From 86c8ec6cbcf92e005ef44f324257dbe133878fad Mon Sep 17 00:00:00 2001
From: MaanavD <maanavdalal@microsoft.com>
Date: Fri, 2 Aug 2024 11:38:23 -0700
Subject: [PATCH] Adding llama31 (#2404)

Summary:
Runs fine locally on CPU, pipelines will hopefully validate that

Pull Request resolved: https://github.com/pytorch/benchmark/pull/2404

Reviewed By: kit1980

Differential Revision: D60628732

Pulled By: xuzhao9

fbshipit-source-id: 7a5a313e871311f2d1c0eadcf125128ccd407743
---
 .../canary_models/llama_v31_8b/__init__.py       | 16 ++++++++++++++++
 .../canary_models/llama_v31_8b/install.py        |  6 ++++++
 .../canary_models/llama_v31_8b/metadata.yaml     | 12 ++++++++++++
 .../util/framework/huggingface/basic_configs.py  |  6 ++++++
 4 files changed, 40 insertions(+)
 create mode 100644 torchbenchmark/canary_models/llama_v31_8b/__init__.py
 create mode 100644 torchbenchmark/canary_models/llama_v31_8b/install.py
 create mode 100644 torchbenchmark/canary_models/llama_v31_8b/metadata.yaml

diff --git a/torchbenchmark/canary_models/llama_v31_8b/__init__.py b/torchbenchmark/canary_models/llama_v31_8b/__init__.py
new file mode 100644
index 0000000000..169fbabf7f
--- /dev/null
+++ b/torchbenchmark/canary_models/llama_v31_8b/__init__.py
@@ -0,0 +1,16 @@
+from torchbenchmark.tasks import NLP
+from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel, HuggingFaceAuthMixin
+
+class Model(HuggingFaceModel, HuggingFaceAuthMixin):
+    task = NLP.LANGUAGE_MODELING
+    DEFAULT_TRAIN_BSIZE = 1
+    DEFAULT_EVAL_BSIZE = 1
+    DEEPCOPY = False 
+
+    def __init__(self, test, device, batch_size=None, extra_args=[]):
+        HuggingFaceAuthMixin.__init__(self)
+        super().__init__(name="llama_v31_8b", test=test, device=device, batch_size=batch_size, extra_args=extra_args)
+
+  
+    def train(self):
+        return NotImplementedError("FSDP should implement a training loop")
diff --git a/torchbenchmark/canary_models/llama_v31_8b/install.py b/torchbenchmark/canary_models/llama_v31_8b/install.py
new file mode 100644
index 0000000000..959c48a541
--- /dev/null
+++ b/torchbenchmark/canary_models/llama_v31_8b/install.py
@@ -0,0 +1,6 @@
+import os
+from torchbenchmark.util.framework.huggingface.patch_hf import cache_model
+
+if __name__ == "__main__":
+    model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
+    cache_model(model_name)
diff --git a/torchbenchmark/canary_models/llama_v31_8b/metadata.yaml b/torchbenchmark/canary_models/llama_v31_8b/metadata.yaml
new file mode 100644
index 0000000000..db5866b059
--- /dev/null
+++ b/torchbenchmark/canary_models/llama_v31_8b/metadata.yaml
@@ -0,0 +1,12 @@
+devices:
+  NVIDIA A100-SXM4-40GB:
+    eval_batch_size: 1
+eval_benchmark: false
+eval_deterministic: false
+eval_nograd: true
+not_implemented:
+- device: cpu
+- device: cuda
+  test: train
+train_benchmark: false
+train_deterministic: false
\ No newline at end of file
diff --git a/torchbenchmark/util/framework/huggingface/basic_configs.py b/torchbenchmark/util/framework/huggingface/basic_configs.py
index 72f067af69..f0941f7df2 100644
--- a/torchbenchmark/util/framework/huggingface/basic_configs.py
+++ b/torchbenchmark/util/framework/huggingface/basic_configs.py
@@ -137,6 +137,12 @@
         'AutoConfig.from_pretrained("meta-llama/Llama-2-70b-hf")',
         "AutoModelForMaskedLM",
     ),
+    "llama_v31_8b": (
+        512,
+        512,
+        'AutoConfig.from_pretrained("meta-llama/Meta-Llama-3.1-8B")',
+        "AutoModelForCausalLM",
+    ),
     "codellama": (
         512,
         512,