From 86c8ec6cbcf92e005ef44f324257dbe133878fad Mon Sep 17 00:00:00 2001 From: MaanavD Date: Fri, 2 Aug 2024 11:38:23 -0700 Subject: [PATCH] Adding llama31 (#2404) Summary: Runs fine locally on CPU, pipelines will hopefully validate that Pull Request resolved: https://github.com/pytorch/benchmark/pull/2404 Reviewed By: kit1980 Differential Revision: D60628732 Pulled By: xuzhao9 fbshipit-source-id: 7a5a313e871311f2d1c0eadcf125128ccd407743 --- .../canary_models/llama_v31_8b/__init__.py | 16 ++++++++++++++++ .../canary_models/llama_v31_8b/install.py | 6 ++++++ .../canary_models/llama_v31_8b/metadata.yaml | 12 ++++++++++++ .../util/framework/huggingface/basic_configs.py | 6 ++++++ 4 files changed, 40 insertions(+) create mode 100644 torchbenchmark/canary_models/llama_v31_8b/__init__.py create mode 100644 torchbenchmark/canary_models/llama_v31_8b/install.py create mode 100644 torchbenchmark/canary_models/llama_v31_8b/metadata.yaml diff --git a/torchbenchmark/canary_models/llama_v31_8b/__init__.py b/torchbenchmark/canary_models/llama_v31_8b/__init__.py new file mode 100644 index 0000000000..169fbabf7f --- /dev/null +++ b/torchbenchmark/canary_models/llama_v31_8b/__init__.py @@ -0,0 +1,16 @@ +from torchbenchmark.tasks import NLP +from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel, HuggingFaceAuthMixin + +class Model(HuggingFaceModel, HuggingFaceAuthMixin): + task = NLP.LANGUAGE_MODELING + DEFAULT_TRAIN_BSIZE = 1 + DEFAULT_EVAL_BSIZE = 1 + DEEPCOPY = False + + def __init__(self, test, device, batch_size=None, extra_args=[]): + HuggingFaceAuthMixin.__init__(self) + super().__init__(name="llama_v31_8b", test=test, device=device, batch_size=batch_size, extra_args=extra_args) + + + def train(self): + return NotImplementedError("FSDP should implement a training loop") diff --git a/torchbenchmark/canary_models/llama_v31_8b/install.py b/torchbenchmark/canary_models/llama_v31_8b/install.py new file mode 100644 index 0000000000..959c48a541 --- /dev/null +++ b/torchbenchmark/canary_models/llama_v31_8b/install.py @@ -0,0 +1,6 @@ +import os +from torchbenchmark.util.framework.huggingface.patch_hf import cache_model + +if __name__ == "__main__": + model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__))) + cache_model(model_name) diff --git a/torchbenchmark/canary_models/llama_v31_8b/metadata.yaml b/torchbenchmark/canary_models/llama_v31_8b/metadata.yaml new file mode 100644 index 0000000000..db5866b059 --- /dev/null +++ b/torchbenchmark/canary_models/llama_v31_8b/metadata.yaml @@ -0,0 +1,12 @@ +devices: + NVIDIA A100-SXM4-40GB: + eval_batch_size: 1 +eval_benchmark: false +eval_deterministic: false +eval_nograd: true +not_implemented: +- device: cpu +- device: cuda + test: train +train_benchmark: false +train_deterministic: false \ No newline at end of file diff --git a/torchbenchmark/util/framework/huggingface/basic_configs.py b/torchbenchmark/util/framework/huggingface/basic_configs.py index 72f067af69..f0941f7df2 100644 --- a/torchbenchmark/util/framework/huggingface/basic_configs.py +++ b/torchbenchmark/util/framework/huggingface/basic_configs.py @@ -137,6 +137,12 @@ 'AutoConfig.from_pretrained("meta-llama/Llama-2-70b-hf")', "AutoModelForMaskedLM", ), + "llama_v31_8b": ( + 512, + 512, + 'AutoConfig.from_pretrained("meta-llama/Meta-Llama-3.1-8B")', + "AutoModelForCausalLM", + ), "codellama": ( 512, 512,