diff --git a/examples/llama_cpp.yaml b/examples/llama_cpp.yaml deleted file mode 100644 index 2b5d6610..00000000 --- a/examples/llama_cpp.yaml +++ /dev/null @@ -1,25 +0,0 @@ -defaults: - - benchmark - - scenario: inference - - launcher: inline - - backend: llama_cpp - - _base_ - - _self_ - -name: llama_cpp_llama_v2 - -backend: - device: mps - model: TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF - task: text-generation - filename: tinyllama-1.1b-chat-v1.0.Q4_0.gguf - - -scenario: - input_shapes: - batch_size: 1 - sequence_length: 256 - vocab_size: 32000 - generate_kwargs: - max_new_tokens: 100 - min_new_tokens: 100 diff --git a/examples/llama_cpp_mps.yaml b/examples/llama_cpp_text_generation.yaml similarity index 100% rename from examples/llama_cpp_mps.yaml rename to examples/llama_cpp_text_generation.yaml diff --git a/examples/llama_mps.yaml b/examples/llama_mps.yaml deleted file mode 100644 index 863cfe36..00000000 --- a/examples/llama_mps.yaml +++ /dev/null @@ -1,23 +0,0 @@ -defaults: - - benchmark - - scenario: inference - - launcher: inline - - backend: pytorch - - _base_ - - _self_ - -name: llama_tiny_mps - -backend: - device: mps - model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 - task: text-generation - -scenario: - input_shapes: - batch_size: 4 - sequence_length: 256 - vocab_size: 32000 - generate_kwargs: - max_new_tokens: 100 - min_new_tokens: 100 diff --git a/examples/llama_tiny_mps.yaml b/examples/llama_tiny_mps.yaml deleted file mode 100644 index 863cfe36..00000000 --- a/examples/llama_tiny_mps.yaml +++ /dev/null @@ -1,23 +0,0 @@ -defaults: - - benchmark - - scenario: inference - - launcher: inline - - backend: pytorch - - _base_ - - _self_ - -name: llama_tiny_mps - -backend: - device: mps - model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 - task: text-generation - -scenario: - input_shapes: - batch_size: 4 - sequence_length: 256 - vocab_size: 32000 - generate_kwargs: - max_new_tokens: 100 - min_new_tokens: 100 diff --git a/optimum_benchmark/backends/base.py b/optimum_benchmark/backends/base.py index f6e529fc..d1b1b374 100644 --- a/optimum_benchmark/backends/base.py +++ b/optimum_benchmark/backends/base.py @@ -70,10 +70,7 @@ def __init__(self, config: BackendConfigT): self.generation_config = None elif self.config.library == "llama_cpp": self.logger.info("\t+ Benchmarking a Llama.cpp model") - self.pretrained_config = get_transformers_generation_config(self.config.model, **self.config.model_kwargs) self.model_shapes = {} - self.pretrained_processor = None - self.generation_config = None else: self.logger.info("\t+ Benchmarking a Transformers model") self.generation_config = get_transformers_generation_config(self.config.model, **self.config.model_kwargs) diff --git a/tests/configs/_llama_cpp_.yaml b/tests/configs/_llama_cpp_.yaml deleted file mode 100644 index e2ee9f91..00000000 --- a/tests/configs/_llama_cpp_.yaml +++ /dev/null @@ -1,4 +0,0 @@ -backend: - model: TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF - task: text-generation - filename: tinyllama-1.1b-chat-v1.0.Q4_0.gguf \ No newline at end of file