Merge branch 'main' into nanotron_greedyuntil_fix

huggingface · Oct 8, 2024 · df7aa59 · df7aa59
2 parents e461548 + b8923aa
commit df7aa59
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 2 deletions.
diff --git a/src/lighteval/metrics/llm_as_judge.py b/src/lighteval/metrics/llm_as_judge.py
@@ -111,15 +111,15 @@ def __lazy_load_client(self):
 
                     self.sampling_params = SamplingParams(temperature=0.8, top_p=0.95, max_tokens=512)
                     self.tokenizer = get_tokenizer(self.model, tokenizer_mode="auto")
-                    self.pipe = LLM(model=self.model, max_model_len=2048, gpu_memory_utilization=0.5)
+                    self.pipe = LLM(model=self.model, max_model_len=2048, gpu_memory_utilization=0.5, dtype="float16")
                 return self.__call_vllm
             case "transformers":
                 if self.pipe is None:
                     import torch
                     from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 
                     transformers_model = AutoModelForCausalLM.from_pretrained(
-                        self.model, torch_dtype=torch.bfloat16, trust_remote_code=False, device_map="cuda"
+                        self.model, torch_dtype=torch.float16, trust_remote_code=False, device_map="cuda"
                     )
                     tokenizer = AutoTokenizer.from_pretrained(self.model)
                     self.pipe = pipeline(

diff --git a/src/lighteval/models/vllm_model.py b/src/lighteval/models/vllm_model.py
@@ -101,6 +101,7 @@ def tokenizer(self):
     def cleanup(self):
         destroy_model_parallel()
         del self.model.llm_engine.model_executor.driver_worker
+        self.model = None
         gc.collect()
         ray.shutdown()
         destroy_distributed_environment()