Skip to content

Commit

Permalink
Merge branch 'main' into nanotron_greedyuntil_fix
Browse files Browse the repository at this point in the history
  • Loading branch information
NathanHB authored Oct 8, 2024
2 parents e461548 + b8923aa commit df7aa59
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 2 deletions.
4 changes: 2 additions & 2 deletions src/lighteval/metrics/llm_as_judge.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,15 +111,15 @@ def __lazy_load_client(self):

self.sampling_params = SamplingParams(temperature=0.8, top_p=0.95, max_tokens=512)
self.tokenizer = get_tokenizer(self.model, tokenizer_mode="auto")
self.pipe = LLM(model=self.model, max_model_len=2048, gpu_memory_utilization=0.5)
self.pipe = LLM(model=self.model, max_model_len=2048, gpu_memory_utilization=0.5, dtype="float16")
return self.__call_vllm
case "transformers":
if self.pipe is None:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

transformers_model = AutoModelForCausalLM.from_pretrained(
self.model, torch_dtype=torch.bfloat16, trust_remote_code=False, device_map="cuda"
self.model, torch_dtype=torch.float16, trust_remote_code=False, device_map="cuda"
)
tokenizer = AutoTokenizer.from_pretrained(self.model)
self.pipe = pipeline(
Expand Down
1 change: 1 addition & 0 deletions src/lighteval/models/vllm_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ def tokenizer(self):
def cleanup(self):
destroy_model_parallel()
del self.model.llm_engine.model_executor.driver_worker
self.model = None
gc.collect()
ray.shutdown()
destroy_distributed_environment()
Expand Down

0 comments on commit df7aa59

Please sign in to comment.