-
Notifications
You must be signed in to change notification settings - Fork 0
/
model.py
34 lines (27 loc) · 923 Bytes
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# This file takes a while to load
# This is necessary to avoid crazy warnings when main.py creates a subprocess (forks).
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import Union
def init_model(model_name: str, device: Union[str, int]):
"""
Returns a dictionary with a model and tokenizer.
"""
assert model_name in ["facebook/incoder-1B", "facebook/incoder-6B"]
if model_name == "facebook/incoder-6B":
kwargs = dict(
revision="float16",
torch_dtype=torch.float16,
low_cpu_mem_usage=True,
)
else:
kwargs = dict()
return {
"model": AutoModelForCausalLM.from_pretrained(model_name, **kwargs)
.half()
.to(device),
"device": device,
"tokenizer": AutoTokenizer.from_pretrained(model_name),
}