Skip to content

Commit

Permalink
downloading tokenizers from hf
Browse files Browse the repository at this point in the history
  • Loading branch information
goliaro committed Jul 30, 2023
1 parent 8bb5e33 commit b6d6bfa
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 13 deletions.
4 changes: 2 additions & 2 deletions inference/flexflow_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
"14000",
"-ll:zsize",
"30000",
"-tokenizer",
"../inference/tokenizer/tokenizer.model",
# "-tokenizer",
# "../inference/tokenizer/tokenizer.model",
# "-output-file",
# "../inference/output/llama_python_inference.txt",
"-pipeline-parallelism-degree",
Expand Down
72 changes: 64 additions & 8 deletions python/flexflow/serve/serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from flexflow.serve.models import FlexFlowLLAMA, FlexFlowOPT, FlexFlowFalcon
from flexflow.core import *
from transformers import AutoConfig, AutoModelForCausalLM
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, LlamaTokenizer
from huggingface_hub import HfApi
import sys, torch

Expand Down Expand Up @@ -97,14 +97,69 @@ def download_hf_weights(self):
f"Loading '{self.hf_config._name_or_path}' model weights from the cache..."
)

def load_hf_tokenizer(self):
print("Loading tokenizer...")
if len(self.tokenizer_path) > 0:
print(f"Using tokenizer from {self.tokenizer_path}")
# check that tokenizer exist
if not os.path.exists(self.tokenizer_path):
raise FileNotFoundError(f"Path {self.tokenizer_path} does not exist")
elif (
os.path.isdir(self.tokenizer_path)
and len(os.listdir(self.tokenizer_path)) == 0
):
raise FileNotFoundError(f"Folder {self.tokenizer_path} is empty")
return

# Download tokenizer

# Use local cache, or download new version
self.tokenizer_path = os.path.expanduser(
f"~/.cache/flexflow/tokenizers/{self.hf_config._name_or_path}/"
)
if not os.path.exists(self.tokenizer_path):
print(f"Creating directory {self.tokenizer_path}...")
os.makedirs(self.tokenizer_path, exist_ok=True)

# Get local revision SHA, check if it matches latest one on huggingface
local_revision = None
local_revision_file = os.path.join(self.tokenizer_path, "rev_sha.txt")
if os.path.exists(local_revision_file):
local_revision = "".join(open(local_revision_file).read().split())
hf_api = HfApi()
latest_revision = hf_api.model_info(self.hf_config._name_or_path).sha

# Download if needed
if local_revision != latest_revision:
print(
f"'{self.hf_config._name_or_path}' tokenizer not found in cache or outdated. Downloading from huggingface.co ..."
)
if self.model_type == ModelType.LLAMA:
hf_tokenizer = LlamaTokenizer.from_pretrained(
self.hf_config._name_or_path, use_fast=True
)
else:
hf_tokenizer = AutoTokenizer.from_pretrained(
self.hf_config._name_or_path
)
hf_tokenizer.save_pretrained(self.tokenizer_path)
print("Done downloading HF tokenizer.")
with open(local_revision_file, "w+") as f:
f.write(latest_revision)
print("Loading the tokenizer...")
else:
print(
f"Loading '{self.hf_config._name_or_path}' tokenizer from the cache..."
)

def load_hf_weights(self):
print("Loading hf weights...")

if self.data_type == DataType.DT_HALF:
torch.set_default_tensor_type(torch.HalfTensor)

if len(self.weights_path) > 0:
print(f"Using weights from {self.weights_path.length}")
print(f"Using weights from {self.weights_path}")
# check that weights exist
if not os.path.exists(self.weights_path) or not os.path.isdir(
self.weights_path
Expand Down Expand Up @@ -165,17 +220,18 @@ def compile(
max_tokens_per_batch,
)

# Create request manager
self.rm = RequestManager()
self.rm.register_tokenizer(self.model_type, self.tokenizer_path)
self.rm.register_output_filepath(self.output_file)

# Create inference manager
self.im = InferenceManager()
self.im.compile_model_and_allocate_buffer(self.model.ffmodel)

# Download the weights from huggingface (if needed) and load them
# Download the weights and tokenizer from huggingface (if needed) and load them
self.load_hf_weights()
self.load_hf_tokenizer()

# Create request manager
self.rm = RequestManager()
self.rm.register_tokenizer(self.model_type, self.tokenizer_path)
self.rm.register_output_filepath(self.output_file)

self.im.init_operators_inference(self.model.ffmodel)

Expand Down
12 changes: 9 additions & 3 deletions src/runtime/request_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,18 @@ void RequestManager::register_tokenizer(ModelType type,
std::string const &path) {
// bos id
this->model_type = type;
std::string tokenizer_folder =
(!path.empty() && path.back() != '/') ? path + '/' : path;
if (model_type == ModelType::LLAMA) {
bool path_to_file = !path.empty() &&
(path.size() >= strlen("tokenizer.model")) &&
path.find("tokenizer.model") ==
(path.size() - strlen("tokenizer.model"));
std::string tokenizer_filepath =
path_to_file ? path : tokenizer_folder + "tokenizer.model";
this->tokenizer_ =
Tokenizer::FromBlobSentencePiece(LoadBytesFromFile(path));
Tokenizer::FromBlobSentencePiece(LoadBytesFromFile(tokenizer_filepath));
} else if (model_type == ModelType::OPT) {
std::string tokenizer_folder =
(!path.empty() && path.back() != '/') ? path + '/' : path;
std::string vocab_file = tokenizer_folder + "gpt2-vocab.json";
std::string merges_file = tokenizer_folder + "gpt2-merges.txt";
std::string added_tokens_file = tokenizer_folder + "added_tokens.json";
Expand Down

0 comments on commit b6d6bfa

Please sign in to comment.