-
Notifications
You must be signed in to change notification settings - Fork 470
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from 01-ai/wangye/add_generate_demo
feat: add base model generate demo
- Loading branch information
Showing
6 changed files
with
79 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,12 @@ | ||
# Demo | ||
## generate_demo.py | ||
- **说明**:语言模型生成 demo | ||
- **调用方式**:`python generate_demo.py` | ||
|
||
This is a placeholder folder | ||
## stream_generate_demo.py | ||
- **说明**:语言模型流式生成 demo | ||
- **调用方式**:`python generate_demo.py` | ||
|
||
## tensor_parallel_generate_demo.py | ||
- **说明**:语言模型模型并行生成 demo | ||
- **调用方式**:`torchrun --nproc_per_node=2 tensor_parallel_generate_demo.py` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# Demo | ||
## generate_demo.py | ||
- **Description**:LLM generate demo | ||
- **Usage**:`python generate_demo.py` | ||
|
||
## stream_generate_demo.py | ||
- **Description**:LLM stream generate demo | ||
- **Usage**:`python generate_demo.py` | ||
|
||
## tensor_parallel_generate_demo.py | ||
- **Description**:LLM tensor-parallel generate demo | ||
- **Usage**:`torchrun --nproc_per_node=2 tensor_parallel_generate_demo.py` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
from transformers import AutoModelForCausalLM, AutoTokenizer | ||
|
||
tokenizer = AutoTokenizer.from_pretrained("Yi-6b", trust_remote_code=True) | ||
model = AutoModelForCausalLM.from_pretrained( | ||
"Yi-6b", device_map="auto", torch_dtype="auto", trust_remote_code=True | ||
) | ||
inputs = tokenizer("Hello", return_tensors="pt") | ||
outputs = model.generate(inputs.input_ids.cuda(), max_new_tokens=1024) | ||
print(tokenizer.decode(outputs[0])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer | ||
|
||
tokenizer = AutoTokenizer.from_pretrained("Yi-6b", trust_remote_code=True) | ||
streamer = TextStreamer(tokenizer) | ||
model = AutoModelForCausalLM.from_pretrained( | ||
"Yi-6b", device_map="auto", torch_dtype="auto", trust_remote_code=True | ||
) | ||
inputs = tokenizer("Hello", return_tensors="pt") | ||
_ = model.generate(inputs.input_ids.cuda(), max_new_tokens=1024, streamer=streamer) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
import os | ||
|
||
import deepspeed | ||
import torch | ||
from deepspeed.module_inject import auto_tp | ||
from torch import nn | ||
from transformers import AutoModelForCausalLM, AutoTokenizer | ||
|
||
|
||
def is_load_module(module): | ||
load_layers = [nn.Linear, nn.Embedding, nn.LayerNorm] | ||
load_layer_names = [ | ||
"LPLayerNorm", | ||
"SharedEmbedding", | ||
"OPTLearnedPositionalEmbedding", | ||
"LlamaRMSNorm", | ||
"YiRMSNorm", | ||
] | ||
return module.__class__ in load_layers or module._get_name() in load_layer_names | ||
|
||
|
||
auto_tp.Loading.is_load_module = is_load_module | ||
|
||
torch.cuda.set_device(int(os.environ["LOCAL_RANK"])) | ||
model = AutoModelForCausalLM.from_pretrained( | ||
"Yi-6b", device_map="cuda", torch_dtype="auto", trust_remote_code=True | ||
) | ||
|
||
model = deepspeed.init_inference( | ||
model, mp_size=int(os.environ["WORLD_SIZE"]), replace_with_kernel_inject=False | ||
) | ||
torch.cuda.empty_cache() | ||
|
||
tokenizer = AutoTokenizer.from_pretrained("Yi-6b", trust_remote_code=True) | ||
inputs = tokenizer("Hello", return_tensors="pt") | ||
outputs = model.generate(inputs.input_ids.cuda(), max_new_tokens=1024) | ||
print(tokenizer.decode(outputs[0])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
deepspeed==0.11.1 |