-
Notifications
You must be signed in to change notification settings - Fork 470
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
1bce7c7
commit c7dce07
Showing
6 changed files
with
79 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,12 @@ | ||
# Demo | ||
## generate_demo.py | ||
- **说明**:语言模型生成 demo | ||
- **调用方式**:`python generate_demo.py` | ||
|
||
This is a placeholder folder | ||
## stream_generate_demo.py | ||
- **说明**:语言模型流式生成 demo | ||
- **调用方式**:`python generate_demo.py` | ||
|
||
## tensor_parallel_generate_demo.py | ||
- **说明**:语言模型模型并行生成 demo | ||
- **调用方式**:`torchrun --nproc_per_node=2 tensor_parallel_generate_demo.py` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# Demo | ||
## generate_demo.py | ||
- **Description**:LLM generate demo | ||
- **Usage**:`python generate_demo.py` | ||
|
||
## stream_generate_demo.py | ||
- **Description**:LLM stream generate demo | ||
- **Usage**:`python generate_demo.py` | ||
|
||
## tensor_parallel_generate_demo.py | ||
- **Description**:LLM tensor-parallel generate demo | ||
- **Usage**:`torchrun --nproc_per_node=2 tensor_parallel_generate_demo.py` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
from transformers import AutoModelForCausalLM, AutoTokenizer | ||
|
||
tokenizer = AutoTokenizer.from_pretrained("Yi-6b", trust_remote_code=True) | ||
model = AutoModelForCausalLM.from_pretrained( | ||
"Yi-6b", device_map="auto", torch_dtype="auto", trust_remote_code=True | ||
) | ||
inputs = tokenizer("Hello", return_tensors="pt") | ||
outputs = model.generate(inputs.input_ids.cuda(), max_new_tokens=1024) | ||
print(tokenizer.decode(outputs[0])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer | ||
|
||
tokenizer = AutoTokenizer.from_pretrained("Yi-6b", trust_remote_code=True) | ||
streamer = TextStreamer(tokenizer) | ||
model = AutoModelForCausalLM.from_pretrained( | ||
"Yi-6b", device_map="auto", torch_dtype="auto", trust_remote_code=True | ||
) | ||
inputs = tokenizer("Hello", return_tensors="pt") | ||
_ = model.generate(inputs.input_ids.cuda(), max_new_tokens=1024, streamer=streamer) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
import os | ||
|
||
import deepspeed | ||
import torch | ||
from deepspeed.module_inject import auto_tp | ||
from torch import nn | ||
from transformers import AutoModelForCausalLM, AutoTokenizer | ||
|
||
|
||
def is_load_module(module): | ||
load_layers = [nn.Linear, nn.Embedding, nn.LayerNorm] | ||
load_layer_names = [ | ||
"LPLayerNorm", | ||
"SharedEmbedding", | ||
"OPTLearnedPositionalEmbedding", | ||
"LlamaRMSNorm", | ||
"YiRMSNorm", | ||
] | ||
return module.__class__ in load_layers or module._get_name() in load_layer_names | ||
|
||
|
||
auto_tp.Loading.is_load_module = is_load_module | ||
|
||
torch.cuda.set_device(int(os.environ["LOCAL_RANK"])) | ||
model = AutoModelForCausalLM.from_pretrained( | ||
"Yi-6b", device_map="cuda", torch_dtype="auto", trust_remote_code=True | ||
) | ||
|
||
model = deepspeed.init_inference( | ||
model, mp_size=int(os.environ["WORLD_SIZE"]), replace_with_kernel_inject=False | ||
) | ||
torch.cuda.empty_cache() | ||
|
||
tokenizer = AutoTokenizer.from_pretrained("Yi-6b", trust_remote_code=True) | ||
inputs = tokenizer("Hello", return_tensors="pt") | ||
outputs = model.generate(inputs.input_ids.cuda(), max_new_tokens=1024) | ||
print(tokenizer.decode(outputs[0])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
deepspeed==0.11.1 |