Skip to content

Commit

Permalink
partial conv changes and mcli update
Browse files Browse the repository at this point in the history
  • Loading branch information
KuuCi committed Aug 29, 2023
1 parent b6c095d commit db50618
Show file tree
Hide file tree
Showing 13 changed files with 556 additions and 393 deletions.
7 changes: 4 additions & 3 deletions examples/end-to-end-examples/support_chatbot/app_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ def parse_args() -> Namespace:
'--endpoint_url',
type=str,
default='https://models.hosted-on.mosaicml.hosting/mpt-30b-chat/v1/predict',
#default='https://mpt-30b-composer-finetuned-2ed9el.inf.hosted-on.mosaicml.hosting/predict',
#default='https://mpt-30b-composer-finetuned-q8mjj9.inf.hosted-on.mosaicml.hosting/predict',
required=False,
help='The endpoint of our MosaicML LLM Model')
parser.add_argument(
'--max_length',
type=int,
default=4000,
default=5000,
required=False,
help='The maximum size tokens in model')
parser.add_argument(
Expand Down Expand Up @@ -87,7 +87,8 @@ def main(endpoint_url: str,
inject_instruction_format=True,
endpoint_url= endpoint_url,
model_kwargs={
'max_new_tokens': max_length,
#'max_new_tokens': max_length,
'max_length': max_length,
'top_k': model_k,
'top_p': 0.95,
'temperature': 0.1,
Expand Down
1 change: 0 additions & 1 deletion examples/end-to-end-examples/support_chatbot/app_slack.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ def parse_args() -> Namespace:
@app.route('/slack/events', methods=['POST'])
def slack_events():
data = request.json

# Immediately respond to Slack's challenge
if "challenge" in data:
return jsonify({'challenge': data['challenge']})

Check warning

Code scanning / CodeQL

Reflected server-side cross-site scripting Medium

Cross-site scripting vulnerability due to a
user-provided value
.
Expand Down
149 changes: 70 additions & 79 deletions examples/end-to-end-examples/support_chatbot/chatbot.py

Large diffs are not rendered by default.

426 changes: 426 additions & 0 deletions examples/end-to-end-examples/support_chatbot/custom_mpt_ft_handler.py

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ compute:
gpus: 0 # Number of GPUs to use

## These configurations are optional
cluster: r7z17 # Name of the cluster to use for this run
# cluster: r0z0 # Name of the cluster to use for this run
# gpu_type: a100_80gb # Type of GPU to use.

integrations:
Expand All @@ -22,8 +22,8 @@ integrations:
command: |
cd llm-foundry/scripts/inference
python convert_composer_to_hf.py \
--composer_path oci://mosaicml-internal-checkpoints/support-bot-demo/checkpoints/mpt-30b-chat_composer_chatv2/latest-rank0.pt.symlink \
--hf_output_path oci://mosaicml-internal-checkpoints/support-bot-demo/converted_checkpoints/mpt-30b-chat_composer_chatv2-hf/ \
--composer_path CLOUD://BUCKET_NAME/support-bot-demo/checkpoints/mpt-30b-chat_composer_chatv2/latest-rank0.pt.symlink \
--hf_output_path CLOUD://BUCKET_NAME/support-bot-demo/converted_checkpoints/mpt-30b-chat_composer-codebase-hf/ \
--output_precision bf16 \
image: mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04 # Use the Docker image provided by MosaicML
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,22 @@ integrations:
# Install composer to use the cloud download helper
command: |
export PYTHONPATH=$PYTHONPATH:/code/llm-foundry:/code/examples:/code
pip uninstall packaging -y
rm /usr/lib/python3/dist-packages/packaging-23.1.dist-info/REQUESTED
pip install composer[streaming,libcloud,oci]==0.16.0
pip install packaging==23.1
model:
backend: faster_transformers
downloader: examples.end-to-end-examples.support_chatbot.scripts.deployment_download_helper.download_and_convert
download_parameters:
remote_uri: oci://mosaicml-internal-checkpoints/support-bot-demo/converted_checkpoints/mpt-30b-chat_composer_chatv2-hf/
remote_uri: CLOUD://BUCKET_NAME/support-bot-demo/converted_checkpoints/mpt-30b-chat_composer-codebase-hf/
gpus: 4
model_handler: examples.inference-deployments.mpt.mpt_ft_handler.MPTFTModelHandler # Use the provided MPT handler
model_handler: examples.end-to-end-examples.support_chatbot.custom_mpt_ft_handler.MPTFTModelHandler # Use the provided MPT handler
model_parameters:
ft_lib_path: /code/FasterTransformer/build/lib/libth_transformer.so
# FT checkpoint path is hardcoded in MPTFTModelHandler at /tmp/mpt
model_name_or_path: mosaicml/mpt-30b-chat # This is used for the tokenzier
gpus: 4
model_name_or_path: mosaicml/mpt-30b # This is used for the tokenzier
gpus: 4

image: mosaicml/inference:0.1.16
image: mosaicml/inference:0.1.29
Original file line number Diff line number Diff line change
@@ -1,21 +1,16 @@
name: mpt-30b-PyPi_composer_chatv2

scheduling:
resumable: true
priority: low
name: mpt-30b-chat_composer_chatv2

compute:
gpus: 64 # Number of GPUs to use
gpus: 8 # Number of GPUs to use

## These configurations are optional
# cluster: r0z0 # Name of the cluster to use for this run
gpu_type: h100_80gb # Type of GPU to use.
# gpu_type: h100_80gb # Type of GPU to use.

integrations:
# Clone and install the llm-foundry repo so we can run scripts from it
- integration_type: git_repo
git_repo: mosaicml/llm-foundry
git_branch: davis/lion8b-v2
pip_install: -e .[gpu]
ssh_clone: false # Should be true if using a private repo
- git_branch: dev
Expand Down Expand Up @@ -43,15 +38,17 @@ image: mosaicml/llm-foundry:2.0.1_cu118-latest # Use the Docker image provided b
# See LLM-foundry llmfoundry/scripts/train.py to see how the parameters are used in code
parameters:
# Path to load the weights from the previous step
load_path: oci://mosaicml-internal-checkpoints/support-bot-demo/checkpoints/mpt-30b_PyPi_composer/latest-rank0.pt.symlink
load_weights_only: false # Only load the weights for finetuning, discarding any other state from previous training
load_path: CLOUD://BUCKET_NAME/support-bot-demo/checkpoints/mpt-30b-chat_composer-codebase/latest-rank0.pt.symlink
load_weights_only: true # Only load the weights for finetuning, discarding any other state from previous training

# Checkpoint to local filesystem or remote object store
save_interval: 1000ba # How frequently to save checkpoints
save_interval: 1ep # How frequently to save checkpoints
save_num_checkpoints_to_keep: 1 # Important, this cleans up checkpoints saved to DISK
save_folder: oci://mosaicml-internal-checkpoints/support-bot-demo/checkpoints/mpt-30b-PyPi_composer_chatv2/
save_folder: CLOUD://BUCKET_NAME/support-bot-demo/checkpoints/mpt-30b-chat_composer_chatv2/
save_weights_only: false

dist_timeout: 60000000

# Maximum sequence length of the model
# For MPT, you can change this to a different number if you would like to train on longer sequences
# Note that you would also need to reprocess your data to contain longer sequences
Expand Down Expand Up @@ -126,11 +123,11 @@ parameters:
# see LLM-foundry llmfoundry/utils/builders.py::build_optimizer for other built-in options
optimizer:
name: decoupled_lionw_8b
lr: 0.000006
lr: 0.0000005
betas:
- 0.9
- 0.99
weight_decay: 0.000006
weight_decay: 0


# Algorithms to apply
Expand All @@ -141,13 +138,13 @@ parameters:
clipping_threshold: 1.0

# Run configuration
max_duration: 4ep # Maximum duration of the run. Change to something shorter (e.g. 10ba) for a quick test run
eval_interval: 1ep # How frequently to evaluate the model
max_duration: 1ep # Maximum duration of the run. Change to something shorter (e.g. 10ba) for a quick test run
eval_interval: 2000ba # How frequently to evaluate the model
eval_first: true # Whether to evaluate the model before training
eval_subset_num_batches: -1 # How many batches to evaluate on. -1 means evaluate on the entire dataset
global_train_batch_size: 64 # Global batch size. This is the batch size across all GPUs
global_train_batch_size: 8 # Global batch size. This is the batch size across all GPUs
seed: ${global_seed}
device_eval_batch_size: 1 # Evaluation batch size per GPU
device_eval_batch_size: 2 # Evaluation batch size per GPU
device_train_microbatch_size: 1
precision: amp_bf16

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,14 @@ name: mpt-support-bot-finetune-PyPi
compute:
gpus: 64 # Number of GPUs to use

scheduling:
resumable: false
priority: low

## These configurations are optional
# cluster: r0z0 # Name of the cluster to use for this run
gpu_type: h100_80gb # Type of GPU to use.
# gpu_type: h100_80gb # Type of GPU to use.

integrations:
# Clone and install the llm-foundry repo so we can run scripts from it
- integration_type: git_repo
git_repo: mosaicml/llm-foundry
git_branch: davis/lion8b-v2
pip_install: -e .[gpu]
ssh_clone: false # Should be true if using a private repo
- git_branch: dev
Expand Down Expand Up @@ -43,9 +38,9 @@ image: mosaicml/llm-foundry:2.0.1_cu118-latest # Use the Docker image provided b
# See LLM-foundry llmfoundry/scripts/train.py to see how the parameters are used in code
parameters:
# Checkpoint to local filesystem or remote object store
save_interval: 3ep # How frequently to save checkpoints
save_interval: 2ep # How frequently to save checkpoints
save_num_checkpoints_to_keep: 1 # Important, this cleans up checkpoints saved to DISK
save_folder: oci://mosaicml-internal-checkpoints/support-bot-demo/checkpoints/mpt-30b_PyPi/
save_folder: CLOUD://BUCKET_NAME/support-bot-demo/checkpoints/mpt-30b_PyPi/
save_weights_only: True # Since we only need the weights for the next step, we can reduce the size of the checkpoint

# Maximum sequence length of the model
Expand Down Expand Up @@ -85,7 +80,7 @@ parameters:
# The dataset section is used by LLM-foundry to construct a StreamingDataset
dataset:
local: ./local-dataset-PyPi-cache
remote: oci://mosaicml-internal-checkpoints/support-bot-demo/data/PyPi
remote: CLOUD://BUCKET_NAME/support-bot-demo/data/PyPi
split: train
shuffle: true
max_seq_len: ${max_seq_len}
Expand All @@ -99,7 +94,7 @@ parameters:
# The dataset section is used by LLM-foundry to construct a StreamingDataset
dataset:
local: ./local-dataset-PyPi-cache
remote: oci://mosaicml-internal-checkpoints/support-bot-demo/data/PyPi
remote: CLOUD://BUCKET_NAME/support-bot-demo/data/PyPi
split: validation
shuffle: false
max_seq_len: ${max_seq_len}
Expand All @@ -119,11 +114,11 @@ parameters:
# see LLM-foundry llmfoundry/utils/builders.py::build_optimizer for other built-in options
optimizer:
name: decoupled_lionw_8b
lr: 0.000006
lr: 0.0000005
betas:
- 0.9
- 0.99
weight_decay: 0.000006
weight_decay: 0.000000

# Algorithms to apply
# see https://docs.mosaicml.com/projects/composer/en/latest/trainer/algorithms.html
Expand All @@ -134,7 +129,7 @@ parameters:
clipping_threshold: 1.0

# Run configuration
max_duration: 3ep # Maximum duration of the run. Change to something shorter (e.g. 10ba) for a quick test run
max_duration: 2ep # Maximum duration of the run. Change to something shorter (e.g. 10ba) for a quick test run
eval_interval: 500ba # How frequently to evaluate the model
eval_first: true # Whether to evaluate the model before training
eval_subset_num_batches: 1000 # How many batches to evaluate on. -1 means evaluate on the entire dataset
Expand Down
Original file line number Diff line number Diff line change
@@ -1,22 +1,16 @@
name: mpt-30b-composer

scheduling:
resumable: false
priority: low
name: mpt-30b_chat-composer

compute:
gpus: 32 # Number of GPUs to use
gpus: 64 # Number of GPUs to use

## These configurations are optional
# cluster: r0z0 # Name of the cluster to use for this run
#gpu_type: h100_80gb # Type of GPU to use.
gpu_type: h100_80gb # Type of GPU to use.

integrations:
# Clone and install the llm-foundry repo so we can run scripts from it
- integration_type: git_repo
git_repo: mosaicml/llm-foundry
git_branch: davis/lion8b-v2
pip_install: -e .[gpu]
ssh_clone: false # Should be true if using a private repo
- git_branch: dev
Expand Down Expand Up @@ -45,18 +39,18 @@ image: mosaicml/llm-foundry:2.0.1_cu118-latest
# See LLM-foundry llmfoundry/scripts/train.py to see how the parameters are used in code
parameters:
# Path to load the weights from the previous step
load_path: oci://mosaicml-internal-checkpoints/support-bot-demo/checkpoints/mpt-30b_PyPi/latest-rank0.pt.symlink
load_path: CLOUD://BUCKET_NAME/support-bot-demo/checkpoints/mpt-30b_PyPi/latest-rank0.pt.symlink
load_weights_only: true # Only load the weights for finetuning, discarding any other state from previous training

# Where to read the data from and save it to locally on the machine
data_remote: oci://mosaicml-internal-checkpoints/support-bot-demo/data/composer_30b/
data_remote: CLOUD:/BUCKET_NAME/support-bot-demo/data/composer_30b/
data_local: ./local-dataset-composercodebase-cache/

# Checkpoint to local filesystem or remote object store
save_interval: 15ep # How frequently to save checkpoints
save_interval: 2ep # How frequently to save checkpoints
save_num_checkpoints_to_keep: 1 # Important, this cleans up checkpoints saved to DISK
save_folder: oci://mosaicml-internal-checkpoints/support-bot-demo/checkpoints/mpt-30b_PyPi_composer/
save_weights_only: True # Since we only need the weights for the next step, we can reduce the size of the checkpoint
save_folder: CLOUD://BUCKET_NAME/support-bot-demo/checkpoints/mpt-30b-chat_composer-codebase/
save_weights_only: False # Since we only need the weights for the next step, we can reduce the size of the checkpoint

# Maximum sequence length of the model
# For MPT, you can change this to a different number if you would like to train on longer sequences
Expand Down Expand Up @@ -119,18 +113,18 @@ parameters:
# see LLM-foundry llmfoundry/utils/builders.py::build_scheduler for other built-in options
scheduler:
name: cosine_with_warmup
t_warmup: 500ba
t_warmup: 10ba
alpha_f: 0.1

# Optimizer
# see LLM-foundry llmfoundry/utils/builders.py::build_optimizer for other built-in options
optimizer:
name: decoupled_lionw_8b
lr: 0.000006
lr: 0.0000001
betas:
- 0.9
- 0.99
weight_decay: 0.000006
weight_decay: 0

# Algorithms to apply
# see https://docs.mosaicml.com/projects/composer/en/latest/trainer/algorithms.html
Expand All @@ -141,13 +135,13 @@ parameters:
clipping_threshold: 1.0

# Run configuration
max_duration: 15ep # Maximum duration of the run. Change to something shorter (e.g. 10ba) for a quick test run
eval_interval: 3ep # How frequently to evaluate the model
max_duration: 8ep # Maximum duration of the run. Change to something shorter (e.g. 10ba) for a quick test run
eval_interval: 2ep # How frequently to evaluate the model
eval_first: true # Whether to evaluate the model before training
eval_subset_num_batches: -1 # How many batches to evaluate on. -1 means evaluate on the entire dataset
global_train_batch_size: 32 # Global batch size. This is the batch size across all GPUs
global_train_batch_size: 64 # Global batch size. This is the batch size across all GPUs
seed: ${global_seed}
device_eval_batch_size: 1 # Evaluation batch size per GPU
device_eval_batch_size: 8 # Evaluation batch size per GPU
device_train_microbatch_size: 1 # Automatically determine the microbatch size per GPU
precision: amp_bf16

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
langchain==0.0.205
composer[streaming,libcloud,oci,nlp]==0.15.0
mosaicml-cli==0.4.4
composer[streaming,libcloud,oci,nlp]==0.16.0
mosaicml-cli==0.4.17
gradio==3.33.1
faiss-cpu==1.7.4
sentencepiece==0.1.97
git+https://github.com/mosaicml/llm-foundry.git@main#egg=llm-foundry
oauthlib>=2.1.0,<3.0.0
git+https://github.com/mosaicml/llm-foundry.git@aabdb3c7b64679e8406a6905700dabdaa2c5e739#egg=llm-foundry
4 changes: 2 additions & 2 deletions examples/end-to-end-examples/support_chatbot/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
composer[nlp,streaming,wandb]==0.15.0
git+https://github.com/mosaicml/llm-foundry.git@main#egg=llm-foundry[gpu] # TODO: main
composer[nlp,streaming,wandb]==0.16.0
git+https://github.com/mosaicml/llm-foundry.git@aabdb3c7b64679e8406a6905700dabdaa2c5e739#egg=llm-foundry[gpu] # TODO: main
Binary file not shown.
Loading

0 comments on commit db50618

Please sign in to comment.