partial conv changes and mcli update

mosaicml · Aug 29, 2023 · db50618 · db50618
1 parent b6c095d
commit db50618
Show file tree

Hide file tree

Showing 13 changed files with 556 additions and 393 deletions.
diff --git a/examples/end-to-end-examples/support_chatbot/app_demo.py b/examples/end-to-end-examples/support_chatbot/app_demo.py
@@ -17,13 +17,13 @@ def parse_args() -> Namespace:
         '--endpoint_url',
         type=str,
         default='https://models.hosted-on.mosaicml.hosting/mpt-30b-chat/v1/predict',
-        #default='https://mpt-30b-composer-finetuned-2ed9el.inf.hosted-on.mosaicml.hosting/predict',
+        #default='https://mpt-30b-composer-finetuned-q8mjj9.inf.hosted-on.mosaicml.hosting/predict',
         required=False,
         help='The endpoint of our MosaicML LLM Model')
     parser.add_argument(
         '--max_length',
         type=int,
-        default=4000,
+        default=5000,
         required=False,
         help='The maximum size tokens in model')
     parser.add_argument(
@@ -87,7 +87,8 @@ def main(endpoint_url: str,
         inject_instruction_format=True,
         endpoint_url= endpoint_url,
         model_kwargs={
-            'max_new_tokens': max_length, 
+            #'max_new_tokens': max_length,
+            'max_length': max_length,
             'top_k': model_k,
             'top_p': 0.95,
             'temperature': 0.1,

diff --git a/examples/end-to-end-examples/support_chatbot/app_slack.py b/examples/end-to-end-examples/support_chatbot/app_slack.py
@@ -50,7 +50,6 @@ def parse_args() -> Namespace:
 @app.route('/slack/events', methods=['POST'])
 def slack_events():
     data = request.json
-
     # Immediately respond to Slack's challenge
     if "challenge" in data:
         return jsonify({'challenge': data['challenge']})

diff --git a/examples/end-to-end-examples/support_chatbot/chatbot.py b/examples/end-to-end-examples/support_chatbot/chatbot.py
diff --git a/examples/end-to-end-examples/support_chatbot/custom_mpt_ft_handler.py b/examples/end-to-end-examples/support_chatbot/custom_mpt_ft_handler.py
diff --git a/...les/end-to-end-examples/support_chatbot/mcli_yamls/convert_checkpoint_to_huggingface.yaml b/...les/end-to-end-examples/support_chatbot/mcli_yamls/convert_checkpoint_to_huggingface.yaml
@@ -7,7 +7,7 @@ compute:
   gpus: 0  # Number of GPUs to use
 
   ## These configurations are optional
-  cluster: r7z17 # Name of the cluster to use for this run
+  # cluster: r0z0 # Name of the cluster to use for this run
   # gpu_type: a100_80gb # Type of GPU to use.
 
 integrations:
@@ -22,8 +22,8 @@ integrations:
 command: |
   cd llm-foundry/scripts/inference
   python convert_composer_to_hf.py \
-    --composer_path oci://mosaicml-internal-checkpoints/support-bot-demo/checkpoints/mpt-30b-chat_composer_chatv2/latest-rank0.pt.symlink \
-    --hf_output_path oci://mosaicml-internal-checkpoints/support-bot-demo/converted_checkpoints/mpt-30b-chat_composer_chatv2-hf/ \
+    --composer_path CLOUD://BUCKET_NAME/support-bot-demo/checkpoints/mpt-30b-chat_composer_chatv2/latest-rank0.pt.symlink \
+    --hf_output_path CLOUD://BUCKET_NAME/support-bot-demo/converted_checkpoints/mpt-30b-chat_composer-codebase-hf/ \
     --output_precision bf16 \
 
 image: mosaicml/pytorch:1.13.1_cu117-python3.10-ubuntu20.04 # Use the Docker image provided by MosaicML
diff --git a/examples/end-to-end-examples/support_chatbot/mcli_yamls/deploy_llm.yaml b/examples/end-to-end-examples/support_chatbot/mcli_yamls/deploy_llm.yaml
@@ -23,17 +23,22 @@ integrations:
 # Install composer to use the cloud download helper
 command: |
   export PYTHONPATH=$PYTHONPATH:/code/llm-foundry:/code/examples:/code
+  pip uninstall packaging -y
+  rm /usr/lib/python3/dist-packages/packaging-23.1.dist-info/REQUESTED
+  pip install composer[streaming,libcloud,oci]==0.16.0
+  pip install packaging==23.1
 
 model:
+  backend: faster_transformers
   downloader: examples.end-to-end-examples.support_chatbot.scripts.deployment_download_helper.download_and_convert
   download_parameters:
-    remote_uri: oci://mosaicml-internal-checkpoints/support-bot-demo/converted_checkpoints/mpt-30b-chat_composer_chatv2-hf/
+    remote_uri: CLOUD://BUCKET_NAME/support-bot-demo/converted_checkpoints/mpt-30b-chat_composer-codebase-hf/
     gpus: 4
-  model_handler: examples.inference-deployments.mpt.mpt_ft_handler.MPTFTModelHandler # Use the provided MPT handler
+  model_handler: examples.end-to-end-examples.support_chatbot.custom_mpt_ft_handler.MPTFTModelHandler # Use the provided MPT handler
   model_parameters:
     ft_lib_path: /code/FasterTransformer/build/lib/libth_transformer.so
     # FT checkpoint path is hardcoded in MPTFTModelHandler at /tmp/mpt
-    model_name_or_path: mosaicml/mpt-30b-chat # This is used for the tokenzier
-    gpus: 4 
+    model_name_or_path: mosaicml/mpt-30b # This is used for the tokenzier
+    gpus: 4
 
-image: mosaicml/inference:0.1.16
+image: mosaicml/inference:0.1.29
diff --git a/examples/end-to-end-examples/support_chatbot/mcli_yamls/finetune/finetune_30b_chat.yaml b/examples/end-to-end-examples/support_chatbot/mcli_yamls/finetune/finetune_30b_chat.yaml
@@ -1,21 +1,16 @@
-name: mpt-30b-PyPi_composer_chatv2
-
-scheduling:
-  resumable: true
-  priority: low
+name: mpt-30b-chat_composer_chatv2
 
 compute:
-  gpus: 64  # Number of GPUs to use
+  gpus: 8  # Number of GPUs to use
 
   ## These configurations are optional
   # cluster: r0z0 # Name of the cluster to use for this run
-  gpu_type: h100_80gb # Type of GPU to use.
+  # gpu_type: h100_80gb # Type of GPU to use.
 
 integrations:
 # Clone and install the llm-foundry repo so we can run scripts from it
 - integration_type: git_repo
   git_repo: mosaicml/llm-foundry
-  git_branch: davis/lion8b-v2
   pip_install: -e .[gpu]
   ssh_clone: false # Should be true if using a private repo
 - git_branch: dev
@@ -43,15 +38,17 @@ image: mosaicml/llm-foundry:2.0.1_cu118-latest # Use the Docker image provided b
 # See LLM-foundry llmfoundry/scripts/train.py to see how the parameters are used in code
 parameters:
   # Path to load the weights from the previous step
-  load_path: oci://mosaicml-internal-checkpoints/support-bot-demo/checkpoints/mpt-30b_PyPi_composer/latest-rank0.pt.symlink
-  load_weights_only: false # Only load the weights for finetuning, discarding any other state from previous training
+  load_path: CLOUD://BUCKET_NAME/support-bot-demo/checkpoints/mpt-30b-chat_composer-codebase/latest-rank0.pt.symlink
+  load_weights_only: true # Only load the weights for finetuning, discarding any other state from previous training
 
   # Checkpoint to local filesystem or remote object store
-  save_interval: 1000ba # How frequently to save checkpoints
+  save_interval: 1ep # How frequently to save checkpoints
   save_num_checkpoints_to_keep: 1  # Important, this cleans up checkpoints saved to DISK
-  save_folder: oci://mosaicml-internal-checkpoints/support-bot-demo/checkpoints/mpt-30b-PyPi_composer_chatv2/
+  save_folder: CLOUD://BUCKET_NAME/support-bot-demo/checkpoints/mpt-30b-chat_composer_chatv2/
   save_weights_only: false
 
+  dist_timeout: 60000000
+
   # Maximum sequence length of the model
   # For MPT, you can change this to a different number if you would like to train on longer sequences
   # Note that you would also need to reprocess your data to contain longer sequences
@@ -126,11 +123,11 @@ parameters:
   # see LLM-foundry llmfoundry/utils/builders.py::build_optimizer for other built-in options
   optimizer:
     name: decoupled_lionw_8b
-    lr: 0.000006
+    lr: 0.0000005
     betas:
     - 0.9
     - 0.99
-    weight_decay: 0.000006
+    weight_decay: 0
 
 
   # Algorithms to apply
@@ -141,13 +138,13 @@ parameters:
       clipping_threshold: 1.0
 
   # Run configuration
-  max_duration: 4ep # Maximum duration of the run. Change to something shorter (e.g. 10ba) for a quick test run
-  eval_interval: 1ep # How frequently to evaluate the model
+  max_duration: 1ep # Maximum duration of the run. Change to something shorter (e.g. 10ba) for a quick test run
+  eval_interval: 2000ba # How frequently to evaluate the model
   eval_first: true # Whether to evaluate the model before training
   eval_subset_num_batches: -1 # How many batches to evaluate on. -1 means evaluate on the entire dataset
-  global_train_batch_size: 64  # Global batch size. This is the batch size across all GPUs
+  global_train_batch_size: 8  # Global batch size. This is the batch size across all GPUs
   seed: ${global_seed}
-  device_eval_batch_size: 1 # Evaluation batch size per GPU
+  device_eval_batch_size: 2 # Evaluation batch size per GPU
   device_train_microbatch_size: 1 
   precision: amp_bf16
 

diff --git a/examples/end-to-end-examples/support_chatbot/mcli_yamls/finetune/finetune_PyPi.yaml b/examples/end-to-end-examples/support_chatbot/mcli_yamls/finetune/finetune_PyPi.yaml
@@ -3,19 +3,14 @@ name: mpt-support-bot-finetune-PyPi
 compute:
   gpus: 64  # Number of GPUs to use
 
-scheduling:
-  resumable: false
-  priority: low
-
   ## These configurations are optional
   # cluster: r0z0 # Name of the cluster to use for this run
-  gpu_type: h100_80gb # Type of GPU to use.
+  # gpu_type: h100_80gb # Type of GPU to use.
 
 integrations:
 # Clone and install the llm-foundry repo so we can run scripts from it
 - integration_type: git_repo
   git_repo: mosaicml/llm-foundry
-  git_branch: davis/lion8b-v2
   pip_install: -e .[gpu]
   ssh_clone: false # Should be true if using a private repo
 - git_branch: dev
@@ -43,9 +38,9 @@ image: mosaicml/llm-foundry:2.0.1_cu118-latest # Use the Docker image provided b
 # See LLM-foundry llmfoundry/scripts/train.py to see how the parameters are used in code
 parameters:
   # Checkpoint to local filesystem or remote object store
-  save_interval: 3ep # How frequently to save checkpoints
+  save_interval: 2ep # How frequently to save checkpoints
   save_num_checkpoints_to_keep: 1  # Important, this cleans up checkpoints saved to DISK
-  save_folder: oci://mosaicml-internal-checkpoints/support-bot-demo/checkpoints/mpt-30b_PyPi/
+  save_folder: CLOUD://BUCKET_NAME/support-bot-demo/checkpoints/mpt-30b_PyPi/
   save_weights_only: True # Since we only need the weights for the next step, we can reduce the size of the checkpoint
 
   # Maximum sequence length of the model
@@ -85,7 +80,7 @@ parameters:
     # The dataset section is used by LLM-foundry to construct a StreamingDataset
     dataset:
       local: ./local-dataset-PyPi-cache
-      remote: oci://mosaicml-internal-checkpoints/support-bot-demo/data/PyPi
+      remote: CLOUD://BUCKET_NAME/support-bot-demo/data/PyPi
       split: train
       shuffle: true
       max_seq_len: ${max_seq_len}
@@ -99,7 +94,7 @@ parameters:
     # The dataset section is used by LLM-foundry to construct a StreamingDataset
     dataset:
       local: ./local-dataset-PyPi-cache
-      remote: oci://mosaicml-internal-checkpoints/support-bot-demo/data/PyPi
+      remote: CLOUD://BUCKET_NAME/support-bot-demo/data/PyPi
       split: validation
       shuffle: false
       max_seq_len: ${max_seq_len}
@@ -119,11 +114,11 @@ parameters:
   # see LLM-foundry llmfoundry/utils/builders.py::build_optimizer for other built-in options
   optimizer:
     name: decoupled_lionw_8b
-    lr: 0.000006
+    lr: 0.0000005
     betas:
     - 0.9
     - 0.99
-    weight_decay: 0.000006
+    weight_decay: 0.000000
 
   # Algorithms to apply
   # see https://docs.mosaicml.com/projects/composer/en/latest/trainer/algorithms.html
@@ -134,7 +129,7 @@ parameters:
       clipping_threshold: 1.0
 
   # Run configuration
-  max_duration: 3ep # Maximum duration of the run. Change to something shorter (e.g. 10ba) for a quick test run
+  max_duration: 2ep # Maximum duration of the run. Change to something shorter (e.g. 10ba) for a quick test run
   eval_interval: 500ba # How frequently to evaluate the model
   eval_first: true # Whether to evaluate the model before training
   eval_subset_num_batches: 1000 # How many batches to evaluate on. -1 means evaluate on the entire dataset

diff --git a/...s/end-to-end-examples/support_chatbot/mcli_yamls/finetune/finetune_composer_codebase.yaml b/...s/end-to-end-examples/support_chatbot/mcli_yamls/finetune/finetune_composer_codebase.yaml
@@ -1,22 +1,16 @@
-name: mpt-30b-composer
-
-scheduling:
-  resumable: false
-  priority: low
+name: mpt-30b_chat-composer
 
 compute:
-  gpus: 32  # Number of GPUs to use
+  gpus: 64  # Number of GPUs to use
 
   ## These configurations are optional
   # cluster: r0z0 # Name of the cluster to use for this run
   #gpu_type: h100_80gb # Type of GPU to use.
-  gpu_type: h100_80gb # Type of GPU to use.
 
 integrations:
 # Clone and install the llm-foundry repo so we can run scripts from it
 - integration_type: git_repo
   git_repo: mosaicml/llm-foundry
-  git_branch: davis/lion8b-v2
   pip_install: -e .[gpu]
   ssh_clone: false # Should be true if using a private repo
 - git_branch: dev
@@ -45,18 +39,18 @@ image: mosaicml/llm-foundry:2.0.1_cu118-latest
 # See LLM-foundry llmfoundry/scripts/train.py to see how the parameters are used in code
 parameters:
   # Path to load the weights from the previous step
-  load_path: oci://mosaicml-internal-checkpoints/support-bot-demo/checkpoints/mpt-30b_PyPi/latest-rank0.pt.symlink
+  load_path: CLOUD://BUCKET_NAME/support-bot-demo/checkpoints/mpt-30b_PyPi/latest-rank0.pt.symlink
   load_weights_only: true # Only load the weights for finetuning, discarding any other state from previous training
 
   # Where to read the data from and save it to locally on the machine
-  data_remote: oci://mosaicml-internal-checkpoints/support-bot-demo/data/composer_30b/
+  data_remote: CLOUD:/BUCKET_NAME/support-bot-demo/data/composer_30b/
   data_local: ./local-dataset-composercodebase-cache/
 
   # Checkpoint to local filesystem or remote object store
-  save_interval: 15ep # How frequently to save checkpoints
+  save_interval: 2ep # How frequently to save checkpoints
   save_num_checkpoints_to_keep: 1  # Important, this cleans up checkpoints saved to DISK
-  save_folder: oci://mosaicml-internal-checkpoints/support-bot-demo/checkpoints/mpt-30b_PyPi_composer/
-  save_weights_only: True # Since we only need the weights for the next step, we can reduce the size of the checkpoint
+  save_folder: CLOUD://BUCKET_NAME/support-bot-demo/checkpoints/mpt-30b-chat_composer-codebase/
+  save_weights_only: False # Since we only need the weights for the next step, we can reduce the size of the checkpoint
 
   # Maximum sequence length of the model
   # For MPT, you can change this to a different number if you would like to train on longer sequences
@@ -119,18 +113,18 @@ parameters:
   # see LLM-foundry llmfoundry/utils/builders.py::build_scheduler for other built-in options
   scheduler:
     name: cosine_with_warmup
-    t_warmup: 500ba
+    t_warmup: 10ba
     alpha_f: 0.1
 
   # Optimizer
   # see LLM-foundry llmfoundry/utils/builders.py::build_optimizer for other built-in options
   optimizer:
     name: decoupled_lionw_8b
-    lr: 0.000006
+    lr: 0.0000001
     betas:
     - 0.9
     - 0.99
-    weight_decay: 0.000006
+    weight_decay: 0
 
   # Algorithms to apply
   # see https://docs.mosaicml.com/projects/composer/en/latest/trainer/algorithms.html
@@ -141,13 +135,13 @@ parameters:
       clipping_threshold: 1.0
 
   # Run configuration
-  max_duration: 15ep # Maximum duration of the run. Change to something shorter (e.g. 10ba) for a quick test run
-  eval_interval: 3ep # How frequently to evaluate the model
+  max_duration: 8ep # Maximum duration of the run. Change to something shorter (e.g. 10ba) for a quick test run
+  eval_interval: 2ep # How frequently to evaluate the model
   eval_first: true # Whether to evaluate the model before training
   eval_subset_num_batches: -1 # How many batches to evaluate on. -1 means evaluate on the entire dataset
-  global_train_batch_size: 32 # Global batch size. This is the batch size across all GPUs
+  global_train_batch_size: 64 # Global batch size. This is the batch size across all GPUs
   seed: ${global_seed}
-  device_eval_batch_size: 1 # Evaluation batch size per GPU
+  device_eval_batch_size: 8 # Evaluation batch size per GPU
   device_train_microbatch_size: 1 # Automatically determine the microbatch size per GPU
   precision: amp_bf16
 

diff --git a/examples/end-to-end-examples/support_chatbot/requirements-cpu.txt b/examples/end-to-end-examples/support_chatbot/requirements-cpu.txt
@@ -1,7 +1,8 @@
 langchain==0.0.205
-composer[streaming,libcloud,oci,nlp]==0.15.0
-mosaicml-cli==0.4.4
+composer[streaming,libcloud,oci,nlp]==0.16.0
+mosaicml-cli==0.4.17
 gradio==3.33.1
 faiss-cpu==1.7.4
 sentencepiece==0.1.97
-git+https://github.com/mosaicml/llm-foundry.git@main#egg=llm-foundry
+oauthlib>=2.1.0,<3.0.0
+git+https://github.com/mosaicml/llm-foundry.git@aabdb3c7b64679e8406a6905700dabdaa2c5e739#egg=llm-foundry
diff --git a/examples/end-to-end-examples/support_chatbot/requirements.txt b/examples/end-to-end-examples/support_chatbot/requirements.txt
@@ -1,2 +1,2 @@
-composer[nlp,streaming,wandb]==0.15.0
-git+https://github.com/mosaicml/llm-foundry.git@main#egg=llm-foundry[gpu] # TODO: main
+composer[nlp,streaming,wandb]==0.16.0
+git+https://github.com/mosaicml/llm-foundry.git@aabdb3c7b64679e8406a6905700dabdaa2c5e739#egg=llm-foundry[gpu] # TODO: main
diff --git a/examples/end-to-end-examples/support_chatbot/retrieval_data_slack/vectors.pickle b/examples/end-to-end-examples/support_chatbot/retrieval_data_slack/vectors.pickle