diff --git a/examples/inference-deployments/mpt/mpt_30b_ft.yaml b/examples/inference-deployments/mpt/mpt_30b_ft.yaml index 9ccec3ab8..11e8ea3ed 100644 --- a/examples/inference-deployments/mpt/mpt_30b_ft.yaml +++ b/examples/inference-deployments/mpt/mpt_30b_ft.yaml @@ -2,7 +2,7 @@ name: mpt-30b-ft compute: gpus: 2 gpu_type: a100_40gb -image: mosaicml/inference:0.1.1 +image: mosaicml/inference:0.1.4 replicas: 1 command: | export PYTHONPATH=/code/llm-foundry:/code/examples:/code @@ -20,7 +20,10 @@ model: downloader: examples.inference-deployments.mpt.mpt_ft_handler.download_convert download_parameters: hf_path: mosaicml/mpt-30b + gpus: 2 + force_conversion: true model_handler: examples.inference-deployments.mpt.mpt_ft_handler.MPTFTModelHandler model_parameters: model_name_or_path: mosaicml/mpt-30b ft_lib_path: /code/FasterTransformer/build/lib/libth_transformer.so + gpus: 2 diff --git a/examples/inference-deployments/mpt/mpt_30b_instruct_ft.yaml b/examples/inference-deployments/mpt/mpt_30b_instruct_ft.yaml index 43db81061..b9178b99e 100644 --- a/examples/inference-deployments/mpt/mpt_30b_instruct_ft.yaml +++ b/examples/inference-deployments/mpt/mpt_30b_instruct_ft.yaml @@ -2,7 +2,7 @@ name: mpt-30b-instruct-ft compute: gpus: 2 gpu_type: a100_40gb -image: mosaicml/inference:0.1.1 +image: mosaicml/inference:0.1.4 replicas: 1 command: | export PYTHONPATH=/code/llm-foundry:/code/examples:/code @@ -20,7 +20,10 @@ model: downloader: examples.inference-deployments.mpt.mpt_ft_handler.download_convert download_parameters: hf_path: mosaicml/mpt-30b-instruct + gpus: 2 + force_conversion: true model_handler: examples.inference-deployments.mpt.mpt_ft_handler.MPTFTModelHandler model_parameters: model_name_or_path: mosaicml/mpt-30b-instruct ft_lib_path: /code/FasterTransformer/build/lib/libth_transformer.so + gpus: 2