From 6e44d6b3933d2bafc513b4541ab9bec5a2871f01 Mon Sep 17 00:00:00 2001 From: Zhihao Lin <36994684+LZHgrla@users.noreply.github.com> Date: Tue, 5 Sep 2023 16:08:28 +0800 Subject: [PATCH] [Improve] Redesign convert tools (#96) * refactor tools * modify entry_point * modify docs * update docs * fix * fix * Update README.md * Update README.md * Update README.md * Update README_zh-CN.md * fix pre-commit * rename converter * update pth2hf * rename pth2hf to pth_to_hf * add fp32 for pth_to_hf * Update README.md * Update README_zh-CN.md * Update README_zh-CN.md * Update README.md * Update README_zh-CN.md * Update README_zh-CN.md * Update README.md * Update README.md * Update README_zh-CN.md * fix pre-commit --- README.md | 99 ++++---- README_zh-CN.md | 99 ++++---- docs/en/user_guides/chat.md | 36 +-- docs/zh_cn/user_guides/chat.md | 36 +-- xtuner/entry_point.py | 73 ++---- xtuner/tools/chat.py | 97 ++++---- xtuner/tools/chat_hf.py | 235 ------------------ .../tools/model_converters/adapter_pth2hf.py | 79 ------ .../{merge_adapter_hf.py => merge.py} | 7 +- .../tools/model_converters/merge_adapter.py | 79 ------ xtuner/tools/model_converters/pth_to_hf.py | 108 ++++++++ .../{split_hf_llm.py => split.py} | 0 12 files changed, 328 insertions(+), 620 deletions(-) delete mode 100644 xtuner/tools/chat_hf.py delete mode 100644 xtuner/tools/model_converters/adapter_pth2hf.py rename xtuner/tools/model_converters/{merge_adapter_hf.py => merge.py} (87%) delete mode 100644 xtuner/tools/model_converters/merge_adapter.py create mode 100644 xtuner/tools/model_converters/pth_to_hf.py rename xtuner/tools/model_converters/{split_hf_llm.py => split.py} (100%) diff --git a/README.md b/README.md index b7f06adbe..d28730113 100644 --- a/README.md +++ b/README.md @@ -26,9 +26,28 @@ XTuner is a toolkit for efficiently fine-tuning LLM, developed by the [MMRazor]( ## 🌟 Demos +- Ready-to-use models and datasets from XTuner API [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eBI9yiOkX-t7P-0-t9vS8y1x5KmWrkoU?usp=sharing) + - QLoRA Fine-tune [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1QAEZVBfQ7LZURkMUtaq0b-5nEQII9G9Z?usp=sharing) + - Plugin-based Chat [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing) -- Ready-to-use models and datasets from XTuner API [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eBI9yiOkX-t7P-0-t9vS8y1x5KmWrkoU?usp=sharing) + + + + + + + + + + +
Examples of Plugin-based Chat 🔥🔥🔥
+ + + + + +
## 🔥 Supports @@ -123,35 +142,6 @@ XTuner is a toolkit for efficiently fine-tuning LLM, developed by the [MMRazor]( pip install -e '.[all]' ``` -### Chat [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing) - - - - - - - - - - -
Examples of Plugins-based Chat 🔥🔥🔥
- - - - - -
- -XTuner provides tools to chat with pretrained / fine-tuned LLMs. - -- For example, we can start the chat with Llama2-7B-Plugins by - - ```shell - xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer - ``` - -For more examples, please see [chat.md](./docs/en/user_guides/chat.md). - ### Fine-tune [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1QAEZVBfQ7LZURkMUtaq0b-5nEQII9G9Z?usp=sharing) XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepare guides can be found on [dataset_prepare.md](./docs/en/user_guides/dataset_prepare.md). @@ -165,10 +155,16 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar Or, if the provided configs cannot meet the requirements, please copy the provided config to the specified directory and make specific modifications by ```shell - xtuner copy-cfg ${CONFIG_NAME} ${SAVE_DIR} + xtuner copy-cfg ${CONFIG_NAME} ${SAVE_PATH} ``` -- **Step 1**, start fine-tuning. For example, we can start the QLoRA fine-tuning of InternLM-7B with oasst1 dataset by +- **Step 1**, start fine-tuning. + + ```shell + xtuner train ${CONFIG_NAME_OR_PATH} + ``` + + For example, we can start the QLoRA fine-tuning of InternLM-7B with oasst1 dataset by ```shell # On a single GPU @@ -180,24 +176,37 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar For more examples, please see [finetune.md](./docs/en/user_guides/finetune.md). -### Deployment - -- **Step 0**, convert the pth adapter to HuggingFace adapter, by +- **Step 2**, convert the saved PTH model (if using DeepSpeed, it will be a directory) to HuggingFace model, by ```shell - xtuner convert adapter_pth2hf \ - ${CONFIG} \ - ${PATH_TO_PTH_ADAPTER} \ - ${SAVE_PATH_TO_HF_ADAPTER} + xtuner convert pth_to_hf ${CONFIG_NAME_OR_PATH} ${PTH} ${SAVE_PATH} ``` - or, directly merge the pth adapter to pretrained LLM, by +### Chat [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing) + +XTuner provides tools to chat with pretrained / fine-tuned LLMs. + +```shell +xtuner chat ${NAME_OR_PATH_TO_LLM} --adapter {NAME_OR_PATH_TO_ADAPTER} [optional arguments] +``` + +For example, we can start the chat with Llama2-7b with adapter trained from MOSS-003-SFT by + +```shell +xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer +``` + +For more examples, please see [chat.md](./docs/en/user_guides/chat.md). + +### Deployment + +- **Step 0**, merge the HuggingFace adapter to pretrained LLM, by ```shell xtuner convert merge_adapter \ - ${CONFIG} \ - ${PATH_TO_PTH_ADAPTER} \ - ${SAVE_PATH_TO_MERGED_LLM} \ + ${NAME_OR_PATH_TO_LLM} \ + ${NAME_OR_PATH_TO_ADAPTER} \ + ${SAVE_PATH} \ --max-shard-size 2GB ``` @@ -212,7 +221,9 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar --seed 0 ``` - 🎯 We are woking closely with [LMDeploy](https://github.com/InternLM/lmdeploy), to implement the deployment of **plugins-based chat**! + 🔥 Seeking efficient inference with less GPU memory? Try 4-bit quantization from [LMDeploy](https://github.com/InternLM/lmdeploy)! For more details, see [here](https://github.com/InternLM/lmdeploy/tree/main#quantization). + + 🎯 We are woking closely with [LMDeploy](https://github.com/InternLM/lmdeploy), to implement the deployment of **plugin-based chat**! ### Evaluation diff --git a/README_zh-CN.md b/README_zh-CN.md index 30f512647..0ca037b7a 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -26,9 +26,28 @@ XTuner 是一个轻量级微调大语言模型的工具库,由 [MMRazor](https ## 🌟 示例 +- XTuner APIs所提供的开箱即用的模型与数据集 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eBI9yiOkX-t7P-0-t9vS8y1x5KmWrkoU?usp=sharing) + - QLoRA 微调 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1QAEZVBfQ7LZURkMUtaq0b-5nEQII9G9Z?usp=sharing) + - 基于插件的对话 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing) -- XTuner APIs所提供的开箱即用的模型与数据集 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eBI9yiOkX-t7P-0-t9vS8y1x5KmWrkoU?usp=sharing) + + + + + + + + + + +
基于插件的对话 🔥🔥🔥
+ + + + + +
## 🔥 支持列表 @@ -123,35 +142,6 @@ XTuner 是一个轻量级微调大语言模型的工具库,由 [MMRazor](https pip install -e '.[all]' ``` -### 对话 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing) - - - - - - - - - - -
基于插件的对话 🔥🔥🔥
- - - - - -
- -XTuner 提供与大语言模型对话的工具。 - -- 例如,与基于插件微调获得的 Llama2-7B-Plugins 对话: - - ```shell - xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer - ``` - -更多示例,请查阅[文档](./docs/zh_cn/user_guides/chat.md)。 - ### 微调 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1QAEZVBfQ7LZURkMUtaq0b-5nEQII9G9Z?usp=sharing) XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](./docs/zh_cn/user_guides/dataset_prepare.md)。 @@ -165,10 +155,16 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](. 或者,如果所提供的配置文件不能满足使用需求,请导出所提供的配置文件并进行相应更改: ```shell - xtuner copy-cfg ${CONFIG_NAME} ${SAVE_DIR} + xtuner copy-cfg ${CONFIG_NAME} ${SAVE_PATH} ``` -- **步骤 1**,开始微调。例如,我们可以利用 QLoRA 算法在 oasst1 数据集上微调 InternLM-7B: +- **步骤 1**,开始微调。 + + ```shell + xtuner train ${CONFIG_NAME_OR_PATH} + ``` + + 例如,我们可以利用 QLoRA 算法在 oasst1 数据集上微调 InternLM-7B: ```shell # 单卡 @@ -177,26 +173,39 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](. NPROC_PER_NODE=${GPU_NUM} xtuner train internlm_7b_qlora_oasst1_e3 ``` - 更多示例,请查阅[文档](./docs/zh_cn/user_guides/finetune.md). + 更多示例,请查阅[文档](./docs/zh_cn/user_guides/finetune.md)。 -### 部署 - -- **步骤 0**,将 pth adapter 转换为 HuggingFace adapter: +- **步骤 2**,将保存的 PTH 模型(如果使用的DeepSpeed,则将会是一个文件夹)转换为 HuggingFace 模型: ```shell - xtuner convert adapter_pth2hf \ - ${CONFIG} \ - ${PATH_TO_PTH_ADAPTER} \ - ${SAVE_PATH_TO_HF_ADAPTER} + xtuner convert pth_to_hf ${CONFIG_NAME_OR_PATH} ${PTH} ${SAVE_PATH} ``` - 或者,直接将 pth adapter 合并到大语言模型: +### 对话 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing) + +XTuner 提供与大语言模型对话的工具。 + +```shell +xtuner chat ${NAME_OR_PATH_TO_LLM} --adapter {NAME_OR_PATH_TO_ADAPTER} [optional arguments] +``` + +例如,与 Llama2-7b + MOSS-003-SFT adapter 对话: + +```shell +xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer +``` + +更多示例,请查阅[文档](./docs/zh_cn/user_guides/chat.md)。 + +### 部署 + +- **步骤 0**,将 HuggingFace adapter 合并到大语言模型: ```shell xtuner convert merge_adapter \ - ${CONFIG} \ - ${PATH_TO_PTH_ADAPTER} \ - ${SAVE_PATH_TO_MERGED_LLM} \ + ${NAME_OR_PATH_TO_LLM} \ + ${NAME_OR_PATH_TO_ADAPTER} \ + ${SAVE_PATH} \ --max-shard-size 2GB ``` @@ -211,6 +220,8 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](. --seed 0 ``` + 🔥 追求速度更快、显存占用更低的推理?欢迎体验 [LMDeploy](https://github.com/InternLM/lmdeploy) 提供的 4-bit 量化!使用指南请见[文档](https://github.com/InternLM/lmdeploy/tree/main#quantization)。 + 🎯 我们正在与 [LMDeploy](https://github.com/InternLM/lmdeploy) 紧密合作,以实现基于插件对话的部署! ### 评测 diff --git a/docs/en/user_guides/chat.md b/docs/en/user_guides/chat.md index 2914296f0..65725f7a3 100644 --- a/docs/en/user_guides/chat.md +++ b/docs/en/user_guides/chat.md @@ -5,49 +5,49 @@ - InternLM-7B, oasst1 ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-oasst1 --prompt-template openassistant + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-oasst1 --prompt-template openassistant ``` - InternLM-7B, Arxiv Gentitle ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-arxiv-gentitle --prompt-template title + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-arxiv-gentitle --prompt-template title ``` - InternLM-7B, Colorist ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-colorist --prompt-template colorist + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-colorist --prompt-template colorist ``` - InternLM-7B, Coder ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-coder --prompt-template code + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-coder --prompt-template code ``` - InternLM-7B, SQL ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-sql --prompt-template sql + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-sql --prompt-template sql ``` - InternLM-7B, Lawyer ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-lawyer --prompt-template lawyer + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-lawyer --prompt-template lawyer ``` - InternLM-7B, Open-Platypus ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-open-platypus --prompt-template alpaca + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-open-platypus --prompt-template alpaca ``` - InternLM-7B, Alpaca-enzh ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-alpaca-enzh --prompt-template alpaca + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-alpaca-enzh --prompt-template alpaca ``` ## Chat with [Llama2](https://github.com/facebookresearch/llama) @@ -58,19 +58,19 @@ ```shell export SERPER_API_KEY="xxx" # Please get the key from https://serper.dev to support google search! - xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer + xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer ``` - Llama2-7B, Arxiv Gentitle ```shell - xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-arxiv-gentitle --prompt-template title + xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-arxiv-gentitle --prompt-template title ``` - Llama2-7B, Colorist ```shell - xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-colorist --prompt-template colorist + xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-colorist --prompt-template colorist ``` ## Chat with [Qwen](https://github.com/QwenLM) @@ -79,25 +79,25 @@ ```shell export SERPER_API_KEY="xxx" # Please get the key from https://serper.dev to support google search! - xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-moss-003-sft --bot-name Qwen --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" + xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-moss-003-sft --bot-name Qwen --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" ``` - Qwen-7B, oasst1 ```shell - xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-oasst1 --prompt-template openassistant --answer-stop-word '<|endoftext|>' + xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-oasst1 --prompt-template openassistant --answer-stop-word '<|endoftext|>' ``` - Qwen-7B, Arxiv Gentitle ```shell - xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-arxiv-gentitle --prompt-template title --answer-stop-word '<|endoftext|>' + xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-arxiv-gentitle --prompt-template title --answer-stop-word '<|endoftext|>' ``` - Qwen-7B, Alpaca-enzh ```shell - xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-alpaca-enzh --prompt-template alpaca --answer-stop-word '<|endoftext|>' + xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-alpaca-enzh --prompt-template alpaca --answer-stop-word '<|endoftext|>' ``` ## Chat with [Baichuan](https://github.com/baichuan-inc) @@ -105,17 +105,17 @@ - Baichuan-7B, oasst1 ```shell - xtuner chat hf baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-oasst1 --prompt-template openassistant + xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-oasst1 --prompt-template openassistant ``` - Baichuan-7B, Arxiv Gentitle ```shell - xtuner chat hf baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-arxiv-gentitle --prompt-template title --no-streamer + xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-arxiv-gentitle --prompt-template title --no-streamer ``` - Baichuan-7B, Alpaca-enzh ```shell - xtuner chat hf baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-alpaca-enzh --prompt-template alpaca + xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-alpaca-enzh --prompt-template alpaca ``` diff --git a/docs/zh_cn/user_guides/chat.md b/docs/zh_cn/user_guides/chat.md index 1ae01388b..6fef1684c 100644 --- a/docs/zh_cn/user_guides/chat.md +++ b/docs/zh_cn/user_guides/chat.md @@ -5,49 +5,49 @@ - InternLM-7B, oasst1 ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-oasst1 --prompt-template openassistant + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-oasst1 --prompt-template openassistant ``` - InternLM-7B, Arxiv Gentitle ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-arxiv-gentitle --prompt-template title + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-arxiv-gentitle --prompt-template title ``` - InternLM-7B, Colorist ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-colorist --prompt-template colorist + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-colorist --prompt-template colorist ``` - InternLM-7B, Coder ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-coder --prompt-template code + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-coder --prompt-template code ``` - InternLM-7B, SQL ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-sql --prompt-template sql + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-sql --prompt-template sql ``` - InternLM-7B, Lawyer ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-lawyer --prompt-template lawyer + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-lawyer --prompt-template lawyer ``` - InternLM-7B, Open-Platypus ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-open-platypus --prompt-template alpaca + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-open-platypus --prompt-template alpaca ``` - InternLM-7B, Alpaca-enzh ```shell - xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-alpaca-enzh --prompt-template alpaca + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-alpaca-enzh --prompt-template alpaca ``` ## 与微调后的 [Llama2](https://github.com/facebookresearch/llama) 对话 @@ -58,19 +58,19 @@ ```shell export SERPER_API_KEY="xxx" # 请从 https://serper.dev 获得API_KEY,以此支持谷歌搜索! - xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer + xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer ``` - Llama2-7B, Arxiv Gentitle ```shell - xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-arxiv-gentitle --prompt-template title + xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-arxiv-gentitle --prompt-template title ``` - Llama2-7B, Colorist ```shell - xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-colorist --prompt-template colorist + xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-colorist --prompt-template colorist ``` ## 与微调后的 [Qwen](https://github.com/QwenLM) 对话 @@ -79,25 +79,25 @@ ```shell export SERPER_API_KEY="xxx" # 请从 https://serper.dev 获得API_KEY,以此支持谷歌搜索! - xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-moss-003-sft --bot-name Qwen --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" + xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-moss-003-sft --bot-name Qwen --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" ``` - Qwen-7B, oasst1 ```shell - xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-oasst1 --prompt-template openassistant --answer-stop-word '<|endoftext|>' + xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-oasst1 --prompt-template openassistant --answer-stop-word '<|endoftext|>' ``` - Qwen-7B, Arxiv Gentitle ```shell - xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-arxiv-gentitle --prompt-template title --answer-stop-word '<|endoftext|>' + xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-arxiv-gentitle --prompt-template title --answer-stop-word '<|endoftext|>' ``` - Qwen-7B, Alpaca-enzh ```shell - xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-alpaca-enzh --prompt-template alpaca --answer-stop-word '<|endoftext|>' + xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-alpaca-enzh --prompt-template alpaca --answer-stop-word '<|endoftext|>' ``` ## 与微调后的 [Baichuan](https://github.com/baichuan-inc) 对话 @@ -105,17 +105,17 @@ - Baichuan-7B, oasst1 ```shell - xtuner chat hf baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-oasst1 --prompt-template openassistant + xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-oasst1 --prompt-template openassistant ``` - Baichuan-7B, Arxiv Gentitle ```shell - xtuner chat hf baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-arxiv-gentitle --prompt-template title --no-streamer + xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-arxiv-gentitle --prompt-template title --no-streamer ``` - Baichuan-7B, Alpaca-enzh ```shell - xtuner chat hf baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-alpaca-enzh --prompt-template alpaca + xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-alpaca-enzh --prompt-template alpaca ``` diff --git a/xtuner/entry_point.py b/xtuner/entry_point.py index 628610d38..f36f2cc39 100644 --- a/xtuner/entry_point.py +++ b/xtuner/entry_point.py @@ -8,10 +8,9 @@ from mmengine.logging import print_log import xtuner -from xtuner.tools import chat, chat_hf, copy_cfg, list_cfg, test, train +from xtuner.tools import chat, copy_cfg, list_cfg, test, train from xtuner.tools.data_preprocess import arxiv as arxiv_preprocess -from xtuner.tools.model_converters import (adapter_pth2hf, merge_adapter, - merge_adapter_hf, split_hf_llm) +from xtuner.tools.model_converters import merge, pth_to_hf, split # Define valid modes MODES = ('list-cfg', 'copy-cfg', 'train', 'test', 'chat', 'convert', @@ -37,18 +36,14 @@ xtuner train $CONFIG 3-2. Fine-tune LLMs by multiple GPUs: NPROC_PER_NODE=$NGPUS NNODES=$NNODES NODE_RANK=$NODE_RANK PORT=$PORT ADDR=$ADDR xtuner dist_train $CONFIG $GPUS - 4-1. Chat with LLMs with HuggingFace's model and adapter: - xtuner chat hf $NAME_OR_PATH_TO_HF_MODEL --adapter $NAME_OR_PATH_TO_HF_ADAPTER --prompt-template $PROMPT_TEMPLATE - 4-2. Chat with LLMs with XTuner's config and adapter: - xtuner chat xtuner $CONFIG --adapter $PATH_TO_PTH_ADAPTER --prompt $PROMPT_TEMPLATE - 5-1. Convert the pth adapter to HuggingFace's adapter: - xtuner convert adapter_pth2hf $CONFIG $PATH_TO_PTH_ADAPTER $SAVE_PATH_TO_HF_ADAPTER - 5-2. Merge the HuggingFace's adapter to the pretrained LLM: - xtuner convert merge_adapter_hf $NAME_OR_PATH_TO_HF_MODEL $NAME_OR_PATH_TO_HF_ADAPTER $SAVE_PATH - 5-3. Merge the XTuner's adapter to the pretraiend LLM: - xtuner convert merge_adapter $CONFIG $NAME_OR_PATH_TO_HF_ADAPTER $SAVE_PATH - 5-4. Split HuggingFace's LLM to the smallest sharded one: - xtuner convert split_hf_llm $NAME_OR_PATH_TO_HF_MODEL $SAVE_PATH + 4-1. Convert the pth model to HuggingFace's model: + xtuner convert pth_to_hf $CONFIG $PATH_TO_PTH_MODEL $SAVE_PATH_TO_HF_MODEL + 4-2. Merge the HuggingFace's adapter to the pretrained LLM: + xtuner convert merge $NAME_OR_PATH_TO_LLM $NAME_OR_PATH_TO_ADAPTER $SAVE_PATH + 4-3. Split HuggingFace's LLM to the smallest sharded one: + xtuner convert split $NAME_OR_PATH_TO_LLM $SAVE_PATH + 5. Chat with LLMs with HuggingFace's model and adapter: + xtuner chat $NAME_OR_PATH_TO_LLM --adapter $NAME_OR_PATH_TO_ADAPTER --prompt-template $PROMPT_TEMPLATE 6-1. Preprocess arxiv dataset: xtuner preprocess arxiv $SRC_FILE $DST_FILE --start-date $START_DATE --categories $CATEGORIES @@ -73,15 +68,12 @@ Some usages for convert: (See more by using -h for specific command!) - 1. Convert the pth adapter to HuggingFace's adapter: - xtuner convert adapter_pth2hf $CONFIG $PATH_TO_PTH_ADAPTER $SAVE_PATH_TO_HF_ADAPTER + 1. Convert the pth model to HuggingFace's model: + xtuner convert pth_to_hf $CONFIG $PATH_TO_PTH_MODEL $SAVE_PATH_TO_HF_MODEL 2. Merge the HuggingFace's adapter to the pretrained LLM: - xtuner convert merge_adapter_hf $NAME_OR_PATH_TO_HF_MODEL $NAME_OR_PATH_TO_HF_ADAPTER $SAVE_PATH - 3. Merge the XTuner's - adapter to the pretraiend LLM: - xtuner convert merge_adapter $CONFIG $NAME_OR_PATH_TO_HF_ADAPTER $SAVE_PATH - 4. Split HuggingFace's LLM to the smallest sharded one: - xtuner convert split_hf_llm $NAME_OR_PATH_TO_HF_MODEL $SAVE_PATH + xtuner convert merge $NAME_OR_PATH_TO_LLM $NAME_OR_PATH_TO_ADAPTER $SAVE_PATH + 3. Split HuggingFace's LLM to the smallest sharded one: + xtuner convert split $NAME_OR_PATH_TO_LLM $SAVE_PATH GitHub: https://github.com/InternLM/xtuner """ # noqa: E501 @@ -105,27 +97,6 @@ GitHub: https://github.com/InternLM/xtuner """ # noqa: E501 - -CHAT_HELP_MSG = \ - f""" - Arguments received: {str(['xtuner'] + sys.argv[1:])}. xtuner commands use the following syntax: - - xtuner MODE MODE_ARGS ARGS - - Where MODE (required) is one of {MODES} - MODE_ARG (optional) is the argument for specific mode - ARGS (optional) are the arguments for specific command - - Some usages for chat: (See more by using -h for specific command!) - - 1. Chat with LLMs with HuggingFace's model and adapter: - xtuner chat hf $NAME_OR_PATH_TO_HF_MODEL --adapter $NAME_OR_PATH_TO_HF_ADAPTER --prompt-template $PROMPT_TEMPLATE - 2. Chat with LLMs with XTuner's config and adapter: - xtuner chat xtuner internlm_7b_qlora_alpaca --adapter $PATH_TO_PTH_ADAPTER --prompt $PROMPT_TEMPLATE - - GitHub: https://github.com/InternLM/xtuner - """ # noqa: E501 - special = { 'help': lambda: print_log(CLI_HELP_MSG, 'current'), 'version': lambda: print_log(xtuner.__version__, 'current') @@ -143,17 +114,11 @@ 'copy-cfg': copy_cfg.__file__, 'train': train.__file__, 'test': test.__file__, - 'chat': { - 'hf': chat_hf.__file__, - 'xtuner': chat.__file__, - '--help': lambda: print_log(CHAT_HELP_MSG, 'current'), - '-h': lambda: print_log(CHAT_HELP_MSG, 'current') - }, + 'chat': chat.__file__, 'convert': { - 'adapter_pth2hf': adapter_pth2hf.__file__, - 'merge_adapter': merge_adapter.__file__, - 'merge_adapter_hf': merge_adapter_hf.__file__, - 'split_hf_llm': split_hf_llm.__file__, + 'pth_to_hf': pth_to_hf.__file__, + 'merge': merge.__file__, + 'split': split.__file__, '--help': lambda: print_log(CONVERT_HELP_MSG, 'current'), '-h': lambda: print_log(CONVERT_HELP_MSG, 'current') }, diff --git a/xtuner/tools/chat.py b/xtuner/tools/chat.py index db3128e65..f3cf452c6 100644 --- a/xtuner/tools/chat.py +++ b/xtuner/tools/chat.py @@ -1,35 +1,45 @@ # Copyright (c) OpenMMLab. All rights reserved. import argparse -import os import re import torch -from mmengine.config import Config, DictAction -from transformers import GenerationConfig +from peft import PeftModel +from transformers import (AutoModelForCausalLM, AutoTokenizer, + BitsAndBytesConfig, GenerationConfig) -from xtuner.configs import cfgs_name_path -from xtuner.registry import BUILDER from xtuner.tools.utils import get_chat_utils, update_stop_criteria from xtuner.utils import PROMPT_TEMPLATE +def remove_prefix(state_dict, prefix): + new_state_dict = {} + for key, value in state_dict.items(): + if key.startswith(prefix): + new_key = key[len(prefix):] + new_state_dict[new_key] = value + else: + new_state_dict[key] = value + return new_state_dict + + def parse_args(): - parser = argparse.ArgumentParser( - description='Chat with a pretrained model') + parser = argparse.ArgumentParser(description='Chat with a HF model') parser.add_argument( - 'config', - help='config file name or path. Note: Please use the original ' - 'configs, instead of the automatically saved log configs.') - parser.add_argument('--adapter', default=None, help='adapter model') + 'model_name_or_path', help='Hugging Face model name or path') + parser.add_argument('--adapter', default=None, help='adapter name or path') parser.add_argument( '--prompt-template', choices=PROMPT_TEMPLATE.keys(), default=None, help='Specify a prompt option') parser.add_argument( - '--is-deepspeed', - action='store_true', - help='whether the adapter is saved from deepspeed') + '--bits', + type=int, + choices=[4, 8, None], + default=None, + help='LLM bits') + parser.add_argument( + '--bot-name', type=str, default='BOT', help='Name for Bot') parser.add_argument( '--with-plugins', nargs='+', @@ -67,16 +77,6 @@ def parse_args(): type=int, default=0, help='Random seed for reproducible text generation') - parser.add_argument( - '--cfg-options', - nargs='+', - action=DictAction, - help='override some settings in the used config, the key-value pair ' - 'in xxx=yyy format will be merged into config file. If the value to ' - 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' - 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' - 'Note that the quotation marks are necessary and that no white space ' - 'is allowed.') args = parser.parse_args() return args @@ -119,29 +119,30 @@ def main(): torch.manual_seed(args.seed) - # parse config - if not os.path.isfile(args.config): - try: - args.config = cfgs_name_path[args.config] - except KeyError: - raise FileNotFoundError(f'Cannot find {args.config}') - - # load config - cfg = Config.fromfile(args.config) - if args.cfg_options is not None: - cfg.merge_from_dict(args.cfg_options) - - model = BUILDER.build(cfg.model) - # Cast to inference mode - model.llm.gradient_checkpointing_disable() - model.llm.config.use_cache = True - - tokenizer = BUILDER.build(cfg.tokenizer) - + # build model + quantization_config = None + load_in_8bit = False + if args.bits == 4: + quantization_config = BitsAndBytesConfig( + load_in_4bit=True, + load_in_8bit=False, + llm_int8_threshold=6.0, + llm_int8_has_fp16_weight=False, + bnb_4bit_compute_dtype=torch.float16, + bnb_4bit_use_double_quant=True, + bnb_4bit_quant_type='nf4') + elif args.bits == 8: + load_in_8bit = True + model = AutoModelForCausalLM.from_pretrained( + args.model_name_or_path, + quantization_config=quantization_config, + load_in_8bit=load_in_8bit, + device_map='auto', + trust_remote_code=True) + tokenizer = AutoTokenizer.from_pretrained( + args.model_name_or_path, trust_remote_code=True) if args.adapter is not None: - adapter = torch.load(args.adapter, map_location='cpu') - state_dict_key = 'module' if args.is_deepspeed else 'state_dict' - model.load_state_dict(adapter[state_dict_key], strict=False) + model = PeftModel.from_pretrained(model, args.adapter) print(f'Load adapter from {args.adapter}') Streamer, stop_criteria = get_chat_utils(model) @@ -173,10 +174,10 @@ def main(): template = PROMPT_TEMPLATE[args.prompt_template] if 'INSTRUCTION_START' in template and n_turn == 0: prompt_text = template['INSTRUCTION_START'].format( - input=text, round=n_turn + 1, **cfg) + input=text, round=n_turn + 1, bot_name=args.bot_name) else: prompt_text = template['INSTRUCTION'].format( - input=text, round=n_turn + 1, **cfg) + input=text, round=n_turn + 1, bot_name=args.bot_name) if args.prompt_template == 'moss_sft': if not inner_thoughts_open: prompt_text.replace('- Inner thoughts: enabled.', diff --git a/xtuner/tools/chat_hf.py b/xtuner/tools/chat_hf.py deleted file mode 100644 index 7cc81d89a..000000000 --- a/xtuner/tools/chat_hf.py +++ /dev/null @@ -1,235 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import argparse -import re - -import torch -from peft import PeftModel -from transformers import (AutoModelForCausalLM, AutoTokenizer, - BitsAndBytesConfig, GenerationConfig) - -from xtuner.tools.utils import get_chat_utils, update_stop_criteria -from xtuner.utils import PROMPT_TEMPLATE - - -def parse_args(): - parser = argparse.ArgumentParser(description='Chat with a HF model') - parser.add_argument( - 'model_name_or_path', help='Hugging Face model name or path') - parser.add_argument('--adapter', default=None, help='adapter name or path') - parser.add_argument( - '--prompt-template', - choices=PROMPT_TEMPLATE.keys(), - default=None, - help='Specify a prompt option') - parser.add_argument( - '--bot-name', type=str, default='BOT', help='Name for Bot') - parser.add_argument( - '--with-plugins', - nargs='+', - choices=['calculate', 'solve', 'search'], - help='Specify plugins to use') - parser.add_argument( - '--no-streamer', action='store_true', help='Whether to with streamer') - parser.add_argument('--command-stop-word', default=None, help='Stop key') - parser.add_argument('--answer-stop-word', default=None, help='Stop key') - parser.add_argument( - '--max-new-tokens', - type=int, - default=2048, - help='Maximum number of new tokens allowed in generated text') - parser.add_argument( - '--temperature', - type=float, - default=0.1, - help='The value used to modulate the next token probabilities.') - parser.add_argument( - '--top-k', - type=int, - default=40, - help='The number of highest probability vocabulary tokens to ' - 'keep for top-k-filtering.') - parser.add_argument( - '--top-p', - type=float, - default=0.75, - help='If set to float < 1, only the smallest set of most probable ' - 'tokens with probabilities that add up to top_p or higher are ' - 'kept for generation.') - parser.add_argument( - '--seed', - type=int, - default=0, - help='Random seed for reproducible text generation') - args = parser.parse_args() - return args - - -def get_input(): - """Helper function for getting input from users.""" - sentinel = '' # ends when this string is seen - result = None - while result is None: - print('\ndouble enter to end input >>> ', end='') - try: - result = '\n'.join(iter(input, sentinel)) - except UnicodeDecodeError: - print('Invalid characters detected. Please enter again.') - return result - - -def main(): - args = parse_args() - - if args.with_plugins is None: - inner_thoughts_open = False - calculate_open = False - solve_open = False - search_open = False - else: - assert args.prompt_template == 'moss_sft' - from plugins import plugins_api - inner_thoughts_open = True - calculate_open = 'calculate' in args.with_plugins - solve_open = 'solve' in args.with_plugins - search_open = 'search' in args.with_plugins - # pre-import for api and model preparation - if calculate_open: - from plugins import calculate # noqa: F401 - if solve_open: - from plugins import solve # noqa: F401 - if search_open: - from plugins import search # noqa: F401 - - torch.manual_seed(args.seed) - - # build model - quantization_config = BitsAndBytesConfig( - load_in_4bit=True, - load_in_8bit=False, - llm_int8_threshold=6.0, - llm_int8_has_fp16_weight=False, - bnb_4bit_compute_dtype=torch.float16, - bnb_4bit_use_double_quant=True, - bnb_4bit_quant_type='nf4') - model = AutoModelForCausalLM.from_pretrained( - args.model_name_or_path, - quantization_config=quantization_config, - trust_remote_code=True) - tokenizer = AutoTokenizer.from_pretrained( - args.model_name_or_path, trust_remote_code=True) - if args.adapter is not None: - model = PeftModel.from_pretrained(model, args.adapter) - print(f'Load adapter from {args.adapter}') - - Streamer, stop_criteria = get_chat_utils(model) - if args.no_streamer: - Streamer = None - - command_stop_cr, answer_stop_cr = update_stop_criteria( - base=stop_criteria, - tokenizer=tokenizer, - command_stop_word=args.command_stop_word, - answer_stop_word=args.answer_stop_word) - - gen_config = GenerationConfig( - max_new_tokens=args.max_new_tokens, - do_sample=args.temperature > 0, - temperature=args.temperature, - top_p=args.top_p, - top_k=args.top_k, - ) - - n_turn = 0 - inputs = '' - while True: - text = get_input() - - if text == 'exit': - exit(0) - if args.prompt_template is not None: - template = PROMPT_TEMPLATE[args.prompt_template] - if 'INSTRUCTION_START' in template and n_turn == 0: - prompt_text = template['INSTRUCTION_START'].format( - input=text, round=n_turn + 1, bot_name=args.bot_name) - else: - prompt_text = template['INSTRUCTION'].format( - input=text, round=n_turn + 1, bot_name=args.bot_name) - if args.prompt_template == 'moss_sft': - if not inner_thoughts_open: - prompt_text.replace('- Inner thoughts: enabled.', - '- Inner thoughts: disabled.') - if not calculate_open: - prompt_text.replace( - '- Calculator: enabled. API: Calculate(expression)', - '- Calculator: disabled.') - if not solve_open: - prompt_text.replace( - '- Equation solver: enabled. API: Solve(equation)', - '- Equation solver: disabled.') - if not search_open: - prompt_text.replace( - '- Web search: enabled. API: Search(query)', - '- Web search: disabled.') - - inputs += prompt_text - else: - inputs += text - ids = tokenizer.encode(inputs, return_tensors='pt') - streamer = Streamer(tokenizer) if Streamer is not None else None - if args.with_plugins is not None: - generate_output = model.generate( - inputs=ids.cuda(), - generation_config=gen_config, - streamer=streamer, - stopping_criteria=command_stop_cr).cpu() - generate_output_text = tokenizer.decode( - generate_output[0][len(ids[0]):]) - if streamer is None: - end = '' if generate_output_text[-1] == '\n' else '\n' - print(generate_output_text, end=end) - pattern = r'<\|Commands\|>:(.*?)' - command_text = ', '.join(re.findall(pattern, generate_output_text)) - extent_text = plugins_api( - command_text, - calculate_open=calculate_open, - solve_open=solve_open, - search_open=search_open) - end = '' if extent_text[-1] == '\n' else '\n' - print(extent_text, end=end) - extent_text_ids = tokenizer.encode( - extent_text, return_tensors='pt', add_special_tokens=False) - new_ids = torch.cat((generate_output, extent_text_ids), dim=1) - new_streamer = Streamer( - tokenizer) if Streamer is not None else None - generate_output = model.generate( - inputs=new_ids.cuda(), - generation_config=gen_config, - streamer=new_streamer, - stopping_criteria=answer_stop_cr) - if streamer is None: - output_text = tokenizer.decode( - generate_output[0][len(new_ids[0]):]) - end = '' if output_text[-1] == '\n' else '\n' - print(output_text, end=end) - else: - generate_output = model.generate( - inputs=ids.cuda(), - generation_config=gen_config, - streamer=streamer, - stopping_criteria=answer_stop_cr) - if streamer is None: - output_text = tokenizer.decode( - generate_output[0][len(ids[0]):]) - end = '' if output_text[-1] == '\n' else '\n' - print(output_text, end=end) - inputs = tokenizer.decode(generate_output[0]) - n_turn += 1 - if len(generate_output[0]) >= args.max_new_tokens: - print('Remove the memory of history responses, since ' - f'it exceeds the length limitation {args.max_new_tokens}.') - n_turn = 0 - inputs = '' - - -if __name__ == '__main__': - main() diff --git a/xtuner/tools/model_converters/adapter_pth2hf.py b/xtuner/tools/model_converters/adapter_pth2hf.py deleted file mode 100644 index 80ae2e209..000000000 --- a/xtuner/tools/model_converters/adapter_pth2hf.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import argparse -import os -import shutil - -import torch -from mmengine.config import Config, DictAction -from mmengine.utils import mkdir_or_exist - -from xtuner.configs import cfgs_name_path -from xtuner.registry import BUILDER - - -def parse_args(): - parser = argparse.ArgumentParser( - description='Convert the pth adapter to HuggingFace adapter') - parser.add_argument( - 'config', - help='config file name or path. Note: Please use the original ' - 'configs, instead of the automatically saved log configs.') - parser.add_argument('adapter_checkpoint', help='adapter checkpoint file') - parser.add_argument( - 'save_dir', help='the directory to save the checkpoint') - parser.add_argument( - '--is-deepspeed', - action='store_true', - help='whether the adapter is saved from deepspeed') - parser.add_argument( - '--cfg-options', - nargs='+', - action=DictAction, - help='override some settings in the used config, the key-value pair ' - 'in xxx=yyy format will be merged into config file. If the value to ' - 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' - 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' - 'Note that the quotation marks are necessary and that no white space ' - 'is allowed.') - args = parser.parse_args() - return args - - -def main(): - args = parse_args() - - # parse config - if not os.path.isfile(args.config): - try: - args.config = cfgs_name_path[args.config] - except KeyError: - raise FileNotFoundError(f'Cannot find {args.config}') - - # load config - cfg = Config.fromfile(args.config) - if args.cfg_options is not None: - cfg.merge_from_dict(args.cfg_options) - - # load on cpu - cfg.model.llm.device_map = 'cpu' - if cfg.model.llm.get('quantization_config'): - cfg.model.llm.quantization_config.\ - llm_int8_enable_fp32_cpu_offload = True - - model = BUILDER.build(cfg.model) - - adapter_checkpoint = torch.load( - args.adapter_checkpoint, map_location='cpu') - state_dict_key = 'module' if args.is_deepspeed else 'state_dict' - model.load_state_dict(adapter_checkpoint[state_dict_key], strict=False) - print(f'Load adapter from {args.adapter_checkpoint}') - - mkdir_or_exist(args.save_dir) - model.llm.save_pretrained(args.save_dir) - shutil.copyfile(args.config, os.path.join(args.save_dir, - 'xtuner_config.py')) - print(f'Save to {args.save_dir}') - - -if __name__ == '__main__': - main() diff --git a/xtuner/tools/model_converters/merge_adapter_hf.py b/xtuner/tools/model_converters/merge.py similarity index 87% rename from xtuner/tools/model_converters/merge_adapter_hf.py rename to xtuner/tools/model_converters/merge.py index 2de6bc23a..169cea620 100644 --- a/xtuner/tools/model_converters/merge_adapter_hf.py +++ b/xtuner/tools/model_converters/merge.py @@ -13,7 +13,12 @@ def parse_args(): parser.add_argument('adapter_name_or_path', help='adapter name or path') parser.add_argument( 'save_dir', help='the directory to save the merged model') - parser.add_argument('--max-shard-size', type=str, default='2GB') + parser.add_argument( + '--max-shard-size', + type=str, + default='2GB', + help='Only applicable for LLM. The maximum size for ' + 'each sharded checkpoint.') args = parser.parse_args() return args diff --git a/xtuner/tools/model_converters/merge_adapter.py b/xtuner/tools/model_converters/merge_adapter.py deleted file mode 100644 index 7383f23dc..000000000 --- a/xtuner/tools/model_converters/merge_adapter.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import argparse -import os - -import torch -from mmengine.config import Config, DictAction - -from xtuner.configs import cfgs_name_path -from xtuner.registry import BUILDER - - -def parse_args(): - parser = argparse.ArgumentParser(description='Merge a pth adapter to LLM') - parser.add_argument( - 'config', - help='config file name or path. Note: Please use the original ' - 'configs, instead of the automatically saved log configs.') - parser.add_argument('adapter_checkpoint', help='adapter checkpoint file') - parser.add_argument( - 'save_dir', help='the directory to save the merged model') - parser.add_argument('--max-shard-size', type=str, default='2GB') - parser.add_argument( - '--is-deepspeed', - action='store_true', - help='whether the adapter is saved from deepspeed') - parser.add_argument( - '--cfg-options', - nargs='+', - action=DictAction, - help='override some settings in the used config, the key-value pair ' - 'in xxx=yyy format will be merged into config file. If the value to ' - 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' - 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' - 'Note that the quotation marks are necessary and that no white space ' - 'is allowed.') - args = parser.parse_args() - return args - - -def main(): - args = parse_args() - - # parse config - if not os.path.isfile(args.config): - try: - args.config = cfgs_name_path[args.config] - except KeyError: - raise FileNotFoundError(f'Cannot find {args.config}') - - # load config - cfg = Config.fromfile(args.config) - if args.cfg_options is not None: - cfg.merge_from_dict(args.cfg_options) - - # load on cpu, with non-quantized - cfg.model.llm.device_map = 'cpu' - cfg.model.llm.quantization_config = None - cfg.model.llm.low_cpu_mem_usage = True - torch_dtype = cfg.model.llm.get('torch_dtype', torch.float16) - model = BUILDER.build(cfg.model) - tokenizer = BUILDER.build(cfg.tokenizer) - adapter_checkpoint = torch.load( - args.adapter_checkpoint, map_location='cpu') - state_dict_key = 'module' if args.is_deepspeed else 'state_dict' - model.load_state_dict(adapter_checkpoint[state_dict_key], strict=False) - print(f'Load adapter from {args.adapter_checkpoint}') - - model = model.llm - model_merged = model.merge_and_unload() - for param in model.parameters(): - param.data = param.data.to(torch_dtype) - model_merged.save_pretrained( - args.save_dir, max_shard_size=args.max_shard_size) - tokenizer.save_pretrained(args.save_dir) - print(f'Save to {args.save_dir}') - - -if __name__ == '__main__': - main() diff --git a/xtuner/tools/model_converters/pth_to_hf.py b/xtuner/tools/model_converters/pth_to_hf.py new file mode 100644 index 000000000..ccf15861b --- /dev/null +++ b/xtuner/tools/model_converters/pth_to_hf.py @@ -0,0 +1,108 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os +import shutil + +import torch +from mmengine.config import Config, DictAction + +from xtuner.configs import cfgs_name_path +from xtuner.registry import BUILDER + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert the pth model to HuggingFace model') + parser.add_argument( + 'config', + help='config file name or path. Note: Please use the original ' + 'configs, instead of the automatically saved log configs.') + parser.add_argument('pth_model', help='pth model file') + parser.add_argument( + 'save_dir', help='the directory to save HuggingFace model') + parser.add_argument( + '--fp32', + action='store_true', + help='Save as fp32. If not set, fp16 will be used by default.') + parser.add_argument( + '--max-shard-size', + type=str, + default='2GB', + help='Only applicable for LLM. The maximum size for ' + 'each sharded checkpoint.') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') + args = parser.parse_args() + return args + + +def guess_load_checkpoint(pth_model): + if os.path.isfile(pth_model): + state_dict = torch.load(pth_model, map_location='cpu') + if 'state_dict' in state_dict: + state_dict = state_dict['state_dict'] + elif os.path.isdir(pth_model): + try: + from deepspeed.utils.zero_to_fp32 import \ + get_fp32_state_dict_from_zero_checkpoint + except ImportError: + raise ImportError( + 'The provided PTH model appears to be a DeepSpeed checkpoint. ' + 'However, DeepSpeed library is not detected in current ' + 'environment. This suggests that DeepSpeed may not be ' + 'installed or is incorrectly configured. Please verify your ' + 'setup.') + state_dict = get_fp32_state_dict_from_zero_checkpoint( + os.path.dirname(pth_model), os.path.basename(pth_model)) + else: + raise FileNotFoundError(f'Cannot find {pth_model}') + return state_dict + + +def main(): + args = parse_args() + + # parse config + if not os.path.isfile(args.config): + try: + args.config = cfgs_name_path[args.config] + except KeyError: + raise FileNotFoundError(f'Cannot find {args.config}') + + # load config + cfg = Config.fromfile(args.config) + if args.cfg_options is not None: + cfg.merge_from_dict(args.cfg_options) + + model = BUILDER.build(cfg.model) + + state_dict = guess_load_checkpoint(args.pth_model) + model.load_state_dict(state_dict, strict=False) + print(f'Load PTH model from {args.pth_model}') + + if not args.fp32: + print('Convert weights to float16') + model.llm.half() + + print(f'Saving HuggingFace model to {args.save_dir}') + model.llm.save_pretrained( + args.save_dir, max_shard_size=args.max_shard_size) + if 'PeftModel' not in model.llm.__class__.__name__: + print(f'Saving HuggingFace tokenizer to {args.save_dir}') + tokenizer = BUILDER.build(cfg.tokenizer) + tokenizer.save_pretrained(args.save_dir) + shutil.copyfile(args.config, os.path.join(args.save_dir, + 'xtuner_config.py')) + print('All done!') + + +if __name__ == '__main__': + main() diff --git a/xtuner/tools/model_converters/split_hf_llm.py b/xtuner/tools/model_converters/split.py similarity index 100% rename from xtuner/tools/model_converters/split_hf_llm.py rename to xtuner/tools/model_converters/split.py