From 6e44d6b3933d2bafc513b4541ab9bec5a2871f01 Mon Sep 17 00:00:00 2001
From: Zhihao Lin <36994684+LZHgrla@users.noreply.github.com>
Date: Tue, 5 Sep 2023 16:08:28 +0800
Subject: [PATCH] [Improve] Redesign convert tools (#96)
* refactor tools
* modify entry_point
* modify docs
* update docs
* fix
* fix
* Update README.md
* Update README.md
* Update README.md
* Update README_zh-CN.md
* fix pre-commit
* rename converter
* update pth2hf
* rename pth2hf to pth_to_hf
* add fp32 for pth_to_hf
* Update README.md
* Update README_zh-CN.md
* Update README_zh-CN.md
* Update README.md
* Update README_zh-CN.md
* Update README_zh-CN.md
* Update README.md
* Update README.md
* Update README_zh-CN.md
* fix pre-commit
---
README.md | 99 ++++----
README_zh-CN.md | 99 ++++----
docs/en/user_guides/chat.md | 36 +--
docs/zh_cn/user_guides/chat.md | 36 +--
xtuner/entry_point.py | 73 ++----
xtuner/tools/chat.py | 97 ++++----
xtuner/tools/chat_hf.py | 235 ------------------
.../tools/model_converters/adapter_pth2hf.py | 79 ------
.../{merge_adapter_hf.py => merge.py} | 7 +-
.../tools/model_converters/merge_adapter.py | 79 ------
xtuner/tools/model_converters/pth_to_hf.py | 108 ++++++++
.../{split_hf_llm.py => split.py} | 0
12 files changed, 328 insertions(+), 620 deletions(-)
delete mode 100644 xtuner/tools/chat_hf.py
delete mode 100644 xtuner/tools/model_converters/adapter_pth2hf.py
rename xtuner/tools/model_converters/{merge_adapter_hf.py => merge.py} (87%)
delete mode 100644 xtuner/tools/model_converters/merge_adapter.py
create mode 100644 xtuner/tools/model_converters/pth_to_hf.py
rename xtuner/tools/model_converters/{split_hf_llm.py => split.py} (100%)
diff --git a/README.md b/README.md
index b7f06adbe..d28730113 100644
--- a/README.md
+++ b/README.md
@@ -26,9 +26,28 @@ XTuner is a toolkit for efficiently fine-tuning LLM, developed by the [MMRazor](
## 🌟 Demos
+- Ready-to-use models and datasets from XTuner API [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eBI9yiOkX-t7P-0-t9vS8y1x5KmWrkoU?usp=sharing)
+
- QLoRA Fine-tune [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1QAEZVBfQ7LZURkMUtaq0b-5nEQII9G9Z?usp=sharing)
+
- Plugin-based Chat [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing)
-- Ready-to-use models and datasets from XTuner API [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eBI9yiOkX-t7P-0-t9vS8y1x5KmWrkoU?usp=sharing)
+
+
+
+ Examples of Plugin-based Chat 🔥🔥🔥 |
+
+
+
+
+ |
+
+
+ |
+
+
+ |
+
+
## 🔥 Supports
@@ -123,35 +142,6 @@ XTuner is a toolkit for efficiently fine-tuning LLM, developed by the [MMRazor](
pip install -e '.[all]'
```
-### Chat [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing)
-
-
-
- Examples of Plugins-based Chat 🔥🔥🔥 |
-
-
-
-
- |
-
-
- |
-
-
- |
-
-
-
-XTuner provides tools to chat with pretrained / fine-tuned LLMs.
-
-- For example, we can start the chat with Llama2-7B-Plugins by
-
- ```shell
- xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer
- ```
-
-For more examples, please see [chat.md](./docs/en/user_guides/chat.md).
-
### Fine-tune [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1QAEZVBfQ7LZURkMUtaq0b-5nEQII9G9Z?usp=sharing)
XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepare guides can be found on [dataset_prepare.md](./docs/en/user_guides/dataset_prepare.md).
@@ -165,10 +155,16 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar
Or, if the provided configs cannot meet the requirements, please copy the provided config to the specified directory and make specific modifications by
```shell
- xtuner copy-cfg ${CONFIG_NAME} ${SAVE_DIR}
+ xtuner copy-cfg ${CONFIG_NAME} ${SAVE_PATH}
```
-- **Step 1**, start fine-tuning. For example, we can start the QLoRA fine-tuning of InternLM-7B with oasst1 dataset by
+- **Step 1**, start fine-tuning.
+
+ ```shell
+ xtuner train ${CONFIG_NAME_OR_PATH}
+ ```
+
+ For example, we can start the QLoRA fine-tuning of InternLM-7B with oasst1 dataset by
```shell
# On a single GPU
@@ -180,24 +176,37 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar
For more examples, please see [finetune.md](./docs/en/user_guides/finetune.md).
-### Deployment
-
-- **Step 0**, convert the pth adapter to HuggingFace adapter, by
+- **Step 2**, convert the saved PTH model (if using DeepSpeed, it will be a directory) to HuggingFace model, by
```shell
- xtuner convert adapter_pth2hf \
- ${CONFIG} \
- ${PATH_TO_PTH_ADAPTER} \
- ${SAVE_PATH_TO_HF_ADAPTER}
+ xtuner convert pth_to_hf ${CONFIG_NAME_OR_PATH} ${PTH} ${SAVE_PATH}
```
- or, directly merge the pth adapter to pretrained LLM, by
+### Chat [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing)
+
+XTuner provides tools to chat with pretrained / fine-tuned LLMs.
+
+```shell
+xtuner chat ${NAME_OR_PATH_TO_LLM} --adapter {NAME_OR_PATH_TO_ADAPTER} [optional arguments]
+```
+
+For example, we can start the chat with Llama2-7b with adapter trained from MOSS-003-SFT by
+
+```shell
+xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer
+```
+
+For more examples, please see [chat.md](./docs/en/user_guides/chat.md).
+
+### Deployment
+
+- **Step 0**, merge the HuggingFace adapter to pretrained LLM, by
```shell
xtuner convert merge_adapter \
- ${CONFIG} \
- ${PATH_TO_PTH_ADAPTER} \
- ${SAVE_PATH_TO_MERGED_LLM} \
+ ${NAME_OR_PATH_TO_LLM} \
+ ${NAME_OR_PATH_TO_ADAPTER} \
+ ${SAVE_PATH} \
--max-shard-size 2GB
```
@@ -212,7 +221,9 @@ XTuner supports the efficient fine-tune (*e.g.*, QLoRA) for LLMs. Dataset prepar
--seed 0
```
- 🎯 We are woking closely with [LMDeploy](https://github.com/InternLM/lmdeploy), to implement the deployment of **plugins-based chat**!
+ 🔥 Seeking efficient inference with less GPU memory? Try 4-bit quantization from [LMDeploy](https://github.com/InternLM/lmdeploy)! For more details, see [here](https://github.com/InternLM/lmdeploy/tree/main#quantization).
+
+ 🎯 We are woking closely with [LMDeploy](https://github.com/InternLM/lmdeploy), to implement the deployment of **plugin-based chat**!
### Evaluation
diff --git a/README_zh-CN.md b/README_zh-CN.md
index 30f512647..0ca037b7a 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -26,9 +26,28 @@ XTuner 是一个轻量级微调大语言模型的工具库,由 [MMRazor](https
## 🌟 示例
+- XTuner APIs所提供的开箱即用的模型与数据集 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eBI9yiOkX-t7P-0-t9vS8y1x5KmWrkoU?usp=sharing)
+
- QLoRA 微调 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1QAEZVBfQ7LZURkMUtaq0b-5nEQII9G9Z?usp=sharing)
+
- 基于插件的对话 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing)
-- XTuner APIs所提供的开箱即用的模型与数据集 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1eBI9yiOkX-t7P-0-t9vS8y1x5KmWrkoU?usp=sharing)
+
+
+
+ 基于插件的对话 🔥🔥🔥 |
+
+
+
+
+ |
+
+
+ |
+
+
+ |
+
+
## 🔥 支持列表
@@ -123,35 +142,6 @@ XTuner 是一个轻量级微调大语言模型的工具库,由 [MMRazor](https
pip install -e '.[all]'
```
-### 对话 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing)
-
-
-
- 基于插件的对话 🔥🔥🔥 |
-
-
-
-
- |
-
-
- |
-
-
- |
-
-
-
-XTuner 提供与大语言模型对话的工具。
-
-- 例如,与基于插件微调获得的 Llama2-7B-Plugins 对话:
-
- ```shell
- xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer
- ```
-
-更多示例,请查阅[文档](./docs/zh_cn/user_guides/chat.md)。
-
### 微调 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1QAEZVBfQ7LZURkMUtaq0b-5nEQII9G9Z?usp=sharing)
XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](./docs/zh_cn/user_guides/dataset_prepare.md)。
@@ -165,10 +155,16 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](.
或者,如果所提供的配置文件不能满足使用需求,请导出所提供的配置文件并进行相应更改:
```shell
- xtuner copy-cfg ${CONFIG_NAME} ${SAVE_DIR}
+ xtuner copy-cfg ${CONFIG_NAME} ${SAVE_PATH}
```
-- **步骤 1**,开始微调。例如,我们可以利用 QLoRA 算法在 oasst1 数据集上微调 InternLM-7B:
+- **步骤 1**,开始微调。
+
+ ```shell
+ xtuner train ${CONFIG_NAME_OR_PATH}
+ ```
+
+ 例如,我们可以利用 QLoRA 算法在 oasst1 数据集上微调 InternLM-7B:
```shell
# 单卡
@@ -177,26 +173,39 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](.
NPROC_PER_NODE=${GPU_NUM} xtuner train internlm_7b_qlora_oasst1_e3
```
- 更多示例,请查阅[文档](./docs/zh_cn/user_guides/finetune.md).
+ 更多示例,请查阅[文档](./docs/zh_cn/user_guides/finetune.md)。
-### 部署
-
-- **步骤 0**,将 pth adapter 转换为 HuggingFace adapter:
+- **步骤 2**,将保存的 PTH 模型(如果使用的DeepSpeed,则将会是一个文件夹)转换为 HuggingFace 模型:
```shell
- xtuner convert adapter_pth2hf \
- ${CONFIG} \
- ${PATH_TO_PTH_ADAPTER} \
- ${SAVE_PATH_TO_HF_ADAPTER}
+ xtuner convert pth_to_hf ${CONFIG_NAME_OR_PATH} ${PTH} ${SAVE_PATH}
```
- 或者,直接将 pth adapter 合并到大语言模型:
+### 对话 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/144OuTVyT_GvFyDMtlSlTzcxYIfnRsklq?usp=sharing)
+
+XTuner 提供与大语言模型对话的工具。
+
+```shell
+xtuner chat ${NAME_OR_PATH_TO_LLM} --adapter {NAME_OR_PATH_TO_ADAPTER} [optional arguments]
+```
+
+例如,与 Llama2-7b + MOSS-003-SFT adapter 对话:
+
+```shell
+xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer
+```
+
+更多示例,请查阅[文档](./docs/zh_cn/user_guides/chat.md)。
+
+### 部署
+
+- **步骤 0**,将 HuggingFace adapter 合并到大语言模型:
```shell
xtuner convert merge_adapter \
- ${CONFIG} \
- ${PATH_TO_PTH_ADAPTER} \
- ${SAVE_PATH_TO_MERGED_LLM} \
+ ${NAME_OR_PATH_TO_LLM} \
+ ${NAME_OR_PATH_TO_ADAPTER} \
+ ${SAVE_PATH} \
--max-shard-size 2GB
```
@@ -211,6 +220,8 @@ XTuner 支持微调大语言模型。数据集预处理指南请查阅[文档](.
--seed 0
```
+ 🔥 追求速度更快、显存占用更低的推理?欢迎体验 [LMDeploy](https://github.com/InternLM/lmdeploy) 提供的 4-bit 量化!使用指南请见[文档](https://github.com/InternLM/lmdeploy/tree/main#quantization)。
+
🎯 我们正在与 [LMDeploy](https://github.com/InternLM/lmdeploy) 紧密合作,以实现基于插件对话的部署!
### 评测
diff --git a/docs/en/user_guides/chat.md b/docs/en/user_guides/chat.md
index 2914296f0..65725f7a3 100644
--- a/docs/en/user_guides/chat.md
+++ b/docs/en/user_guides/chat.md
@@ -5,49 +5,49 @@
- InternLM-7B, oasst1
```shell
- xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-oasst1 --prompt-template openassistant
+ xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-oasst1 --prompt-template openassistant
```
- InternLM-7B, Arxiv Gentitle
```shell
- xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-arxiv-gentitle --prompt-template title
+ xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-arxiv-gentitle --prompt-template title
```
- InternLM-7B, Colorist
```shell
- xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-colorist --prompt-template colorist
+ xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-colorist --prompt-template colorist
```
- InternLM-7B, Coder
```shell
- xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-coder --prompt-template code
+ xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-coder --prompt-template code
```
- InternLM-7B, SQL
```shell
- xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-sql --prompt-template sql
+ xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-sql --prompt-template sql
```
- InternLM-7B, Lawyer
```shell
- xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-lawyer --prompt-template lawyer
+ xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-lawyer --prompt-template lawyer
```
- InternLM-7B, Open-Platypus
```shell
- xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-open-platypus --prompt-template alpaca
+ xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-open-platypus --prompt-template alpaca
```
- InternLM-7B, Alpaca-enzh
```shell
- xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-alpaca-enzh --prompt-template alpaca
+ xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-alpaca-enzh --prompt-template alpaca
```
## Chat with [Llama2](https://github.com/facebookresearch/llama)
@@ -58,19 +58,19 @@
```shell
export SERPER_API_KEY="xxx" # Please get the key from https://serper.dev to support google search!
- xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer
+ xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer
```
- Llama2-7B, Arxiv Gentitle
```shell
- xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-arxiv-gentitle --prompt-template title
+ xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-arxiv-gentitle --prompt-template title
```
- Llama2-7B, Colorist
```shell
- xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-colorist --prompt-template colorist
+ xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-colorist --prompt-template colorist
```
## Chat with [Qwen](https://github.com/QwenLM)
@@ -79,25 +79,25 @@
```shell
export SERPER_API_KEY="xxx" # Please get the key from https://serper.dev to support google search!
- xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-moss-003-sft --bot-name Qwen --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word ""
+ xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-moss-003-sft --bot-name Qwen --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word ""
```
- Qwen-7B, oasst1
```shell
- xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-oasst1 --prompt-template openassistant --answer-stop-word '<|endoftext|>'
+ xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-oasst1 --prompt-template openassistant --answer-stop-word '<|endoftext|>'
```
- Qwen-7B, Arxiv Gentitle
```shell
- xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-arxiv-gentitle --prompt-template title --answer-stop-word '<|endoftext|>'
+ xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-arxiv-gentitle --prompt-template title --answer-stop-word '<|endoftext|>'
```
- Qwen-7B, Alpaca-enzh
```shell
- xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-alpaca-enzh --prompt-template alpaca --answer-stop-word '<|endoftext|>'
+ xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-alpaca-enzh --prompt-template alpaca --answer-stop-word '<|endoftext|>'
```
## Chat with [Baichuan](https://github.com/baichuan-inc)
@@ -105,17 +105,17 @@
- Baichuan-7B, oasst1
```shell
- xtuner chat hf baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-oasst1 --prompt-template openassistant
+ xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-oasst1 --prompt-template openassistant
```
- Baichuan-7B, Arxiv Gentitle
```shell
- xtuner chat hf baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-arxiv-gentitle --prompt-template title --no-streamer
+ xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-arxiv-gentitle --prompt-template title --no-streamer
```
- Baichuan-7B, Alpaca-enzh
```shell
- xtuner chat hf baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-alpaca-enzh --prompt-template alpaca
+ xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-alpaca-enzh --prompt-template alpaca
```
diff --git a/docs/zh_cn/user_guides/chat.md b/docs/zh_cn/user_guides/chat.md
index 1ae01388b..6fef1684c 100644
--- a/docs/zh_cn/user_guides/chat.md
+++ b/docs/zh_cn/user_guides/chat.md
@@ -5,49 +5,49 @@
- InternLM-7B, oasst1
```shell
- xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-oasst1 --prompt-template openassistant
+ xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-oasst1 --prompt-template openassistant
```
- InternLM-7B, Arxiv Gentitle
```shell
- xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-arxiv-gentitle --prompt-template title
+ xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-arxiv-gentitle --prompt-template title
```
- InternLM-7B, Colorist
```shell
- xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-colorist --prompt-template colorist
+ xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-colorist --prompt-template colorist
```
- InternLM-7B, Coder
```shell
- xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-coder --prompt-template code
+ xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-coder --prompt-template code
```
- InternLM-7B, SQL
```shell
- xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-sql --prompt-template sql
+ xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-sql --prompt-template sql
```
- InternLM-7B, Lawyer
```shell
- xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-lawyer --prompt-template lawyer
+ xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-lawyer --prompt-template lawyer
```
- InternLM-7B, Open-Platypus
```shell
- xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-open-platypus --prompt-template alpaca
+ xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-open-platypus --prompt-template alpaca
```
- InternLM-7B, Alpaca-enzh
```shell
- xtuner chat hf internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-alpaca-enzh --prompt-template alpaca
+ xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-alpaca-enzh --prompt-template alpaca
```
## 与微调后的 [Llama2](https://github.com/facebookresearch/llama) 对话
@@ -58,19 +58,19 @@
```shell
export SERPER_API_KEY="xxx" # 请从 https://serper.dev 获得API_KEY,以此支持谷歌搜索!
- xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer
+ xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word "" --no-streamer
```
- Llama2-7B, Arxiv Gentitle
```shell
- xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-arxiv-gentitle --prompt-template title
+ xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-arxiv-gentitle --prompt-template title
```
- Llama2-7B, Colorist
```shell
- xtuner chat hf meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-colorist --prompt-template colorist
+ xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-colorist --prompt-template colorist
```
## 与微调后的 [Qwen](https://github.com/QwenLM) 对话
@@ -79,25 +79,25 @@
```shell
export SERPER_API_KEY="xxx" # 请从 https://serper.dev 获得API_KEY,以此支持谷歌搜索!
- xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-moss-003-sft --bot-name Qwen --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word ""
+ xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-moss-003-sft --bot-name Qwen --prompt-template moss_sft --with-plugins calculate solve search --command-stop-word "" --answer-stop-word ""
```
- Qwen-7B, oasst1
```shell
- xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-oasst1 --prompt-template openassistant --answer-stop-word '<|endoftext|>'
+ xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-oasst1 --prompt-template openassistant --answer-stop-word '<|endoftext|>'
```
- Qwen-7B, Arxiv Gentitle
```shell
- xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-arxiv-gentitle --prompt-template title --answer-stop-word '<|endoftext|>'
+ xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-arxiv-gentitle --prompt-template title --answer-stop-word '<|endoftext|>'
```
- Qwen-7B, Alpaca-enzh
```shell
- xtuner chat hf Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-alpaca-enzh --prompt-template alpaca --answer-stop-word '<|endoftext|>'
+ xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-alpaca-enzh --prompt-template alpaca --answer-stop-word '<|endoftext|>'
```
## 与微调后的 [Baichuan](https://github.com/baichuan-inc) 对话
@@ -105,17 +105,17 @@
- Baichuan-7B, oasst1
```shell
- xtuner chat hf baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-oasst1 --prompt-template openassistant
+ xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-oasst1 --prompt-template openassistant
```
- Baichuan-7B, Arxiv Gentitle
```shell
- xtuner chat hf baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-arxiv-gentitle --prompt-template title --no-streamer
+ xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-arxiv-gentitle --prompt-template title --no-streamer
```
- Baichuan-7B, Alpaca-enzh
```shell
- xtuner chat hf baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-alpaca-enzh --prompt-template alpaca
+ xtuner chat baichuan-inc/Baichuan-7B --adapter xtuner/Baichuan-7B-qlora-alpaca-enzh --prompt-template alpaca
```
diff --git a/xtuner/entry_point.py b/xtuner/entry_point.py
index 628610d38..f36f2cc39 100644
--- a/xtuner/entry_point.py
+++ b/xtuner/entry_point.py
@@ -8,10 +8,9 @@
from mmengine.logging import print_log
import xtuner
-from xtuner.tools import chat, chat_hf, copy_cfg, list_cfg, test, train
+from xtuner.tools import chat, copy_cfg, list_cfg, test, train
from xtuner.tools.data_preprocess import arxiv as arxiv_preprocess
-from xtuner.tools.model_converters import (adapter_pth2hf, merge_adapter,
- merge_adapter_hf, split_hf_llm)
+from xtuner.tools.model_converters import merge, pth_to_hf, split
# Define valid modes
MODES = ('list-cfg', 'copy-cfg', 'train', 'test', 'chat', 'convert',
@@ -37,18 +36,14 @@
xtuner train $CONFIG
3-2. Fine-tune LLMs by multiple GPUs:
NPROC_PER_NODE=$NGPUS NNODES=$NNODES NODE_RANK=$NODE_RANK PORT=$PORT ADDR=$ADDR xtuner dist_train $CONFIG $GPUS
- 4-1. Chat with LLMs with HuggingFace's model and adapter:
- xtuner chat hf $NAME_OR_PATH_TO_HF_MODEL --adapter $NAME_OR_PATH_TO_HF_ADAPTER --prompt-template $PROMPT_TEMPLATE
- 4-2. Chat with LLMs with XTuner's config and adapter:
- xtuner chat xtuner $CONFIG --adapter $PATH_TO_PTH_ADAPTER --prompt $PROMPT_TEMPLATE
- 5-1. Convert the pth adapter to HuggingFace's adapter:
- xtuner convert adapter_pth2hf $CONFIG $PATH_TO_PTH_ADAPTER $SAVE_PATH_TO_HF_ADAPTER
- 5-2. Merge the HuggingFace's adapter to the pretrained LLM:
- xtuner convert merge_adapter_hf $NAME_OR_PATH_TO_HF_MODEL $NAME_OR_PATH_TO_HF_ADAPTER $SAVE_PATH
- 5-3. Merge the XTuner's adapter to the pretraiend LLM:
- xtuner convert merge_adapter $CONFIG $NAME_OR_PATH_TO_HF_ADAPTER $SAVE_PATH
- 5-4. Split HuggingFace's LLM to the smallest sharded one:
- xtuner convert split_hf_llm $NAME_OR_PATH_TO_HF_MODEL $SAVE_PATH
+ 4-1. Convert the pth model to HuggingFace's model:
+ xtuner convert pth_to_hf $CONFIG $PATH_TO_PTH_MODEL $SAVE_PATH_TO_HF_MODEL
+ 4-2. Merge the HuggingFace's adapter to the pretrained LLM:
+ xtuner convert merge $NAME_OR_PATH_TO_LLM $NAME_OR_PATH_TO_ADAPTER $SAVE_PATH
+ 4-3. Split HuggingFace's LLM to the smallest sharded one:
+ xtuner convert split $NAME_OR_PATH_TO_LLM $SAVE_PATH
+ 5. Chat with LLMs with HuggingFace's model and adapter:
+ xtuner chat $NAME_OR_PATH_TO_LLM --adapter $NAME_OR_PATH_TO_ADAPTER --prompt-template $PROMPT_TEMPLATE
6-1. Preprocess arxiv dataset:
xtuner preprocess arxiv $SRC_FILE $DST_FILE --start-date $START_DATE --categories $CATEGORIES
@@ -73,15 +68,12 @@
Some usages for convert: (See more by using -h for specific command!)
- 1. Convert the pth adapter to HuggingFace's adapter:
- xtuner convert adapter_pth2hf $CONFIG $PATH_TO_PTH_ADAPTER $SAVE_PATH_TO_HF_ADAPTER
+ 1. Convert the pth model to HuggingFace's model:
+ xtuner convert pth_to_hf $CONFIG $PATH_TO_PTH_MODEL $SAVE_PATH_TO_HF_MODEL
2. Merge the HuggingFace's adapter to the pretrained LLM:
- xtuner convert merge_adapter_hf $NAME_OR_PATH_TO_HF_MODEL $NAME_OR_PATH_TO_HF_ADAPTER $SAVE_PATH
- 3. Merge the XTuner's
- adapter to the pretraiend LLM:
- xtuner convert merge_adapter $CONFIG $NAME_OR_PATH_TO_HF_ADAPTER $SAVE_PATH
- 4. Split HuggingFace's LLM to the smallest sharded one:
- xtuner convert split_hf_llm $NAME_OR_PATH_TO_HF_MODEL $SAVE_PATH
+ xtuner convert merge $NAME_OR_PATH_TO_LLM $NAME_OR_PATH_TO_ADAPTER $SAVE_PATH
+ 3. Split HuggingFace's LLM to the smallest sharded one:
+ xtuner convert split $NAME_OR_PATH_TO_LLM $SAVE_PATH
GitHub: https://github.com/InternLM/xtuner
""" # noqa: E501
@@ -105,27 +97,6 @@
GitHub: https://github.com/InternLM/xtuner
""" # noqa: E501
-
-CHAT_HELP_MSG = \
- f"""
- Arguments received: {str(['xtuner'] + sys.argv[1:])}. xtuner commands use the following syntax:
-
- xtuner MODE MODE_ARGS ARGS
-
- Where MODE (required) is one of {MODES}
- MODE_ARG (optional) is the argument for specific mode
- ARGS (optional) are the arguments for specific command
-
- Some usages for chat: (See more by using -h for specific command!)
-
- 1. Chat with LLMs with HuggingFace's model and adapter:
- xtuner chat hf $NAME_OR_PATH_TO_HF_MODEL --adapter $NAME_OR_PATH_TO_HF_ADAPTER --prompt-template $PROMPT_TEMPLATE
- 2. Chat with LLMs with XTuner's config and adapter:
- xtuner chat xtuner internlm_7b_qlora_alpaca --adapter $PATH_TO_PTH_ADAPTER --prompt $PROMPT_TEMPLATE
-
- GitHub: https://github.com/InternLM/xtuner
- """ # noqa: E501
-
special = {
'help': lambda: print_log(CLI_HELP_MSG, 'current'),
'version': lambda: print_log(xtuner.__version__, 'current')
@@ -143,17 +114,11 @@
'copy-cfg': copy_cfg.__file__,
'train': train.__file__,
'test': test.__file__,
- 'chat': {
- 'hf': chat_hf.__file__,
- 'xtuner': chat.__file__,
- '--help': lambda: print_log(CHAT_HELP_MSG, 'current'),
- '-h': lambda: print_log(CHAT_HELP_MSG, 'current')
- },
+ 'chat': chat.__file__,
'convert': {
- 'adapter_pth2hf': adapter_pth2hf.__file__,
- 'merge_adapter': merge_adapter.__file__,
- 'merge_adapter_hf': merge_adapter_hf.__file__,
- 'split_hf_llm': split_hf_llm.__file__,
+ 'pth_to_hf': pth_to_hf.__file__,
+ 'merge': merge.__file__,
+ 'split': split.__file__,
'--help': lambda: print_log(CONVERT_HELP_MSG, 'current'),
'-h': lambda: print_log(CONVERT_HELP_MSG, 'current')
},
diff --git a/xtuner/tools/chat.py b/xtuner/tools/chat.py
index db3128e65..f3cf452c6 100644
--- a/xtuner/tools/chat.py
+++ b/xtuner/tools/chat.py
@@ -1,35 +1,45 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
-import os
import re
import torch
-from mmengine.config import Config, DictAction
-from transformers import GenerationConfig
+from peft import PeftModel
+from transformers import (AutoModelForCausalLM, AutoTokenizer,
+ BitsAndBytesConfig, GenerationConfig)
-from xtuner.configs import cfgs_name_path
-from xtuner.registry import BUILDER
from xtuner.tools.utils import get_chat_utils, update_stop_criteria
from xtuner.utils import PROMPT_TEMPLATE
+def remove_prefix(state_dict, prefix):
+ new_state_dict = {}
+ for key, value in state_dict.items():
+ if key.startswith(prefix):
+ new_key = key[len(prefix):]
+ new_state_dict[new_key] = value
+ else:
+ new_state_dict[key] = value
+ return new_state_dict
+
+
def parse_args():
- parser = argparse.ArgumentParser(
- description='Chat with a pretrained model')
+ parser = argparse.ArgumentParser(description='Chat with a HF model')
parser.add_argument(
- 'config',
- help='config file name or path. Note: Please use the original '
- 'configs, instead of the automatically saved log configs.')
- parser.add_argument('--adapter', default=None, help='adapter model')
+ 'model_name_or_path', help='Hugging Face model name or path')
+ parser.add_argument('--adapter', default=None, help='adapter name or path')
parser.add_argument(
'--prompt-template',
choices=PROMPT_TEMPLATE.keys(),
default=None,
help='Specify a prompt option')
parser.add_argument(
- '--is-deepspeed',
- action='store_true',
- help='whether the adapter is saved from deepspeed')
+ '--bits',
+ type=int,
+ choices=[4, 8, None],
+ default=None,
+ help='LLM bits')
+ parser.add_argument(
+ '--bot-name', type=str, default='BOT', help='Name for Bot')
parser.add_argument(
'--with-plugins',
nargs='+',
@@ -67,16 +77,6 @@ def parse_args():
type=int,
default=0,
help='Random seed for reproducible text generation')
- parser.add_argument(
- '--cfg-options',
- nargs='+',
- action=DictAction,
- help='override some settings in the used config, the key-value pair '
- 'in xxx=yyy format will be merged into config file. If the value to '
- 'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
- 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
- 'Note that the quotation marks are necessary and that no white space '
- 'is allowed.')
args = parser.parse_args()
return args
@@ -119,29 +119,30 @@ def main():
torch.manual_seed(args.seed)
- # parse config
- if not os.path.isfile(args.config):
- try:
- args.config = cfgs_name_path[args.config]
- except KeyError:
- raise FileNotFoundError(f'Cannot find {args.config}')
-
- # load config
- cfg = Config.fromfile(args.config)
- if args.cfg_options is not None:
- cfg.merge_from_dict(args.cfg_options)
-
- model = BUILDER.build(cfg.model)
- # Cast to inference mode
- model.llm.gradient_checkpointing_disable()
- model.llm.config.use_cache = True
-
- tokenizer = BUILDER.build(cfg.tokenizer)
-
+ # build model
+ quantization_config = None
+ load_in_8bit = False
+ if args.bits == 4:
+ quantization_config = BitsAndBytesConfig(
+ load_in_4bit=True,
+ load_in_8bit=False,
+ llm_int8_threshold=6.0,
+ llm_int8_has_fp16_weight=False,
+ bnb_4bit_compute_dtype=torch.float16,
+ bnb_4bit_use_double_quant=True,
+ bnb_4bit_quant_type='nf4')
+ elif args.bits == 8:
+ load_in_8bit = True
+ model = AutoModelForCausalLM.from_pretrained(
+ args.model_name_or_path,
+ quantization_config=quantization_config,
+ load_in_8bit=load_in_8bit,
+ device_map='auto',
+ trust_remote_code=True)
+ tokenizer = AutoTokenizer.from_pretrained(
+ args.model_name_or_path, trust_remote_code=True)
if args.adapter is not None:
- adapter = torch.load(args.adapter, map_location='cpu')
- state_dict_key = 'module' if args.is_deepspeed else 'state_dict'
- model.load_state_dict(adapter[state_dict_key], strict=False)
+ model = PeftModel.from_pretrained(model, args.adapter)
print(f'Load adapter from {args.adapter}')
Streamer, stop_criteria = get_chat_utils(model)
@@ -173,10 +174,10 @@ def main():
template = PROMPT_TEMPLATE[args.prompt_template]
if 'INSTRUCTION_START' in template and n_turn == 0:
prompt_text = template['INSTRUCTION_START'].format(
- input=text, round=n_turn + 1, **cfg)
+ input=text, round=n_turn + 1, bot_name=args.bot_name)
else:
prompt_text = template['INSTRUCTION'].format(
- input=text, round=n_turn + 1, **cfg)
+ input=text, round=n_turn + 1, bot_name=args.bot_name)
if args.prompt_template == 'moss_sft':
if not inner_thoughts_open:
prompt_text.replace('- Inner thoughts: enabled.',
diff --git a/xtuner/tools/chat_hf.py b/xtuner/tools/chat_hf.py
deleted file mode 100644
index 7cc81d89a..000000000
--- a/xtuner/tools/chat_hf.py
+++ /dev/null
@@ -1,235 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import argparse
-import re
-
-import torch
-from peft import PeftModel
-from transformers import (AutoModelForCausalLM, AutoTokenizer,
- BitsAndBytesConfig, GenerationConfig)
-
-from xtuner.tools.utils import get_chat_utils, update_stop_criteria
-from xtuner.utils import PROMPT_TEMPLATE
-
-
-def parse_args():
- parser = argparse.ArgumentParser(description='Chat with a HF model')
- parser.add_argument(
- 'model_name_or_path', help='Hugging Face model name or path')
- parser.add_argument('--adapter', default=None, help='adapter name or path')
- parser.add_argument(
- '--prompt-template',
- choices=PROMPT_TEMPLATE.keys(),
- default=None,
- help='Specify a prompt option')
- parser.add_argument(
- '--bot-name', type=str, default='BOT', help='Name for Bot')
- parser.add_argument(
- '--with-plugins',
- nargs='+',
- choices=['calculate', 'solve', 'search'],
- help='Specify plugins to use')
- parser.add_argument(
- '--no-streamer', action='store_true', help='Whether to with streamer')
- parser.add_argument('--command-stop-word', default=None, help='Stop key')
- parser.add_argument('--answer-stop-word', default=None, help='Stop key')
- parser.add_argument(
- '--max-new-tokens',
- type=int,
- default=2048,
- help='Maximum number of new tokens allowed in generated text')
- parser.add_argument(
- '--temperature',
- type=float,
- default=0.1,
- help='The value used to modulate the next token probabilities.')
- parser.add_argument(
- '--top-k',
- type=int,
- default=40,
- help='The number of highest probability vocabulary tokens to '
- 'keep for top-k-filtering.')
- parser.add_argument(
- '--top-p',
- type=float,
- default=0.75,
- help='If set to float < 1, only the smallest set of most probable '
- 'tokens with probabilities that add up to top_p or higher are '
- 'kept for generation.')
- parser.add_argument(
- '--seed',
- type=int,
- default=0,
- help='Random seed for reproducible text generation')
- args = parser.parse_args()
- return args
-
-
-def get_input():
- """Helper function for getting input from users."""
- sentinel = '' # ends when this string is seen
- result = None
- while result is None:
- print('\ndouble enter to end input >>> ', end='')
- try:
- result = '\n'.join(iter(input, sentinel))
- except UnicodeDecodeError:
- print('Invalid characters detected. Please enter again.')
- return result
-
-
-def main():
- args = parse_args()
-
- if args.with_plugins is None:
- inner_thoughts_open = False
- calculate_open = False
- solve_open = False
- search_open = False
- else:
- assert args.prompt_template == 'moss_sft'
- from plugins import plugins_api
- inner_thoughts_open = True
- calculate_open = 'calculate' in args.with_plugins
- solve_open = 'solve' in args.with_plugins
- search_open = 'search' in args.with_plugins
- # pre-import for api and model preparation
- if calculate_open:
- from plugins import calculate # noqa: F401
- if solve_open:
- from plugins import solve # noqa: F401
- if search_open:
- from plugins import search # noqa: F401
-
- torch.manual_seed(args.seed)
-
- # build model
- quantization_config = BitsAndBytesConfig(
- load_in_4bit=True,
- load_in_8bit=False,
- llm_int8_threshold=6.0,
- llm_int8_has_fp16_weight=False,
- bnb_4bit_compute_dtype=torch.float16,
- bnb_4bit_use_double_quant=True,
- bnb_4bit_quant_type='nf4')
- model = AutoModelForCausalLM.from_pretrained(
- args.model_name_or_path,
- quantization_config=quantization_config,
- trust_remote_code=True)
- tokenizer = AutoTokenizer.from_pretrained(
- args.model_name_or_path, trust_remote_code=True)
- if args.adapter is not None:
- model = PeftModel.from_pretrained(model, args.adapter)
- print(f'Load adapter from {args.adapter}')
-
- Streamer, stop_criteria = get_chat_utils(model)
- if args.no_streamer:
- Streamer = None
-
- command_stop_cr, answer_stop_cr = update_stop_criteria(
- base=stop_criteria,
- tokenizer=tokenizer,
- command_stop_word=args.command_stop_word,
- answer_stop_word=args.answer_stop_word)
-
- gen_config = GenerationConfig(
- max_new_tokens=args.max_new_tokens,
- do_sample=args.temperature > 0,
- temperature=args.temperature,
- top_p=args.top_p,
- top_k=args.top_k,
- )
-
- n_turn = 0
- inputs = ''
- while True:
- text = get_input()
-
- if text == 'exit':
- exit(0)
- if args.prompt_template is not None:
- template = PROMPT_TEMPLATE[args.prompt_template]
- if 'INSTRUCTION_START' in template and n_turn == 0:
- prompt_text = template['INSTRUCTION_START'].format(
- input=text, round=n_turn + 1, bot_name=args.bot_name)
- else:
- prompt_text = template['INSTRUCTION'].format(
- input=text, round=n_turn + 1, bot_name=args.bot_name)
- if args.prompt_template == 'moss_sft':
- if not inner_thoughts_open:
- prompt_text.replace('- Inner thoughts: enabled.',
- '- Inner thoughts: disabled.')
- if not calculate_open:
- prompt_text.replace(
- '- Calculator: enabled. API: Calculate(expression)',
- '- Calculator: disabled.')
- if not solve_open:
- prompt_text.replace(
- '- Equation solver: enabled. API: Solve(equation)',
- '- Equation solver: disabled.')
- if not search_open:
- prompt_text.replace(
- '- Web search: enabled. API: Search(query)',
- '- Web search: disabled.')
-
- inputs += prompt_text
- else:
- inputs += text
- ids = tokenizer.encode(inputs, return_tensors='pt')
- streamer = Streamer(tokenizer) if Streamer is not None else None
- if args.with_plugins is not None:
- generate_output = model.generate(
- inputs=ids.cuda(),
- generation_config=gen_config,
- streamer=streamer,
- stopping_criteria=command_stop_cr).cpu()
- generate_output_text = tokenizer.decode(
- generate_output[0][len(ids[0]):])
- if streamer is None:
- end = '' if generate_output_text[-1] == '\n' else '\n'
- print(generate_output_text, end=end)
- pattern = r'<\|Commands\|>:(.*?)'
- command_text = ', '.join(re.findall(pattern, generate_output_text))
- extent_text = plugins_api(
- command_text,
- calculate_open=calculate_open,
- solve_open=solve_open,
- search_open=search_open)
- end = '' if extent_text[-1] == '\n' else '\n'
- print(extent_text, end=end)
- extent_text_ids = tokenizer.encode(
- extent_text, return_tensors='pt', add_special_tokens=False)
- new_ids = torch.cat((generate_output, extent_text_ids), dim=1)
- new_streamer = Streamer(
- tokenizer) if Streamer is not None else None
- generate_output = model.generate(
- inputs=new_ids.cuda(),
- generation_config=gen_config,
- streamer=new_streamer,
- stopping_criteria=answer_stop_cr)
- if streamer is None:
- output_text = tokenizer.decode(
- generate_output[0][len(new_ids[0]):])
- end = '' if output_text[-1] == '\n' else '\n'
- print(output_text, end=end)
- else:
- generate_output = model.generate(
- inputs=ids.cuda(),
- generation_config=gen_config,
- streamer=streamer,
- stopping_criteria=answer_stop_cr)
- if streamer is None:
- output_text = tokenizer.decode(
- generate_output[0][len(ids[0]):])
- end = '' if output_text[-1] == '\n' else '\n'
- print(output_text, end=end)
- inputs = tokenizer.decode(generate_output[0])
- n_turn += 1
- if len(generate_output[0]) >= args.max_new_tokens:
- print('Remove the memory of history responses, since '
- f'it exceeds the length limitation {args.max_new_tokens}.')
- n_turn = 0
- inputs = ''
-
-
-if __name__ == '__main__':
- main()
diff --git a/xtuner/tools/model_converters/adapter_pth2hf.py b/xtuner/tools/model_converters/adapter_pth2hf.py
deleted file mode 100644
index 80ae2e209..000000000
--- a/xtuner/tools/model_converters/adapter_pth2hf.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import argparse
-import os
-import shutil
-
-import torch
-from mmengine.config import Config, DictAction
-from mmengine.utils import mkdir_or_exist
-
-from xtuner.configs import cfgs_name_path
-from xtuner.registry import BUILDER
-
-
-def parse_args():
- parser = argparse.ArgumentParser(
- description='Convert the pth adapter to HuggingFace adapter')
- parser.add_argument(
- 'config',
- help='config file name or path. Note: Please use the original '
- 'configs, instead of the automatically saved log configs.')
- parser.add_argument('adapter_checkpoint', help='adapter checkpoint file')
- parser.add_argument(
- 'save_dir', help='the directory to save the checkpoint')
- parser.add_argument(
- '--is-deepspeed',
- action='store_true',
- help='whether the adapter is saved from deepspeed')
- parser.add_argument(
- '--cfg-options',
- nargs='+',
- action=DictAction,
- help='override some settings in the used config, the key-value pair '
- 'in xxx=yyy format will be merged into config file. If the value to '
- 'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
- 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
- 'Note that the quotation marks are necessary and that no white space '
- 'is allowed.')
- args = parser.parse_args()
- return args
-
-
-def main():
- args = parse_args()
-
- # parse config
- if not os.path.isfile(args.config):
- try:
- args.config = cfgs_name_path[args.config]
- except KeyError:
- raise FileNotFoundError(f'Cannot find {args.config}')
-
- # load config
- cfg = Config.fromfile(args.config)
- if args.cfg_options is not None:
- cfg.merge_from_dict(args.cfg_options)
-
- # load on cpu
- cfg.model.llm.device_map = 'cpu'
- if cfg.model.llm.get('quantization_config'):
- cfg.model.llm.quantization_config.\
- llm_int8_enable_fp32_cpu_offload = True
-
- model = BUILDER.build(cfg.model)
-
- adapter_checkpoint = torch.load(
- args.adapter_checkpoint, map_location='cpu')
- state_dict_key = 'module' if args.is_deepspeed else 'state_dict'
- model.load_state_dict(adapter_checkpoint[state_dict_key], strict=False)
- print(f'Load adapter from {args.adapter_checkpoint}')
-
- mkdir_or_exist(args.save_dir)
- model.llm.save_pretrained(args.save_dir)
- shutil.copyfile(args.config, os.path.join(args.save_dir,
- 'xtuner_config.py'))
- print(f'Save to {args.save_dir}')
-
-
-if __name__ == '__main__':
- main()
diff --git a/xtuner/tools/model_converters/merge_adapter_hf.py b/xtuner/tools/model_converters/merge.py
similarity index 87%
rename from xtuner/tools/model_converters/merge_adapter_hf.py
rename to xtuner/tools/model_converters/merge.py
index 2de6bc23a..169cea620 100644
--- a/xtuner/tools/model_converters/merge_adapter_hf.py
+++ b/xtuner/tools/model_converters/merge.py
@@ -13,7 +13,12 @@ def parse_args():
parser.add_argument('adapter_name_or_path', help='adapter name or path')
parser.add_argument(
'save_dir', help='the directory to save the merged model')
- parser.add_argument('--max-shard-size', type=str, default='2GB')
+ parser.add_argument(
+ '--max-shard-size',
+ type=str,
+ default='2GB',
+ help='Only applicable for LLM. The maximum size for '
+ 'each sharded checkpoint.')
args = parser.parse_args()
return args
diff --git a/xtuner/tools/model_converters/merge_adapter.py b/xtuner/tools/model_converters/merge_adapter.py
deleted file mode 100644
index 7383f23dc..000000000
--- a/xtuner/tools/model_converters/merge_adapter.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import argparse
-import os
-
-import torch
-from mmengine.config import Config, DictAction
-
-from xtuner.configs import cfgs_name_path
-from xtuner.registry import BUILDER
-
-
-def parse_args():
- parser = argparse.ArgumentParser(description='Merge a pth adapter to LLM')
- parser.add_argument(
- 'config',
- help='config file name or path. Note: Please use the original '
- 'configs, instead of the automatically saved log configs.')
- parser.add_argument('adapter_checkpoint', help='adapter checkpoint file')
- parser.add_argument(
- 'save_dir', help='the directory to save the merged model')
- parser.add_argument('--max-shard-size', type=str, default='2GB')
- parser.add_argument(
- '--is-deepspeed',
- action='store_true',
- help='whether the adapter is saved from deepspeed')
- parser.add_argument(
- '--cfg-options',
- nargs='+',
- action=DictAction,
- help='override some settings in the used config, the key-value pair '
- 'in xxx=yyy format will be merged into config file. If the value to '
- 'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
- 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
- 'Note that the quotation marks are necessary and that no white space '
- 'is allowed.')
- args = parser.parse_args()
- return args
-
-
-def main():
- args = parse_args()
-
- # parse config
- if not os.path.isfile(args.config):
- try:
- args.config = cfgs_name_path[args.config]
- except KeyError:
- raise FileNotFoundError(f'Cannot find {args.config}')
-
- # load config
- cfg = Config.fromfile(args.config)
- if args.cfg_options is not None:
- cfg.merge_from_dict(args.cfg_options)
-
- # load on cpu, with non-quantized
- cfg.model.llm.device_map = 'cpu'
- cfg.model.llm.quantization_config = None
- cfg.model.llm.low_cpu_mem_usage = True
- torch_dtype = cfg.model.llm.get('torch_dtype', torch.float16)
- model = BUILDER.build(cfg.model)
- tokenizer = BUILDER.build(cfg.tokenizer)
- adapter_checkpoint = torch.load(
- args.adapter_checkpoint, map_location='cpu')
- state_dict_key = 'module' if args.is_deepspeed else 'state_dict'
- model.load_state_dict(adapter_checkpoint[state_dict_key], strict=False)
- print(f'Load adapter from {args.adapter_checkpoint}')
-
- model = model.llm
- model_merged = model.merge_and_unload()
- for param in model.parameters():
- param.data = param.data.to(torch_dtype)
- model_merged.save_pretrained(
- args.save_dir, max_shard_size=args.max_shard_size)
- tokenizer.save_pretrained(args.save_dir)
- print(f'Save to {args.save_dir}')
-
-
-if __name__ == '__main__':
- main()
diff --git a/xtuner/tools/model_converters/pth_to_hf.py b/xtuner/tools/model_converters/pth_to_hf.py
new file mode 100644
index 000000000..ccf15861b
--- /dev/null
+++ b/xtuner/tools/model_converters/pth_to_hf.py
@@ -0,0 +1,108 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os
+import shutil
+
+import torch
+from mmengine.config import Config, DictAction
+
+from xtuner.configs import cfgs_name_path
+from xtuner.registry import BUILDER
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(
+ description='Convert the pth model to HuggingFace model')
+ parser.add_argument(
+ 'config',
+ help='config file name or path. Note: Please use the original '
+ 'configs, instead of the automatically saved log configs.')
+ parser.add_argument('pth_model', help='pth model file')
+ parser.add_argument(
+ 'save_dir', help='the directory to save HuggingFace model')
+ parser.add_argument(
+ '--fp32',
+ action='store_true',
+ help='Save as fp32. If not set, fp16 will be used by default.')
+ parser.add_argument(
+ '--max-shard-size',
+ type=str,
+ default='2GB',
+ help='Only applicable for LLM. The maximum size for '
+ 'each sharded checkpoint.')
+ parser.add_argument(
+ '--cfg-options',
+ nargs='+',
+ action=DictAction,
+ help='override some settings in the used config, the key-value pair '
+ 'in xxx=yyy format will be merged into config file. If the value to '
+ 'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
+ 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
+ 'Note that the quotation marks are necessary and that no white space '
+ 'is allowed.')
+ args = parser.parse_args()
+ return args
+
+
+def guess_load_checkpoint(pth_model):
+ if os.path.isfile(pth_model):
+ state_dict = torch.load(pth_model, map_location='cpu')
+ if 'state_dict' in state_dict:
+ state_dict = state_dict['state_dict']
+ elif os.path.isdir(pth_model):
+ try:
+ from deepspeed.utils.zero_to_fp32 import \
+ get_fp32_state_dict_from_zero_checkpoint
+ except ImportError:
+ raise ImportError(
+ 'The provided PTH model appears to be a DeepSpeed checkpoint. '
+ 'However, DeepSpeed library is not detected in current '
+ 'environment. This suggests that DeepSpeed may not be '
+ 'installed or is incorrectly configured. Please verify your '
+ 'setup.')
+ state_dict = get_fp32_state_dict_from_zero_checkpoint(
+ os.path.dirname(pth_model), os.path.basename(pth_model))
+ else:
+ raise FileNotFoundError(f'Cannot find {pth_model}')
+ return state_dict
+
+
+def main():
+ args = parse_args()
+
+ # parse config
+ if not os.path.isfile(args.config):
+ try:
+ args.config = cfgs_name_path[args.config]
+ except KeyError:
+ raise FileNotFoundError(f'Cannot find {args.config}')
+
+ # load config
+ cfg = Config.fromfile(args.config)
+ if args.cfg_options is not None:
+ cfg.merge_from_dict(args.cfg_options)
+
+ model = BUILDER.build(cfg.model)
+
+ state_dict = guess_load_checkpoint(args.pth_model)
+ model.load_state_dict(state_dict, strict=False)
+ print(f'Load PTH model from {args.pth_model}')
+
+ if not args.fp32:
+ print('Convert weights to float16')
+ model.llm.half()
+
+ print(f'Saving HuggingFace model to {args.save_dir}')
+ model.llm.save_pretrained(
+ args.save_dir, max_shard_size=args.max_shard_size)
+ if 'PeftModel' not in model.llm.__class__.__name__:
+ print(f'Saving HuggingFace tokenizer to {args.save_dir}')
+ tokenizer = BUILDER.build(cfg.tokenizer)
+ tokenizer.save_pretrained(args.save_dir)
+ shutil.copyfile(args.config, os.path.join(args.save_dir,
+ 'xtuner_config.py'))
+ print('All done!')
+
+
+if __name__ == '__main__':
+ main()
diff --git a/xtuner/tools/model_converters/split_hf_llm.py b/xtuner/tools/model_converters/split.py
similarity index 100%
rename from xtuner/tools/model_converters/split_hf_llm.py
rename to xtuner/tools/model_converters/split.py