From 937e9937f5f60c21bc7bc4d510936dfc2ba671eb Mon Sep 17 00:00:00 2001 From: fanqiNO1 <75657629+fanqiNO1@users.noreply.github.com> Date: Mon, 24 Jun 2024 11:17:15 +0800 Subject: [PATCH] [Fix] Fix map_fn in custom_dataset/sft (#785) --- .../sft/baichuan/baichuan2_13b_chat_qlora_custom_sft_e1.py | 4 ++-- .../sft/baichuan/baichuan2_7b_chat_qlora_custom_sft_e1.py | 4 ++-- .../sft/baichuan/baichuan_13b_chat_qlora_custom_sft_e1.py | 4 ++-- .../sft/baichuan/baichuan_7b_qlora_custom_sft_e1.py | 4 ++-- .../sft/chatglm/chatglm2_6b_qlora_custom_sft_e1.py | 4 ++-- .../sft/chatglm/chatglm3_6b_qlora_custom_sft_e1.py | 4 ++-- .../sft/deepseek/deepseek_moe_16b_chat_qlora_custom_sft_e1.py | 4 ++-- .../deepseekcoder_6_7b_instruct_qlora_custom_sft_e1.py | 4 ++-- .../sft/gemma/gemma_2b_it_qlora_custom_sft_e1.py | 4 ++-- .../custom_dataset/sft/gemma/gemma_2b_qlora_custom_sft_e1.py | 4 ++-- .../sft/gemma/gemma_7b_it_qlora_custom_sft_e1.py | 4 ++-- .../custom_dataset/sft/gemma/gemma_7b_qlora_custom_sft_e1.py | 4 ++-- .../sft/internlm/internlm2_chat_1_8b_qlora_custom_sft_e1.py | 4 ++-- .../sft/internlm/internlm2_chat_20b_qlora_custom_sft_e1.py | 4 ++-- .../sft/internlm/internlm2_chat_7b_qlora_custom_sft_e1.py | 4 ++-- .../sft/llama/llama2_70b_qlora_custom_sft_e1.py | 4 ++-- .../sft/llama/llama2_7b_chat_qlora_custom_sft_e1.py | 4 ++-- .../sft/mistral/mistral_7b_full_finetune_custom_sft_e1.py | 4 ++-- .../sft/mixtral/mixtral_8x7b_instruct_qlora_custom_sft_e1.py | 4 ++-- .../sft/qwen/qwen1_5_0_5b_chat_qlora_custom_sft_e1.py | 4 ++-- .../sft/qwen/qwen1_5_14b_chat_qlora_custom_sft_e1.py | 4 ++-- .../sft/qwen/qwen1_5_1_8b_chat_qlora_custom_sft_e1.py | 4 ++-- .../sft/qwen/qwen1_5_4b_chat_qlora_custom_sft_e1.py | 4 ++-- .../sft/qwen/qwen1_5_72b_chat_qlora_custom_sft_e1.py | 4 ++-- .../sft/qwen/qwen1_5_7b_chat_qlora_custom_sft_e1.py | 4 ++-- .../sft/qwen/qwen_1_8b_chat_qlora_custom_sft_e1.py | 4 ++-- .../custom_dataset/sft/qwen/qwen_72b_qlora_custom_sft_e1.py | 4 ++-- .../sft/qwen/qwen_7b_chat_qlora_custom_sft_e1.py | 4 ++-- .../sft/starcoder/starcoder_qlora_custom_sft_e1.py | 4 ++-- .../custom_dataset/sft/yi/yi_34b_qlora_custom_sft_e1.py | 4 ++-- .../custom_dataset/sft/yi/yi_6b_qlora_custom_sft_e1.py | 4 ++-- .../sft/zephyr/zephyr_7b_beta_qlora_custom_sft_e1.py | 4 ++-- 32 files changed, 64 insertions(+), 64 deletions(-) diff --git a/xtuner/configs/custom_dataset/sft/baichuan/baichuan2_13b_chat_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/baichuan/baichuan2_13b_chat_qlora_custom_sft_e1.py index c1f79073d..558887c04 100644 --- a/xtuner/configs/custom_dataset/sft/baichuan/baichuan2_13b_chat_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/baichuan/baichuan2_13b_chat_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/baichuan/baichuan2_7b_chat_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/baichuan/baichuan2_7b_chat_qlora_custom_sft_e1.py index a2f4d445c..8df388a67 100644 --- a/xtuner/configs/custom_dataset/sft/baichuan/baichuan2_7b_chat_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/baichuan/baichuan2_7b_chat_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/baichuan/baichuan_13b_chat_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/baichuan/baichuan_13b_chat_qlora_custom_sft_e1.py index 178cee847..3dc38eb4f 100644 --- a/xtuner/configs/custom_dataset/sft/baichuan/baichuan_13b_chat_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/baichuan/baichuan_13b_chat_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/baichuan/baichuan_7b_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/baichuan/baichuan_7b_qlora_custom_sft_e1.py index eecce4cce..dc15b6289 100644 --- a/xtuner/configs/custom_dataset/sft/baichuan/baichuan_7b_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/baichuan/baichuan_7b_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/chatglm/chatglm2_6b_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/chatglm/chatglm2_6b_qlora_custom_sft_e1.py index 312044a88..09b354929 100644 --- a/xtuner/configs/custom_dataset/sft/chatglm/chatglm2_6b_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/chatglm/chatglm2_6b_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/chatglm/chatglm3_6b_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/chatglm/chatglm3_6b_qlora_custom_sft_e1.py index c0bd6b2a7..7e3abba71 100644 --- a/xtuner/configs/custom_dataset/sft/chatglm/chatglm3_6b_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/chatglm/chatglm3_6b_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -112,7 +112,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/deepseek/deepseek_moe_16b_chat_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/deepseek/deepseek_moe_16b_chat_qlora_custom_sft_e1.py index d4cf2d9d9..f7621bc6c 100644 --- a/xtuner/configs/custom_dataset/sft/deepseek/deepseek_moe_16b_chat_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/deepseek/deepseek_moe_16b_chat_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/deepseek/deepseekcoder_6_7b_instruct_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/deepseek/deepseekcoder_6_7b_instruct_qlora_custom_sft_e1.py index b0142dc76..629012f5b 100644 --- a/xtuner/configs/custom_dataset/sft/deepseek/deepseekcoder_6_7b_instruct_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/deepseek/deepseekcoder_6_7b_instruct_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -115,7 +115,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/gemma/gemma_2b_it_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/gemma/gemma_2b_it_qlora_custom_sft_e1.py index cc3d90a8b..122ddf023 100644 --- a/xtuner/configs/custom_dataset/sft/gemma/gemma_2b_it_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/gemma/gemma_2b_it_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/gemma/gemma_2b_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/gemma/gemma_2b_qlora_custom_sft_e1.py index a993ecbab..9a3d36b30 100644 --- a/xtuner/configs/custom_dataset/sft/gemma/gemma_2b_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/gemma/gemma_2b_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/gemma/gemma_7b_it_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/gemma/gemma_7b_it_qlora_custom_sft_e1.py index 5d5dab1c0..c677c9d09 100644 --- a/xtuner/configs/custom_dataset/sft/gemma/gemma_7b_it_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/gemma/gemma_7b_it_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/gemma/gemma_7b_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/gemma/gemma_7b_qlora_custom_sft_e1.py index bc906731d..443a1e663 100644 --- a/xtuner/configs/custom_dataset/sft/gemma/gemma_7b_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/gemma/gemma_7b_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/internlm/internlm2_chat_1_8b_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/internlm/internlm2_chat_1_8b_qlora_custom_sft_e1.py index af01dc2c7..2aaa6f24d 100644 --- a/xtuner/configs/custom_dataset/sft/internlm/internlm2_chat_1_8b_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/internlm/internlm2_chat_1_8b_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/internlm/internlm2_chat_20b_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/internlm/internlm2_chat_20b_qlora_custom_sft_e1.py index f0c74ce81..dfb423839 100644 --- a/xtuner/configs/custom_dataset/sft/internlm/internlm2_chat_20b_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/internlm/internlm2_chat_20b_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/internlm/internlm2_chat_7b_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/internlm/internlm2_chat_7b_qlora_custom_sft_e1.py index 645f9cbf6..313103992 100644 --- a/xtuner/configs/custom_dataset/sft/internlm/internlm2_chat_7b_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/internlm/internlm2_chat_7b_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/llama/llama2_70b_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/llama/llama2_70b_qlora_custom_sft_e1.py index 53b380719..2b0f889b4 100644 --- a/xtuner/configs/custom_dataset/sft/llama/llama2_70b_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/llama/llama2_70b_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -112,7 +112,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/llama/llama2_7b_chat_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/llama/llama2_7b_chat_qlora_custom_sft_e1.py index 59d14cfb5..9aa9b6362 100644 --- a/xtuner/configs/custom_dataset/sft/llama/llama2_7b_chat_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/llama/llama2_7b_chat_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/mistral/mistral_7b_full_finetune_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/mistral/mistral_7b_full_finetune_custom_sft_e1.py index f6aa0f574..0af78f79f 100644 --- a/xtuner/configs/custom_dataset/sft/mistral/mistral_7b_full_finetune_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/mistral/mistral_7b_full_finetune_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.dataset.samplers import InternRepoSampler from xtuner.engine import (DatasetInfoHook, EvaluateChatHook, ThroughputHook, VarlenAttnArgsToMessageHubHook) @@ -114,7 +114,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/mixtral/mixtral_8x7b_instruct_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/mixtral/mixtral_8x7b_instruct_qlora_custom_sft_e1.py index 3eea2a9f5..91cda57ec 100644 --- a/xtuner/configs/custom_dataset/sft/mixtral/mixtral_8x7b_instruct_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/mixtral/mixtral_8x7b_instruct_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -114,7 +114,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_0_5b_chat_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_0_5b_chat_qlora_custom_sft_e1.py index 8f26fe5ad..3066f0be9 100644 --- a/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_0_5b_chat_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_0_5b_chat_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_14b_chat_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_14b_chat_qlora_custom_sft_e1.py index 70aa8d7f6..642592f0c 100644 --- a/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_14b_chat_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_14b_chat_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_1_8b_chat_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_1_8b_chat_qlora_custom_sft_e1.py index d7196c53a..3790006d7 100644 --- a/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_1_8b_chat_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_1_8b_chat_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_4b_chat_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_4b_chat_qlora_custom_sft_e1.py index 0c2808485..36d3e6cd0 100644 --- a/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_4b_chat_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_4b_chat_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_72b_chat_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_72b_chat_qlora_custom_sft_e1.py index 88bc7a3ed..d152c207d 100644 --- a/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_72b_chat_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_72b_chat_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_7b_chat_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_7b_chat_qlora_custom_sft_e1.py index 67b02d626..1098c5ca8 100644 --- a/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_7b_chat_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/qwen/qwen1_5_7b_chat_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/qwen/qwen_1_8b_chat_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/qwen/qwen_1_8b_chat_qlora_custom_sft_e1.py index 327c3c7c3..2d517e897 100644 --- a/xtuner/configs/custom_dataset/sft/qwen/qwen_1_8b_chat_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/qwen/qwen_1_8b_chat_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -112,7 +112,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/qwen/qwen_72b_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/qwen/qwen_72b_qlora_custom_sft_e1.py index eb5fed1f6..e1156a1aa 100644 --- a/xtuner/configs/custom_dataset/sft/qwen/qwen_72b_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/qwen/qwen_72b_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -112,7 +112,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/qwen/qwen_7b_chat_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/qwen/qwen_7b_chat_qlora_custom_sft_e1.py index 22acb42c8..b6fcaacba 100644 --- a/xtuner/configs/custom_dataset/sft/qwen/qwen_7b_chat_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/qwen/qwen_7b_chat_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -112,7 +112,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/starcoder/starcoder_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/starcoder/starcoder_qlora_custom_sft_e1.py index d15b023da..d79484dcf 100644 --- a/xtuner/configs/custom_dataset/sft/starcoder/starcoder_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/starcoder/starcoder_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -114,7 +114,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), max_dataset_length=max_dataset_length, diff --git a/xtuner/configs/custom_dataset/sft/yi/yi_34b_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/yi/yi_34b_qlora_custom_sft_e1.py index 2cbde95a7..4906ab5f7 100644 --- a/xtuner/configs/custom_dataset/sft/yi/yi_34b_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/yi/yi_34b_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/yi/yi_6b_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/yi/yi_6b_qlora_custom_sft_e1.py index 5b1281964..96a684a22 100644 --- a/xtuner/configs/custom_dataset/sft/yi/yi_6b_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/yi/yi_6b_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True, diff --git a/xtuner/configs/custom_dataset/sft/zephyr/zephyr_7b_beta_qlora_custom_sft_e1.py b/xtuner/configs/custom_dataset/sft/zephyr/zephyr_7b_beta_qlora_custom_sft_e1.py index 7cb010a99..b2349c2da 100644 --- a/xtuner/configs/custom_dataset/sft/zephyr/zephyr_7b_beta_qlora_custom_sft_e1.py +++ b/xtuner/configs/custom_dataset/sft/zephyr/zephyr_7b_beta_qlora_custom_sft_e1.py @@ -26,7 +26,7 @@ from xtuner.dataset import process_hf_dataset from xtuner.dataset.collate_fns import default_collate_fn -from xtuner.dataset.map_fns import template_map_fn_factory +from xtuner.dataset.map_fns import openai_map_fn, template_map_fn_factory from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook, VarlenAttnArgsToMessageHubHook) from xtuner.engine.runner import TrainLoop @@ -111,7 +111,7 @@ dataset=dict(type=load_dataset, path='json', data_files=data_files), tokenizer=tokenizer, max_length=max_length, - dataset_map_fn=None, + dataset_map_fn=openai_map_fn, template_map_fn=dict( type=template_map_fn_factory, template=prompt_template), remove_unused_columns=True,