fix qwen2.5 template (#2081)

Jintao-Huang · Jintao-Huang · commit dcfcadf23aaa · 2024-09-23T10:33:17.000+08:00
diff --git a/docs/source/Instruction/LLM微调文档.md b/docs/source/Instruction/LLM微调文档.md
@@ -59,12 +59,12 @@ sft_args = SftArguments(
     dataset=[f'{DatasetName.blossom_math_zh}#2000'],
     output_dir='output')
 result = sft_main(sft_args)
-best_model_checkpoint = result['best_model_checkpoint']
-print(f'best_model_checkpoint: {best_model_checkpoint}')
+last_model_checkpoint = result['last_model_checkpoint']
+print(f'last_model_checkpoint: {last_model_checkpoint}')
 torch.cuda.empty_cache()
 
 infer_args = InferArguments(
-    ckpt_dir=best_model_checkpoint,
+    ckpt_dir=last_model_checkpoint,
     load_dataset_config=True)
 # merge_lora(infer_args, device_map='cpu')
 result = infer_main(infer_args)
diff --git a/docs/source/Instruction/支持的模型和数据集.md b/docs/source/Instruction/支持的模型和数据集.md
diff --git a/docs/source/LLM/Qwen1.5全流程最佳实践.md b/docs/source/LLM/Qwen1.5全流程最佳实践.md
@@ -198,8 +198,8 @@ sft_args = SftArguments(
     model_name=['小黄', 'Xiao Huang'],
     model_author=['魔搭', 'ModelScope'])
 output = sft_main(sft_args)
-best_model_checkpoint = output['best_model_checkpoint']
-print(f'best_model_checkpoint: {best_model_checkpoint}')
+last_model_checkpoint = output['last_model_checkpoint']
+print(f'last_model_checkpoint: {last_model_checkpoint}')
 ```
 
 如果你想要在3090的机器中进行训练, 你可以**降低max_length**为1024, 使用模型并行, 或者使用deepspeed-zero3.
diff --git a/docs/source/LLM/自我认知微调最佳实践.md b/docs/source/LLM/自我认知微调最佳实践.md
@@ -119,8 +119,8 @@ sft_args = SftArguments(
     model_name=['小黄', 'Xiao Huang'],
     model_author=['魔搭', 'ModelScope'])
 output = sft_main(sft_args)
-best_model_checkpoint = output['best_model_checkpoint']
-print(f'best_model_checkpoint: {best_model_checkpoint}')
+last_model_checkpoint = output['last_model_checkpoint']
+print(f'last_model_checkpoint: {last_model_checkpoint}')
 
 """Out[0]
 [INFO:swift] The logging file will be saved in: /xxx/output/qwen2-7b-instruct/v2-20240607-101038/logging.jsonl
@@ -157,7 +157,7 @@ Train: 100%|██████████████████████
 [INFO:swift] best_model_checkpoint: /xxx/output/qwen2-7b-instruct/v2-20240607-101038/checkpoint-93
 [INFO:swift] images_dir: /xxx/output/qwen2-7b-instruct/v2-20240607-101038/images
 [INFO:swift] End time of running main: 2024-06-07 10:18:41.386561
-best_model_checkpoint: /xxx/output/qwen2-7b-instruct/v2-20240607-101038/checkpoint-93
+last_model_checkpoint: /xxx/output/qwen2-7b-instruct/v2-20240607-101038/checkpoint-93
 """
 ```
 
@@ -197,7 +197,7 @@ swift sft \
 ```
 
 ## 微调后推理
-你需要设置`best_model_checkpoint`的值, 该值会在sft的最后被打印出来.
+你需要设置`last_model_checkpoint`的值, 该值会在sft的最后被打印出来.
 
 使用python:
 ```python
@@ -206,8 +206,8 @@ os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 
 from swift.llm import InferArguments, merge_lora, infer_main
 
-best_model_checkpoint = 'qwen2-7b-instruct/vx-xxx/checkpoint-xxx'
-infer_args = InferArguments(ckpt_dir=best_model_checkpoint)
+last_model_checkpoint = 'qwen2-7b-instruct/vx-xxx/checkpoint-xxx'
+infer_args = InferArguments(ckpt_dir=last_model_checkpoint)
 merge_lora(infer_args, device_map='cpu')
 result = infer_main(infer_args)
 
@@ -281,8 +281,8 @@ os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 
 from swift.llm import AppUIArguments, merge_lora, app_ui_main
 
-best_model_checkpoint = 'qwen2-7b-instruct/vx-xxx/checkpoint-xxx'
-app_ui_args = AppUIArguments(ckpt_dir=best_model_checkpoint)
+last_model_checkpoint = 'qwen2-7b-instruct/vx-xxx/checkpoint-xxx'
+app_ui_args = AppUIArguments(ckpt_dir=last_model_checkpoint)
 merge_lora(app_ui_args, device_map='cpu')
 result = app_ui_main(app_ui_args)
 ```
diff --git a/docs/source_en/Instruction/LLM-fine-tuning.md b/docs/source_en/Instruction/LLM-fine-tuning.md
@@ -55,12 +55,12 @@ sft_args = SftArguments(
     dataset=[f'{DatasetName.blossom_math_zh}#2000'],
     output_dir='output')
 result = sft_main(sft_args)
-best_model_checkpoint = result['best_model_checkpoint']
-print(f'best_model_checkpoint: {best_model_checkpoint}')
+last_model_checkpoint = result['last_model_checkpoint']
+print(f'last_model_checkpoint: {last_model_checkpoint}')
 torch.cuda.empty_cache()
 
 infer_args = InferArguments(
-    ckpt_dir=best_model_checkpoint,
+    ckpt_dir=last_model_checkpoint,
     load_dataset_config=True)
 # merge_lora(infer_args, device_map='cpu')
 result = infer_main(infer_args)
diff --git a/docs/source_en/Instruction/Supported-models-datasets.md b/docs/source_en/Instruction/Supported-models-datasets.md
diff --git a/docs/source_en/LLM/Qwen1.5-best-practice.md b/docs/source_en/LLM/Qwen1.5-best-practice.md
@@ -196,8 +196,8 @@ sft_args = SftArguments(
     model_name=['小黄', 'Xiao Huang'],
     model_author=['魔搭', 'ModelScope'])
 output = sft_main(sft_args)
-best_model_checkpoint = output['best_model_checkpoint']
-print(f'best_model_checkpoint: {best_model_checkpoint}')
+last_model_checkpoint = output['last_model_checkpoint']
+print(f'last_model_checkpoint: {last_model_checkpoint}')
 ```
 
 If you want to train on a 3090 machine, you can **reduce max_length** to 1024, use model parallelism, or use deepspeed-zero3.
diff --git a/docs/source_en/LLM/Self-cognition-best-practice.md b/docs/source_en/LLM/Self-cognition-best-practice.md
@@ -122,8 +122,8 @@ sft_args = SftArguments(
     model_name=['小黄', 'Xiao Huang'],
     model_author=['魔搭', 'ModelScope'])
 output = sft_main(sft_args)
-best_model_checkpoint = output['best_model_checkpoint']
-print(f'best_model_checkpoint: {best_model_checkpoint}')
+last_model_checkpoint = output['last_model_checkpoint']
+print(f'last_model_checkpoint: {last_model_checkpoint}')
 
 """Out[0]
 [INFO:swift] The logging file will be saved in: /xxx/output/qwen2-7b-instruct/v2-20240607-101038/logging.jsonl
@@ -160,7 +160,7 @@ Train: 100%|██████████████████████
 [INFO:swift] best_model_checkpoint: /xxx/output/qwen2-7b-instruct/v2-20240607-101038/checkpoint-93
 [INFO:swift] images_dir: /xxx/output/qwen2-7b-instruct/v2-20240607-101038/images
 [INFO:swift] End time of running main: 2024-06-07 10:18:41.386561
-best_model_checkpoint: /xxx/output/qwen2-7b-instruct/v2-20240607-101038/checkpoint-93
+last_model_checkpoint: /xxx/output/qwen2-7b-instruct/v2-20240607-101038/checkpoint-93
 """
 ```
 
@@ -200,7 +200,7 @@ swift sft \
 ```
 
 ## Inference After Fine-Tuning
-You need to set the value of `best_model_checkpoint`, which will be printed out at the end of the sft.
+You need to set the value of `last_model_checkpoint`, which will be printed out at the end of the sft.
 
 Using Python:
 ```python
@@ -209,8 +209,8 @@ os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 
 from swift.llm import InferArguments, merge_lora, infer_main
 
-best_model_checkpoint = 'qwen2-7b-instruct/vx-xxx/checkpoint-xxx'
-infer_args = InferArguments(ckpt_dir=best_model_checkpoint)
+last_model_checkpoint = 'qwen2-7b-instruct/vx-xxx/checkpoint-xxx'
+infer_args = InferArguments(ckpt_dir=last_model_checkpoint)
 merge_lora(infer_args, device_map='cpu')
 result = infer_main(infer_args)
 
@@ -271,8 +271,8 @@ os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 
 from swift.llm import AppUIArguments, merge_lora, app_ui_main
 
-best_model_checkpoint = 'qwen2-7b-instruct/vx-xxx/checkpoint-xxx'
-app_ui_args = AppUIArguments(ckpt_dir=best_model_checkpoint)
+last_model_checkpoint = 'qwen2-7b-instruct/vx-xxx/checkpoint-xxx'
+app_ui_args = AppUIArguments(ckpt_dir=last_model_checkpoint)
 merge_lora(app_ui_args, device_map='cpu')
 result = app_ui_main(app_ui_args)
 ```
diff --git a/swift/llm/utils/model.py b/swift/llm/utils/model.py
@@ -3478,7 +3478,7 @@ def get_model_tokenizer_qwen2_chat(model_dir: str,
         f'qwen2_5-{model_size_lower}-instruct',
         f'qwen/Qwen2.5-{model_size}-Instruct',
         LoRATM.llama,
-        TemplateType.qwen,
+        TemplateType.qwen2_5,
         get_model_tokenizer_qwen2_chat,
         support_flash_attn=True,
         support_vllm=True,
@@ -3492,7 +3492,7 @@ def get_model_tokenizer_qwen2_chat(model_dir: str,
             f'qwen2_5-{model_size_lower}-instruct-{quant_type_lower}',
             f'qwen/Qwen2.5-{model_size}-Instruct-{quant_type}',
             LoRATM.llama,
-            TemplateType.qwen,
+            TemplateType.qwen2_5,
             get_model_tokenizer_qwen2_chat,
             support_flash_attn=True,
             support_vllm=True,
@@ -3505,7 +3505,7 @@ def get_model_tokenizer_qwen2_chat(model_dir: str,
         f'qwen2_5-{model_size_lower}-instruct-awq',
         f'qwen/Qwen2.5-{model_size}-Instruct-AWQ',
         LoRATM.llama,
-        TemplateType.qwen,
+        TemplateType.qwen2_5,
         get_model_tokenizer_qwen2_chat,
         support_flash_attn=True,
         support_vllm=True,
@@ -3531,7 +3531,7 @@ def get_model_tokenizer_qwen2_chat(model_dir: str,
         f'qwen2_5-math-{model_size_lower}-instruct',
         f'qwen/Qwen2.5-Math-{model_size}-Instruct',
         LoRATM.llama,
-        TemplateType.qwen,
+        TemplateType.qwen2_5,
         get_model_tokenizer_qwen2_chat,
         support_flash_attn=True,
         support_vllm=True,
@@ -3556,7 +3556,7 @@ def get_model_tokenizer_qwen2_chat(model_dir: str,
         f'qwen2_5-coder-{model_size_lower}-instruct',
         f'qwen/Qwen2.5-Coder-{model_size}-Instruct',
         LoRATM.llama,
-        TemplateType.qwen,
+        TemplateType.qwen2_5,
         get_model_tokenizer_qwen2_chat,
         support_flash_attn=True,
         support_vllm=True,
diff --git a/swift/llm/utils/template.py b/swift/llm/utils/template.py
@@ -45,6 +45,7 @@ class TemplateType:
     # chat
     default = 'default'
     qwen = 'qwen'
+    qwen2_5 = 'qwen2_5'
     qwen_vl = 'qwen-vl'
     qwen_audio = 'qwen-audio'
     qwen2_audio = 'qwen2-audio'
@@ -1269,7 +1270,12 @@ def replace_box(self, index: int, example: Dict[str, Any]) -> List[Context]:
             ]
 
 
+class Qwen2_5Template(QwenTemplate):
+    system = 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.'
+
+
 register_template(TemplateType.qwen, QwenTemplate())
+register_template(TemplateType.qwen2_5, Qwen2_5Template())
 
 
 class QwenVLTemplate(_QwenVLTemplateMixin, QwenTemplate):