Skip to content

Commit dcfcadf

Browse files
committed
fix qwen2.5 template (#2081)
1 parent 1f0b9f5 commit dcfcadf

File tree

10 files changed

+103
-97
lines changed

10 files changed

+103
-97
lines changed

docs/source/Instruction/LLM微调文档.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,12 @@ sft_args = SftArguments(
5959
dataset=[f'{DatasetName.blossom_math_zh}#2000'],
6060
output_dir='output')
6161
result = sft_main(sft_args)
62-
best_model_checkpoint = result['best_model_checkpoint']
63-
print(f'best_model_checkpoint: {best_model_checkpoint}')
62+
last_model_checkpoint = result['last_model_checkpoint']
63+
print(f'last_model_checkpoint: {last_model_checkpoint}')
6464
torch.cuda.empty_cache()
6565

6666
infer_args = InferArguments(
67-
ckpt_dir=best_model_checkpoint,
67+
ckpt_dir=last_model_checkpoint,
6868
load_dataset_config=True)
6969
# merge_lora(infer_args, device_map='cpu')
7070
result = infer_main(infer_args)

docs/source/Instruction/支持的模型和数据集.md

Lines changed: 33 additions & 33 deletions
Large diffs are not rendered by default.

docs/source/LLM/Qwen1.5全流程最佳实践.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,8 @@ sft_args = SftArguments(
198198
model_name=['小黄', 'Xiao Huang'],
199199
model_author=['魔搭', 'ModelScope'])
200200
output = sft_main(sft_args)
201-
best_model_checkpoint = output['best_model_checkpoint']
202-
print(f'best_model_checkpoint: {best_model_checkpoint}')
201+
last_model_checkpoint = output['last_model_checkpoint']
202+
print(f'last_model_checkpoint: {last_model_checkpoint}')
203203
```
204204

205205
如果你想要在3090的机器中进行训练, 你可以**降低max_length**为1024, 使用模型并行, 或者使用deepspeed-zero3.

docs/source/LLM/自我认知微调最佳实践.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,8 @@ sft_args = SftArguments(
119119
model_name=['小黄', 'Xiao Huang'],
120120
model_author=['魔搭', 'ModelScope'])
121121
output = sft_main(sft_args)
122-
best_model_checkpoint = output['best_model_checkpoint']
123-
print(f'best_model_checkpoint: {best_model_checkpoint}')
122+
last_model_checkpoint = output['last_model_checkpoint']
123+
print(f'last_model_checkpoint: {last_model_checkpoint}')
124124

125125
"""Out[0]
126126
[INFO:swift] The logging file will be saved in: /xxx/output/qwen2-7b-instruct/v2-20240607-101038/logging.jsonl
@@ -157,7 +157,7 @@ Train: 100%|██████████████████████
157157
[INFO:swift] best_model_checkpoint: /xxx/output/qwen2-7b-instruct/v2-20240607-101038/checkpoint-93
158158
[INFO:swift] images_dir: /xxx/output/qwen2-7b-instruct/v2-20240607-101038/images
159159
[INFO:swift] End time of running main: 2024-06-07 10:18:41.386561
160-
best_model_checkpoint: /xxx/output/qwen2-7b-instruct/v2-20240607-101038/checkpoint-93
160+
last_model_checkpoint: /xxx/output/qwen2-7b-instruct/v2-20240607-101038/checkpoint-93
161161
"""
162162
```
163163

@@ -197,7 +197,7 @@ swift sft \
197197
```
198198

199199
## 微调后推理
200-
你需要设置`best_model_checkpoint`的值, 该值会在sft的最后被打印出来.
200+
你需要设置`last_model_checkpoint`的值, 该值会在sft的最后被打印出来.
201201

202202
使用python:
203203
```python
@@ -206,8 +206,8 @@ os.environ['CUDA_VISIBLE_DEVICES'] = '0'
206206

207207
from swift.llm import InferArguments, merge_lora, infer_main
208208

209-
best_model_checkpoint = 'qwen2-7b-instruct/vx-xxx/checkpoint-xxx'
210-
infer_args = InferArguments(ckpt_dir=best_model_checkpoint)
209+
last_model_checkpoint = 'qwen2-7b-instruct/vx-xxx/checkpoint-xxx'
210+
infer_args = InferArguments(ckpt_dir=last_model_checkpoint)
211211
merge_lora(infer_args, device_map='cpu')
212212
result = infer_main(infer_args)
213213

@@ -281,8 +281,8 @@ os.environ['CUDA_VISIBLE_DEVICES'] = '0'
281281

282282
from swift.llm import AppUIArguments, merge_lora, app_ui_main
283283

284-
best_model_checkpoint = 'qwen2-7b-instruct/vx-xxx/checkpoint-xxx'
285-
app_ui_args = AppUIArguments(ckpt_dir=best_model_checkpoint)
284+
last_model_checkpoint = 'qwen2-7b-instruct/vx-xxx/checkpoint-xxx'
285+
app_ui_args = AppUIArguments(ckpt_dir=last_model_checkpoint)
286286
merge_lora(app_ui_args, device_map='cpu')
287287
result = app_ui_main(app_ui_args)
288288
```

docs/source_en/Instruction/LLM-fine-tuning.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,12 @@ sft_args = SftArguments(
5555
dataset=[f'{DatasetName.blossom_math_zh}#2000'],
5656
output_dir='output')
5757
result = sft_main(sft_args)
58-
best_model_checkpoint = result['best_model_checkpoint']
59-
print(f'best_model_checkpoint: {best_model_checkpoint}')
58+
last_model_checkpoint = result['last_model_checkpoint']
59+
print(f'last_model_checkpoint: {last_model_checkpoint}')
6060
torch.cuda.empty_cache()
6161

6262
infer_args = InferArguments(
63-
ckpt_dir=best_model_checkpoint,
63+
ckpt_dir=last_model_checkpoint,
6464
load_dataset_config=True)
6565
# merge_lora(infer_args, device_map='cpu')
6666
result = infer_main(infer_args)

docs/source_en/Instruction/Supported-models-datasets.md

Lines changed: 33 additions & 33 deletions
Large diffs are not rendered by default.

docs/source_en/LLM/Qwen1.5-best-practice.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -196,8 +196,8 @@ sft_args = SftArguments(
196196
model_name=['小黄', 'Xiao Huang'],
197197
model_author=['魔搭', 'ModelScope'])
198198
output = sft_main(sft_args)
199-
best_model_checkpoint = output['best_model_checkpoint']
200-
print(f'best_model_checkpoint: {best_model_checkpoint}')
199+
last_model_checkpoint = output['last_model_checkpoint']
200+
print(f'last_model_checkpoint: {last_model_checkpoint}')
201201
```
202202

203203
If you want to train on a 3090 machine, you can **reduce max_length** to 1024, use model parallelism, or use deepspeed-zero3.

docs/source_en/LLM/Self-cognition-best-practice.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,8 @@ sft_args = SftArguments(
122122
model_name=['小黄', 'Xiao Huang'],
123123
model_author=['魔搭', 'ModelScope'])
124124
output = sft_main(sft_args)
125-
best_model_checkpoint = output['best_model_checkpoint']
126-
print(f'best_model_checkpoint: {best_model_checkpoint}')
125+
last_model_checkpoint = output['last_model_checkpoint']
126+
print(f'last_model_checkpoint: {last_model_checkpoint}')
127127

128128
"""Out[0]
129129
[INFO:swift] The logging file will be saved in: /xxx/output/qwen2-7b-instruct/v2-20240607-101038/logging.jsonl
@@ -160,7 +160,7 @@ Train: 100%|██████████████████████
160160
[INFO:swift] best_model_checkpoint: /xxx/output/qwen2-7b-instruct/v2-20240607-101038/checkpoint-93
161161
[INFO:swift] images_dir: /xxx/output/qwen2-7b-instruct/v2-20240607-101038/images
162162
[INFO:swift] End time of running main: 2024-06-07 10:18:41.386561
163-
best_model_checkpoint: /xxx/output/qwen2-7b-instruct/v2-20240607-101038/checkpoint-93
163+
last_model_checkpoint: /xxx/output/qwen2-7b-instruct/v2-20240607-101038/checkpoint-93
164164
"""
165165
```
166166

@@ -200,7 +200,7 @@ swift sft \
200200
```
201201

202202
## Inference After Fine-Tuning
203-
You need to set the value of `best_model_checkpoint`, which will be printed out at the end of the sft.
203+
You need to set the value of `last_model_checkpoint`, which will be printed out at the end of the sft.
204204

205205
Using Python:
206206
```python
@@ -209,8 +209,8 @@ os.environ['CUDA_VISIBLE_DEVICES'] = '0'
209209

210210
from swift.llm import InferArguments, merge_lora, infer_main
211211

212-
best_model_checkpoint = 'qwen2-7b-instruct/vx-xxx/checkpoint-xxx'
213-
infer_args = InferArguments(ckpt_dir=best_model_checkpoint)
212+
last_model_checkpoint = 'qwen2-7b-instruct/vx-xxx/checkpoint-xxx'
213+
infer_args = InferArguments(ckpt_dir=last_model_checkpoint)
214214
merge_lora(infer_args, device_map='cpu')
215215
result = infer_main(infer_args)
216216

@@ -271,8 +271,8 @@ os.environ['CUDA_VISIBLE_DEVICES'] = '0'
271271

272272
from swift.llm import AppUIArguments, merge_lora, app_ui_main
273273

274-
best_model_checkpoint = 'qwen2-7b-instruct/vx-xxx/checkpoint-xxx'
275-
app_ui_args = AppUIArguments(ckpt_dir=best_model_checkpoint)
274+
last_model_checkpoint = 'qwen2-7b-instruct/vx-xxx/checkpoint-xxx'
275+
app_ui_args = AppUIArguments(ckpt_dir=last_model_checkpoint)
276276
merge_lora(app_ui_args, device_map='cpu')
277277
result = app_ui_main(app_ui_args)
278278
```

swift/llm/utils/model.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3478,7 +3478,7 @@ def get_model_tokenizer_qwen2_chat(model_dir: str,
34783478
f'qwen2_5-{model_size_lower}-instruct',
34793479
f'qwen/Qwen2.5-{model_size}-Instruct',
34803480
LoRATM.llama,
3481-
TemplateType.qwen,
3481+
TemplateType.qwen2_5,
34823482
get_model_tokenizer_qwen2_chat,
34833483
support_flash_attn=True,
34843484
support_vllm=True,
@@ -3492,7 +3492,7 @@ def get_model_tokenizer_qwen2_chat(model_dir: str,
34923492
f'qwen2_5-{model_size_lower}-instruct-{quant_type_lower}',
34933493
f'qwen/Qwen2.5-{model_size}-Instruct-{quant_type}',
34943494
LoRATM.llama,
3495-
TemplateType.qwen,
3495+
TemplateType.qwen2_5,
34963496
get_model_tokenizer_qwen2_chat,
34973497
support_flash_attn=True,
34983498
support_vllm=True,
@@ -3505,7 +3505,7 @@ def get_model_tokenizer_qwen2_chat(model_dir: str,
35053505
f'qwen2_5-{model_size_lower}-instruct-awq',
35063506
f'qwen/Qwen2.5-{model_size}-Instruct-AWQ',
35073507
LoRATM.llama,
3508-
TemplateType.qwen,
3508+
TemplateType.qwen2_5,
35093509
get_model_tokenizer_qwen2_chat,
35103510
support_flash_attn=True,
35113511
support_vllm=True,
@@ -3531,7 +3531,7 @@ def get_model_tokenizer_qwen2_chat(model_dir: str,
35313531
f'qwen2_5-math-{model_size_lower}-instruct',
35323532
f'qwen/Qwen2.5-Math-{model_size}-Instruct',
35333533
LoRATM.llama,
3534-
TemplateType.qwen,
3534+
TemplateType.qwen2_5,
35353535
get_model_tokenizer_qwen2_chat,
35363536
support_flash_attn=True,
35373537
support_vllm=True,
@@ -3556,7 +3556,7 @@ def get_model_tokenizer_qwen2_chat(model_dir: str,
35563556
f'qwen2_5-coder-{model_size_lower}-instruct',
35573557
f'qwen/Qwen2.5-Coder-{model_size}-Instruct',
35583558
LoRATM.llama,
3559-
TemplateType.qwen,
3559+
TemplateType.qwen2_5,
35603560
get_model_tokenizer_qwen2_chat,
35613561
support_flash_attn=True,
35623562
support_vllm=True,

swift/llm/utils/template.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ class TemplateType:
4545
# chat
4646
default = 'default'
4747
qwen = 'qwen'
48+
qwen2_5 = 'qwen2_5'
4849
qwen_vl = 'qwen-vl'
4950
qwen_audio = 'qwen-audio'
5051
qwen2_audio = 'qwen2-audio'
@@ -1269,7 +1270,12 @@ def replace_box(self, index: int, example: Dict[str, Any]) -> List[Context]:
12691270
]
12701271

12711272

1273+
class Qwen2_5Template(QwenTemplate):
1274+
system = 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.'
1275+
1276+
12721277
register_template(TemplateType.qwen, QwenTemplate())
1278+
register_template(TemplateType.qwen2_5, Qwen2_5Template())
12731279

12741280

12751281
class QwenVLTemplate(_QwenVLTemplateMixin, QwenTemplate):

0 commit comments

Comments
 (0)