Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions docs/source/Instruction/LLM微调文档.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,12 @@ sft_args = SftArguments(
dataset=[f'{DatasetName.blossom_math_zh}#2000'],
output_dir='output')
result = sft_main(sft_args)
best_model_checkpoint = result['best_model_checkpoint']
print(f'best_model_checkpoint: {best_model_checkpoint}')
last_model_checkpoint = result['last_model_checkpoint']
print(f'last_model_checkpoint: {last_model_checkpoint}')
torch.cuda.empty_cache()

infer_args = InferArguments(
ckpt_dir=best_model_checkpoint,
ckpt_dir=last_model_checkpoint,
load_dataset_config=True)
# merge_lora(infer_args, device_map='cpu')
result = infer_main(infer_args)
Expand Down
66 changes: 33 additions & 33 deletions docs/source/Instruction/支持的模型和数据集.md

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions docs/source/LLM/Qwen1.5全流程最佳实践.md
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,8 @@ sft_args = SftArguments(
model_name=['小黄', 'Xiao Huang'],
model_author=['魔搭', 'ModelScope'])
output = sft_main(sft_args)
best_model_checkpoint = output['best_model_checkpoint']
print(f'best_model_checkpoint: {best_model_checkpoint}')
last_model_checkpoint = output['last_model_checkpoint']
print(f'last_model_checkpoint: {last_model_checkpoint}')
```

如果你想要在3090的机器中进行训练, 你可以**降低max_length**为1024, 使用模型并行, 或者使用deepspeed-zero3.
Expand Down
16 changes: 8 additions & 8 deletions docs/source/LLM/自我认知微调最佳实践.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,8 @@ sft_args = SftArguments(
model_name=['小黄', 'Xiao Huang'],
model_author=['魔搭', 'ModelScope'])
output = sft_main(sft_args)
best_model_checkpoint = output['best_model_checkpoint']
print(f'best_model_checkpoint: {best_model_checkpoint}')
last_model_checkpoint = output['last_model_checkpoint']
print(f'last_model_checkpoint: {last_model_checkpoint}')

"""Out[0]
[INFO:swift] The logging file will be saved in: /xxx/output/qwen2-7b-instruct/v2-20240607-101038/logging.jsonl
Expand Down Expand Up @@ -157,7 +157,7 @@ Train: 100%|██████████████████████
[INFO:swift] best_model_checkpoint: /xxx/output/qwen2-7b-instruct/v2-20240607-101038/checkpoint-93
[INFO:swift] images_dir: /xxx/output/qwen2-7b-instruct/v2-20240607-101038/images
[INFO:swift] End time of running main: 2024-06-07 10:18:41.386561
best_model_checkpoint: /xxx/output/qwen2-7b-instruct/v2-20240607-101038/checkpoint-93
last_model_checkpoint: /xxx/output/qwen2-7b-instruct/v2-20240607-101038/checkpoint-93
"""
```

Expand Down Expand Up @@ -197,7 +197,7 @@ swift sft \
```

## 微调后推理
你需要设置`best_model_checkpoint`的值, 该值会在sft的最后被打印出来.
你需要设置`last_model_checkpoint`的值, 该值会在sft的最后被打印出来.

使用python:
```python
Expand All @@ -206,8 +206,8 @@ os.environ['CUDA_VISIBLE_DEVICES'] = '0'

from swift.llm import InferArguments, merge_lora, infer_main

best_model_checkpoint = 'qwen2-7b-instruct/vx-xxx/checkpoint-xxx'
infer_args = InferArguments(ckpt_dir=best_model_checkpoint)
last_model_checkpoint = 'qwen2-7b-instruct/vx-xxx/checkpoint-xxx'
infer_args = InferArguments(ckpt_dir=last_model_checkpoint)
merge_lora(infer_args, device_map='cpu')
result = infer_main(infer_args)

Expand Down Expand Up @@ -281,8 +281,8 @@ os.environ['CUDA_VISIBLE_DEVICES'] = '0'

from swift.llm import AppUIArguments, merge_lora, app_ui_main

best_model_checkpoint = 'qwen2-7b-instruct/vx-xxx/checkpoint-xxx'
app_ui_args = AppUIArguments(ckpt_dir=best_model_checkpoint)
last_model_checkpoint = 'qwen2-7b-instruct/vx-xxx/checkpoint-xxx'
app_ui_args = AppUIArguments(ckpt_dir=last_model_checkpoint)
merge_lora(app_ui_args, device_map='cpu')
result = app_ui_main(app_ui_args)
```
Expand Down
6 changes: 3 additions & 3 deletions docs/source_en/Instruction/LLM-fine-tuning.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,12 @@ sft_args = SftArguments(
dataset=[f'{DatasetName.blossom_math_zh}#2000'],
output_dir='output')
result = sft_main(sft_args)
best_model_checkpoint = result['best_model_checkpoint']
print(f'best_model_checkpoint: {best_model_checkpoint}')
last_model_checkpoint = result['last_model_checkpoint']
print(f'last_model_checkpoint: {last_model_checkpoint}')
torch.cuda.empty_cache()

infer_args = InferArguments(
ckpt_dir=best_model_checkpoint,
ckpt_dir=last_model_checkpoint,
load_dataset_config=True)
# merge_lora(infer_args, device_map='cpu')
result = infer_main(infer_args)
Expand Down
66 changes: 33 additions & 33 deletions docs/source_en/Instruction/Supported-models-datasets.md

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions docs/source_en/LLM/Qwen1.5-best-practice.md
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,8 @@ sft_args = SftArguments(
model_name=['小黄', 'Xiao Huang'],
model_author=['魔搭', 'ModelScope'])
output = sft_main(sft_args)
best_model_checkpoint = output['best_model_checkpoint']
print(f'best_model_checkpoint: {best_model_checkpoint}')
last_model_checkpoint = output['last_model_checkpoint']
print(f'last_model_checkpoint: {last_model_checkpoint}')
```

If you want to train on a 3090 machine, you can **reduce max_length** to 1024, use model parallelism, or use deepspeed-zero3.
Expand Down
16 changes: 8 additions & 8 deletions docs/source_en/LLM/Self-cognition-best-practice.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,8 @@ sft_args = SftArguments(
model_name=['小黄', 'Xiao Huang'],
model_author=['魔搭', 'ModelScope'])
output = sft_main(sft_args)
best_model_checkpoint = output['best_model_checkpoint']
print(f'best_model_checkpoint: {best_model_checkpoint}')
last_model_checkpoint = output['last_model_checkpoint']
print(f'last_model_checkpoint: {last_model_checkpoint}')

"""Out[0]
[INFO:swift] The logging file will be saved in: /xxx/output/qwen2-7b-instruct/v2-20240607-101038/logging.jsonl
Expand Down Expand Up @@ -160,7 +160,7 @@ Train: 100%|██████████████████████
[INFO:swift] best_model_checkpoint: /xxx/output/qwen2-7b-instruct/v2-20240607-101038/checkpoint-93
[INFO:swift] images_dir: /xxx/output/qwen2-7b-instruct/v2-20240607-101038/images
[INFO:swift] End time of running main: 2024-06-07 10:18:41.386561
best_model_checkpoint: /xxx/output/qwen2-7b-instruct/v2-20240607-101038/checkpoint-93
last_model_checkpoint: /xxx/output/qwen2-7b-instruct/v2-20240607-101038/checkpoint-93
"""
```

Expand Down Expand Up @@ -200,7 +200,7 @@ swift sft \
```

## Inference After Fine-Tuning
You need to set the value of `best_model_checkpoint`, which will be printed out at the end of the sft.
You need to set the value of `last_model_checkpoint`, which will be printed out at the end of the sft.

Using Python:
```python
Expand All @@ -209,8 +209,8 @@ os.environ['CUDA_VISIBLE_DEVICES'] = '0'

from swift.llm import InferArguments, merge_lora, infer_main

best_model_checkpoint = 'qwen2-7b-instruct/vx-xxx/checkpoint-xxx'
infer_args = InferArguments(ckpt_dir=best_model_checkpoint)
last_model_checkpoint = 'qwen2-7b-instruct/vx-xxx/checkpoint-xxx'
infer_args = InferArguments(ckpt_dir=last_model_checkpoint)
merge_lora(infer_args, device_map='cpu')
result = infer_main(infer_args)

Expand Down Expand Up @@ -271,8 +271,8 @@ os.environ['CUDA_VISIBLE_DEVICES'] = '0'

from swift.llm import AppUIArguments, merge_lora, app_ui_main

best_model_checkpoint = 'qwen2-7b-instruct/vx-xxx/checkpoint-xxx'
app_ui_args = AppUIArguments(ckpt_dir=best_model_checkpoint)
last_model_checkpoint = 'qwen2-7b-instruct/vx-xxx/checkpoint-xxx'
app_ui_args = AppUIArguments(ckpt_dir=last_model_checkpoint)
merge_lora(app_ui_args, device_map='cpu')
result = app_ui_main(app_ui_args)
```
Expand Down
10 changes: 5 additions & 5 deletions swift/llm/utils/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3478,7 +3478,7 @@ def get_model_tokenizer_qwen2_chat(model_dir: str,
f'qwen2_5-{model_size_lower}-instruct',
f'qwen/Qwen2.5-{model_size}-Instruct',
LoRATM.llama,
TemplateType.qwen,
TemplateType.qwen2_5,
get_model_tokenizer_qwen2_chat,
support_flash_attn=True,
support_vllm=True,
Expand All @@ -3492,7 +3492,7 @@ def get_model_tokenizer_qwen2_chat(model_dir: str,
f'qwen2_5-{model_size_lower}-instruct-{quant_type_lower}',
f'qwen/Qwen2.5-{model_size}-Instruct-{quant_type}',
LoRATM.llama,
TemplateType.qwen,
TemplateType.qwen2_5,
get_model_tokenizer_qwen2_chat,
support_flash_attn=True,
support_vllm=True,
Expand All @@ -3505,7 +3505,7 @@ def get_model_tokenizer_qwen2_chat(model_dir: str,
f'qwen2_5-{model_size_lower}-instruct-awq',
f'qwen/Qwen2.5-{model_size}-Instruct-AWQ',
LoRATM.llama,
TemplateType.qwen,
TemplateType.qwen2_5,
get_model_tokenizer_qwen2_chat,
support_flash_attn=True,
support_vllm=True,
Expand All @@ -3531,7 +3531,7 @@ def get_model_tokenizer_qwen2_chat(model_dir: str,
f'qwen2_5-math-{model_size_lower}-instruct',
f'qwen/Qwen2.5-Math-{model_size}-Instruct',
LoRATM.llama,
TemplateType.qwen,
TemplateType.qwen2_5,
get_model_tokenizer_qwen2_chat,
support_flash_attn=True,
support_vllm=True,
Expand All @@ -3556,7 +3556,7 @@ def get_model_tokenizer_qwen2_chat(model_dir: str,
f'qwen2_5-coder-{model_size_lower}-instruct',
f'qwen/Qwen2.5-Coder-{model_size}-Instruct',
LoRATM.llama,
TemplateType.qwen,
TemplateType.qwen2_5,
get_model_tokenizer_qwen2_chat,
support_flash_attn=True,
support_vllm=True,
Expand Down
6 changes: 6 additions & 0 deletions swift/llm/utils/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class TemplateType:
# chat
default = 'default'
qwen = 'qwen'
qwen2_5 = 'qwen2_5'
qwen_vl = 'qwen-vl'
qwen_audio = 'qwen-audio'
qwen2_audio = 'qwen2-audio'
Expand Down Expand Up @@ -1269,7 +1270,12 @@ def replace_box(self, index: int, example: Dict[str, Any]) -> List[Context]:
]


class Qwen2_5Template(QwenTemplate):
system = 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.'


register_template(TemplateType.qwen, QwenTemplate())
register_template(TemplateType.qwen2_5, Qwen2_5Template())


class QwenVLTemplate(_QwenVLTemplateMixin, QwenTemplate):
Expand Down
Loading