Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/Instruction/LLM推理文档.md
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ from swift.llm import (
get_model_tokenizer, get_template, inference, ModelType, get_default_template_type,
)
from swift.utils import seed_everything
from modelscope import BitsAndBytesConfig
from transformers import BitsAndBytesConfig
import torch

model_type = ModelType.chatglm3_6b
Expand Down
32 changes: 16 additions & 16 deletions docs/source/Instruction/支持的模型和数据集.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/source/Instruction/自定义与拓展.md
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ if __name__ == '__main__':
```python
from typing import Any, Dict
import torch
from modelscope import AutoConfig, AutoModelForCausalLM, AutoTokenizer
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer

from transformers.utils.versions import require_version

Expand Down
2 changes: 1 addition & 1 deletion docs/source_en/Instruction/Customization.md
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ The following is an example of **custom models**. The complete py file can be vi
from typing import Any, Dict
import torch

from modelscope import AutoConfig, AutoModelForCausalLM, AutoTokenizer
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer

from transformers.utils.versions import require_version

Expand Down
2 changes: 1 addition & 1 deletion docs/source_en/Instruction/LLM-inference.md
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ from swift.llm import (
get_model_tokenizer, get_template, inference, ModelType, get_default_template_type,
)
from swift.utils import seed_everything
from modelscope import BitsAndBytesConfig
from transformers import BitsAndBytesConfig
import torch

model_type = ModelType.chatglm3_6b
Expand Down
32 changes: 16 additions & 16 deletions docs/source_en/Instruction/Supported-models-datasets.md

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions examples/pytorch/llm/custom.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import Any, Dict, Optional, Tuple
from typing import Any, Dict

import torch
from datasets import Dataset as HfDataset
from modelscope import AutoConfig, AutoModelForCausalLM, AutoTokenizer, MsDataset
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
from transformers.utils.versions import require_version

from swift.llm import (LoRATM, Template, TemplateType, dataset_map, get_dataset, get_dataset_from_repo,
Expand Down
2 changes: 1 addition & 1 deletion swift/llm/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
import torch
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse, StreamingResponse
from modelscope import GenerationConfig
from packaging import version
from peft import PeftModel
from transformers import GenerationConfig

from swift.utils import get_logger, get_main, get_seed, seed_everything
from .agent import split_action_action_input
Expand Down
2 changes: 1 addition & 1 deletion swift/llm/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
from evalscope.run import run_task
from evalscope.summarizer import Summarizer
from evalscope.utils import EvalBackend
from modelscope import GenerationConfig
from openai import APIConnectionError
from tqdm import tqdm
from transformers import GenerationConfig

from swift.utils import append_to_jsonl, get_logger, get_main, seed_everything
from .infer import merge_lora, prepare_model_template
Expand Down
3 changes: 1 addition & 2 deletions swift/llm/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,8 @@
import json
import numpy as np
import torch
from modelscope import BitsAndBytesConfig, GenerationConfig
from tqdm import tqdm
from transformers import PreTrainedModel, PreTrainedTokenizerBase
from transformers import BitsAndBytesConfig, GenerationConfig, PreTrainedModel, PreTrainedTokenizerBase
from transformers.utils import is_torch_npu_available

from swift.tuners import Swift
Expand Down
2 changes: 1 addition & 1 deletion swift/llm/rome.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import json
import torch
from modelscope import GenerationConfig
from transformers import GenerationConfig

from swift.tuners import Swift
from swift.tuners.rome import RomeConfig
Expand Down
3 changes: 1 addition & 2 deletions swift/llm/sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@
import torch
import transformers
from datasets import Dataset as HfDataset
from modelscope import BitsAndBytesConfig, GenerationConfig
from packaging import version
from transformers import IntervalStrategy
from transformers import BitsAndBytesConfig, GenerationConfig, IntervalStrategy
from transformers.integrations import is_deepspeed_zero3_enabled
from transformers.utils import is_torch_npu_available, strtobool

Expand Down
17 changes: 8 additions & 9 deletions swift/llm/utils/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,15 @@
from typing import Any, Callable, Dict, List, Literal, NamedTuple, Optional, Tuple, Type, Union

import torch
import torch.distributed as dist
import torch.nn.functional as F
import torch.utils.checkpoint
import transformers
from accelerate.utils import find_device
from modelscope import (AutoConfig, AutoModel, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,
GenerationConfig, GPTQConfig, snapshot_download)
from modelscope import snapshot_download
from modelscope.hub.utils.utils import get_cache_dir
from packaging import version
from transformers import PretrainedConfig, PreTrainedModel, PreTrainedTokenizerBase
from transformers import (AutoConfig, AutoModel, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,
GenerationConfig, GPTQConfig, PretrainedConfig, PreTrainedModel, PreTrainedTokenizerBase)
from transformers.dynamic_module_utils import get_class_from_dynamic_module
from transformers.models.auto.tokenization_auto import get_tokenizer_config
from transformers.utils import is_torch_bf16_gpu_available, strtobool
Expand Down Expand Up @@ -2729,7 +2728,7 @@ def get_model_tokenizer_with_flash_attn(model_dir: str,
TemplateType.mplug_owl3,
requires=['transformers>=4.36', 'icecream'], # decord
support_flash_attn=True,
tags=['multi-modal', 'vision'],
tags=['multi-modal', 'vision', 'video'],
hf_model_id='mPLUG/mPLUG-Owl3-7B-240728')
def get_model_tokenizer_mplug_owl3(model_dir: str,
torch_dtype: torch.dtype,
Expand Down Expand Up @@ -3664,7 +3663,7 @@ def _read_from_stream(container: 'av.container.Container', start_offset: float,
support_vllm=True,
placeholder_tokens=['<|image_pad|>', '<|video_pad|>'],
requires=['transformers>=4.45.0.dev0', 'qwen_vl_utils'],
tags=['multi-modal', 'vision'],
tags=['multi-modal', 'vision', 'video'],
hf_model_id=f'Qwen/Qwen2-VL-{model_size}')
register_model(
f'qwen2-vl-{model_size_lower}-instruct',
Expand All @@ -3676,7 +3675,7 @@ def _read_from_stream(container: 'av.container.Container', start_offset: float,
support_vllm=True,
placeholder_tokens=['<|image_pad|>', '<|video_pad|>'],
requires=['transformers>=4.45.0.dev0', 'qwen_vl_utils'], # 'pyav'
tags=['multi-modal', 'vision'],
tags=['multi-modal', 'vision', 'video'],
hf_model_id=f'Qwen/Qwen2-VL-{model_size}-Instruct')
for quant_bits in [4, 8]:
quant_type = f'GPTQ-Int{quant_bits}'
Expand All @@ -3691,7 +3690,7 @@ def _read_from_stream(container: 'av.container.Container', start_offset: float,
support_vllm=True,
placeholder_tokens=['<|image_pad|>', '<|video_pad|>'],
requires=['transformers>=4.45.0.dev0', 'qwen_vl_utils', 'auto_gptq>=0.5'],
tags=['multi-modal', 'vision'],
tags=['multi-modal', 'vision', 'video'],
function_kwargs={'gptq_bits': quant_bits},
torch_dtype=torch.float16,
hf_model_id=f'Qwen/Qwen2-VL-{model_size}-Instruct-{quant_type}')
Expand All @@ -3706,7 +3705,7 @@ def _read_from_stream(container: 'av.container.Container', start_offset: float,
support_vllm=True,
placeholder_tokens=['<|image_pad|>', '<|video_pad|>'],
requires=['transformers>=4.45.0.dev0', 'qwen_vl_utils', 'autoawq'],
tags=['multi-modal', 'vision'],
tags=['multi-modal', 'vision', 'video'],
function_kwargs={'is_awq': True},
torch_dtype=torch.float16,
hf_model_id=f'Qwen/Qwen2-VL-{model_size}-Instruct-AWQ')
Expand Down
2 changes: 1 addition & 1 deletion swift/llm/utils/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def new_call_func(self, dataset: DATASET_TYPE) -> DATASET_TYPE:
self.shared_shm_name = shm.name
buffer = shm.buf
self.column_state = np.ndarray((len(self.key_mapping), ), dtype=np.bool_, buffer=buffer)
self.column_state[:] = 0
self.column_state[:] = False
dataset = call_func(self, dataset)
if isinstance(dataset, HfIterableDataset) and dataset.features is None:
features = next(iter(dataset)).keys()
Expand Down
22 changes: 18 additions & 4 deletions swift/llm/utils/vllm_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@
import inspect
import os
import time
from contextlib import contextmanager
from copy import deepcopy
from functools import wraps
from typing import Any, Dict, Iterator, List, Optional, Tuple

import torch
import vllm
from modelscope import GenerationConfig
from packaging import version
from tqdm import tqdm
from transformers import PreTrainedTokenizerBase
from transformers import AutoTokenizer, GenerationConfig, PreTrainedTokenizerBase
from vllm import AsyncEngineArgs, AsyncLLMEngine, EngineArgs, LLMEngine, SamplingParams

from swift.utils import get_logger
Expand All @@ -26,6 +27,19 @@
logger = get_logger()


@contextmanager
def _patch_auto_tokenizer(tokenizer):
_old_from_pretrained = AutoTokenizer.from_pretrained

@wraps(_old_from_pretrained)
def _from_pretrained(self, *args, **kwargs):
return tokenizer

AutoTokenizer.from_pretrained = _from_pretrained
yield
AutoTokenizer.from_pretrained = _old_from_pretrained


def get_vllm_engine(
model_type: str,
torch_dtype: Optional[torch.dtype] = None,
Expand Down Expand Up @@ -105,8 +119,8 @@ def get_vllm_engine(
os.environ.pop('VLLM_USE_MODELSCOPE', None)
if version.parse(vllm.__version__) >= version.parse('0.5.1'):
os.environ['VLLM_WORKER_MULTIPROC_METHOD'] = 'spawn'

llm_engine = llm_engine_cls.from_engine_args(engine_args)
with _patch_auto_tokenizer(tokenizer):
llm_engine = llm_engine_cls.from_engine_args(engine_args)
llm_engine.engine_args = engine_args
llm_engine.model_dir = model_dir
llm_engine.model_type = model_type
Expand Down
Loading