diff --git a/vllm/model_executor/models/qwen2_5_vl.py b/vllm/model_executor/models/qwen2_5_vl.py index 6aec99b3f964..632ecaf65f2f 100644 --- a/vllm/model_executor/models/qwen2_5_vl.py +++ b/vllm/model_executor/models/qwen2_5_vl.py @@ -33,10 +33,11 @@ import torch.nn.functional as F from einops import rearrange from transformers import BatchFeature -from transformers.models.qwen2_5_vl import (Qwen2_5_VLImageProcessor, - Qwen2_5_VLProcessor) +from transformers.models.qwen2_5_vl import Qwen2_5_VLProcessor from transformers.models.qwen2_5_vl.configuration_qwen2_5_vl import ( Qwen2_5_VLConfig, Qwen2_5_VLVisionConfig) +from transformers.models.qwen2_vl import (Qwen2VLImageProcessor, + Qwen2VLImageProcessorFast) from vllm.attention import AttentionMetadata from vllm.config import VllmConfig @@ -693,7 +694,8 @@ def get_hf_processor( ) -> Qwen2_5_VLProcessor: hf_processor = self.ctx.get_hf_processor(Qwen2_5_VLProcessor) image_processor = hf_processor.image_processor # type: ignore - assert isinstance(image_processor, Qwen2_5_VLImageProcessor) + assert isinstance(image_processor, + (Qwen2VLImageProcessor, Qwen2VLImageProcessorFast)) if min_pixels: image_processor.min_pixels = min_pixels @@ -713,14 +715,15 @@ def get_image_processor( min_pixels: Optional[int] = None, max_pixels: Optional[int] = None, fps: Optional[float] = 2.0, - ) -> Qwen2_5_VLImageProcessor: + ) -> Union[Qwen2VLImageProcessor, Qwen2VLImageProcessorFast]: hf_processor = self.get_hf_processor( min_pixels=min_pixels, max_pixels=max_pixels, fps=fps, ) image_processor = hf_processor.image_processor # type: ignore - assert isinstance(image_processor, Qwen2_5_VLImageProcessor) + assert isinstance(image_processor, + (Qwen2VLImageProcessor, Qwen2VLImageProcessorFast)) return image_processor diff --git a/vllm/model_executor/models/qwen2_vl.py b/vllm/model_executor/models/qwen2_vl.py index 961f53cef137..b50f35c08b11 100644 --- a/vllm/model_executor/models/qwen2_vl.py +++ b/vllm/model_executor/models/qwen2_vl.py @@ -31,7 +31,9 @@ import torch.nn as nn import torch.nn.functional as F from einops import rearrange, repeat +from packaging.version import Version from transformers import BatchFeature +from transformers import __version__ as TRANSFORMERS_VERSION from transformers.models.qwen2_vl import (Qwen2VLImageProcessor, Qwen2VLProcessor) from transformers.models.qwen2_vl.configuration_qwen2_vl import ( @@ -759,7 +761,13 @@ def get_image_processor( hf_processor = self.get_hf_processor(min_pixels=min_pixels, max_pixels=max_pixels) image_processor = hf_processor.image_processor # type: ignore - assert isinstance(image_processor, Qwen2VLImageProcessor) + if Version(TRANSFORMERS_VERSION) >= Version("4.49"): + from transformers.models.qwen2_vl import Qwen2VLImageProcessorFast + assert isinstance( + image_processor, + (Qwen2VLImageProcessor, Qwen2VLImageProcessorFast)) + else: + assert isinstance(image_processor, Qwen2VLImageProcessor) return image_processor def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]: