Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 1 addition & 6 deletions tests/models/multimodal/processing/test_h2ovl.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.image import rescale_image_size
from vllm.multimodal.processing import BaseMultiModalProcessor
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config

from ....conftest import _ImageAssets
from ...utils import build_model_context
Expand Down Expand Up @@ -156,11 +155,7 @@ def test_processor_override(
mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None,
limit_mm_per_prompt={"image": len(size_factors)},
)
tokenizer = cached_tokenizer_from_config(ctx.model_config)
processor = MULTIMODAL_REGISTRY.create_processor(
ctx.model_config,
tokenizer=tokenizer,
)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
hf_processor_mm_kwargs = {} if kwargs_on_init else mm_processor_kwargs

min_num = min_dynamic_patch if dynamic_image_size else 1
Expand Down
7 changes: 1 addition & 6 deletions tests/models/multimodal/processing/test_idefics3.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from transformers import Idefics3Config

from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config

from ....conftest import _ImageAssets
from ...utils import build_model_context
Expand Down Expand Up @@ -38,11 +37,7 @@ def test_processor_override(
mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None,
limit_mm_per_prompt={"image": num_imgs},
)
tokenizer = cached_tokenizer_from_config(ctx.model_config)
processor = MULTIMODAL_REGISTRY.create_processor(
ctx.model_config,
tokenizer=tokenizer,
)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
hf_processor_mm_kwargs = {} if kwargs_on_init else mm_processor_kwargs

# Build the image str / prompt based on the number of images we pass
Expand Down
7 changes: 1 addition & 6 deletions tests/models/multimodal/processing/test_internvl.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.image import rescale_image_size
from vllm.multimodal.processing import BaseMultiModalProcessor
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config

from ....conftest import _ImageAssets
from ...utils import build_model_context
Expand Down Expand Up @@ -113,11 +112,7 @@ def test_processor_override(
mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None,
limit_mm_per_prompt={"image": len(size_factors)},
)
tokenizer = cached_tokenizer_from_config(ctx.model_config)
processor = MULTIMODAL_REGISTRY.create_processor(
ctx.model_config,
tokenizer=tokenizer,
)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
hf_processor_mm_kwargs = {} if kwargs_on_init else mm_processor_kwargs

min_num = min_dynamic_patch if dynamic_image_size else 1
Expand Down
16 changes: 3 additions & 13 deletions tests/models/multimodal/processing/test_llava_next.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.parse import ImageSize
from vllm.multimodal.processing import BaseMultiModalProcessor
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config

from ...utils import build_model_context

Expand Down Expand Up @@ -40,10 +39,7 @@ def test_processor_max_tokens(model_id):
mm_processor_kwargs=None,
limit_mm_per_prompt={"image": 1},
)
processor = MULTIMODAL_REGISTRY.create_processor(
ctx.model_config,
tokenizer=cached_tokenizer_from_config(ctx.model_config),
)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
info = processor.info

seen_aspect_ratios = set[float]()
Expand Down Expand Up @@ -139,10 +135,7 @@ def test_processor_prompt_replacements_regression(model_id, num_imgs):
mm_processor_kwargs=None,
limit_mm_per_prompt={"image": num_imgs},
)
processor = MULTIMODAL_REGISTRY.create_processor(
ctx.model_config,
tokenizer=cached_tokenizer_from_config(ctx.model_config),
)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)

image_ratios = [(171, 152), (184, 161), (198, 176), (333, 296), (369, 328),
(488, 183), (2560, 1669)]
Expand All @@ -168,10 +161,7 @@ def test_processor_prompt_replacements_all(model_id, num_imgs):
mm_processor_kwargs=None,
limit_mm_per_prompt={"image": num_imgs},
)
processor = MULTIMODAL_REGISTRY.create_processor(
ctx.model_config,
tokenizer=cached_tokenizer_from_config(ctx.model_config),
)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)

seen_aspect_ratios = set[float]()
image_sizes = list[ImageSize]()
Expand Down
16 changes: 3 additions & 13 deletions tests/models/multimodal/processing/test_llava_onevision.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.parse import ImageSize
from vllm.multimodal.processing import BaseMultiModalProcessor
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config

from ...utils import build_model_context

Expand Down Expand Up @@ -41,10 +40,7 @@ def test_processor_max_tokens(model_id):
mm_processor_kwargs=None,
limit_mm_per_prompt={"image": 1},
)
processor = MULTIMODAL_REGISTRY.create_processor(
ctx.model_config,
tokenizer=cached_tokenizer_from_config(ctx.model_config),
)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
info = processor.info

seen_aspect_ratios = set[float]()
Expand Down Expand Up @@ -139,10 +135,7 @@ def test_processor_prompt_replacements_regression(model_id, num_imgs):
mm_processor_kwargs=None,
limit_mm_per_prompt={"image": num_imgs},
)
processor = MULTIMODAL_REGISTRY.create_processor(
ctx.model_config,
tokenizer=cached_tokenizer_from_config(ctx.model_config),
)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)

image_ratios = [(171, 152), (184, 161), (198, 176), (333, 296), (369, 328),
(488, 183), (2560, 1669)]
Expand All @@ -169,10 +162,7 @@ def test_processor_prompt_replacements_all(model_id, num_imgs):
mm_processor_kwargs=None,
limit_mm_per_prompt={"image": num_imgs},
)
processor = MULTIMODAL_REGISTRY.create_processor(
ctx.model_config,
tokenizer=cached_tokenizer_from_config(ctx.model_config),
)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)

seen_aspect_ratios = set[float]()
image_sizes = list[ImageSize]()
Expand Down
7 changes: 1 addition & 6 deletions tests/models/multimodal/processing/test_phi3v.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import pytest

from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config

from ....conftest import _ImageAssets
from ...utils import build_model_context
Expand Down Expand Up @@ -39,11 +38,7 @@ def test_processor_override(
mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None,
limit_mm_per_prompt={"image": num_imgs},
)
tokenizer = cached_tokenizer_from_config(ctx.model_config)
processor = MULTIMODAL_REGISTRY.create_processor(
ctx.model_config,
tokenizer=tokenizer,
)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
hf_processor_mm_kwargs = {} if kwargs_on_init else mm_processor_kwargs

# Build the image str / prompt based on the number of images we pass
Expand Down
8 changes: 2 additions & 6 deletions tests/models/multimodal/processing/test_qwen2_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import pytest

from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config

from ....conftest import _ImageAssets
from ...utils import build_model_context
Expand Down Expand Up @@ -34,11 +33,8 @@ def test_processor_override(
mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None,
limit_mm_per_prompt={"image": num_imgs},
)
tokenizer = cached_tokenizer_from_config(ctx.model_config)
processor = MULTIMODAL_REGISTRY.create_processor(
ctx.model_config,
tokenizer=tokenizer,
)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
tokenizer = processor.info.get_tokenizer()
hf_processor_mm_kwargs = {} if kwargs_on_init else mm_processor_kwargs

# Build the image str / prompt based on the number of images we pass
Expand Down
18 changes: 4 additions & 14 deletions tests/multimodal/test_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@
replace_token_matches)
# yapf: enable
from vllm.multimodal.profiling import MultiModalProfiler
from vllm.transformers_utils.tokenizer import (AnyTokenizer,
cached_tokenizer_from_config)
from vllm.transformers_utils.tokenizer import AnyTokenizer
from vllm.utils import full_groupby

from .utils import random_image
Expand Down Expand Up @@ -955,10 +954,7 @@ def test_limit_mm_per_prompt_dummy(model_id, limit, num_supported, is_valid):
limit_mm_per_prompt=limit_mm_per_prompt,
)

processor = MULTIMODAL_REGISTRY.create_processor(
model_config,
tokenizer=cached_tokenizer_from_config(model_config),
)
processor = MULTIMODAL_REGISTRY.create_processor(model_config)
profiler = MultiModalProfiler(processor)

mock_supported_mm_limits = MagicMock(return_value={"image": num_supported})
Expand Down Expand Up @@ -994,10 +990,7 @@ def test_limit_mm_per_prompt_apply(model_id, num_images, limit, is_valid):
limit_mm_per_prompt=limit_mm_per_prompt,
)

processor = MULTIMODAL_REGISTRY.create_processor(
model_config,
tokenizer=cached_tokenizer_from_config(model_config),
)
processor = MULTIMODAL_REGISTRY.create_processor(model_config)

rng = np.random.RandomState(0)
image = random_image(rng, min_wh=128, max_wh=256)
Expand Down Expand Up @@ -1066,10 +1059,7 @@ def test_hf_processor_kwargs(model_id, call_kwargs, expected_kwargs):
revision=None,
)

processor = MULTIMODAL_REGISTRY.create_processor(
model_config,
tokenizer=cached_tokenizer_from_config(model_config),
)
processor = MULTIMODAL_REGISTRY.create_processor(model_config)
orig_get_hf_processor = processor.info.get_hf_processor

def get_hf_processor(self, **kwargs):
Expand Down
12 changes: 6 additions & 6 deletions vllm/inputs/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,13 +261,13 @@ def _process_multimodal(
# initialized without a tokenizer while using also multi-modal
# input.
if not self.tokenizer:
tokenizer = None
tokenizer = object() # Dummy
else:
tokenizer_group = self.get_tokenizer_group()
tokenizer = tokenizer_group.get_lora_tokenizer(lora_request)

mm_processor = self.mm_registry.create_processor(
self.model_config, tokenizer)
mm_processor = self.mm_registry.create_processor(self.model_config,
tokenizer=tokenizer)

if mm_processor_kwargs is None:
mm_processor_kwargs = {}
Expand All @@ -288,14 +288,14 @@ async def _process_multimodal_async(
# initialized without a tokenizer while using also multi-modal
# input.
if not self.tokenizer:
tokenizer = None
tokenizer = object() # Dummy
else:
tokenizer_group = self.get_tokenizer_group()
tokenizer = await tokenizer_group.get_lora_tokenizer_async(
lora_request)

mm_processor = self.mm_registry.create_processor(
self.model_config, tokenizer)
mm_processor = self.mm_registry.create_processor(self.model_config,
tokenizer=tokenizer)
if mm_processor_kwargs is None:
mm_processor_kwargs = {}

Expand Down
25 changes: 17 additions & 8 deletions vllm/inputs/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@

from vllm.logger import init_logger
from vllm.transformers_utils.processor import cached_processor_from_config
from vllm.transformers_utils.tokenizer import (AnyTokenizer,
cached_tokenizer_from_config)
from vllm.transformers_utils.tokenizer import AnyTokenizer
from vllm.utils import (ClassRegistry, get_allowed_kwarg_only_overrides,
resolve_mm_processor_kwargs)

Expand Down Expand Up @@ -329,17 +328,27 @@ def dummy_data_for_profiling(
from vllm.model_executor.model_loader import get_model_architecture
from vllm.multimodal import MultiModalKwargs
from vllm.multimodal.profiling import MultiModalProfiler
from vllm.sequence import SequenceData

if mm_registry.has_processor(model_config):
tokenizer = cached_tokenizer_from_config(model_config)
processor = mm_registry.create_processor(model_config,
tokenizer,
disable_cache=True)
profiler = MultiModalProfiler(processor)
dummy_data_factory = (profiler.get_encoder_dummy_data
if is_encoder_data else
profiler.get_decoder_dummy_data)
dummy_data = dummy_data_factory(seq_len)

dummy_data_v1 = (profiler.get_encoder_dummy_data(seq_len)
if is_encoder_data else
profiler.get_decoder_dummy_data(seq_len))
_seq_data = SequenceData.from_seqs(
dummy_data_v1.prompt_token_ids) # type: ignore[attr-defined]

dummy_data = DummyData(
seq_data=_seq_data,
multi_modal_data=getattr(dummy_data_v1, "multi_modal_data",
None),
multi_modal_placeholders=getattr(dummy_data_v1,
"multi_modal_placeholders",
None),
)
else:
model_cls, _ = get_model_architecture(model_config)
if is_encoder_data:
Expand Down
Loading