From 7957134be33dfbb5cb0813b60f9a3d54225898f0 Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Wed, 15 Jan 2025 15:02:27 +0800 Subject: [PATCH 1/5] add support for deepseek-vl2-tiny Signed-off-by: Isotr0py <2037008807@qq.com> --- docs/source/models/supported_models.md | 5 ++--- examples/offline_inference/vision_language.py | 2 +- vllm/model_executor/models/deepseek_vl2.py | 11 ++++++++--- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/docs/source/models/supported_models.md b/docs/source/models/supported_models.md index 642ef3c9655b..74f4f8a510ad 100644 --- a/docs/source/models/supported_models.md +++ b/docs/source/models/supported_models.md @@ -613,7 +613,7 @@ See [this page](#generative-models) for more information on how to use generativ * - `DeepseekVLV2ForCausalLM` - DeepSeek-VL2 - T + I+ - - `deepseek-ai/deepseek-vl2-tiny`(WIP), `deepseek-ai/deepseek-vl2-small`, `deepseek-ai/deepseek-vl2` etc. (see note) + - `deepseek-ai/deepseek-vl2-tiny`, `deepseek-ai/deepseek-vl2-small`, `deepseek-ai/deepseek-vl2` etc. (see note) - - ✅︎ - ✅︎ @@ -763,9 +763,8 @@ See [this page](#generative-models) for more information on how to use generativ + Multiple items can be inputted per text prompt for this modality. ````{note} -The `deepseek-ai/deepseek-vl2-tiny` is not supported yet. - To use `DeepSeek-VL2` series models, you need to install a fork version `deepseek_vl2` package: + ```shell pip install git+https://github.com/Isotr0py/DeepSeek-VL2.git ``` diff --git a/examples/offline_inference/vision_language.py b/examples/offline_inference/vision_language.py index ad32b9fe242e..8bc715a50e0d 100644 --- a/examples/offline_inference/vision_language.py +++ b/examples/offline_inference/vision_language.py @@ -70,7 +70,7 @@ def run_chameleon(question: str, modality: str): def run_deepseek_vl2(question: str, modality: str): assert modality == "image" - model_name = "deepseek-ai/deepseek-vl2-small" + model_name = "deepseek-ai/deepseek-vl2-tiny" llm = LLM(model=model_name, max_model_len=4096, diff --git a/vllm/model_executor/models/deepseek_vl2.py b/vllm/model_executor/models/deepseek_vl2.py index 99fa941c055d..455369502216 100644 --- a/vllm/model_executor/models/deepseek_vl2.py +++ b/vllm/model_executor/models/deepseek_vl2.py @@ -356,13 +356,18 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): f"Only 2D tile_tag is supported currently, got: {self.tile_tag}" ) + if self.text_config.topk_method == "noaux_tc": + architectures = ["DeepseekV3ForCausalLM"] + elif not self.text_config.use_mla: + architectures = ["DeepseekForCausalLM"] + else: + architectures = ["DeepseekV2ForCausalLM"] + self.language_model = init_vllm_registered_model( vllm_config=vllm_config, hf_config=self.text_config, prefix=maybe_prefix(prefix, "language"), - architectures=["DeepseekV3ForCausalLM"] - if self.text_config.topk_method == "noaux_tc" else - ["DeepseekV2ForCausalLM"], + architectures=architectures, ) self.make_empty_intermediate_tensors = ( From d8c0deaff07901a24a768631f7fb7b19ee5c5f4d Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Wed, 15 Jan 2025 21:59:22 +0800 Subject: [PATCH 2/5] use tiny for test Signed-off-by: Isotr0py <2037008807@qq.com> --- .../decoder_only/vision_language/test_models.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/tests/models/decoder_only/vision_language/test_models.py b/tests/models/decoder_only/vision_language/test_models.py index 7620ed1107e8..8ea49c9c9de9 100644 --- a/tests/models/decoder_only/vision_language/test_models.py +++ b/tests/models/decoder_only/vision_language/test_models.py @@ -189,31 +189,22 @@ dtype="bfloat16", ), "deepseek_vl_v2": VLMTestInfo( - models=["deepseek-ai/deepseek-vl2-small"], + models=["deepseek-ai/deepseek-vl2-tiny"], test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE), - dtype="bfloat16", prompt_formatter=lambda img_prompt: f"<|User|>: {img_prompt}\n\n<|Assistant|>: ", # noqa: E501 max_model_len=4096, max_num_seqs=2, single_image_prompts=IMAGE_ASSETS.prompts({ - "stop_sign": "\nWhat's the color of the stop sign and car?", - "cherry_blossom": "\nWhat's the color of the tower?", + "stop_sign": "\nWhat's the content in the center of the image?", # noqa: E501 + "cherry_blossom": "\nPlease infer the season with reason in details.", # noqa: E501 }), multi_image_prompt="image_1:\nimage_2:\nDescribe the two images shortly.", # noqa: E501 vllm_runner_kwargs={"hf_overrides": {"architectures": ["DeepseekVLV2ForCausalLM"]}}, # noqa: E501 - image_size_factors=[(0.10, 0.15)], patch_hf_runner=model_utils.deepseekvl2_patch_hf_runner, postprocess_inputs=model_utils.cast_dtype_post_processor("images"), hf_output_post_proc=model_utils.deepseekvl2_trunc_hf_output, stop_str=["<|end▁of▁sentence|>", "<|begin▁of▁sentence|>"], # noqa: E501 - num_logprobs=5, - marks=[ - pytest.mark.skipif( - not is_flash_attn_2_available(), - reason="Model needs flash-attn for numeric convergence.", - ), - large_gpu_mark(min_gb=48), - ], + image_size_factors=[(), (1.0, ), (1.0, 1.0, 1.0), (0.1, 0.5, 1.0)], ), "fuyu": VLMTestInfo( models=["adept/fuyu-8b"], From 421505c680081f78a202a07c07af8fde87b1e677 Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Wed, 15 Jan 2025 22:43:48 +0800 Subject: [PATCH 3/5] fix multi-images test Signed-off-by: Isotr0py <2037008807@qq.com> --- tests/models/decoder_only/vision_language/test_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/decoder_only/vision_language/test_models.py b/tests/models/decoder_only/vision_language/test_models.py index 8ea49c9c9de9..acbb78e6e498 100644 --- a/tests/models/decoder_only/vision_language/test_models.py +++ b/tests/models/decoder_only/vision_language/test_models.py @@ -198,7 +198,7 @@ "stop_sign": "\nWhat's the content in the center of the image?", # noqa: E501 "cherry_blossom": "\nPlease infer the season with reason in details.", # noqa: E501 }), - multi_image_prompt="image_1:\nimage_2:\nDescribe the two images shortly.", # noqa: E501 + multi_image_prompt="image_1:\nimage_2:\nWhich image can we see the car and the tower?", # noqa: E501 vllm_runner_kwargs={"hf_overrides": {"architectures": ["DeepseekVLV2ForCausalLM"]}}, # noqa: E501 patch_hf_runner=model_utils.deepseekvl2_patch_hf_runner, postprocess_inputs=model_utils.cast_dtype_post_processor("images"), From 26ed011e7756b275215214159fdfcc42426d89ff Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Wed, 15 Jan 2025 23:21:58 +0800 Subject: [PATCH 4/5] replace small to tiny Signed-off-by: Isotr0py <2037008807@qq.com> --- examples/offline_inference/vision_language_multi_image.py | 2 +- tests/models/registry.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/examples/offline_inference/vision_language_multi_image.py b/examples/offline_inference/vision_language_multi_image.py index c6cf3f30c31c..33ef5f316f04 100644 --- a/examples/offline_inference/vision_language_multi_image.py +++ b/examples/offline_inference/vision_language_multi_image.py @@ -55,7 +55,7 @@ def load_aria(question, image_urls: List[str]) -> ModelRequestData: def load_deepseek_vl2(question: str, image_urls: List[str]): - model_name = "deepseek-ai/deepseek-vl2-small" + model_name = "deepseek-ai/deepseek-vl2-tiny" llm = LLM(model=model_name, max_model_len=4096, diff --git a/tests/models/registry.py b/tests/models/registry.py index d079725b2f78..3bbaf5e038a2 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -179,8 +179,7 @@ class _HfExamplesInfo: trust_remote_code=True), "ChatGLMForConditionalGeneration": _HfExamplesInfo("chatglm2-6b", is_available_online=False), - # TODO(Isotr0py): Use deepseek-vl2-tiny for test after it's supported - "DeepseekVLV2ForCausalLM": _HfExamplesInfo("deepseek-ai/deepseek-vl2-small"), # noqa: E501 + "DeepseekVLV2ForCausalLM": _HfExamplesInfo("deepseek-ai/deepseek-vl2-tiny"), # noqa: E501 "FuyuForCausalLM": _HfExamplesInfo("adept/fuyu-8b"), "H2OVLChatModel": _HfExamplesInfo("h2oai/h2ovl-mississippi-800m"), "InternVLChatModel": _HfExamplesInfo("OpenGVLab/InternVL2-1B", From 9419436875a6b7540e8c2eb87cb45f1009ea28c3 Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Thu, 16 Jan 2025 21:56:16 +0800 Subject: [PATCH 5/5] add test mark Signed-off-by: Isotr0py <2037008807@qq.com> --- tests/models/decoder_only/vision_language/test_models.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/models/decoder_only/vision_language/test_models.py b/tests/models/decoder_only/vision_language/test_models.py index acbb78e6e498..5710303548c3 100644 --- a/tests/models/decoder_only/vision_language/test_models.py +++ b/tests/models/decoder_only/vision_language/test_models.py @@ -9,6 +9,7 @@ import pytest from transformers import AutoModelForVision2Seq +from transformers import __version__ as TRANSFORMERS_VERSION from transformers.utils import is_flash_attn_2_available from vllm.platforms import current_platform @@ -205,6 +206,12 @@ hf_output_post_proc=model_utils.deepseekvl2_trunc_hf_output, stop_str=["<|end▁of▁sentence|>", "<|begin▁of▁sentence|>"], # noqa: E501 image_size_factors=[(), (1.0, ), (1.0, 1.0, 1.0), (0.1, 0.5, 1.0)], + marks=[ + pytest.mark.skipif( + TRANSFORMERS_VERSION >= "4.48.0", + reason="HF model is not compatible with transformers>=4.48.0", + ) + ], ), "fuyu": VLMTestInfo( models=["adept/fuyu-8b"],