diff --git a/examples/models/core/multimodal/requirements-qwen2vl.txt b/examples/models/core/multimodal/requirements-qwen2vl.txt index 50f14d1d809..c75f6c32e1f 100644 --- a/examples/models/core/multimodal/requirements-qwen2vl.txt +++ b/examples/models/core/multimodal/requirements-qwen2vl.txt @@ -1,2 +1,3 @@ accelerate qwen-vl-utils==0.0.8 # 0.0.9 has bug https://github.com/QwenLM/Qwen2-VL/pull/673, rollback until a newer version is released +transformers==4.51.0 # nvbugs/5385987 diff --git a/tensorrt_llm/tools/multimodal_builder.py b/tensorrt_llm/tools/multimodal_builder.py index d93d1a8bfc8..9a2096852b7 100644 --- a/tensorrt_llm/tools/multimodal_builder.py +++ b/tensorrt_llm/tools/multimodal_builder.py @@ -1324,6 +1324,7 @@ def rot_pos_emb(grid_thw, rotary_pos_emb_func): def build_qwen2_vl_engine(args): + import transformers from qwen_vl_utils import process_vision_info from transformers import AutoProcessor, Qwen2VLForConditionalGeneration from transformers.models.qwen2_vl.configuration_qwen2_vl import \ @@ -1391,8 +1392,15 @@ def build_qwen2_vl_engine(args): class VisionAttentionOpt(VisionAttention): def __init__(self, config: Qwen2VLVisionConfig): - super().__init__(config) - self.head_dim = config.embed_dim // config.num_heads + # Fallback for compatibility with older transformers versions (for certain nvbugs/tests) + if transformers.__version__ >= '4.53.0': + super().__init__(config) + self.head_dim = config.embed_dim // config.num_heads + else: + num_heads = config.num_heads + dim = config.embed_dim + super().__init__(dim, num_heads) + self.head_dim = dim // num_heads def forward(self, hidden_states: torch.Tensor, diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index d6463637208..27eef079984 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -235,7 +235,6 @@ stress_test/stress_test.py::test_run_stress_test[llama-v3-8b-instruct-hf_tp1-str accuracy/test_llm_api_pytorch.py::TestGemma3_1BInstruct::test_auto_dtype SKIP (https://nvbugs/5375620) test_e2e.py::test_ptp_quickstart_advanced_8gpus[Llama3.1-405B-FP8-llama-3.1-model/Llama-3.1-405B-Instruct-FP8] SKIP (https://nvbugs/5380570) test_e2e.py::test_ptp_quickstart_advanced_8gpus[Nemotron-Ultra-253B-nemotron-nas/Llama-3_1-Nemotron-Ultra-253B-v1] SKIP (https://nvbugs/5380570) -examples/test_multimodal.py::test_llm_fp8_multimodal_general[fp8-fp8-cnn_dailymail-Qwen2-VL-7B-Instruct-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False] SKIP (https://nvbugs/5385987) examples/test_multimodal.py::test_llm_multimodal_general[Phi-4-multimodal-instruct-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1] SKIP (https://nvbugs/5385992) examples/test_multimodal.py::test_llm_multimodal_general[kosmos-2-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1] SKIP (https://nvbugs/5387422) examples/test_multimodal.py::test_llm_multimodal_general[fuyu-8b-pp:1-tp:1-float16-bs:1-cpp_e2e:False-nb:1] SKIP (https://nvbugs/5387424)