Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions tensorrt_llm/_torch/pyexecutor/py_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1181,6 +1181,17 @@ def _forward_step_inter_pp(self, scheduled_batch) -> SampleState:

def _validate_request(self, request: LlmRequest):
if isinstance(self.model_engine.model, DecoderModelForCausalLM):
# Only skip token‐range checks for Llama4 when the request has multimodal data
from ..models.modeling_llama import Llama4ForConditionalGeneration
if isinstance(self.model_engine.model,
Llama4ForConditionalGeneration):
has_mm = bool(request.py_multimodal_data)
if has_mm:
logger.debug(
f"Skipping token-range validation for {type(self.model_engine.model).__name__} "
"(multimodal request)")
return

# FIXME: This check is necessary because of how Qwen2ForProcessRewardModel
# subclasses DecoderModelForCausalLM. Perhaps the functionality
# of DecoderModelForCausalLM reused by Qwen2ForProcessRewardModel
Expand Down
19 changes: 9 additions & 10 deletions tests/unittest/_torch/multi_gpu_modeling/test_llama4.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from difflib import SequenceMatcher

import pytest
import torch
from utils.llm_data import llm_models_root

from tensorrt_llm import LLM, SamplingParams
Expand Down Expand Up @@ -43,19 +44,17 @@ def test_llama4(model_name, backend, tp_size, use_cuda_graph,
"This is a very long prompt to exercise long context. Count up to 10000 from 1, 2, 3,"
+ ", ".join(str(i) for i in range(4, 9000))
},
# TODO: Fix multimodal test.
# {
# "prompt": "<|image|>This image is of color",
# "multi_modal_data": {
# "image": [torch.ones(3, 1024, 1024)]
# }
# },
{
"prompt": "<|image|>This image is of color",
"multi_modal_data": {
"image": [torch.ones(3, 1024, 1024)]
}
},
]

expected_outputs = [
" the head of state and head of government of the",
", 9000, 9001, ",
# " white. What is the color of the background of" # TODO: Fix multimodal test.
" the head of state and head of government of the", ", 9000, 9001, ",
" white. What is the color of the background of"
]

pytorch_config = dict(attn_backend=backend)
Expand Down