From cf8ca7be898bdffe3c09aad69c5f3dd5c5558a08 Mon Sep 17 00:00:00 2001 From: qqiao Date: Sun, 24 Aug 2025 23:10:20 -0700 Subject: [PATCH 1/2] Waive failed tests on main branch Signed-off-by: qqiao --- tests/integration/test_lists/waives.txt | 6 ++++++ tests/unittest/_torch/multi_gpu_modeling/test_llama4.py | 1 + tests/unittest/llmapi/apps/_test_openai_chat.py | 1 + tests/unittest/llmapi/test_executor.py | 4 ++++ 4 files changed, 12 insertions(+) diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index a4aa94fb6d2..f9f8f715c12 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -321,3 +321,9 @@ full:L40S/accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ full:L40S/accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_tp_pp_symmetric[MMLU-tp2pp2] SKIP (https://nvbugs/5471108) test_e2e.py::test_multi_nodes_eval[llama4-models/nvidia/Llama-4-Maverick-17B-128E-Instruct-FP8-tp8pp2-mmlu] SKIP (https://nvbugs/5473781) accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-tp4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=True] SKIP (https://nvbugs/5476580) +disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_llama_context_capacity[False-False-DeepSeek-V3-Lite-fp8/fp8] SKIP (https://nvbugs/5477404) +triton_server/test_triton.py::test_python_bls_unit_tests[python-bls-unit-tests] SKIP (https://nvbugs/5477392) +triton_server/test_triton.py::test_mistral_ib[mistral-ib] SKIP (https://nvbugs/5477399) +triton_server/test_triton.py::test_eagle[eagle] SKIP (https://nvbugs/5477378) +examples/test_mixtral.py::test_llm_mixtral_moe_plugin_lora_4gpus[Mixtral-8x7B-v0.1-chinese-mixtral-lora] SKIP (https://nvbugs/5477421) +accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_tp8] SKIP (https://nvbugs/5455140) diff --git a/tests/unittest/_torch/multi_gpu_modeling/test_llama4.py b/tests/unittest/_torch/multi_gpu_modeling/test_llama4.py index 6149201d582..4910d1351fa 100644 --- a/tests/unittest/_torch/multi_gpu_modeling/test_llama4.py +++ b/tests/unittest/_torch/multi_gpu_modeling/test_llama4.py @@ -8,6 +8,7 @@ from tensorrt_llm.llmapi import CudaGraphConfig, KvCacheConfig +@pytest.mark.skip(reason="https://nvbugs/5418673") @pytest.mark.parametrize( "model_name", ["Llama-4-Maverick-17B-128E-Instruct", "Llama-4-Scout-17B-16E-Instruct"], diff --git a/tests/unittest/llmapi/apps/_test_openai_chat.py b/tests/unittest/llmapi/apps/_test_openai_chat.py index 6e58b094783..a4f655b506d 100644 --- a/tests/unittest/llmapi/apps/_test_openai_chat.py +++ b/tests/unittest/llmapi/apps/_test_openai_chat.py @@ -14,6 +14,7 @@ from .utils import (invalid_logit_bias_helper, logit_bias_effect_helper, make_server_with_custom_sampler_fixture) +pytestmark = pytest.mark.skip(reason="https://nvbugs/5477444") pytestmark = pytest.mark.threadleak(enabled=False) diff --git a/tests/unittest/llmapi/test_executor.py b/tests/unittest/llmapi/test_executor.py index ecdb6d9ad25..cf34aa66f81 100644 --- a/tests/unittest/llmapi/test_executor.py +++ b/tests/unittest/llmapi/test_executor.py @@ -277,6 +277,7 @@ def create_rsp(id, finished: bool = False): return tllm.Response(request_id=0, result=result, client_id=0) +@pytest.mark.skip(reason="https://nvbugs/5477359") def test_GenerationResultBase(): sampling_params = SamplingParams(max_tokens=4) result = GenerationResultBase( @@ -291,6 +292,7 @@ def test_GenerationResultBase(): assert result._done +@pytest.mark.skip(reason="https://nvbugs/5477359") def test_GenerationResult(): request = GenerationRequest(prompt_token_ids=[12, 23, 34], sampling_params=SamplingParams(max_tokens=4)) @@ -303,6 +305,7 @@ def test_GenerationResult(): assert result._done +@pytest.mark.skip(reason="https://nvbugs/5477359") def test_DetokenizedGenerationResultBase(): sampling_params = SamplingParams(max_tokens=4) model_path = llm_models_root() / "llama-models/llama-7b-hf" @@ -434,6 +437,7 @@ def ResponsePostprocessWorker_worker_task(pull_pipe_addr, push_pipe_addr, worker.start() +@pytest.mark.skip(reason="https://nvbugs/5477369") def test_ResponsePostprocessWorker(): input_pipe = ZeroMqQueue(is_server=True) From 43801a9646e368258359d14105010a17d7b224d1 Mon Sep 17 00:00:00 2001 From: qqiao Date: Mon, 25 Aug 2025 00:00:27 -0700 Subject: [PATCH 2/2] Update some waives base on comments Signed-off-by: qqiao --- tests/integration/test_lists/waives.txt | 2 ++ tests/unittest/_torch/multi_gpu_modeling/test_llama4.py | 1 - tests/unittest/llmapi/apps/_test_openai_chat.py | 1 - 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index f9f8f715c12..27059a35994 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -327,3 +327,5 @@ triton_server/test_triton.py::test_mistral_ib[mistral-ib] SKIP (https://nvbugs/5 triton_server/test_triton.py::test_eagle[eagle] SKIP (https://nvbugs/5477378) examples/test_mixtral.py::test_llm_mixtral_moe_plugin_lora_4gpus[Mixtral-8x7B-v0.1-chinese-mixtral-lora] SKIP (https://nvbugs/5477421) accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_tp8] SKIP (https://nvbugs/5455140) +unittest/_torch/multi_gpu_modeling/test_llama4.py::test_llama4[pp1-ep4-enable_adp-enable_graph-tp8-trtllm-scout] SKIP (https://nvbugs/5477730) +test_e2e.py::test_openai_chat_example[trt] SKIP (https://nvbugs/5477444) diff --git a/tests/unittest/_torch/multi_gpu_modeling/test_llama4.py b/tests/unittest/_torch/multi_gpu_modeling/test_llama4.py index 4910d1351fa..6149201d582 100644 --- a/tests/unittest/_torch/multi_gpu_modeling/test_llama4.py +++ b/tests/unittest/_torch/multi_gpu_modeling/test_llama4.py @@ -8,7 +8,6 @@ from tensorrt_llm.llmapi import CudaGraphConfig, KvCacheConfig -@pytest.mark.skip(reason="https://nvbugs/5418673") @pytest.mark.parametrize( "model_name", ["Llama-4-Maverick-17B-128E-Instruct", "Llama-4-Scout-17B-16E-Instruct"], diff --git a/tests/unittest/llmapi/apps/_test_openai_chat.py b/tests/unittest/llmapi/apps/_test_openai_chat.py index a4f655b506d..6e58b094783 100644 --- a/tests/unittest/llmapi/apps/_test_openai_chat.py +++ b/tests/unittest/llmapi/apps/_test_openai_chat.py @@ -14,7 +14,6 @@ from .utils import (invalid_logit_bias_helper, logit_bias_effect_helper, make_server_with_custom_sampler_fixture) -pytestmark = pytest.mark.skip(reason="https://nvbugs/5477444") pytestmark = pytest.mark.threadleak(enabled=False)