From cf8ca7be898bdffe3c09aad69c5f3dd5c5558a08 Mon Sep 17 00:00:00 2001
From: qqiao <qqiao@nvidia.com>
Date: Sun, 24 Aug 2025 23:10:20 -0700
Subject: [PATCH 1/2] Waive failed tests on main branch

Signed-off-by: qqiao <qqiao@nvidia.com>
---
 tests/integration/test_lists/waives.txt                 | 6 ++++++
 tests/unittest/_torch/multi_gpu_modeling/test_llama4.py | 1 +
 tests/unittest/llmapi/apps/_test_openai_chat.py         | 1 +
 tests/unittest/llmapi/test_executor.py                  | 4 ++++
 4 files changed, 12 insertions(+)

diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt
index a4aa94fb6d2..f9f8f715c12 100644
--- a/tests/integration/test_lists/waives.txt
+++ b/tests/integration/test_lists/waives.txt
@@ -321,3 +321,9 @@ full:L40S/accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_
 full:L40S/accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_tp_pp_symmetric[MMLU-tp2pp2] SKIP (https://nvbugs/5471108)
 test_e2e.py::test_multi_nodes_eval[llama4-models/nvidia/Llama-4-Maverick-17B-128E-Instruct-FP8-tp8pp2-mmlu] SKIP (https://nvbugs/5473781)
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-tp4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=True] SKIP (https://nvbugs/5476580)
+disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_llama_context_capacity[False-False-DeepSeek-V3-Lite-fp8/fp8] SKIP (https://nvbugs/5477404)
+triton_server/test_triton.py::test_python_bls_unit_tests[python-bls-unit-tests] SKIP (https://nvbugs/5477392)
+triton_server/test_triton.py::test_mistral_ib[mistral-ib] SKIP (https://nvbugs/5477399)
+triton_server/test_triton.py::test_eagle[eagle] SKIP (https://nvbugs/5477378)
+examples/test_mixtral.py::test_llm_mixtral_moe_plugin_lora_4gpus[Mixtral-8x7B-v0.1-chinese-mixtral-lora] SKIP (https://nvbugs/5477421)
+accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_tp8] SKIP (https://nvbugs/5455140)
diff --git a/tests/unittest/_torch/multi_gpu_modeling/test_llama4.py b/tests/unittest/_torch/multi_gpu_modeling/test_llama4.py
index 6149201d582..4910d1351fa 100644
--- a/tests/unittest/_torch/multi_gpu_modeling/test_llama4.py
+++ b/tests/unittest/_torch/multi_gpu_modeling/test_llama4.py
@@ -8,6 +8,7 @@
 from tensorrt_llm.llmapi import CudaGraphConfig, KvCacheConfig
 
 
+@pytest.mark.skip(reason="https://nvbugs/5418673")
 @pytest.mark.parametrize(
     "model_name",
     ["Llama-4-Maverick-17B-128E-Instruct", "Llama-4-Scout-17B-16E-Instruct"],
diff --git a/tests/unittest/llmapi/apps/_test_openai_chat.py b/tests/unittest/llmapi/apps/_test_openai_chat.py
index 6e58b094783..a4f655b506d 100644
--- a/tests/unittest/llmapi/apps/_test_openai_chat.py
+++ b/tests/unittest/llmapi/apps/_test_openai_chat.py
@@ -14,6 +14,7 @@
 from .utils import (invalid_logit_bias_helper, logit_bias_effect_helper,
                     make_server_with_custom_sampler_fixture)
 
+pytestmark = pytest.mark.skip(reason="https://nvbugs/5477444")
 pytestmark = pytest.mark.threadleak(enabled=False)
 
 
diff --git a/tests/unittest/llmapi/test_executor.py b/tests/unittest/llmapi/test_executor.py
index ecdb6d9ad25..cf34aa66f81 100644
--- a/tests/unittest/llmapi/test_executor.py
+++ b/tests/unittest/llmapi/test_executor.py
@@ -277,6 +277,7 @@ def create_rsp(id, finished: bool = False):
     return tllm.Response(request_id=0, result=result, client_id=0)
 
 
+@pytest.mark.skip(reason="https://nvbugs/5477359")
 def test_GenerationResultBase():
     sampling_params = SamplingParams(max_tokens=4)
     result = GenerationResultBase(
@@ -291,6 +292,7 @@ def test_GenerationResultBase():
     assert result._done
 
 
+@pytest.mark.skip(reason="https://nvbugs/5477359")
 def test_GenerationResult():
     request = GenerationRequest(prompt_token_ids=[12, 23, 34],
                                 sampling_params=SamplingParams(max_tokens=4))
@@ -303,6 +305,7 @@ def test_GenerationResult():
     assert result._done
 
 
+@pytest.mark.skip(reason="https://nvbugs/5477359")
 def test_DetokenizedGenerationResultBase():
     sampling_params = SamplingParams(max_tokens=4)
     model_path = llm_models_root() / "llama-models/llama-7b-hf"
@@ -434,6 +437,7 @@ def ResponsePostprocessWorker_worker_task(pull_pipe_addr, push_pipe_addr,
     worker.start()
 
 
+@pytest.mark.skip(reason="https://nvbugs/5477369")
 def test_ResponsePostprocessWorker():
 
     input_pipe = ZeroMqQueue(is_server=True)

From 43801a9646e368258359d14105010a17d7b224d1 Mon Sep 17 00:00:00 2001
From: qqiao <qqiao@nvidia.com>
Date: Mon, 25 Aug 2025 00:00:27 -0700
Subject: [PATCH 2/2] Update some waives base on comments

Signed-off-by: qqiao <qqiao@nvidia.com>
---
 tests/integration/test_lists/waives.txt                 | 2 ++
 tests/unittest/_torch/multi_gpu_modeling/test_llama4.py | 1 -
 tests/unittest/llmapi/apps/_test_openai_chat.py         | 1 -
 3 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt
index f9f8f715c12..27059a35994 100644
--- a/tests/integration/test_lists/waives.txt
+++ b/tests/integration/test_lists/waives.txt
@@ -327,3 +327,5 @@ triton_server/test_triton.py::test_mistral_ib[mistral-ib] SKIP (https://nvbugs/5
 triton_server/test_triton.py::test_eagle[eagle] SKIP (https://nvbugs/5477378)
 examples/test_mixtral.py::test_llm_mixtral_moe_plugin_lora_4gpus[Mixtral-8x7B-v0.1-chinese-mixtral-lora] SKIP (https://nvbugs/5477421)
 accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_tp8] SKIP (https://nvbugs/5455140)
+unittest/_torch/multi_gpu_modeling/test_llama4.py::test_llama4[pp1-ep4-enable_adp-enable_graph-tp8-trtllm-scout] SKIP (https://nvbugs/5477730)
+test_e2e.py::test_openai_chat_example[trt] SKIP (https://nvbugs/5477444)
diff --git a/tests/unittest/_torch/multi_gpu_modeling/test_llama4.py b/tests/unittest/_torch/multi_gpu_modeling/test_llama4.py
index 4910d1351fa..6149201d582 100644
--- a/tests/unittest/_torch/multi_gpu_modeling/test_llama4.py
+++ b/tests/unittest/_torch/multi_gpu_modeling/test_llama4.py
@@ -8,7 +8,6 @@
 from tensorrt_llm.llmapi import CudaGraphConfig, KvCacheConfig
 
 
-@pytest.mark.skip(reason="https://nvbugs/5418673")
 @pytest.mark.parametrize(
     "model_name",
     ["Llama-4-Maverick-17B-128E-Instruct", "Llama-4-Scout-17B-16E-Instruct"],
diff --git a/tests/unittest/llmapi/apps/_test_openai_chat.py b/tests/unittest/llmapi/apps/_test_openai_chat.py
index a4f655b506d..6e58b094783 100644
--- a/tests/unittest/llmapi/apps/_test_openai_chat.py
+++ b/tests/unittest/llmapi/apps/_test_openai_chat.py
@@ -14,7 +14,6 @@
 from .utils import (invalid_logit_bias_helper, logit_bias_effect_helper,
                     make_server_with_custom_sampler_fixture)
 
-pytestmark = pytest.mark.skip(reason="https://nvbugs/5477444")
 pytestmark = pytest.mark.threadleak(enabled=False)