NVIDIA · chzblych · Aug 25, 2025 · Aug 25, 2025 · Aug 25, 2025
diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt
@@ -321,3 +321,11 @@ full:L40S/accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_
 full:L40S/accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_tp_pp_symmetric[MMLU-tp2pp2] SKIP (https://nvbugs/5471108)
 test_e2e.py::test_multi_nodes_eval[llama4-models/nvidia/Llama-4-Maverick-17B-128E-Instruct-FP8-tp8pp2-mmlu] SKIP (https://nvbugs/5473781)
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-tp4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=True] SKIP (https://nvbugs/5476580)
+disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_llama_context_capacity[False-False-DeepSeek-V3-Lite-fp8/fp8] SKIP (https://nvbugs/5477404)
+triton_server/test_triton.py::test_python_bls_unit_tests[python-bls-unit-tests] SKIP (https://nvbugs/5477392)
+triton_server/test_triton.py::test_mistral_ib[mistral-ib] SKIP (https://nvbugs/5477399)
+triton_server/test_triton.py::test_eagle[eagle] SKIP (https://nvbugs/5477378)
+examples/test_mixtral.py::test_llm_mixtral_moe_plugin_lora_4gpus[Mixtral-8x7B-v0.1-chinese-mixtral-lora] SKIP (https://nvbugs/5477421)
+accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_tp8] SKIP (https://nvbugs/5455140)
+unittest/_torch/multi_gpu_modeling/test_llama4.py::test_llama4[pp1-ep4-enable_adp-enable_graph-tp8-trtllm-scout] SKIP (https://nvbugs/5477730)
+test_e2e.py::test_openai_chat_example[trt] SKIP (https://nvbugs/5477444)
diff --git a/tests/unittest/llmapi/test_executor.py b/tests/unittest/llmapi/test_executor.py
@@ -277,6 +277,7 @@ def create_rsp(id, finished: bool = False):
     return tllm.Response(request_id=0, result=result, client_id=0)
 
 
+@pytest.mark.skip(reason="https://nvbugs/5477359")
 def test_GenerationResultBase():
     sampling_params = SamplingParams(max_tokens=4)
     result = GenerationResultBase(
@@ -291,6 +292,7 @@ def test_GenerationResultBase():
     assert result._done
 
 
+@pytest.mark.skip(reason="https://nvbugs/5477359")
 def test_GenerationResult():
     request = GenerationRequest(prompt_token_ids=[12, 23, 34],
                                 sampling_params=SamplingParams(max_tokens=4))
@@ -303,6 +305,7 @@ def test_GenerationResult():
     assert result._done
 
 
+@pytest.mark.skip(reason="https://nvbugs/5477359")
 def test_DetokenizedGenerationResultBase():
     sampling_params = SamplingParams(max_tokens=4)
     model_path = llm_models_root() / "llama-models/llama-7b-hf"
@@ -434,6 +437,7 @@ def ResponsePostprocessWorker_worker_task(pull_pipe_addr, push_pipe_addr,
     worker.start()
 
 
+@pytest.mark.skip(reason="https://nvbugs/5477369")
 def test_ResponsePostprocessWorker():
 
     input_pipe = ZeroMqQueue(is_server=True)