diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index a4aa94fb6d2..27059a35994 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -321,3 +321,11 @@ full:L40S/accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ full:L40S/accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_tp_pp_symmetric[MMLU-tp2pp2] SKIP (https://nvbugs/5471108) test_e2e.py::test_multi_nodes_eval[llama4-models/nvidia/Llama-4-Maverick-17B-128E-Instruct-FP8-tp8pp2-mmlu] SKIP (https://nvbugs/5473781) accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-tp4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=True] SKIP (https://nvbugs/5476580) +disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_llama_context_capacity[False-False-DeepSeek-V3-Lite-fp8/fp8] SKIP (https://nvbugs/5477404) +triton_server/test_triton.py::test_python_bls_unit_tests[python-bls-unit-tests] SKIP (https://nvbugs/5477392) +triton_server/test_triton.py::test_mistral_ib[mistral-ib] SKIP (https://nvbugs/5477399) +triton_server/test_triton.py::test_eagle[eagle] SKIP (https://nvbugs/5477378) +examples/test_mixtral.py::test_llm_mixtral_moe_plugin_lora_4gpus[Mixtral-8x7B-v0.1-chinese-mixtral-lora] SKIP (https://nvbugs/5477421) +accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_tp8] SKIP (https://nvbugs/5455140) +unittest/_torch/multi_gpu_modeling/test_llama4.py::test_llama4[pp1-ep4-enable_adp-enable_graph-tp8-trtllm-scout] SKIP (https://nvbugs/5477730) +test_e2e.py::test_openai_chat_example[trt] SKIP (https://nvbugs/5477444) diff --git a/tests/unittest/llmapi/test_executor.py b/tests/unittest/llmapi/test_executor.py index ecdb6d9ad25..cf34aa66f81 100644 --- a/tests/unittest/llmapi/test_executor.py +++ b/tests/unittest/llmapi/test_executor.py @@ -277,6 +277,7 @@ def create_rsp(id, finished: bool = False): return tllm.Response(request_id=0, result=result, client_id=0) +@pytest.mark.skip(reason="https://nvbugs/5477359") def test_GenerationResultBase(): sampling_params = SamplingParams(max_tokens=4) result = GenerationResultBase( @@ -291,6 +292,7 @@ def test_GenerationResultBase(): assert result._done +@pytest.mark.skip(reason="https://nvbugs/5477359") def test_GenerationResult(): request = GenerationRequest(prompt_token_ids=[12, 23, 34], sampling_params=SamplingParams(max_tokens=4)) @@ -303,6 +305,7 @@ def test_GenerationResult(): assert result._done +@pytest.mark.skip(reason="https://nvbugs/5477359") def test_DetokenizedGenerationResultBase(): sampling_params = SamplingParams(max_tokens=4) model_path = llm_models_root() / "llama-models/llama-7b-hf" @@ -434,6 +437,7 @@ def ResponsePostprocessWorker_worker_task(pull_pipe_addr, push_pipe_addr, worker.start() +@pytest.mark.skip(reason="https://nvbugs/5477369") def test_ResponsePostprocessWorker(): input_pipe = ZeroMqQueue(is_server=True)