diff --git a/tests/integration/defs/accuracy/test_disaggregated_serving.py b/tests/integration/defs/accuracy/test_disaggregated_serving.py index 6a70628f904..733122c3229 100644 --- a/tests/integration/defs/accuracy/test_disaggregated_serving.py +++ b/tests/integration/defs/accuracy/test_disaggregated_serving.py @@ -697,9 +697,9 @@ def test_nixl_backend(self): task.evaluate(llm) @pytest.mark.skip_less_device(8) + @skip_pre_hopper @parametrize_with_ids("overlap_scheduler", [True, False]) - @parametrize_with_ids("mtp_nextn", - [0, pytest.param(2, marks=skip_pre_hopper)]) + @parametrize_with_ids("mtp_nextn", [0, 2]) @pytest.mark.skip_less_device(8) def test_auto_dtype(self, overlap_scheduler, mtp_nextn): ctx_server_config = {"disable_overlap_scheduler": True} diff --git a/tests/integration/defs/accuracy/test_llm_api_pytorch.py b/tests/integration/defs/accuracy/test_llm_api_pytorch.py index c416bb49744..305dd3c67d9 100644 --- a/tests/integration/defs/accuracy/test_llm_api_pytorch.py +++ b/tests/integration/defs/accuracy/test_llm_api_pytorch.py @@ -1130,23 +1130,21 @@ def test_nvfp4_tp2(self): # This class has extensively parameterized test methods, which yield totally 200 test cases. # This is because this model requires high test coverage over the feature combinations. # Normally we should not parameterize test methods so extensively -- just test on the typical/important feature combinations. +@skip_pre_hopper class TestDeepSeekV3Lite(LlmapiAccuracyTestHarness): MODEL_NAME = "deepseek-ai/DeepSeek-V3-Lite" MODEL_PATH = f"{llm_models_root()}/DeepSeek-V3-Lite/bf16" @pytest.mark.skip_less_device_memory(60000) # Chunked Prefill for MLA can only be enabled on SM100 - @parametrize_with_ids( - "enable_chunked_prefill", - [False, pytest.param(True, marks=skip_pre_hopper)]) + @parametrize_with_ids("enable_chunked_prefill", [False, True]) @parametrize_with_ids("torch_compile", [False, True]) @parametrize_with_ids("attention_dp,cuda_graph,overlap_scheduler", [(False, False, False), (True, False, False), (False, True, False), (False, False, True), (False, True, True), (True, True, True)]) # Only Hopper and Blackwell MLA kernel supports MTP - @parametrize_with_ids("mtp_nextn", - [0, pytest.param(2, marks=skip_pre_hopper)]) + @parametrize_with_ids("mtp_nextn", [0, 2]) def test_bfloat16(self, mtp_nextn, attention_dp, cuda_graph, overlap_scheduler, torch_compile, enable_chunked_prefill): kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.75) diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index a0ca2e1ee56..faf0b1af975 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -283,9 +283,7 @@ disaggregated/test_disaggregated.py::test_disaggregated_diff_max_tokens[TinyLlam disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_tp1_single_gpu_mtp[DeepSeek-V3-Lite-fp8] SKIP (https://nvbugs/5465642) examples/test_multimodal.py::test_llm_multimodal_general[Mistral-Small-3.1-24B-Instruct-2503-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:False-nb:1] SKIP (https://nvbugs/5431146) accuracy/test_disaggregated_serving.py::TestQwen3_30B_A3B::test_mixed_ctx_gen_model[ctxpp2gentp2] SKIP (https://nvbugs/5470769) -full:L40S/accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_auto_dtype[mtp_nextn=0-overlap_scheduler=False] SKIP (https://nvbugs/5347051) full:L40S/accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_tp_pp_symmetric[MMLU-tp2pp2] SKIP (https://nvbugs/5471108) -full:L20/accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_auto_dtype[mtp_nextn=0-overlap_scheduler=False] SKIP (https://nvbugs/5347051) full:L20/accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_tp_pp_symmetric[MMLU-tp2pp2] SKIP (https://nvbugs/5471108) test_e2e.py::test_multi_nodes_eval[llama4-models/nvidia/Llama-4-Maverick-17B-128E-Instruct-FP8-tp8pp2-mmlu] SKIP (https://nvbugs/5473781) disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_llama_context_capacity[False-False-DeepSeek-V3-Lite-fp8/fp8] SKIP (https://nvbugs/5477404) @@ -337,9 +335,6 @@ full:H100/accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8 full:H100/accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8[tp8ep4-cuda_graph=True] SKIP (https://nvbugs/5512734) full:H100/accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8[tp8-cuda_graph=True] SKIP (https://nvbugs/5512734) full:H100/accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8_eagle3[tp8-torch_compile=True] SKIP (https://nvbugs/5483534) -full:A100/accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_auto_dtype[mtp_nextn=0-overlap_scheduler=False] SKIP (https://nvbugs/5519462) -full:A100/accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_auto_dtype[mtp_nextn=0-overlap_scheduler=True] SKIP (https://nvbugs/5519462) -full:A100/accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=0-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False-enable_chunked_prefill=False] SKIP (https://nvbugs/5347051) full:A100/test_e2e.py::test_ptp_quickstart_multimodal[NVILA-8B-FP16-vila/NVILA-8B-video-False] SKIP (https://nvbugs/5453725) test_e2e.py::test_ptp_scaffolding[DeepSeek-R1-Distill-Qwen-7B-DeepSeek-R1/DeepSeek-R1-Distill-Qwen-7B] SKIP (https://nvbugs/5517260) accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_fp8_chunked_prefill[tp4ep4-cuda_graph=True] SKIP (https://nvbugs/5522462)