diff --git a/tests/integration/defs/accuracy/test_disaggregated_serving.py b/tests/integration/defs/accuracy/test_disaggregated_serving.py index 0b7dcf3782a..69f42867b50 100644 --- a/tests/integration/defs/accuracy/test_disaggregated_serving.py +++ b/tests/integration/defs/accuracy/test_disaggregated_serving.py @@ -174,6 +174,7 @@ def generate_async(prompt: str, disaggregated_server.wait() +@pytest.mark.timeout(3600) class TestLlama3_1_8BInstruct(LlmapiAccuracyTestHarness): MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct" MODEL_PATH = f"{llm_models_root()}/llama-3.1-model/Llama-3.1-8B-Instruct" @@ -208,11 +209,12 @@ def test_auto_dtype(self, disable_overlap_scheduler): task.evaluate(llm) +@pytest.mark.timeout(3600) +@pytest.mark.skip_less_device_memory(140000) class TestLlama4ScoutInstruct(LlmapiAccuracyTestHarness): MODEL_NAME = "meta-llama/Llama-4-Scout-17B-16E-Instruct" MODEL_PATH = f"{llm_models_root()}/llama4-models/Llama-4-Scout-17B-16E-Instruct" - @pytest.mark.skip_device_not_contain(["H200"]) @pytest.mark.parametrize("overlap_scheduler", [False, True]) def test_auto_dtype(self, overlap_scheduler): ctx_server_config = {"disable_overlap_scheduler": True} @@ -241,6 +243,7 @@ def test_auto_dtype(self, overlap_scheduler): task.evaluate(llm) +@pytest.mark.timeout(3600) class TestDeepSeekV3Lite(LlmapiAccuracyTestHarness): MODEL_NAME = "deepseek-ai/DeepSeek-V3-Lite" MODEL_PATH = f"{llm_models_root()}/DeepSeek-V3-Lite/bf16" diff --git a/tests/integration/defs/conftest.py b/tests/integration/defs/conftest.py index 4a368f2b808..6e13dc2023d 100644 --- a/tests/integration/defs/conftest.py +++ b/tests/integration/defs/conftest.py @@ -16,6 +16,7 @@ import datetime import os +import platform import re import shutil import subprocess as sp @@ -283,7 +284,6 @@ def gemma_example_root(llm_root, llm_venv): # and caused pipeline to fail. We manually install gemma dependency as a WAR. llm_venv.run_cmd(["-m", "pip", "install", "safetensors~=0.4.1", "nltk"]) # Install Jax because it breaks dependency - import platform google_extension = [ "-f", "https://storage.googleapis.com/jax-releases/jax_cuda_releases.html" @@ -1722,8 +1722,6 @@ def qcache_dir(llm_venv, llm_root): quantization_root = os.path.join(llm_root, "examples", "quantization") - import platform - # Fix the issue that the requirements.txt is not available on aarch64. if "aarch64" not in platform.machine() and get_sm_version() >= 89: llm_venv.run_cmd([ @@ -1874,7 +1872,11 @@ def check_device_contain(keyword_list): reason="This test is only supported in Hopper architecture") skip_no_sm120 = pytest.mark.skipif(get_sm_version() != 120, - reason="This test is for Blackwell SM120") + reason="This test is for SM120") + +skip_arm = pytest.mark.skipif( + "aarch64" in platform.machine(), + reason="This test is not supported on ARM architecture") def skip_fp8_pre_ada(use_fp8): diff --git a/tests/integration/defs/disaggregated/test_disaggregated.py b/tests/integration/defs/disaggregated/test_disaggregated.py index 2377ac7d4d3..a2438a8e15c 100644 --- a/tests/integration/defs/disaggregated/test_disaggregated.py +++ b/tests/integration/defs/disaggregated/test_disaggregated.py @@ -17,7 +17,7 @@ import subprocess import pytest -from defs.conftest import skip_no_hopper +from defs.conftest import skip_arm, skip_no_hopper from defs.trt_test_alternative import check_call, popen from tensorrt_llm.logger import logger @@ -528,6 +528,7 @@ def test_disaggregated_deepseek_v3_lite_fp8_tp1_single_gpu_mtp( @skip_no_hopper +@skip_arm @pytest.mark.skip_less_device(4) @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'], indirect=True) @@ -554,6 +555,7 @@ def test_disaggregated_deepseek_v3_lite_fp8_ucx(disaggregated_test_root, @skip_no_hopper +@skip_arm @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'], indirect=True) def test_disaggregated_deepseek_v3_lite_fp8_nixl(disaggregated_test_root, @@ -579,6 +581,7 @@ def test_disaggregated_deepseek_v3_lite_fp8_nixl(disaggregated_test_root, @skip_no_hopper +@skip_arm @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'], indirect=True) def test_disaggregated_deepseek_v3_lite_fp8_ucx_tp1_single_gpu( diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index a70516d2996..fdaa8403780 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -443,3 +443,5 @@ test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-NVFP4-nvfp4-quantized/Meta test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-NVFP4-nvfp4-quantized/Mixtral-8x7B-Instruct-v0.1] SKIP (https://nvbugs/5333659) test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1-NVFP4-nvfp4-quantized/Llama-3_3-Nemotron-Super-49B-v1_nvfp4_hf] SKIP (https://nvbugs/5333659) examples/test_multimodal.py::test_llm_multimodal_general[Mistral-Small-3.1-24B-Instruct-2503-pp:1-tp:1-bfloat16-bs:8-cpp_e2e:False-nb:1] SKIP (https://nvbugs/5331031) +accuracy/test_disaggregated_serving.py::TestLlama4ScoutInstruct::test_auto_dtype[True] SKIP (https://nvbugs/5336321) +accuracy/test_disaggregated_serving.py::TestLlama4ScoutInstruct::test_auto_dtype[False] SKIP (https://nvbugs/5336321)