NVIDIA · LarryXFly · Jun 11, 2025 · Jun 11, 2025
diff --git a/tests/integration/defs/accuracy/test_disaggregated_serving.py b/tests/integration/defs/accuracy/test_disaggregated_serving.py
@@ -174,6 +174,7 @@ def generate_async(prompt: str,
         disaggregated_server.wait()
 
 
+@pytest.mark.timeout(3600)
 class TestLlama3_1_8BInstruct(LlmapiAccuracyTestHarness):
     MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct"
     MODEL_PATH = f"{llm_models_root()}/llama-3.1-model/Llama-3.1-8B-Instruct"
@@ -208,11 +209,12 @@ def test_auto_dtype(self, disable_overlap_scheduler):
             task.evaluate(llm)
 
 
+@pytest.mark.timeout(3600)
+@pytest.mark.skip_less_device_memory(140000)
 class TestLlama4ScoutInstruct(LlmapiAccuracyTestHarness):
     MODEL_NAME = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
     MODEL_PATH = f"{llm_models_root()}/llama4-models/Llama-4-Scout-17B-16E-Instruct"
 
-    @pytest.mark.skip_device_not_contain(["H200"])
     @pytest.mark.parametrize("overlap_scheduler", [False, True])
     def test_auto_dtype(self, overlap_scheduler):
         ctx_server_config = {"disable_overlap_scheduler": True}
@@ -241,6 +243,7 @@ def test_auto_dtype(self, overlap_scheduler):
             task.evaluate(llm)
 
 
+@pytest.mark.timeout(3600)
 class TestDeepSeekV3Lite(LlmapiAccuracyTestHarness):
     MODEL_NAME = "deepseek-ai/DeepSeek-V3-Lite"
     MODEL_PATH = f"{llm_models_root()}/DeepSeek-V3-Lite/bf16"

diff --git a/tests/integration/defs/conftest.py b/tests/integration/defs/conftest.py
@@ -16,6 +16,7 @@
 
 import datetime
 import os
+import platform
 import re
 import shutil
 import subprocess as sp
@@ -283,7 +284,6 @@ def gemma_example_root(llm_root, llm_venv):
     # and caused pipeline to fail. We manually install gemma dependency as a WAR.
     llm_venv.run_cmd(["-m", "pip", "install", "safetensors~=0.4.1", "nltk"])
     # Install Jax because it breaks dependency
-    import platform
     google_extension = [
         "-f",
         "https://storage.googleapis.com/jax-releases/jax_cuda_releases.html"
@@ -1722,8 +1722,6 @@ def qcache_dir(llm_venv, llm_root):
 
     quantization_root = os.path.join(llm_root, "examples", "quantization")
 
-    import platform
-
     # Fix the issue that the requirements.txt is not available on aarch64.
     if "aarch64" not in platform.machine() and get_sm_version() >= 89:
         llm_venv.run_cmd([
@@ -1874,7 +1872,11 @@ def check_device_contain(keyword_list):
     reason="This test is only  supported in Hopper architecture")
 
 skip_no_sm120 = pytest.mark.skipif(get_sm_version() != 120,
-                                   reason="This test is for Blackwell SM120")
+                                   reason="This test is for SM120")
+
+skip_arm = pytest.mark.skipif(
+    "aarch64" in platform.machine(),
+    reason="This test is not supported on ARM architecture")
 
 
 def skip_fp8_pre_ada(use_fp8):

diff --git a/tests/integration/defs/disaggregated/test_disaggregated.py b/tests/integration/defs/disaggregated/test_disaggregated.py
@@ -17,7 +17,7 @@
 import subprocess
 
 import pytest
-from defs.conftest import skip_no_hopper
+from defs.conftest import skip_arm, skip_no_hopper
 from defs.trt_test_alternative import check_call, popen
 
 from tensorrt_llm.logger import logger
@@ -528,6 +528,7 @@ def test_disaggregated_deepseek_v3_lite_fp8_tp1_single_gpu_mtp(
 
 
 @skip_no_hopper
+@skip_arm
 @pytest.mark.skip_less_device(4)
 @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'],
                          indirect=True)
@@ -554,6 +555,7 @@ def test_disaggregated_deepseek_v3_lite_fp8_ucx(disaggregated_test_root,
 
 
 @skip_no_hopper
+@skip_arm
 @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'],
                          indirect=True)
 def test_disaggregated_deepseek_v3_lite_fp8_nixl(disaggregated_test_root,
@@ -579,6 +581,7 @@ def test_disaggregated_deepseek_v3_lite_fp8_nixl(disaggregated_test_root,
 
 
 @skip_no_hopper
+@skip_arm
 @pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'],
                          indirect=True)
 def test_disaggregated_deepseek_v3_lite_fp8_ucx_tp1_single_gpu(

diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt
@@ -443,3 +443,5 @@ test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-NVFP4-nvfp4-quantized/Meta
 test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-NVFP4-nvfp4-quantized/Mixtral-8x7B-Instruct-v0.1] SKIP (https://nvbugs/5333659)
 test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1-NVFP4-nvfp4-quantized/Llama-3_3-Nemotron-Super-49B-v1_nvfp4_hf] SKIP (https://nvbugs/5333659)
 examples/test_multimodal.py::test_llm_multimodal_general[Mistral-Small-3.1-24B-Instruct-2503-pp:1-tp:1-bfloat16-bs:8-cpp_e2e:False-nb:1] SKIP (https://nvbugs/5331031)
+accuracy/test_disaggregated_serving.py::TestLlama4ScoutInstruct::test_auto_dtype[True] SKIP (https://nvbugs/5336321)
+accuracy/test_disaggregated_serving.py::TestLlama4ScoutInstruct::test_auto_dtype[False] SKIP (https://nvbugs/5336321)