Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ def generate_async(prompt: str,
disaggregated_server.wait()


@pytest.mark.timeout(3600)
class TestLlama3_1_8BInstruct(LlmapiAccuracyTestHarness):
MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct"
MODEL_PATH = f"{llm_models_root()}/llama-3.1-model/Llama-3.1-8B-Instruct"
Expand Down Expand Up @@ -208,11 +209,12 @@ def test_auto_dtype(self, disable_overlap_scheduler):
task.evaluate(llm)


@pytest.mark.timeout(3600)
@pytest.mark.skip_less_device_memory(140000)
class TestLlama4ScoutInstruct(LlmapiAccuracyTestHarness):
MODEL_NAME = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
MODEL_PATH = f"{llm_models_root()}/llama4-models/Llama-4-Scout-17B-16E-Instruct"

@pytest.mark.skip_device_not_contain(["H200"])
@pytest.mark.parametrize("overlap_scheduler", [False, True])
def test_auto_dtype(self, overlap_scheduler):
ctx_server_config = {"disable_overlap_scheduler": True}
Expand Down Expand Up @@ -241,6 +243,7 @@ def test_auto_dtype(self, overlap_scheduler):
task.evaluate(llm)


@pytest.mark.timeout(3600)
class TestDeepSeekV3Lite(LlmapiAccuracyTestHarness):
MODEL_NAME = "deepseek-ai/DeepSeek-V3-Lite"
MODEL_PATH = f"{llm_models_root()}/DeepSeek-V3-Lite/bf16"
Expand Down
10 changes: 6 additions & 4 deletions tests/integration/defs/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import datetime
import os
import platform
import re
import shutil
import subprocess as sp
Expand Down Expand Up @@ -283,7 +284,6 @@ def gemma_example_root(llm_root, llm_venv):
# and caused pipeline to fail. We manually install gemma dependency as a WAR.
llm_venv.run_cmd(["-m", "pip", "install", "safetensors~=0.4.1", "nltk"])
# Install Jax because it breaks dependency
import platform
google_extension = [
"-f",
"https://storage.googleapis.com/jax-releases/jax_cuda_releases.html"
Expand Down Expand Up @@ -1722,8 +1722,6 @@ def qcache_dir(llm_venv, llm_root):

quantization_root = os.path.join(llm_root, "examples", "quantization")

import platform

# Fix the issue that the requirements.txt is not available on aarch64.
if "aarch64" not in platform.machine() and get_sm_version() >= 89:
llm_venv.run_cmd([
Expand Down Expand Up @@ -1874,7 +1872,11 @@ def check_device_contain(keyword_list):
reason="This test is only supported in Hopper architecture")

skip_no_sm120 = pytest.mark.skipif(get_sm_version() != 120,
reason="This test is for Blackwell SM120")
reason="This test is for SM120")

skip_arm = pytest.mark.skipif(
"aarch64" in platform.machine(),
reason="This test is not supported on ARM architecture")


def skip_fp8_pre_ada(use_fp8):
Expand Down
5 changes: 4 additions & 1 deletion tests/integration/defs/disaggregated/test_disaggregated.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import subprocess

import pytest
from defs.conftest import skip_no_hopper
from defs.conftest import skip_arm, skip_no_hopper
from defs.trt_test_alternative import check_call, popen

from tensorrt_llm.logger import logger
Expand Down Expand Up @@ -528,6 +528,7 @@ def test_disaggregated_deepseek_v3_lite_fp8_tp1_single_gpu_mtp(


@skip_no_hopper
@skip_arm
@pytest.mark.skip_less_device(4)
@pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'],
indirect=True)
Expand All @@ -554,6 +555,7 @@ def test_disaggregated_deepseek_v3_lite_fp8_ucx(disaggregated_test_root,


@skip_no_hopper
@skip_arm
@pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'],
indirect=True)
def test_disaggregated_deepseek_v3_lite_fp8_nixl(disaggregated_test_root,
Expand All @@ -579,6 +581,7 @@ def test_disaggregated_deepseek_v3_lite_fp8_nixl(disaggregated_test_root,


@skip_no_hopper
@skip_arm
@pytest.mark.parametrize("deepseek_v3_model_root", ['DeepSeek-V3-Lite-fp8'],
indirect=True)
def test_disaggregated_deepseek_v3_lite_fp8_ucx_tp1_single_gpu(
Expand Down
2 changes: 2 additions & 0 deletions tests/integration/test_lists/waives.txt
Original file line number Diff line number Diff line change
Expand Up @@ -443,3 +443,5 @@ test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-8B-NVFP4-nvfp4-quantized/Meta
test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-NVFP4-nvfp4-quantized/Mixtral-8x7B-Instruct-v0.1] SKIP (https://nvbugs/5333659)
test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1-NVFP4-nvfp4-quantized/Llama-3_3-Nemotron-Super-49B-v1_nvfp4_hf] SKIP (https://nvbugs/5333659)
examples/test_multimodal.py::test_llm_multimodal_general[Mistral-Small-3.1-24B-Instruct-2503-pp:1-tp:1-bfloat16-bs:8-cpp_e2e:False-nb:1] SKIP (https://nvbugs/5331031)
accuracy/test_disaggregated_serving.py::TestLlama4ScoutInstruct::test_auto_dtype[True] SKIP (https://nvbugs/5336321)
accuracy/test_disaggregated_serving.py::TestLlama4ScoutInstruct::test_auto_dtype[False] SKIP (https://nvbugs/5336321)