Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion tests/integration/test_lists/qa/llm_function_l20.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ accuracy/test_llm_api.py::TestMistralNemo12B::test_fp8
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_chunked_prefill[attn_backend=FLASHINFER]
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_chunked_prefill[attn_backend=TRTLLM]
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_llm_sampler
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_beam_search
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_eagle3[eagle3_one_model=True-overlap_scheduler=True]
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_eagle3[eagle3_one_model=False-overlap_scheduler=False]
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_ngram
Expand Down
3 changes: 3 additions & 0 deletions tests/integration/test_lists/waives.txt
Original file line number Diff line number Diff line change
Expand Up @@ -346,3 +346,6 @@ accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8[fp8kv=False-
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8[fp8kv=True-attn_backend=FLASHINFER-torch_compile=True] SKIP (https://nvbugs/5485102)
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp4-attn_backend=FLASHINFER-torch_compile=True] SKIP (https://nvbugs/5485109)
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[tp4-fp8kv=False-attn_backend=FLASHINFER-torch_compile=True] SKIP (https://nvbugs/5485116)
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus_online_eplb[mtp_nextn=2] SKIP (https://nvbugs/5444687)
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=True] SKIP (https://nvbugs/5444687)
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[tp4-fp8kv=True-attn_backend=FLASHINFER-torch_compile=True] SKIP (https://nvbugs/5488580)
2 changes: 2 additions & 0 deletions tests/unittest/llmapi/test_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def llama_7b_tp2_path(engine_path: Path) -> Path:
return path


@pytest.mark.skip(reason="https://nvbugs/5488280")
@pytest.mark.skipif(WORLD_SIZE != 1, reason="Must run on single MPI rank")
def test_generation_bs2(llama_7b_bs2_path: Path):
tokenizer = TransformersTokenizer.from_pretrained(llama_7b_bs2_path)
Expand All @@ -99,6 +100,7 @@ def test_generation_bs2(llama_7b_bs2_path: Path):
'E F G H I K L M')


@pytest.mark.skip(reason="https://nvbugs/5488280")
@pytest.mark.skipif(WORLD_SIZE != 1, reason="Must run on single MPI rank")
def test_sync_generation(llama_7b_path: Path):
tokenizer = TransformersTokenizer.from_pretrained(llama_7b_path)
Expand Down
3 changes: 3 additions & 0 deletions tests/unittest/trt/model_api/test_model_level_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import tempfile
from contextlib import contextmanager

import pytest
from profile_utils import profile
from transformers import AutoTokenizer
from utils.llm_data import llm_models_root
Expand Down Expand Up @@ -42,6 +43,7 @@ def workspace(suffix, prefix="./trtllm_workspace"):
# 233s on ipp1-1197: loading weights 37s, network/engine 27s, save engine: 35s, load engine (14GB) about 100s
@profile("save-and-load")
@force_ampere
@pytest.mark.skip(reason="https://nvbugs/5488280")
def test_save_load():
'''When the engine_dir parameter of to_trt and generate is not None
to_trt() saves the engine to disk.
Expand Down Expand Up @@ -102,6 +104,7 @@ def test_high_level_fake_weights():


@force_ampere
@pytest.mark.skip(reason="https://nvbugs/5488280")
def test_async_io():
max_batch_size, max_isl, max_osl = 8, 256, 256
hf_model_dir = str(llm_models_root() / "llama-models/llama-7b-hf")
Expand Down
3 changes: 3 additions & 0 deletions tests/unittest/trt/model_api/test_model_quantization.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import tempfile

import pytest
from transformers import AutoTokenizer
from utils.llm_data import llm_models_root
from utils.util import force_ampere, skip_no_modelopt, skip_pre_ada
Expand All @@ -20,6 +21,7 @@
]


@pytest.mark.skip(reason="https://nvbugs/5488280")
@force_ampere
@skip_no_modelopt
def test_int4_awq_quantization():
Expand Down Expand Up @@ -63,6 +65,7 @@ def test_int4_awq_quantization():
# TODO: TRTLLM-185, check the score when the test infra is ready, hard coded value is not stable, cause flaky tests in L0


@pytest.mark.skip(reason="https://nvbugs/5488280")
@skip_pre_ada
@skip_no_modelopt
def test_fp8_quantization():
Expand Down
Loading