Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions jenkins/L0_Test.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -1967,6 +1967,7 @@ def launchTestJobs(pipeline, testFilter)
x86TestConfigs = [
"DGX_H100-4_GPUs-PyTorch-DeepSeek-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 2, 4],
"DGX_H100-4_GPUs-PyTorch-DeepSeek-2": ["dgx-h100-x4", "l0_dgx_h100", 2, 2, 4],
"DGX_H100-2_GPUs-PyTorch-Others-1": ["dgx-h100-x2", "l0_dgx_h100", 1, 1, 2],
"DGX_H100-4_GPUs-PyTorch-Others-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
"DGX_H100-4_GPUs-CPP-1": ["dgx-h100-x4", "l0_dgx_h100", 1, 1, 4],
"A10-PyTorch-1": ["a10", "l0_a10", 1, 1],
Expand Down Expand Up @@ -2353,9 +2354,9 @@ def launchTestJobs(pipeline, testFilter)
}, {}, true)
}]}

multiGpuJobs = parallelJobs.findAll{(it.key.contains("4_GPUs") || it.key.contains("8_GPUs")) && !it.key.contains("Post-Merge")}
multiGpuJobs = parallelJobs.findAll{(it.key.contains("2_GPUs") || it.key.contains("4_GPUs") || it.key.contains("8_GPUs")) && !it.key.contains("Post-Merge")}
println multiGpuJobs.keySet()
multiGpuJobsPostMerge = parallelJobs.findAll{(it.key.contains("4_GPUs") || it.key.contains("8_GPUs")) && it.key.contains("Post-Merge")}
multiGpuJobsPostMerge = parallelJobs.findAll{(it.key.contains("2_GPUs") || it.key.contains("4_GPUs") || it.key.contains("8_GPUs")) && it.key.contains("Post-Merge")}

parallelJobs += docBuildJobs
parallelJobs += sanityCheckJobs
Expand Down
57 changes: 36 additions & 21 deletions tests/integration/test_lists/test-db/l0_dgx_h100.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,40 @@
version: 0.0.1
l0_dgx_h100:
- condition:
ranges:
system_gpu_count:
gte: 2
lte: 2
wildcards:
gpu:
- '*h100*'
linux_distribution_name: ubuntu*
terms:
stage: pre_merge
backend: pytorch
auto_trigger: others
tests:
- unittest/llmapi/test_llm_multi_gpu_pytorch.py -m "gpu2"
- unittest/_torch/multi_gpu -m "not post_merge" TIMEOUT (90)
- unittest/_torch/auto_deploy/unit/multigpu
- unittest/_torch/modeling/test_modeling_pixtral.py::test_tensor_parallelism
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_eagle3[eagle3_one_model=False-overlap_scheduler=False]
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_eagle3[eagle3_one_model=True-overlap_scheduler=True]
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_guided_decoding[xgrammar]
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_guided_decoding_with_eagle3[xgrammar-eagle3_one_model=True]
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_guided_decoding_with_eagle3[xgrammar-eagle3_one_model=False]
- accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_auto_dtype[False]
- accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_auto_dtype[True]
- accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_chunked_prefill
- accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_nixl_backend
- accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_nixl_backend
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ngram
- accuracy/test_disaggregated_serving.py::TestGemma3_1BInstruct::test_auto_dtype[False]
- accuracy/test_disaggregated_serving.py::TestGemma3_1BInstruct::test_auto_dtype[True]
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[False]
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[True]
# ------------- AutoDeploy tests ---------------
- accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype
- condition:
ranges:
system_gpu_count:
Expand All @@ -15,9 +50,7 @@ l0_dgx_h100:
auto_trigger: others
tests:
# ------------- PyTorch tests ---------------
- unittest/_torch/multi_gpu -m "not post_merge" TIMEOUT (90)
- unittest/_torch/auto_deploy/unit/multigpu
- unittest/llmapi/test_llm_multi_gpu_pytorch.py -m "gpu4 or gpu2"
- unittest/llmapi/test_llm_multi_gpu_pytorch.py -m "gpu4"
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp4-attn_backend=TRTLLM-torch_compile=False]
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp2pp2-attn_backend=TRTLLM-torch_compile=False]
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp2pp2-attn_backend=TRTLLM-torch_compile=True]
Expand All @@ -35,19 +68,6 @@ l0_dgx_h100:
- disaggregated/test_disaggregated.py::test_disaggregated_ctxpp2_gentp2[TinyLlama-1.1B-Chat-v1.0]
- disaggregated/test_disaggregated.py::test_disaggregated_ctxpp4_gentp4[TinyLlama-1.1B-Chat-v1.0]
- disaggregated/test_disaggregated.py::test_disaggregated_genbs1[TinyLlama-1.1B-Chat-v1.0]
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[False]
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[True]
- accuracy/test_disaggregated_serving.py::TestGemma3_1BInstruct::test_auto_dtype[False]
- accuracy/test_disaggregated_serving.py::TestGemma3_1BInstruct::test_auto_dtype[True]
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ngram
- accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_auto_dtype[False]
- accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_auto_dtype[True]
- accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_chunked_prefill
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_eagle3[eagle3_one_model=False-overlap_scheduler=False]
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_eagle3[eagle3_one_model=True-overlap_scheduler=True]
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_guided_decoding[xgrammar]
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_guided_decoding_with_eagle3[xgrammar-eagle3_one_model=True]
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_guided_decoding_with_eagle3[xgrammar-eagle3_one_model=False]
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_tp_pp_symmetric[GSM8K-tp1pp2]
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_tp_pp_symmetric[MMLU-tp1pp2]
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_tp_pp_symmetric[GSM8K-tp2pp1]
Expand All @@ -58,13 +78,8 @@ l0_dgx_h100:
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[MMLU-gen_tp=2-ctx_pp=2]
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_multi_instance[GSM8K]
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_multi_instance[MMLU]
- accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_nixl_backend
- accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_nixl_backend
- test_e2e.py::test_ptp_quickstart_advanced_bs1
- test_e2e.py::test_ptp_quickstart_advanced_deepseek_v3_lite_4gpus_adp_balance[DeepSeek-V3-Lite-FP8-DeepSeek-V3-Lite/fp8]
- unittest/_torch/modeling/test_modeling_pixtral.py::test_tensor_parallelism
# ------------- AutoDeploy tests ---------------
- accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype
- condition:
ranges:
system_gpu_count:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -249,9 +249,9 @@ def test(self, mode="acc"):
ids=lambda x: f"size{x}")
@pytest.mark.parametrize(
"mpi_pool_executor",
[2, 4], # 8
ids=["tp_size_2", "tp_size_4"],
indirect=True) # "tp_size_8"
[2], # 4, 8
ids=["tp_size_2"],
indirect=True) # "tp_size_4", "tp_size_8"
def test_lowprecision_allreduce_acc(dtype, strategy, message_size,
mpi_pool_executor):
"""
Expand Down