diff --git a/cpp/include/tensorrt_llm/deep_gemm/scheduler.cuh b/cpp/include/tensorrt_llm/deep_gemm/scheduler.cuh index 9d6666b1deb..ed44e4bdb64 100644 --- a/cpp/include/tensorrt_llm/deep_gemm/scheduler.cuh +++ b/cpp/include/tensorrt_llm/deep_gemm/scheduler.cuh @@ -379,7 +379,7 @@ struct GroupedMaskedScheduler } }; -// Need to keep the same as the one in tests/unittest/_torch/thop/deep_gemm_tests.py +// Need to keep the same as the one in tests/unittest/_torch/thop/parallel/deep_gemm_tests.py template __host__ __device__ __forceinline__ T_offset compute_padded_offset(T_offset offset, T_index problem_idx) { diff --git a/tests/integration/defs/.test_durations b/tests/integration/defs/.test_durations index 9a95171adb9..2f19d53e10b 100644 --- a/tests/integration/defs/.test_durations +++ b/tests/integration/defs/.test_durations @@ -146,7 +146,8 @@ "test_unittests.py::test_unittests_v2[unittest/_torch/multimodal]": 23.54, "test_unittests.py::test_unittests_v2[unittest/_torch/sampler]": 107.66, "test_unittests.py::test_unittests_v2[unittest/_torch/speculative]": 1850.16, - "test_unittests.py::test_unittests_v2[unittest/_torch/thop]": 852.56, + "test_unittests.py::test_unittests_v2[unittest/_torch/thop/parallel]": 311.58, + "test_unittests.py::test_unittests_v2[unittest/_torch/thop/serial]": 18.96, "test_unittests.py::test_unittests_v2[unittest/_torch/modeling -k \"modeling_mixtral\"]": 208.1838396479725, "test_unittests.py::test_unittests_v2[unittest/_torch/multi_gpu_modeling -k \"deepseek\"]": 393.0210295501165, "test_e2e.py::test_ptp_quickstart_bert[TRTLLM-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]": 21.019993914989755, diff --git a/tests/integration/defs/agg_unit_mem_df.csv b/tests/integration/defs/agg_unit_mem_df.csv index 258895ab937..2629c91b541 100644 --- a/tests/integration/defs/agg_unit_mem_df.csv +++ b/tests/integration/defs/agg_unit_mem_df.csv @@ -101,9 +101,17 @@ unittest/trt/model/test_mamba.py,NVIDIA H100,10, "unittest/trt/attention/test_gpt_attention.py -k ""partition2""",NVIDIA L40,6, "unittest/trt/attention/test_gpt_attention.py -k ""partition3""",NVIDIA L40,6, "unittest/trt/attention/test_gpt_attention.py -k ""xqa_generic""",NVIDIA L40,3, +unittest/_torch/attention,NVIDIA Graphics Device,4,B200 Bring Up Board +unittest/_torch/misc,NVIDIA Graphics Device,4,B200 Bring Up Board unittest/_torch/speculative,NVIDIA Graphics Device,4,B200 Bring Up Board -unittest/_torch/thop,NVIDIA Graphics Device,32,B200 Bring Up Board +unittest/_torch/thop/parallel,NVIDIA Graphics Device,4,B200 Bring Up Board "unittest/_torch/auto_deploy/unit/singlegpu -k ""not test_trtllm_bench_backend_comparison""",NVIDIA Graphics Device,4,B200 Bring Up Board +unittest/_torch/attention,NVIDIA B200,4, +unittest/_torch/misc,NVIDIA B200,4, unittest/_torch/speculative,NVIDIA B200,4, -unittest/_torch/thop,NVIDIA B200,32, +unittest/_torch/thop/parallel,NVIDIA B200,4, "unittest/_torch/auto_deploy/unit/singlegpu -k ""not test_trtllm_bench_backend_comparison""",NVIDIA B200,4, +unittest/_torch/attention,NVIDIA H100,4, +unittest/_torch/misc,NVIDIA H100,4, +unittest/_torch/speculative,NVIDIA H100,2, +unittest/_torch/thop/parallel,NVIDIA H100,4, diff --git a/tests/integration/test_lists/test-db/l0_b200.yml b/tests/integration/test_lists/test-db/l0_b200.yml index 503d029f569..22a039fc9d4 100644 --- a/tests/integration/test_lists/test-db/l0_b200.yml +++ b/tests/integration/test_lists/test-db/l0_b200.yml @@ -51,7 +51,8 @@ l0_b200: - unittest/_torch/multimodal - unittest/_torch/sampler - unittest/_torch/speculative - - unittest/_torch/thop + - unittest/_torch/thop/parallel + - unittest/_torch/thop/serial - unittest/_torch/modeling -k "modeling_llama" - unittest/_torch/modeling -k "modeling_mixtral" - unittest/_torch/modeling -k "modeling_deepseek" diff --git a/tests/integration/test_lists/test-db/l0_h100.yml b/tests/integration/test_lists/test-db/l0_h100.yml index 97156021fbe..10cac3033ca 100644 --- a/tests/integration/test_lists/test-db/l0_h100.yml +++ b/tests/integration/test_lists/test-db/l0_h100.yml @@ -23,7 +23,8 @@ l0_h100: - unittest/_torch/multimodal - unittest/_torch/sampler - unittest/_torch/speculative - - unittest/_torch/thop + - unittest/_torch/thop/parallel + - unittest/_torch/thop/serial # Only key models in H100: llama/mixtral/nemotron/deepseek - unittest/_torch/modeling -k "modeling_llama" - unittest/_torch/modeling -k "modeling_mixtral" diff --git a/tests/unittest/_torch/thop/deep_gemm_tests.py b/tests/unittest/_torch/thop/parallel/deep_gemm_tests.py similarity index 100% rename from tests/unittest/_torch/thop/deep_gemm_tests.py rename to tests/unittest/_torch/thop/parallel/deep_gemm_tests.py diff --git a/tests/unittest/_torch/thop/test_causal_conv1d_op.py b/tests/unittest/_torch/thop/parallel/test_causal_conv1d_op.py similarity index 100% rename from tests/unittest/_torch/thop/test_causal_conv1d_op.py rename to tests/unittest/_torch/thop/parallel/test_causal_conv1d_op.py diff --git a/tests/unittest/_torch/thop/test_cublas_mm.py b/tests/unittest/_torch/thop/parallel/test_cublas_mm.py similarity index 100% rename from tests/unittest/_torch/thop/test_cublas_mm.py rename to tests/unittest/_torch/thop/parallel/test_cublas_mm.py diff --git a/tests/unittest/_torch/thop/test_custom_ops.py b/tests/unittest/_torch/thop/parallel/test_custom_ops.py similarity index 100% rename from tests/unittest/_torch/thop/test_custom_ops.py rename to tests/unittest/_torch/thop/parallel/test_custom_ops.py diff --git a/tests/unittest/_torch/thop/test_dsv3_fused_a_gemm.py b/tests/unittest/_torch/thop/parallel/test_dsv3_fused_a_gemm.py similarity index 100% rename from tests/unittest/_torch/thop/test_dsv3_fused_a_gemm.py rename to tests/unittest/_torch/thop/parallel/test_dsv3_fused_a_gemm.py diff --git a/tests/unittest/_torch/thop/test_dsv3_router_gemm.py b/tests/unittest/_torch/thop/parallel/test_dsv3_router_gemm.py similarity index 100% rename from tests/unittest/_torch/thop/test_dsv3_router_gemm.py rename to tests/unittest/_torch/thop/parallel/test_dsv3_router_gemm.py diff --git a/tests/unittest/_torch/thop/test_finegrained_mixed_dtype_gemm.py b/tests/unittest/_torch/thop/parallel/test_finegrained_mixed_dtype_gemm.py similarity index 100% rename from tests/unittest/_torch/thop/test_finegrained_mixed_dtype_gemm.py rename to tests/unittest/_torch/thop/parallel/test_finegrained_mixed_dtype_gemm.py diff --git a/tests/unittest/_torch/thop/test_fp4_bmm_quantize.py b/tests/unittest/_torch/thop/parallel/test_fp4_bmm_quantize.py similarity index 100% rename from tests/unittest/_torch/thop/test_fp4_bmm_quantize.py rename to tests/unittest/_torch/thop/parallel/test_fp4_bmm_quantize.py diff --git a/tests/unittest/_torch/thop/test_fp4_calculate_global_scale.py b/tests/unittest/_torch/thop/parallel/test_fp4_calculate_global_scale.py similarity index 100% rename from tests/unittest/_torch/thop/test_fp4_calculate_global_scale.py rename to tests/unittest/_torch/thop/parallel/test_fp4_calculate_global_scale.py diff --git a/tests/unittest/_torch/thop/test_fp4_gemm_quantize.py b/tests/unittest/_torch/thop/parallel/test_fp4_gemm_quantize.py similarity index 100% rename from tests/unittest/_torch/thop/test_fp4_gemm_quantize.py rename to tests/unittest/_torch/thop/parallel/test_fp4_gemm_quantize.py diff --git a/tests/unittest/_torch/thop/test_fp4_linear.py b/tests/unittest/_torch/thop/parallel/test_fp4_linear.py similarity index 100% rename from tests/unittest/_torch/thop/test_fp4_linear.py rename to tests/unittest/_torch/thop/parallel/test_fp4_linear.py diff --git a/tests/unittest/_torch/thop/test_fp4_swizzle.py b/tests/unittest/_torch/thop/parallel/test_fp4_swizzle.py similarity index 100% rename from tests/unittest/_torch/thop/test_fp4_swizzle.py rename to tests/unittest/_torch/thop/parallel/test_fp4_swizzle.py diff --git a/tests/unittest/_torch/thop/test_fp8_block_scale_gemm.py b/tests/unittest/_torch/thop/parallel/test_fp8_block_scale_gemm.py similarity index 100% rename from tests/unittest/_torch/thop/test_fp8_block_scale_gemm.py rename to tests/unittest/_torch/thop/parallel/test_fp8_block_scale_gemm.py diff --git a/tests/unittest/_torch/thop/test_fp8_linear.py b/tests/unittest/_torch/thop/parallel/test_fp8_linear.py similarity index 100% rename from tests/unittest/_torch/thop/test_fp8_linear.py rename to tests/unittest/_torch/thop/parallel/test_fp8_linear.py diff --git a/tests/unittest/_torch/thop/test_fp8_per_tensor_scale_tllmg_gemm.py b/tests/unittest/_torch/thop/parallel/test_fp8_per_tensor_scale_tllmg_gemm.py similarity index 100% rename from tests/unittest/_torch/thop/test_fp8_per_tensor_scale_tllmg_gemm.py rename to tests/unittest/_torch/thop/parallel/test_fp8_per_tensor_scale_tllmg_gemm.py diff --git a/tests/unittest/_torch/thop/test_fp8_quantize.py b/tests/unittest/_torch/thop/parallel/test_fp8_quantize.py similarity index 100% rename from tests/unittest/_torch/thop/test_fp8_quantize.py rename to tests/unittest/_torch/thop/parallel/test_fp8_quantize.py diff --git a/tests/unittest/_torch/thop/test_fp8_rowwise_linear.py b/tests/unittest/_torch/thop/parallel/test_fp8_rowwise_linear.py similarity index 100% rename from tests/unittest/_torch/thop/test_fp8_rowwise_linear.py rename to tests/unittest/_torch/thop/parallel/test_fp8_rowwise_linear.py diff --git a/tests/unittest/_torch/thop/test_fused_qk_norm_rope.py b/tests/unittest/_torch/thop/parallel/test_fused_qk_norm_rope.py similarity index 100% rename from tests/unittest/_torch/thop/test_fused_qk_norm_rope.py rename to tests/unittest/_torch/thop/parallel/test_fused_qk_norm_rope.py diff --git a/tests/unittest/_torch/thop/test_logits_bitmask_op.py b/tests/unittest/_torch/thop/parallel/test_logits_bitmask_op.py similarity index 100% rename from tests/unittest/_torch/thop/test_logits_bitmask_op.py rename to tests/unittest/_torch/thop/parallel/test_logits_bitmask_op.py diff --git a/tests/unittest/_torch/thop/test_mamba2_chunk_ss_update.py b/tests/unittest/_torch/thop/parallel/test_mamba2_chunk_ss_update.py similarity index 100% rename from tests/unittest/_torch/thop/test_mamba2_chunk_ss_update.py rename to tests/unittest/_torch/thop/parallel/test_mamba2_chunk_ss_update.py diff --git a/tests/unittest/_torch/thop/test_mamba_conv1d_op.py b/tests/unittest/_torch/thop/parallel/test_mamba_conv1d_op.py similarity index 100% rename from tests/unittest/_torch/thop/test_mamba_conv1d_op.py rename to tests/unittest/_torch/thop/parallel/test_mamba_conv1d_op.py diff --git a/tests/unittest/_torch/thop/test_moe.py b/tests/unittest/_torch/thop/parallel/test_moe.py similarity index 100% rename from tests/unittest/_torch/thop/test_moe.py rename to tests/unittest/_torch/thop/parallel/test_moe.py diff --git a/tests/unittest/_torch/thop/test_noaux_tc.py b/tests/unittest/_torch/thop/parallel/test_noaux_tc.py similarity index 100% rename from tests/unittest/_torch/thop/test_noaux_tc.py rename to tests/unittest/_torch/thop/parallel/test_noaux_tc.py diff --git a/tests/unittest/_torch/thop/test_scaled_mm.py b/tests/unittest/_torch/thop/parallel/test_scaled_mm.py similarity index 100% rename from tests/unittest/_torch/thop/test_scaled_mm.py rename to tests/unittest/_torch/thop/parallel/test_scaled_mm.py diff --git a/tests/unittest/_torch/thop/test_selective_scan_op.py b/tests/unittest/_torch/thop/parallel/test_selective_scan_op.py similarity index 100% rename from tests/unittest/_torch/thop/test_selective_scan_op.py rename to tests/unittest/_torch/thop/parallel/test_selective_scan_op.py diff --git a/tests/unittest/_torch/thop/test_tllmg_bmm.py b/tests/unittest/_torch/thop/parallel/test_tllmg_bmm.py similarity index 100% rename from tests/unittest/_torch/thop/test_tllmg_bmm.py rename to tests/unittest/_torch/thop/parallel/test_tllmg_bmm.py diff --git a/tests/unittest/_torch/thop/test_w4a16_linear.py b/tests/unittest/_torch/thop/parallel/test_w4a16_linear.py similarity index 100% rename from tests/unittest/_torch/thop/test_w4a16_linear.py rename to tests/unittest/_torch/thop/parallel/test_w4a16_linear.py diff --git a/tests/unittest/_torch/thop/test_w4a8_linear.py b/tests/unittest/_torch/thop/parallel/test_w4a8_linear.py similarity index 100% rename from tests/unittest/_torch/thop/test_w4a8_linear.py rename to tests/unittest/_torch/thop/parallel/test_w4a8_linear.py diff --git a/tests/unittest/_torch/thop/test_w4a8_mxfp4_mxfp8_gemm.py b/tests/unittest/_torch/thop/parallel/test_w4a8_mxfp4_mxfp8_gemm.py similarity index 100% rename from tests/unittest/_torch/thop/test_w4a8_mxfp4_mxfp8_gemm.py rename to tests/unittest/_torch/thop/parallel/test_w4a8_mxfp4_mxfp8_gemm.py diff --git a/tests/unittest/_torch/thop/test_weight_only_quant_gemm.py b/tests/unittest/_torch/thop/parallel/test_weight_only_quant_gemm.py similarity index 100% rename from tests/unittest/_torch/thop/test_weight_only_quant_gemm.py rename to tests/unittest/_torch/thop/parallel/test_weight_only_quant_gemm.py diff --git a/tests/unittest/_torch/thop/test_weight_only_quant_linear.py b/tests/unittest/_torch/thop/parallel/test_weight_only_quant_linear.py similarity index 100% rename from tests/unittest/_torch/thop/test_weight_only_quant_linear.py rename to tests/unittest/_torch/thop/parallel/test_weight_only_quant_linear.py diff --git a/tests/unittest/_torch/thop/test_moe_alltoall.py b/tests/unittest/_torch/thop/serial/test_moe_alltoall.py similarity index 100% rename from tests/unittest/_torch/thop/test_moe_alltoall.py rename to tests/unittest/_torch/thop/serial/test_moe_alltoall.py