From b41098aac0cc9b4843671ff5d93a68b6bf7662fe Mon Sep 17 00:00:00 2001 From: Amit Zuker <203509407+amitz-nv@users.noreply.github.com> Date: Wed, 18 Jun 2025 15:57:29 +0000 Subject: [PATCH 1/2] Fixed ModelConfig.get_bindings_model_config to calculate fields like gptJsonConfig.cpp::createModelConfig Signed-off-by: Amit Zuker <203509407+amitz-nv@users.noreply.github.com> --- tensorrt_llm/_torch/model_config.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorrt_llm/_torch/model_config.py b/tensorrt_llm/_torch/model_config.py index 24802ec2e0f..97a816c3d47 100644 --- a/tensorrt_llm/_torch/model_config.py +++ b/tensorrt_llm/_torch/model_config.py @@ -282,6 +282,7 @@ def get_bindings_model_config(self) -> "ModelConfigCpp": num_heads = self.pretrained_config.num_attention_heads // ( self.mapping.tp_size * self.mapping.cp_size) + hidden_size = self.pretrained_config.hidden_size // self.mapping.tp_size model_config_cpp = ModelConfigCpp( vocab_size=self.pretrained_config.vocab_size, @@ -289,7 +290,7 @@ def get_bindings_model_config(self) -> "ModelConfigCpp": num_attention_layers=self.pretrained_config.num_hidden_layers, num_rnn_layers=0, num_heads=num_heads, - hidden_size=self.pretrained_config.hidden_size, + hidden_size=hidden_size, data_type=torch_dtype_to_binding( self.pretrained_config.torch_dtype)) @@ -317,7 +318,7 @@ def get_bindings_model_config(self) -> "ModelConfigCpp": if "head_size" in self.pretrained_config: head_size = self.pretrained_config.head_size else: - head_size = self.pretrained_config.hidden_size // num_heads + head_size = hidden_size // num_heads model_config_cpp.mlp_hidden_size = mlp_hidden_size model_config_cpp.size_per_head = head_size From 7b2d3de3d7867516cb371e4dede7463e9377af91 Mon Sep 17 00:00:00 2001 From: Amit Zuker <203509407+amitz-nv@users.noreply.github.com> Date: Wed, 18 Jun 2025 16:01:30 +0000 Subject: [PATCH 2/2] Configure pytorch LoRA TP tests to run on DGX Signed-off-by: Amit Zuker <203509407+amitz-nv@users.noreply.github.com> --- tests/integration/test_lists/test-db/l0_dgx_h100.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_lists/test-db/l0_dgx_h100.yml b/tests/integration/test_lists/test-db/l0_dgx_h100.yml index 489e5415d8e..5620d508b12 100644 --- a/tests/integration/test_lists/test-db/l0_dgx_h100.yml +++ b/tests/integration/test_lists/test-db/l0_dgx_h100.yml @@ -17,7 +17,7 @@ l0_dgx_h100: # ------------- PyTorch tests --------------- - unittest/_torch/multi_gpu - unittest/_torch/auto_deploy/unit/multigpu - - unittest/llmapi/test_llm_multi_gpu_pytorch.py -m "gpu4" + - unittest/llmapi/test_llm_multi_gpu_pytorch.py -m "gpu4 or gpu2" - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp4-attn_backend=TRTLLM-torch_compile=False] - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp4-attn_backend=TRTLLM-torch_compile=True] - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp2pp2-attn_backend=TRTLLM-torch_compile=False]