NVIDIA
diff --git a/‎.git-blame-ignore-revs‎
Lines changed: 2 additions & 0 deletions b/‎.git-blame-ignore-revs‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 57 additions & 2 deletions b/‎pyproject.toml‎
Lines changed: 57 additions & 2 deletions
diff --git a/‎tensorrt_llm/_tensorrt_engine/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎tensorrt_llm/_tensorrt_engine/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tensorrt_llm/_torch/debug/debug_hook.py‎
Lines changed: 21 additions & 25 deletions b/‎tensorrt_llm/_torch/debug/debug_hook.py‎
Lines changed: 21 additions & 25 deletions
diff --git a/‎tensorrt_llm/_torch/peft/lora/layer.py‎
Lines changed: 36 additions & 27 deletions b/‎tensorrt_llm/_torch/peft/lora/layer.py‎
Lines changed: 36 additions & 27 deletions
diff --git a/‎tensorrt_llm/_torch/shared_tensor/__init__.py‎
Lines changed: 2 additions & 3 deletions b/‎tensorrt_llm/_torch/shared_tensor/__init__.py‎
Lines changed: 2 additions & 3 deletions
@@ -0,0 +1,2 @@
+# Ruff formatting / linting adoption.
+dc52b67492b2f6531e310bed90f88c8427ad3908
@@ -33,6 +33,20 @@ extend_skip_glob = [
     "tensorrt_llm/top_model_mixin.py",
     "tests/unittest/_torch/modeling/test_modeling_mistral.py",
     "tests/unittest/_torch/modeling/test_modeling_pixtral.py",
+    # Phase 2.
+    "tensorrt_llm/_tensorrt_engine/*.py",
+    "tensorrt_llm/_torch/auto_deploy/custom_ops/torch_libs/*.py",
+    "tensorrt_llm/_torch/debug/*.py",
+    "tensorrt_llm/_torch/shared_tensor/*.py",
+    "tensorrt_llm/_torch/peft/*.py",
+    "tensorrt_llm/evaluate/lm_eval_tasks/gpqa/cot_zeroshot_aa/*.py",
+    "tensorrt_llm/models/clip/*.py",
+    "tensorrt_llm/models/internlm/*.py",
+    "tensorrt_llm/models/mmdit_sd3/*.py",
+    "tensorrt_llm/models/multimodal_encoders/*.py",
+    "tensorrt_llm/models/skywork/*.py",
+    "tensorrt_llm/models/stdit/*.py",
+    "tensorrt_llm/scaffolding/contrib/**/*.py",
 ]
 
 [tool.yapf]
@@ -63,6 +77,20 @@ ignore_patterns = [
     "tensorrt_llm/top_model_mixin.py",
     "tests/unittest/_torch/modeling/test_modeling_mistral.py",
     "tests/unittest/_torch/modeling/test_modeling_pixtral.py",
+    # Phase 2.
+    "tensorrt_llm/_tensorrt_engine/*.py",
+    "tensorrt_llm/_torch/auto_deploy/custom_ops/torch_libs/*.py",
+    "tensorrt_llm/_torch/debug/*.py",
+    "tensorrt_llm/_torch/shared_tensor/*.py",
+    "tensorrt_llm/_torch/peft/*.py",
+    "tensorrt_llm/evaluate/lm_eval_tasks/gpqa/cot_zeroshot_aa/*.py",
+    "tensorrt_llm/models/clip/*.py",
+    "tensorrt_llm/models/internlm/*.py",
+    "tensorrt_llm/models/mmdit_sd3/*.py",
+    "tensorrt_llm/models/multimodal_encoders/*.py",
+    "tensorrt_llm/models/skywork/*.py",
+    "tensorrt_llm/models/stdit/*.py",
+    "tensorrt_llm/scaffolding/contrib/**/*.py",
 ]
 
 [tool.codespell]
@@ -74,8 +102,7 @@ ignore-words-list = "rouge,inout,atleast,strat,nd,subtile,thrid,improbe,NotIn,te
 in-place = true
 remove_all_unused_imports = true
 remove_unused_variables = true
-# This should match the `include` in `[tool.ruff]`. See the comments in that section for why this
-# is necessary.
+# This should match the `include` in `[tool.ruff]`. The reason is that it is setup to run on the entire codebase.
 exclude = [
     "**/auto_deploy/**",
     "tensorrt_llm/_common.py",
@@ -97,6 +124,20 @@ exclude = [
     "tensorrt_llm/top_model_mixin.py",
     "tests/unittest/_torch/modeling/test_modeling_mistral.py",
     "tests/unittest/_torch/modeling/test_modeling_pixtral.py",
+    # Phase 2.
+    "tensorrt_llm/_tensorrt_engine/*.py",
+    "tensorrt_llm/_torch/auto_deploy/custom_ops/torch_libs/*.py",
+    "tensorrt_llm/_torch/debug/*.py",
+    "tensorrt_llm/_torch/shared_tensor/*.py",
+    "tensorrt_llm/_torch/peft/*.py",
+    "tensorrt_llm/evaluate/lm_eval_tasks/gpqa/cot_zeroshot_aa/*.py",
+    "tensorrt_llm/models/clip/*.py",
+    "tensorrt_llm/models/internlm/*.py",
+    "tensorrt_llm/models/mmdit_sd3/*.py",
+    "tensorrt_llm/models/multimodal_encoders/*.py",
+    "tensorrt_llm/models/skywork/*.py",
+    "tensorrt_llm/models/stdit/*.py",
+    "tensorrt_llm/scaffolding/contrib/**/*.py",
 ]
 
 
@@ -140,6 +181,20 @@ include = [
     "tensorrt_llm/top_model_mixin.py",
     "tests/unittest/_torch/modeling/test_modeling_mistral.py",
     "tests/unittest/_torch/modeling/test_modeling_pixtral.py",
+    # Phase 2.
+    "tensorrt_llm/_tensorrt_engine/*.py",
+    "tensorrt_llm/_torch/auto_deploy/custom_ops/torch_libs/*.py",
+    "tensorrt_llm/_torch/debug/*.py",
+    "tensorrt_llm/_torch/shared_tensor/*.py",
+    "tensorrt_llm/_torch/peft/*.py",
+    "tensorrt_llm/evaluate/lm_eval_tasks/gpqa/cot_zeroshot_aa/*.py",
+    "tensorrt_llm/models/clip/*.py",
+    "tensorrt_llm/models/internlm/*.py",
+    "tensorrt_llm/models/mmdit_sd3/*.py",
+    "tensorrt_llm/models/multimodal_encoders/*.py",
+    "tensorrt_llm/models/skywork/*.py",
+    "tensorrt_llm/models/stdit/*.py",
+    "tensorrt_llm/scaffolding/contrib/**/*.py",
 ]
 exclude = [
     "**3rdparty/**",
 
@@ -1,3 +1,3 @@
 from tensorrt_llm.llmapi.llm import _TrtLLM as LLM
 
-__all__ = ['LLM']
+__all__ = ["LLM"]
@@ -98,8 +98,7 @@ def get_module_indices_tree(self):
         return self.layer_inner_counter
 
     def get_current_model_loop_index(self):
-        return self.layer_inner_counter[0] + 1 if len(
-            self.layer_inner_counter) >= 1 else 0
+        return self.layer_inner_counter[0] + 1 if len(self.layer_inner_counter) >= 1 else 0
 
     def do_actions(self, module, tensors, actions):
         assert isinstance(actions, list), "Actions shall be list."
@@ -109,7 +108,6 @@ def do_actions(self, module, tensors, actions):
 
 
 class Filter:
-
     def __init__(self):
         pass
 
@@ -151,14 +149,12 @@ def pre_forward(module: nn.Module, args, kwargs):
     if len(debug_ctx.get_module_indices_tree()) == 0:
         debug_ctx.get_module_indices_tree().append(0)
 
-    if len(debug_ctx.get_current_modules_tree()) >= len(
-            debug_ctx.get_module_indices_tree()):
+    if len(debug_ctx.get_current_modules_tree()) >= len(debug_ctx.get_module_indices_tree()):
         debug_ctx.get_module_indices_tree().append(0)
 
-    debug_ctx.get_module_indices_tree()[
-        len(debug_ctx.get_current_modules_tree()) -
-        1] = debug_ctx.get_module_indices_tree()[
-            len(debug_ctx.get_current_modules_tree()) - 1] + 1
+    debug_ctx.get_module_indices_tree()[len(debug_ctx.get_current_modules_tree()) - 1] = (
+        debug_ctx.get_module_indices_tree()[len(debug_ctx.get_current_modules_tree()) - 1] + 1
+    )
     debug_ctx.do_actions(module, args, debug_ctx.get_pre_forward_action())
     return None
 
@@ -179,8 +175,7 @@ def after_forward(module: nn.Module, args, kwargs, output):
     """
     debug_ctx = get_current_debug_ctx()
     debug_ctx.mark_in_pre_forward(False)
-    debug_ctx.do_actions(module, [args, output],
-                         debug_ctx.get_after_forward_action())
+    debug_ctx.do_actions(module, [args, output], debug_ctx.get_after_forward_action())
     name = module.name if hasattr(module, "name") else module.__class__.__name__
     old_name = debug_ctx.get_current_modules_tree().pop(-1)
     assert name == old_name, "module mismatch"
@@ -189,9 +184,9 @@ def after_forward(module: nn.Module, args, kwargs, output):
     return None
 
 
-def enable_debug(model: nn.Module,
-                 dest_folder: Optional[str] = None,
-                 filter: Optional[Filter] = None):
+def enable_debug(
+    model: nn.Module, dest_folder: Optional[str] = None, filter: Optional[Filter] = None
+):
     """
     The function style to interface to enable debugger on model.
     If filter is provided, it will be used to filter out satisfied module to register hook.
@@ -231,16 +226,16 @@ def enable_debug(model: nn.Module,
         if submodule not in debug_ctx.forward_hook_handles:
             do_hook = filter(submodule) if filter is not None else True
             if do_hook:
-                debug_ctx.forward_hook_handles[
-                    submodule] = submodule.register_forward_hook(
-                        after_forward, with_kwargs=True, always_call=True)
+                debug_ctx.forward_hook_handles[submodule] = submodule.register_forward_hook(
+                    after_forward, with_kwargs=True, always_call=True
+                )
 
         if submodule not in debug_ctx.forward_pre_hook_handles:
             do_hook = filter(submodule) if filter is not None else True
             if do_hook:
-                debug_ctx.forward_pre_hook_handles[
-                    submodule] = submodule.register_forward_pre_hook(
-                        pre_forward, with_kwargs=True)
+                debug_ctx.forward_pre_hook_handles[submodule] = submodule.register_forward_pre_hook(
+                    pre_forward, with_kwargs=True
+                )
 
 
 def disable_debug():
@@ -262,9 +257,9 @@ def disable_debug():
 
 
 @contextmanager
-def debug_mode(model: nn.Module,
-               dest_folder: Optional[str] = None,
-               filter: Optional[Filter] = None):
+def debug_mode(
+    model: nn.Module, dest_folder: Optional[str] = None, filter: Optional[Filter] = None
+):
     """
     The context manager style interface to enable debugger on model.
     If filter is provided, it will be used to filter out satisfied module to register hook.
@@ -329,8 +324,9 @@ def dump_tensor(module: nn.Module, data_tensor, debug_ctx: DebuggerContext):
     def get_dump_file_path(tensor):
         nonlocal tensor_counter
         nonlocal input_tensor_names
-        assert debug_ctx.get_log_folder(
-        ) is not None, "Log folder shall be initialized by DebugContext."
+        assert debug_ctx.get_log_folder() is not None, (
+            "Log folder shall be initialized by DebugContext."
+        )
 
         name_parts = []
         for idx in range(len(debug_ctx.get_current_modules_tree())):
 
@@ -10,6 +10,7 @@ class LoraModuleType(IntEnum):
     This enum maps to the different attention and MLP components in a transformer model
     that can be adapted using LoRA weights.
     """
+
     ATTENTION_QKV = 0  # Combined QKV projection
     ATTENTION_Q = 1  # Query projection
     ATTENTION_K = 2  # Key projection
@@ -60,32 +61,37 @@ def from_string(cls, name: str) -> "LoraModuleType":
     def is_attention(self) -> bool:
         """Check if this is an attention module type."""
         return self in {
-            self.ATTENTION_QKV, self.ATTENTION_Q, self.ATTENTION_K,
-            self.ATTENTION_V, self.ATTENTION_DENSE, self.CROSS_ATTENTION_QKV,
-            self.CROSS_ATTENTION_Q, self.CROSS_ATTENTION_K,
-            self.CROSS_ATTENTION_V, self.CROSS_ATTENTION_DENSE
+            self.ATTENTION_QKV,
+            self.ATTENTION_Q,
+            self.ATTENTION_K,
+            self.ATTENTION_V,
+            self.ATTENTION_DENSE,
+            self.CROSS_ATTENTION_QKV,
+            self.CROSS_ATTENTION_Q,
+            self.CROSS_ATTENTION_K,
+            self.CROSS_ATTENTION_V,
+            self.CROSS_ATTENTION_DENSE,
         }
 
     @property
     def is_mlp(self) -> bool:
         """Check if this is an MLP module type."""
         return self in {
-            self.MLP_H_TO_4H, self.MLP_4H_TO_H, self.MLP_GATE, self.MLP_GATE_UP,
-            self.MLP_ROUTER
+            self.MLP_H_TO_4H,
+            self.MLP_4H_TO_H,
+            self.MLP_GATE,
+            self.MLP_GATE_UP,
+            self.MLP_ROUTER,
         }
 
     @property
     def is_moe(self) -> bool:
         """Check if this is a Mixture of Experts (MoE) module type."""
-        return self in {
-            self.MOE_H_TO_4H, self.MOE_4H_TO_H, self.MOE_GATE, self.MOE_ROUTER
-        }
+        return self in {self.MOE_H_TO_4H, self.MOE_4H_TO_H, self.MOE_GATE, self.MOE_ROUTER}
 
 
 class LoraLayer(torch.nn.Module):
-
-    def __init__(self, lora_module_types: List[LoraModuleType],
-                 output_hidden_sizes: List[int]):
+    def __init__(self, lora_module_types: List[LoraModuleType], output_hidden_sizes: List[int]):
         super().__init__()
 
         self.lora_module_types = lora_module_types
@@ -98,7 +104,6 @@ def forward(
         lora_params: Dict,
         layer_idx: int,
     ) -> Optional[torch.Tensor]:
-
         if bool(lora_params):
             lora_ranks = []
             lora_weight_pointers = []
@@ -108,23 +113,23 @@ def forward(
                 if module_idx in lora_params[layer_idx]:
                     active_lora_module_ids.append(module_idx)
                     # TODO (dafrimi): needs to pass this is_dora arg
-                    lora_params[layer_idx][module_idx]['is_dora']
-                    lora_ranks.append(
-                        lora_params[layer_idx][module_idx]['adapter_size'])
+                    lora_params[layer_idx][module_idx]["is_dora"]
+                    lora_ranks.append(lora_params[layer_idx][module_idx]["adapter_size"])
                     lora_weight_pointers.append(
-                        lora_params[layer_idx][module_idx]['weight_pointers'])
+                        lora_params[layer_idx][module_idx]["weight_pointers"]
+                    )
 
-            num_seqs = lora_params['num_seqs']
+            num_seqs = lora_params["num_seqs"]
 
             if len(active_lora_module_ids) == 0:
                 return None
             else:
                 lora_outputs = torch.ops.trtllm.lora_grouped_gemm(
                     x,
-                    lora_params['host_request_types'][:num_seqs],
+                    lora_params["host_request_types"][:num_seqs],
                     lora_ranks,
                     lora_weight_pointers,
-                    lora_params['prompt_lens_cpu'][:num_seqs],
+                    lora_params["prompt_lens_cpu"][:num_seqs],
                     self.output_hidden_sizes,
                     False,  # transA
                     True,  # transB
@@ -144,13 +149,17 @@ def forward(
                             lora_output.append(lora_outputs.pop(0))
                         else:
                             lora_output.append(
-                                torch.zeros(list(x.shape[:-1]) + [
-                                    self.output_hidden_sizes[
-                                        self.lora_module_types.index(
-                                            module_idx)]
-                                ],
-                                            dtype=x.dtype,
-                                            device=x.device))
+                                torch.zeros(
+                                    list(x.shape[:-1])
+                                    + [
+                                        self.output_hidden_sizes[
+                                            self.lora_module_types.index(module_idx)
+                                        ]
+                                    ],
+                                    dtype=x.dtype,
+                                    device=x.device,
+                                )
+                            )
                     lora_output = torch.cat(lora_output, dim=-1)
                     return lora_output
 
 
@@ -1,9 +1,8 @@
-from .shared_tensor import (SharedTensorContainer,
-                            _SharedTensorRebuildMethodRegistry)
+from .shared_tensor import SharedTensorContainer, _SharedTensorRebuildMethodRegistry
 
 # Initialize the registry when the package is imported
 _SharedTensorRebuildMethodRegistry.initialize()
 
 __all__ = [
-    'SharedTensorContainer',
+    "SharedTensorContainer",
 ]
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+# Ruff formatting / linting adoption.`
	`2`	`+dc52b67492b2f6531e310bed90f88c8427ad3908`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`	`1`	`from tensorrt_llm.llmapi.llm import _TrtLLM as LLM`
`2`	`2`
`3`		`-__all__ = ['LLM']`
	`3`	`+__all__ = ["LLM"]`