Skip to content

Commit cbce511

Browse files
authored
Merge branch 'main' into user/dongfengy/fix_ref
2 parents 5957a02 + e0253ee commit cbce511

File tree

95 files changed

+4557
-1212
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

95 files changed

+4557
-1212
lines changed

.gitattributes

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,6 @@ triton_backend/tools/gpt/input_data.json filter=lfs diff=lfs merge=lfs -text
99
docs/source/blogs/media/tech_blog3_mla_absorb.png filter=lfs diff=lfs merge=lfs -text
1010
tests/integration/test_input_files/*.png filter=lfs diff=lfs merge=lfs -text
1111
tests/integration/test_input_files/*.jpg filter=lfs diff=lfs merge=lfs -text
12+
docs/source/blogs/media/tech_blog10_baseline_performance_detail.png filter=lfs diff=lfs merge=lfs -text
13+
docs/source/blogs/media/tech_blog10_full_strategy_performance.png filter=lfs diff=lfs merge=lfs -text
14+
docs/source/blogs/media/tech_blog10_context_wait_performance.png filter=lfs diff=lfs merge=lfs -text

.github/pull_request_template.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,20 @@ Please explain the issue and the solution in short.
4040
Please list clearly what are the relevant test(s) that can safeguard the changes in the PR. This helps us to ensure we have sufficient test coverage for the PR.
4141
-->
4242

43+
## PR Checklist
44+
45+
Please review the following before submitting your PR:
46+
- PR description clearly explains what and why. If using CodeRabbit's summary, please make sure it makes sense.
47+
- PR Follows [TRT-LLM CODING GUIDELINES](https://github.com/NVIDIA/TensorRT-LLM/blob/main/CODING_GUIDELINES.md) to the best of your knowledge.
48+
- Test cases are provided for new code paths (see [test instructions](https://github.com/NVIDIA/TensorRT-LLM/tree/main/tests#1-how-does-the-ci-work))
49+
- Any new dependencies have been scanned for license and vulnerabilities
50+
- [CODEOWNERS](https://github.com/NVIDIA/TensorRT-LLM/blob/main/.github/CODEOWNERS) updated if ownership changes
51+
- Documentation updated as needed
52+
- The reviewers assigned automatically/manually are appropriate for the PR.
53+
54+
55+
- [ ] Please check this after reviewing the above items as appropriate for this PR.
56+
4357
## GitHub Bot Help
4458

4559
`/bot [-h] ['run', 'kill', 'skip', 'reuse-pipeline'] ...`
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
#!/usr/bin/env python3
2+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
# SPDX-License-Identifier: Apache-2.0
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
import os
18+
import re
19+
import sys
20+
from typing import List
21+
22+
# Matches a Markdown checklist item in the PR body.
23+
# Expected format: "- [ ] Task description" or "* [x] Task description"
24+
# Group 1 captures the checkbox state: ' ' (unchecked), 'x' or 'X' (checked).
25+
# Group 2 captures the task content (the description of the checklist item).
26+
TASK_PATTERN = re.compile(r'^\s*[-*]\s+\[( |x|X)\]\s*(.*)')
27+
28+
29+
def find_all_tasks(pr_body: str) -> List[str]:
30+
"""Return list of all task list items (both resolved and unresolved)."""
31+
tasks: List[str] = []
32+
for line in pr_body.splitlines():
33+
match = TASK_PATTERN.match(line)
34+
if match:
35+
tasks.append(match.group(0).strip())
36+
return tasks
37+
38+
39+
def find_unresolved_tasks(pr_body: str) -> List[str]:
40+
"""Return list of unresolved task list items.
41+
42+
A task is considered resolved if it is checked (``[x]`` or ``[X]``)
43+
or if its text is struck through using ``~~`` markers.
44+
"""
45+
unresolved: List[str] = []
46+
for line in pr_body.splitlines():
47+
match = TASK_PATTERN.match(line)
48+
if not match:
49+
continue
50+
state, content = match.groups()
51+
if state.lower() == 'x':
52+
continue
53+
# Check if the entire content is struck through
54+
if content.strip().startswith('~~') and content.strip().endswith('~~'):
55+
continue
56+
unresolved.append(match.group(0).strip())
57+
return unresolved
58+
59+
60+
def check_pr_checklist_section(pr_body: str) -> tuple[bool, str]:
61+
"""Check if the PR Checklist section exists with the required final checkbox.
62+
63+
Returns:
64+
tuple: (is_valid, error_message)
65+
"""
66+
# Check if "## PR Checklist" header exists
67+
pr_checklist_pattern = re.compile(r'^##\s+PR\s+Checklist',
68+
re.IGNORECASE | re.MULTILINE)
69+
if not pr_checklist_pattern.search(pr_body):
70+
return False, "Missing '## PR Checklist' header. Please ensure you haven't removed the PR template section."
71+
72+
# Check if the final checkbox exists (the one users must check)
73+
final_checkbox_pattern = re.compile(
74+
r'^\s*[-*]\s+\[( |x|X)\]\s+Please check this after reviewing the above items',
75+
re.MULTILINE)
76+
if not final_checkbox_pattern.search(pr_body):
77+
return False, "Missing the required final checkbox '- [ ] Please check this after reviewing the above items as appropriate for this PR.' Please ensure you haven't removed this from the PR template."
78+
79+
return True, ""
80+
81+
82+
def main() -> None:
83+
pr_body = os.environ.get("PR_BODY", "")
84+
enforce_checklist = os.environ.get("ENFORCE_PR_HAS_CHECKLIST",
85+
"false").lower() == "true"
86+
87+
# Always check for PR Checklist section when enforcement is enabled
88+
if enforce_checklist:
89+
is_valid, error_msg = check_pr_checklist_section(pr_body)
90+
if not is_valid:
91+
print(f"Error: {error_msg}")
92+
sys.exit(1)
93+
94+
all_tasks = find_all_tasks(pr_body)
95+
unresolved = find_unresolved_tasks(pr_body)
96+
97+
# Check if we need to enforce the presence of at least one checklist item
98+
if enforce_checklist and not all_tasks:
99+
print(
100+
"Error: PR body must contain at least one checklist item when ENFORCE_PR_HAS_CHECKLIST is enabled."
101+
)
102+
print(
103+
"Expected format: - [ ] Task description or * [ ] Task description")
104+
sys.exit(1)
105+
106+
# If we have tasks, check if any are unresolved
107+
if unresolved:
108+
print("Unresolved checklist items found:")
109+
for item in unresolved:
110+
print(f"{item}")
111+
sys.exit(1)
112+
113+
if all_tasks:
114+
print("All checklist items resolved.")
115+
else:
116+
print("No checklist items found in PR body.")
117+
118+
119+
if __name__ == "__main__":
120+
main()

.github/workflows/pr-check.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,21 @@ jobs:
5353
echo " - [#1234][doc] Update documentation"
5454
echo " - [None][chore] Minor clean-up"
5555
exit 1
56+
57+
check-pr-body-checklist:
58+
name: Check PR Checklist Resolution
59+
runs-on: ubuntu-latest
60+
steps:
61+
- name: Checkout repository
62+
uses: actions/checkout@v4
63+
64+
- name: Set up Python
65+
uses: actions/setup-python@v5
66+
with:
67+
python-version: '3.10'
68+
69+
- name: Validate PR Checklist
70+
env:
71+
PR_BODY: ${{ github.event.pull_request.body }}
72+
ENFORCE_PR_HAS_CHECKLIST: false
73+
run: python .github/scripts/pr_checklist_check.py

cpp/include/tensorrt_llm/deep_gemm/scheduler.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,7 @@ struct GroupedMaskedScheduler
379379
}
380380
};
381381

382-
// Need to keep the same as the one in tests/unittest/_torch/thop/deep_gemm_tests.py
382+
// Need to keep the same as the one in tests/unittest/_torch/thop/parallel/deep_gemm_tests.py
383383
template <typename T_offset, typename T_index>
384384
__host__ __device__ __forceinline__ T_offset compute_padded_offset(T_offset offset, T_index problem_idx)
385385
{

0 commit comments

Comments
 (0)