From 72be28c61643044888c76b74b7b7e5d15b386ede Mon Sep 17 00:00:00 2001 From: Corentin Jabot Date: Tue, 12 Sep 2023 19:48:47 +0200 Subject: [PATCH 01/11] [Github] Escape `@` and html in the
block * This avoid pinging folks on all issue when they got pinged on bugzilla eaons ago * Avoid formating bugs when there is html in the issue description --- llvm/utils/git/github-automation.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py index eac5816b5499f..02e4779412658 100755 --- a/llvm/utils/git/github-automation.py +++ b/llvm/utils/git/github-automation.py @@ -12,6 +12,7 @@ from git import Repo # type: ignore import html import github +import html import os import re import requests @@ -46,6 +47,9 @@ def _get_curent_team(team_name, teams) -> Optional[github.Team.Team]: return team return None +def escape_description(str): + # https://github.com/github/markup/issues/1168#issuecomment-494946168 + return html.escape(str.replace("@", "@"), False) class IssueSubscriber: @property @@ -67,12 +71,15 @@ def run(self) -> bool: if team.slug == "issue-subscribers-good-first-issue": comment = "{}\n".format(beginner_comment) - comment = ( - f"@llvm/{team.slug}" - + "\n\n
\n" - + f"{self.issue.body}\n" - + "
" - ) + body = escape_description(self.issue.body) + + comment = ( f""" +@llvm/{team.slug} + +
+{body} +
+""" ) self.issue.create_comment(comment) return True From 42bb7d5940a1dfb6e385838687c770bc37c6512a Mon Sep 17 00:00:00 2001 From: Corentin Jabot Date: Tue, 12 Sep 2023 19:57:49 +0200 Subject: [PATCH 02/11] Fix order of operations: we should escape html first so the comment does not get escaped --- llvm/utils/git/github-automation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py index 02e4779412658..c1885ea4af801 100755 --- a/llvm/utils/git/github-automation.py +++ b/llvm/utils/git/github-automation.py @@ -49,7 +49,8 @@ def _get_curent_team(team_name, teams) -> Optional[github.Team.Team]: def escape_description(str): # https://github.com/github/markup/issues/1168#issuecomment-494946168 - return html.escape(str.replace("@", "@"), False) + str = html.escape(str, False) + return str.replace("@", "@") class IssueSubscriber: @property From b4b4e8f48390212d5bcc296f121050db4c3fb3be Mon Sep 17 00:00:00 2001 From: Corentin Jabot Date: Tue, 12 Sep 2023 20:05:41 +0200 Subject: [PATCH 03/11] Formatting --- llvm/utils/git/github-automation.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py index c1885ea4af801..eeba5eaff97fb 100755 --- a/llvm/utils/git/github-automation.py +++ b/llvm/utils/git/github-automation.py @@ -47,11 +47,13 @@ def _get_curent_team(team_name, teams) -> Optional[github.Team.Team]: return team return None + def escape_description(str): # https://github.com/github/markup/issues/1168#issuecomment-494946168 str = html.escape(str, False) return str.replace("@", "@") + class IssueSubscriber: @property def team_name(self) -> str: @@ -74,13 +76,13 @@ def run(self) -> bool: body = escape_description(self.issue.body) - comment = ( f""" + comment = f""" @llvm/{team.slug}
{body}
-""" ) +""" self.issue.create_comment(comment) return True From d12147f1587e1f8ac29b31e159eb4e5efaefb810 Mon Sep 17 00:00:00 2001 From: Corentin Jabot Date: Wed, 13 Sep 2023 07:21:08 +0200 Subject: [PATCH 04/11] * Escape # * Escape PR description * Trunkate the list of files if it's > 20K * Color the diff on github --- llvm/utils/git/github-automation.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py index eeba5eaff97fb..2a372622954b4 100755 --- a/llvm/utils/git/github-automation.py +++ b/llvm/utils/git/github-automation.py @@ -51,7 +51,7 @@ def _get_curent_team(team_name, teams) -> Optional[github.Team.Team]: def escape_description(str): # https://github.com/github/markup/issues/1168#issuecomment-494946168 str = html.escape(str, False) - return str.replace("@", "@") + return str.replace("@", "@").replace("#", "#") class IssueSubscriber: @@ -123,6 +123,11 @@ def run(self) -> bool: print(f"couldn't find team named {self.team_name}") return False + # GitHub limits comments to 65,536 characters, let's limit the diff + # and the file list to 20kB each. + STAT_LIMIT = 20 * 1024 + DIFF_LIMIT = 20 * 1024 + # Get statistics for each file diff_stats = f"{self.pr.changed_files} Files Affected:\n\n" for file in self.pr.get_files(): @@ -133,37 +138,40 @@ def run(self) -> bool: diff_stats += f"-{file.deletions}" diff_stats += ") " if file.status == "renamed": - print(f"(from {file.previous_filename})") + print(f"(from {file.previous_filename})" diff_stats += "\n" - diff_stats += "\n" + if len(diff_stats) > STAT_LIMIT) + break # Get the diff try: patch = html.escape(requests.get(self.pr.diff_url).text) except: patch = "" - diff_stats += "\n
\n" + html.escape(patch)
 
         # GitHub limits comments to 65,536 characters, let's limit the diff to 20kB.
-        DIFF_LIMIT = 20 * 1024
         patch_link = f"Full diff: {self.pr.diff_url}\n"
         if len(patch) > DIFF_LIMIT:
             patch_link = f"\nPatch is {human_readable_size(len(patch))}, truncated to {human_readable_size(DIFF_LIMIT)} below, full version: {self.pr.diff_url}\n"
-            diff_stats = diff_stats[0:DIFF_LIMIT] + "...\n\n"
-        diff_stats += "
" + patch = html.escape(patch[0:DIFF_LIMIT]) + "...\n\n" team_mention = "@llvm/{}".format(team.slug) - body = self.pr.body + body = escape_description(self.pr.body) comment = f""" {self.COMMENT_TAG} {team_mention} - +
Changes {body} -- {patch_link} + {diff_stats} + +
+{patch}
+
""" From b6456169661b49556dbc6b73baed1d64561bbf56 Mon Sep 17 00:00:00 2001 From: Corentin Jabot Date: Wed, 13 Sep 2023 07:25:29 +0200 Subject: [PATCH 05/11] Formatting --- llvm/utils/git/github-automation.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py index 2a372622954b4..55a08a6eee17a 100755 --- a/llvm/utils/git/github-automation.py +++ b/llvm/utils/git/github-automation.py @@ -12,7 +12,6 @@ from git import Repo # type: ignore import html import github -import html import os import re import requests @@ -123,8 +122,8 @@ def run(self) -> bool: print(f"couldn't find team named {self.team_name}") return False - # GitHub limits comments to 65,536 characters, let's limit the diff - # and the file list to 20kB each. + # GitHub limits comments to 65,536 characters, let's limit the diff + # and the file list to 20kB each. STAT_LIMIT = 20 * 1024 DIFF_LIMIT = 20 * 1024 @@ -138,9 +137,9 @@ def run(self) -> bool: diff_stats += f"-{file.deletions}" diff_stats += ") " if file.status == "renamed": - print(f"(from {file.previous_filename})" + print(f"(from {file.previous_filename})") diff_stats += "\n" - if len(diff_stats) > STAT_LIMIT) + if len(diff_stats) > STAT_LIMIT: break # Get the diff From 8c0b3bc07d236816f1a11f280f2c56e36adf48c9 Mon Sep 17 00:00:00 2001 From: Corentin Jabot Date: Thu, 14 Sep 2023 10:18:46 +0200 Subject: [PATCH 06/11] Only escape # and @ when they could be part of an issue number/handle --- llvm/utils/git/github-automation.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py index 55a08a6eee17a..11d21967cf210 100755 --- a/llvm/utils/git/github-automation.py +++ b/llvm/utils/git/github-automation.py @@ -50,7 +50,11 @@ def _get_curent_team(team_name, teams) -> Optional[github.Team.Team]: def escape_description(str): # https://github.com/github/markup/issues/1168#issuecomment-494946168 str = html.escape(str, False) - return str.replace("@", "@").replace("#", "#") + # '@' followed by alphanum is a user name + str = re.sub("@(?=\w+)","@", str) + # '#' followed by digits is considered an issue number + str = re.sub("#(?=\d+\s)", "#", str) + return str class IssueSubscriber: From 47ad7a04f072c4c0a8822bb8a074369a13f4a15d Mon Sep 17 00:00:00 2001 From: Corentin Jabot Date: Thu, 14 Sep 2023 21:00:05 +0200 Subject: [PATCH 07/11] Use a markdown codeblock for the diff. This way we can render < and & properly. This only work if there is a new line before the 3 ticks. --- llvm/utils/git/github-automation.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py index 11d21967cf210..9de02a9a71da7 100755 --- a/llvm/utils/git/github-automation.py +++ b/llvm/utils/git/github-automation.py @@ -56,6 +56,9 @@ def escape_description(str): str = re.sub("#(?=\d+\s)", "#", str) return str +def sanitize_markdown_code_block(str): + # remove codeblocks terminators + return re.sub("^\s*```\s*$", r"` ` `", str) class IssueSubscriber: @property @@ -148,18 +151,21 @@ def run(self) -> bool: # Get the diff try: - patch = html.escape(requests.get(self.pr.diff_url).text) + patch = requests.get(self.pr.diff_url).text except: patch = "" - # GitHub limits comments to 65,536 characters, let's limit the diff to 20kB. + patch = sanitize_markdown_code_block(patch) + patch_link = f"Full diff: {self.pr.diff_url}\n" if len(patch) > DIFF_LIMIT: patch_link = f"\nPatch is {human_readable_size(len(patch))}, truncated to {human_readable_size(DIFF_LIMIT)} below, full version: {self.pr.diff_url}\n" - patch = html.escape(patch[0:DIFF_LIMIT]) + "...\n\n" + patch = patch[0:DIFF_LIMIT] + "...\n[truncated]\n" team_mention = "@llvm/{}".format(team.slug) body = escape_description(self.pr.body) +# Note: the comment is in markdown and the code below +# is sensible to line break comment = f""" {self.COMMENT_TAG} {team_mention} @@ -172,9 +178,10 @@ def run(self) -> bool: {diff_stats} -
+```diff
 {patch}
-
+``` +
""" From 1c7b435c7960b31a2cfbc6d074619ebbb593bf33 Mon Sep 17 00:00:00 2001 From: Corentin Jabot Date: Thu, 14 Sep 2023 21:14:20 +0200 Subject: [PATCH 08/11] formatting --- llvm/utils/git/github-automation.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py index 9de02a9a71da7..d5cf511ebeecb 100755 --- a/llvm/utils/git/github-automation.py +++ b/llvm/utils/git/github-automation.py @@ -56,6 +56,7 @@ def escape_description(str): str = re.sub("#(?=\d+\s)", "#", str) return str + def sanitize_markdown_code_block(str): # remove codeblocks terminators return re.sub("^\s*```\s*$", r"` ` `", str) @@ -164,8 +165,8 @@ def run(self) -> bool: team_mention = "@llvm/{}".format(team.slug) body = escape_description(self.pr.body) -# Note: the comment is in markdown and the code below -# is sensible to line break + # Note: the comment is in markdown and the code below + # is sensible to line break comment = f""" {self.COMMENT_TAG} {team_mention} From 96c4b4760afafc97a2e9b08a49ffbb1bd3575222 Mon Sep 17 00:00:00 2001 From: Corentin Jabot Date: Thu, 14 Sep 2023 21:17:54 +0200 Subject: [PATCH 09/11] formatting again using the correct base revision --- llvm/utils/git/github-automation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py index d5cf511ebeecb..7f4ea46a07412 100755 --- a/llvm/utils/git/github-automation.py +++ b/llvm/utils/git/github-automation.py @@ -51,7 +51,7 @@ def escape_description(str): # https://github.com/github/markup/issues/1168#issuecomment-494946168 str = html.escape(str, False) # '@' followed by alphanum is a user name - str = re.sub("@(?=\w+)","@", str) + str = re.sub("@(?=\w+)", "@", str) # '#' followed by digits is considered an issue number str = re.sub("#(?=\d+\s)", "#", str) return str @@ -61,6 +61,7 @@ def sanitize_markdown_code_block(str): # remove codeblocks terminators return re.sub("^\s*```\s*$", r"` ` `", str) + class IssueSubscriber: @property def team_name(self) -> str: From b67d0faa371385b103f5dc39b6efb3c33a1430b5 Mon Sep 17 00:00:00 2001 From: Corentin Jabot Date: Fri, 15 Sep 2023 08:50:53 +0200 Subject: [PATCH 10/11] add a line break to make sure the description is markdown-formatted --- llvm/utils/git/github-automation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py index 7f4ea46a07412..8578a83262717 100755 --- a/llvm/utils/git/github-automation.py +++ b/llvm/utils/git/github-automation.py @@ -174,8 +174,9 @@ def run(self) -> bool:
Changes + {body} --- +--- {patch_link} {diff_stats} From 6a15e622f9b9ef163f07ebc862beb887f3f25e92 Mon Sep 17 00:00:00 2001 From: Corentin Jabot Date: Fri, 15 Sep 2023 09:50:56 +0200 Subject: [PATCH 11/11] Use more backticks to start the code block That way we do not have to escape ` ``` ` --- llvm/utils/git/github-automation.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/llvm/utils/git/github-automation.py b/llvm/utils/git/github-automation.py index 8578a83262717..5c90ca8bb7150 100755 --- a/llvm/utils/git/github-automation.py +++ b/llvm/utils/git/github-automation.py @@ -57,11 +57,6 @@ def escape_description(str): return str -def sanitize_markdown_code_block(str): - # remove codeblocks terminators - return re.sub("^\s*```\s*$", r"` ` `", str) - - class IssueSubscriber: @property def team_name(self) -> str: @@ -157,8 +152,6 @@ def run(self) -> bool: except: patch = "" - patch = sanitize_markdown_code_block(patch) - patch_link = f"Full diff: {self.pr.diff_url}\n" if len(patch) > DIFF_LIMIT: patch_link = f"\nPatch is {human_readable_size(len(patch))}, truncated to {human_readable_size(DIFF_LIMIT)} below, full version: {self.pr.diff_url}\n" @@ -181,9 +174,9 @@ def run(self) -> bool: {diff_stats} -```diff +``````````diff {patch} -``` +``````````
"""