Skip to content

Commit 67a6dce

Browse files
committed
Only show GitHub check annotations on changed doc paragraphs
1 parent dc8fdf5 commit 67a6dce

File tree

2 files changed

+165
-28
lines changed

2 files changed

+165
-28
lines changed

.github/workflows/reusable-docs.yml

+3-8
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ jobs:
1818
timeout-minutes: 60
1919
steps:
2020
- uses: actions/checkout@v3
21+
with:
22+
fetch-depth: 0
2123
- name: 'Set up Python'
2224
uses: actions/setup-python@v4
2325
with:
@@ -28,13 +30,6 @@ jobs:
2830
run: make -C Doc/ venv
2931

3032
# To annotate PRs with Sphinx nitpicks (missing references)
31-
- name: 'Get list of changed files'
32-
if: github.event_name == 'pull_request'
33-
id: changed_files
34-
uses: Ana06/[email protected]
35-
with:
36-
filter: "Doc/**"
37-
format: csv # works for paths with spaces
3833
- name: 'Build HTML documentation'
3934
continue-on-error: true
4035
run: |
@@ -45,7 +40,7 @@ jobs:
4540
if: github.event_name == 'pull_request'
4641
run: |
4742
python Doc/tools/check-warnings.py \
48-
--check-and-annotate '${{ steps.changed_files.outputs.added_modified }}' \
43+
--check-and-annotate-changed 'origin/${{ github.base_ref }}' '${{ github.sha }}' \
4944
--fail-if-regression \
5045
--fail-if-improved
5146

Doc/tools/check-warnings.py

+162-20
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
Check the output of running Sphinx in nit-picky mode (missing references).
44
"""
55
import argparse
6-
import csv
6+
import itertools
77
import os
88
import re
9+
import subprocess
910
import sys
1011
from pathlib import Path
12+
from typing import TextIO
1113

1214
# Exclude these whether they're dirty or clean,
1315
# because they trigger a rebuild of dirty files.
@@ -24,28 +26,167 @@
2426
"venv",
2527
}
2628

27-
PATTERN = re.compile(r"(?P<file>[^:]+):(?P<line>\d+): WARNING: (?P<msg>.+)")
29+
# Regex pattern to match the parts of a Sphinx warning
30+
WARNING_PATTERN = re.compile(
31+
r"(?P<file>([A-Za-z]:[\\/])?[^:]+):(?P<line>\d+): WARNING: (?P<msg>.+)"
32+
)
2833

34+
# Regex pattern to match the line numbers in a Git unified diff
35+
DIFF_PATTERN = re.compile(
36+
r"^@@ -(?P<linea>\d+)(?:,(?P<removed>\d+))? \+(?P<lineb>\d+)(?:,(?P<added>\d+))? @@",
37+
flags=re.MULTILINE,
38+
)
2939

30-
def check_and_annotate(warnings: list[str], files_to_check: str) -> None:
40+
41+
def get_diff_files(ref_a: str, ref_b: str, filter_mode: str = "") -> set[Path]:
42+
"""List the files changed between two Gif refs, filtered by change type."""
43+
added_files_result = subprocess.run(
44+
[
45+
"git",
46+
"diff",
47+
f"--diff-filter={filter_mode}",
48+
"--name-only",
49+
f"{ref_a}...{ref_b}",
50+
"--",
51+
],
52+
stdout=subprocess.PIPE,
53+
check=True,
54+
text=True,
55+
encoding="UTF-8",
56+
)
57+
58+
added_files = added_files_result.stdout.strip().split("\n")
59+
return {Path(file.strip()) for file in added_files if file.strip()}
60+
61+
62+
def get_diff_lines(ref_a: str, ref_b: str, file: Path) -> list[int]:
63+
"""List the lines changed between two Gif refs for a specific file."""
64+
diff_output = subprocess.run(
65+
[
66+
"git",
67+
"diff",
68+
"--unified=0",
69+
f"{ref_a}...{ref_b}",
70+
"--",
71+
str(file),
72+
],
73+
stdout=subprocess.PIPE,
74+
check=True,
75+
text=True,
76+
encoding="UTF-8",
77+
)
78+
79+
# Scrape line offsets + lengths from diff and convert to line numbers
80+
line_matches = DIFF_PATTERN.finditer(diff_output.stdout)
81+
line_match_values = [
82+
line_match.groupdict(default=1) for line_match in line_matches
83+
]
84+
line_ints = [
85+
(int(match_value["lineb"]), int(match_value["added"]))
86+
for match_value in line_match_values
87+
]
88+
line_ranges = [
89+
range(line_b, line_b + added) for line_b, added in line_ints
90+
]
91+
line_numbers = list(itertools.chain(*line_ranges))
92+
93+
return line_numbers
94+
95+
96+
def get_para_line_numbers(file_obj: TextIO) -> list[list[int]]:
97+
"""Get the line numbers of text in a file object, grouped by paragraph."""
98+
paragraphs = []
99+
prev_line = None
100+
for lineno, line in enumerate(file_obj):
101+
lineno = lineno + 1
102+
if prev_line is None or (line.strip() and not prev_line.strip()):
103+
paragraph = [lineno - 1]
104+
paragraphs.append(paragraph)
105+
paragraph.append(lineno)
106+
prev_line = line
107+
return paragraphs
108+
109+
110+
def parse_and_filter_warnings(
111+
warnings: list[str], files: set[Path]
112+
) -> list[re.Match[str]]:
113+
"""Get the warnings matching passed files and parse them with regex."""
114+
filtered_warnings = [
115+
warning
116+
for warning in warnings
117+
if any(str(file) in warning for file in files)
118+
]
119+
warning_matches = [
120+
WARNING_PATTERN.fullmatch(warning.strip())
121+
for warning in filtered_warnings
122+
]
123+
non_null_matches = [warning for warning in warning_matches if warning]
124+
return non_null_matches
125+
126+
127+
def process_touched_warnings(
128+
warnings: list[str], ref_a: str, ref_b: str
129+
) -> list[re.Match[str]]:
130+
"""Filter a list of Sphinx warnings to those affecting touched lines."""
131+
added_files, modified_files = tuple(
132+
get_diff_files(ref_a, ref_b, filter_mode=mode) for mode in ("A", "M")
133+
)
134+
135+
warnings_added = parse_and_filter_warnings(warnings, added_files)
136+
warnings_modified = parse_and_filter_warnings(warnings, modified_files)
137+
138+
modified_files_warned = {
139+
file
140+
for file in modified_files
141+
if any(str(file) in warning["file"] for warning in warnings_modified)
142+
}
143+
144+
warnings_touched = warnings_added.copy()
145+
for file in modified_files_warned:
146+
diff_lines = get_diff_lines(ref_a, ref_b, file)
147+
with file.open(encoding="UTF-8") as file_obj:
148+
paragraphs = get_para_line_numbers(file_obj)
149+
touched_paras = [
150+
para_lines
151+
for para_lines in paragraphs
152+
if set(diff_lines) & set(para_lines)
153+
]
154+
touched_para_lines = set(itertools.chain(*touched_paras))
155+
warnings_infile = [
156+
warning
157+
for warning in warnings_modified
158+
if str(file) in warning["file"]
159+
]
160+
warnings_touched += [
161+
warning
162+
for warning in warnings_infile
163+
if int(warning["line"]) in touched_para_lines
164+
]
165+
166+
return warnings_touched
167+
168+
169+
def check_and_annotate_changed(
170+
warnings: list[str], ref_a: str = "main", ref_b: str = "HEAD"
171+
) -> None:
31172
"""
32-
Convert Sphinx warning messages to GitHub Actions.
173+
Convert Sphinx warning messages to GitHub Actions for changed paragraphs.
33174
34175
Converts lines like:
35176
.../Doc/library/cgi.rst:98: WARNING: reference target not found
36177
to:
37178
::warning file=.../Doc/library/cgi.rst,line=98::reference target not found
38179
39-
Non-matching lines are echoed unchanged.
40-
41-
see:
180+
See:
42181
https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-warning-message
43182
"""
44-
files_to_check = next(csv.reader([files_to_check]))
45-
for warning in warnings:
46-
if any(filename in warning for filename in files_to_check):
47-
if match := PATTERN.fullmatch(warning):
48-
print("::warning file={file},line={line}::{msg}".format_map(match))
183+
warnings_touched = process_touched_warnings(warnings, ref_a, ref_b)
184+
print("Emitting doc warnings matching modified lines:")
185+
for warning in warnings_touched:
186+
print(warning[0])
187+
print("::warning file={file},line={line}::{msg}".format_map(warning))
188+
if not warnings_touched:
189+
print("None")
49190

50191

51192
def fail_if_regression(
@@ -68,7 +209,7 @@ def fail_if_regression(
68209
print(filename)
69210
for warning in warnings:
70211
if filename in warning:
71-
if match := PATTERN.fullmatch(warning):
212+
if match := WARNING_PATTERN.fullmatch(warning):
72213
print(" {line}: {msg}".format_map(match))
73214
return -1
74215
return 0
@@ -94,9 +235,10 @@ def fail_if_improved(
94235
def main() -> int:
95236
parser = argparse.ArgumentParser()
96237
parser.add_argument(
97-
"--check-and-annotate",
98-
help="Comma-separated list of files to check, "
99-
"and annotate those with warnings on GitHub Actions",
238+
"--check-and-annotate-changed",
239+
nargs=2,
240+
help="Annotate lines changed between two refs "
241+
"with warnings on GitHub Actions",
100242
)
101243
parser.add_argument(
102244
"--fail-if-regression",
@@ -114,7 +256,7 @@ def main() -> int:
114256
wrong_directory_msg = "Must run this script from the repo root"
115257
assert Path("Doc").exists() and Path("Doc").is_dir(), wrong_directory_msg
116258

117-
with Path("Doc/sphinx-warnings.txt").open() as f:
259+
with Path("Doc/sphinx-warnings.txt").open(encoding="UTF-8") as f:
118260
warnings = f.read().splitlines()
119261

120262
cwd = str(Path.cwd()) + os.path.sep
@@ -124,15 +266,15 @@ def main() -> int:
124266
if "Doc/" in warning
125267
}
126268

127-
with Path("Doc/tools/.nitignore").open() as clean_files:
269+
with Path("Doc/tools/.nitignore").open(encoding="UTF-8") as clean_files:
128270
files_with_expected_nits = {
129271
filename.strip()
130272
for filename in clean_files
131273
if filename.strip() and not filename.startswith("#")
132274
}
133275

134-
if args.check_and_annotate:
135-
check_and_annotate(warnings, args.check_and_annotate)
276+
if args.check_and_annotate_changed:
277+
check_and_annotate_changed(warnings, *args.check_and_annotate_changed)
136278

137279
if args.fail_if_regression:
138280
exit_code += fail_if_regression(

0 commit comments

Comments
 (0)