Skip to content

Commit d4f66b9

Browse files
committed
Only show GitHub check annotations on changed doc paragraphs
1 parent dc8fdf5 commit d4f66b9

File tree

2 files changed

+147
-27
lines changed

2 files changed

+147
-27
lines changed

.github/workflows/reusable-docs.yml

+3-8
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ jobs:
1818
timeout-minutes: 60
1919
steps:
2020
- uses: actions/checkout@v3
21+
with:
22+
fetch-depth: 0
2123
- name: 'Set up Python'
2224
uses: actions/setup-python@v4
2325
with:
@@ -28,13 +30,6 @@ jobs:
2830
run: make -C Doc/ venv
2931

3032
# To annotate PRs with Sphinx nitpicks (missing references)
31-
- name: 'Get list of changed files'
32-
if: github.event_name == 'pull_request'
33-
id: changed_files
34-
uses: Ana06/[email protected]
35-
with:
36-
filter: "Doc/**"
37-
format: csv # works for paths with spaces
3833
- name: 'Build HTML documentation'
3934
continue-on-error: true
4035
run: |
@@ -45,7 +40,7 @@ jobs:
4540
if: github.event_name == 'pull_request'
4641
run: |
4742
python Doc/tools/check-warnings.py \
48-
--check-and-annotate '${{ steps.changed_files.outputs.added_modified }}' \
43+
--check-and-annotate-changed 'origin/${{ github.base_ref }}' '${{ github.sha }}' \
4944
--fail-if-regression \
5045
--fail-if-improved
5146

Doc/tools/check-warnings.py

+144-19
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
Check the output of running Sphinx in nit-picky mode (missing references).
44
"""
55
import argparse
6-
import csv
6+
import itertools
77
import os
88
import re
9+
import subprocess
910
import sys
1011
from pathlib import Path
12+
from typing import TextIO
1113

1214
# Exclude these whether they're dirty or clean,
1315
# because they trigger a rebuild of dirty files.
@@ -24,28 +26,150 @@
2426
"venv",
2527
}
2628

27-
PATTERN = re.compile(r"(?P<file>[^:]+):(?P<line>\d+): WARNING: (?P<msg>.+)")
29+
# Regex pattern to match the parts of a Sphinx warning
30+
WARNING_PATTERN = re.compile(
31+
r"(?P<file>([A-Za-z]:[\\/])?[^:]+):(?P<line>\d+): WARNING: (?P<msg>.+)"
32+
)
2833

34+
# Regex pattern to match the line numbers in a Git unified diff
35+
DIFF_PATTERN = re.compile(
36+
r"^@@ -(?P<linea>\d+),(?P<removed>\d+) \+(?P<lineb>\d+),(?P<added>\d+) @@",
37+
flags=re.MULTILINE,
38+
)
2939

30-
def check_and_annotate(warnings: list[str], files_to_check: str) -> None:
40+
41+
def get_diff_files(ref_a: str, ref_b: str, filter_mode: str = "") -> set[Path]:
42+
"""List the files changed between two Gif refs, filtered by change type."""
43+
added_files_result = subprocess.run(
44+
[
45+
"git",
46+
"diff",
47+
f"--diff-filter={filter_mode}",
48+
"--name-only",
49+
f"{ref_a}...{ref_b}",
50+
"--",
51+
],
52+
stdout=subprocess.PIPE,
53+
check=True,
54+
text=True,
55+
)
56+
57+
added_files = added_files_result.stdout.strip().split("\n")
58+
return {Path(file.strip()) for file in added_files if file.strip()}
59+
60+
61+
def get_diff_lines(ref_a: str, ref_b: str, file: os.PathLike) -> list[int]:
62+
"""List the lines changed between two Gif refs for a specific file."""
63+
diff_output = subprocess.run(
64+
[
65+
"git",
66+
"diff",
67+
"--unified=0",
68+
"--diff-filter=M",
69+
f"{ref_a}...{ref_b}",
70+
"--",
71+
str(file),
72+
],
73+
stdout=subprocess.PIPE,
74+
check=True,
75+
text=True,
76+
)
77+
78+
line_matches = DIFF_PATTERN.finditer(diff_output.stdout)
79+
line_ints = [
80+
(int(line_match["lineb"]), int(line_match["added"]))
81+
for line_match in line_matches
82+
]
83+
line_ranges = [
84+
range(line_b, line_b + added) for line_b, added in line_ints
85+
]
86+
line_numbers = list(itertools.chain(*line_ranges))
87+
return line_numbers
88+
89+
90+
def get_para_line_numbers(file_obj: TextIO) -> list[list[int]]:
91+
"""Get the line numbers of text in a file object, grouped by paragraph."""
92+
paragraphs = []
93+
prev_line = None
94+
for lineno, line in enumerate(file_obj):
95+
lineno = lineno + 1
96+
if prev_line is None or (line.strip() and not prev_line.strip()):
97+
paragraph = [lineno - 1]
98+
paragraphs.append(paragraph)
99+
paragraph.append(lineno)
100+
prev_line = line
101+
return paragraphs
102+
103+
104+
def process_touched_warnings(
105+
warnings: list[str], ref_a: str, ref_b: str
106+
) -> list[re.Match]:
107+
"""Filter a list of Sphinx warnings to those affecting touched lines."""
108+
added_files, modified_files = tuple(
109+
get_diff_files(ref_a, ref_b, filter_mode=mode) for mode in ("A", "M")
110+
)
111+
112+
warnings_added, warnings_modified = tuple(
113+
[
114+
WARNING_PATTERN.fullmatch(warning.strip())
115+
for warning in warnings
116+
if any(str(file) in warning for file in files)
117+
]
118+
for files in (added_files, modified_files)
119+
)
120+
warnings_added, warnings_modified = tuple(
121+
[warning for warning in warning_matches if warning]
122+
for warning_matches in (warnings_added, warnings_modified)
123+
)
124+
125+
modified_files_warned = {
126+
file
127+
for file in modified_files
128+
if any(str(file) in warning["file"] for warning in warnings_modified)
129+
}
130+
131+
warnings_touched = warnings_added.copy()
132+
for file in modified_files_warned:
133+
diff_lines = get_diff_lines(ref_a, ref_b, file)
134+
with file.open(encoding="UTF-8") as file_obj:
135+
paragraphs = get_para_line_numbers(file_obj)
136+
touched_paras = [
137+
para_lines
138+
for para_lines in paragraphs
139+
if set(diff_lines) & set(para_lines)
140+
]
141+
touched_para_lines = set(itertools.chain(*touched_paras))
142+
warnings_infile = [
143+
warning
144+
for warning in warnings_modified
145+
if str(file) in warning["file"]
146+
]
147+
warnings_touched += [
148+
warning
149+
for warning in warnings_infile
150+
if int(warning["line"]) in touched_para_lines
151+
]
152+
153+
return warnings_touched
154+
155+
156+
def check_and_annotate_changed(
157+
warnings: list[str], ref_a: str = "main", ref_b: str = "HEAD"
158+
) -> None:
31159
"""
32-
Convert Sphinx warning messages to GitHub Actions.
160+
Convert Sphinx warning messages to GitHub Actions for changed paragraphs.
33161
34162
Converts lines like:
35163
.../Doc/library/cgi.rst:98: WARNING: reference target not found
36164
to:
37165
::warning file=.../Doc/library/cgi.rst,line=98::reference target not found
38166
39-
Non-matching lines are echoed unchanged.
40-
41-
see:
167+
See:
42168
https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-warning-message
43169
"""
44-
files_to_check = next(csv.reader([files_to_check]))
45-
for warning in warnings:
46-
if any(filename in warning for filename in files_to_check):
47-
if match := PATTERN.fullmatch(warning):
48-
print("::warning file={file},line={line}::{msg}".format_map(match))
170+
warnings_touched = process_touched_warnings(warnings, ref_a, ref_b)
171+
for warning in warnings_touched:
172+
print("::warning file={file},line={line}::{msg}".format_map(warning))
49173

50174

51175
def fail_if_regression(
@@ -68,7 +192,7 @@ def fail_if_regression(
68192
print(filename)
69193
for warning in warnings:
70194
if filename in warning:
71-
if match := PATTERN.fullmatch(warning):
195+
if match := WARNING_PATTERN.fullmatch(warning):
72196
print(" {line}: {msg}".format_map(match))
73197
return -1
74198
return 0
@@ -94,9 +218,10 @@ def fail_if_improved(
94218
def main() -> int:
95219
parser = argparse.ArgumentParser()
96220
parser.add_argument(
97-
"--check-and-annotate",
98-
help="Comma-separated list of files to check, "
99-
"and annotate those with warnings on GitHub Actions",
221+
"--check-and-annotate-changed",
222+
nargs=2,
223+
help="Annotate lines changed between two refs "
224+
"with warnings on GitHub Actions",
100225
)
101226
parser.add_argument(
102227
"--fail-if-regression",
@@ -114,7 +239,7 @@ def main() -> int:
114239
wrong_directory_msg = "Must run this script from the repo root"
115240
assert Path("Doc").exists() and Path("Doc").is_dir(), wrong_directory_msg
116241

117-
with Path("Doc/sphinx-warnings.txt").open() as f:
242+
with Path("Doc/sphinx-warnings.txt").open(encoding="UTF-8") as f:
118243
warnings = f.read().splitlines()
119244

120245
cwd = str(Path.cwd()) + os.path.sep
@@ -131,8 +256,8 @@ def main() -> int:
131256
if filename.strip() and not filename.startswith("#")
132257
}
133258

134-
if args.check_and_annotate:
135-
check_and_annotate(warnings, args.check_and_annotate)
259+
if args.check_and_annotate_changed:
260+
check_and_annotate_changed(warnings, *args.check_and_annotate_changed)
136261

137262
if args.fail_if_regression:
138263
exit_code += fail_if_regression(

0 commit comments

Comments
 (0)