Skip to content

Commit dce6ae1

Browse files
committed
Improve filter_by_diff.py and friends
* Attempts to cope with invalid UTF-8 (present in a few CBMC files) using iconv * Splits the filter-by-diff job into two stages (diff -> JSON description of interesting lines and JSON -> filtered output) This means that run_diff.sh CPPLINT is now practical, and produces a report of the linting problems in develop but not in master (around 150 lines as of the time of writing) in about 2 minutes.
1 parent 3dd8fe5 commit dce6ae1

File tree

4 files changed

+120
-60
lines changed

4 files changed

+120
-60
lines changed

scripts/diff_to_added_lines.py

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#!/usr/bin/env python
2+
3+
from __future__ import print_function
4+
5+
def diff_to_added_lines(diff_file, repository_root, out_stream):
6+
7+
import unidiff
8+
import os.path
9+
import json
10+
11+
# Create a set of all the files and the specific lines within that file that are in the diff
12+
added_lines = dict()
13+
14+
for file_in_diff in unidiff.PatchSet.from_filename(diff_file):
15+
filename = file_in_diff.target_file
16+
# Skip files deleted in the tip (b side of the diff):
17+
if filename == "/dev/null":
18+
continue
19+
assert filename.startswith("b/")
20+
filename = os.path.join(repository_root, filename[2:])
21+
if filename not in added_lines:
22+
added_lines[filename] = []
23+
added_lines[filename].append(0)
24+
for diff_hunk in file_in_diff:
25+
for diff_line in diff_hunk:
26+
if diff_line.line_type == "+":
27+
if filename not in added_lines:
28+
added_lines[filename] = []
29+
added_lines[filename].append(diff_line.target_line_no)
30+
31+
json.dump(added_lines, out_stream)
32+
33+
if __name__ == "__main__":
34+
35+
import sys
36+
37+
if len(sys.argv) != 3:
38+
print("diff_to_added_lines.py: converts a unified-diff file into a JSON dictionary mapping filenames onto an array of added or modified line numbers", file=sys.stderr)
39+
print("Usage: diff_to_added_lines.py diff.patch repository_root_directory", file=sys.stderr)
40+
41+
sys.exit(1)
42+
43+
diff_to_added_lines(sys.argv[1], sys.argv[2], sys.stdout)
44+
45+
diff_file = sys.argv[1]
46+
repository_root = sys.argv[2]

scripts/filter_by_diff.py

-53
This file was deleted.

scripts/filter_by_lines.py

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#!/usr/bin/env python
2+
3+
from __future__ import print_function
4+
5+
def filter_by_lines(diffed_file, added_lines_file, repository_root, in_stream, out_stream):
6+
7+
import os.path
8+
import json
9+
10+
diffed_file = sys.argv[1]
11+
added_lines_file = sys.argv[2]
12+
repository_root = sys.argv[3]
13+
14+
# Get a set of all the files and the specific lines within that file to keep:
15+
with open(added_lines_file, "r") as f:
16+
added_lines = json.load(f)
17+
18+
# added_lines is a dict filename -> line_number list
19+
# Make those into line_number sets instead:
20+
21+
for k in added_lines:
22+
added_lines[k] = set(added_lines[k])
23+
24+
# Print the lines that are in the set
25+
found = False
26+
for line in in_stream:
27+
line_parts = line.split(":")
28+
if len(line_parts) < 3:
29+
if found:
30+
# Print lines between a matching warning and the next warning
31+
out_stream.write(line)
32+
continue
33+
try:
34+
linenum = int(line_parts[1])
35+
found = False
36+
filename = line_parts[0]
37+
if not repository_root in filename:
38+
filename = os.path.join(repository_root, line_parts[0])
39+
if filename in added_lines and linenum in added_lines[filename]:
40+
found = True
41+
out_stream.write(line)
42+
except ValueError:
43+
if found:
44+
out_stream.write(line)
45+
continue
46+
47+
if __name__ == "__main__":
48+
49+
import sys
50+
51+
if len(sys.argv) != 4:
52+
print("filter_by_lines.py: filters lines of the form filename:line_number:message, retaining those matching a particular filename and list of line numbers", file=sys.stderr)
53+
print("Usage: filter_by_lines.py diffed_file added_lines.json repository_root_directory < warnings.txt", file=sys.stderr)
54+
sys.exit(1)
55+
56+
filter_by_lines(sys.argv[1], sys.argv[2], sys.argv[3], sys.stdin, sys.stdout)

scripts/run_diff.sh

+18-7
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,21 @@ then
1616
exit 1
1717
fi
1818

19-
if ! [[ -e $script_folder/filter_by_diff.py ]]
19+
if ! [[ -e $script_folder/filter_by_lines.py ]]
2020
then
2121
echo "Filter script could not be found in the $script_folder directory"
22-
echo "Ensure filter_by_diff.py is inside the $script_folder directory then run again"
22+
echo "Ensure filter_by_lines.py is inside the $script_folder directory then run again"
2323
exit 1
2424
fi
2525

2626
if [[ "$mode" == "CPPLINT" ]]
2727
then
28+
if ! which iconv >/dev/null
29+
then
30+
echo "Couldn't find iconv in current path. Please install and try again"
31+
exit 1
32+
fi
33+
2834
if ! [[ -e $script_folder/cpplint.py ]]
2935
then
3036
echo "Lint script could not be found in the $script_folder directory"
@@ -76,17 +82,22 @@ git_start=`git merge-base $git_start $git_merge_base_end`
7682

7783
cleanup()
7884
{
79-
rm -f $diff_file
85+
rm -f $diff_file $added_lines_file
8086
}
8187

8288
trap cleanup EXIT
8389

8490
diff_file=`mktemp`
91+
added_lines_file=`mktemp`
92+
93+
# Pass the output through iconv to remove any invalid UTF-8 (diff_to_added_lines.py will die otherwise)
94+
95+
git diff $git_start $git_end | iconv -t utf-8 -c > $diff_file
8596

86-
git diff $git_start $git_end > $diff_file
97+
# Get the list of files that have changed, that end with lintable extensions
98+
diff_files=`git diff --name-only $git_start $git_end | grep "\.\(\(cpp\)\|\(hh\)\|\(cc\)\|h\)$" || true`
8799

88-
# Get the list of files that have changed
89-
diff_files=`git diff --name-only $git_start $git_end`
100+
$script_folder/diff_to_added_lines.py $diff_file $absolute_repository_root > $added_lines_file
90101

91102
for file in $diff_files; do
92103
file=$absolute_repository_root/$file
@@ -98,7 +109,7 @@ for file in $diff_files; do
98109

99110
# Run the linting script and filter:
100111
# The errors from the linter go to STDERR so must be redirected to STDOUT
101-
result=`eval $cmd | $script_folder/filter_by_diff.py $file $diff_file $absolute_repository_root`
112+
result=`eval $cmd | $script_folder/filter_by_diff.py $file $added_lines_file $absolute_repository_root`
102113

103114
# Providing some errors were relevant we print them out
104115
if [ "$result" ]

0 commit comments

Comments
 (0)