Skip to content

Commit 2531d02

Browse files
authored
Add a script to split minimal tests into distinct directories depending on metrics differences (mozilla#623)
* Add a script to split minimal tests This script splits minimal tests into distinct directories depending on their metric differences * Use the new script in the metric-checker
1 parent b829b9c commit 2531d02

File tree

2 files changed

+134
-21
lines changed

2 files changed

+134
-21
lines changed

check-grammars-crates.sh

Lines changed: 4 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -92,29 +92,12 @@ if [ "$(ls -A $COMPARE)" ]; then
9292
# Maximum number of considered minimal tests for a metric
9393
MT_THRESHOLD=30
9494

95-
# Array containing the considered metrics
96-
# TODO: Implement a command into rust-code-analysis-cli that returns all
97-
# computed metrics https://github.com/mozilla/rust-code-analysis/issues/478
98-
METRICS=("cognitive" "sloc" "ploc" "lloc" "cloc" "blank" "cyclomatic" "halstead" "nom" "nexits" "nargs")
99-
100-
# Output directory name
95+
# Output directory path
10196
OUTPUT_DIR=/tmp/output-$TREE_SITTER_CRATE
10297

103-
# Create output directory
104-
mkdir -p $OUTPUT_DIR
105-
106-
# Retrieve minimal tests for a metric
107-
for METRIC in "${METRICS[@]}"
108-
do
109-
110-
PREFIX_METRIC="\.$METRIC"
111-
FILES=`grep -r -i -l $PREFIX_METRIC $COMPARE | head -$MT_THRESHOLD`
112-
if [ -n "$FILES" ]
113-
then
114-
mkdir -p $OUTPUT_DIR/$METRIC
115-
cp $FILES $OUTPUT_DIR/$METRIC
116-
fi
117-
done
98+
# Split files into distinct directories depending on
99+
# their metric differences
100+
./split-minimal-tests.py -i $COMPARE -o $OUTPUT_DIR -t $MT_THRESHOLD
118101

119102
tar -czvf /tmp/json-diffs-and-minimal-tests.tar.gz $COMPARE $OUTPUT_DIR
120103
fi

split-minimal-tests.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
#!/usr/bin/env python3
2+
3+
"""split-minimal-tests
4+
This script splits HTML minimal-tests, produced by a software called
5+
`json-minimal-tests`, into distinct directories depending on metric differences.
6+
7+
Usage:
8+
9+
./split-minimal-tests.py -i INPUT_DIR -o OUTPUT_DIR [-t MT_THRESHOLD]
10+
11+
NOTE: OUTPUT_DIR is the path to the output directory to be created.
12+
This directory could contain either a series of directories, called as
13+
the metrics that presents differences, or be empty if no metric differences
14+
are found.
15+
MT_THRESHOLD determines the maximum number of considered minimal tests
16+
for a metric.
17+
"""
18+
19+
import argparse
20+
import pathlib
21+
import re
22+
import shutil
23+
import typing as T
24+
25+
# List of metrics
26+
# TODO: Implement a command into rust-code-analysis-cli that returns all
27+
# computed metrics https://github.com/mozilla/rust-code-analysis/issues/478
28+
METRICS = [
29+
"cognitive",
30+
"sloc",
31+
"ploc",
32+
"lloc",
33+
"cloc",
34+
"blank",
35+
"cyclomatic",
36+
"halstead",
37+
"nom",
38+
"nexits",
39+
"nargs",
40+
]
41+
42+
43+
def main() -> None:
44+
parser = argparse.ArgumentParser(
45+
prog="split-minimal-tests",
46+
description="This tool splits HTML minimal-tests, produced by "
47+
"a software called `json-minimal-tests`, into distinct directories "
48+
"depending on metric differences.",
49+
epilog="The source code of this program can be found on "
50+
"GitHub at https://github.com/mozilla/rust-code-analysis",
51+
)
52+
53+
# Arguments
54+
parser.add_argument(
55+
"--input",
56+
"-i",
57+
type=lambda value: pathlib.Path(value),
58+
required=True,
59+
help="Input directory containing HTML minimal tests.",
60+
)
61+
62+
parser.add_argument(
63+
"--output",
64+
"-o",
65+
type=lambda value: pathlib.Path(value),
66+
required=True,
67+
help="Path to the output directory.",
68+
)
69+
70+
# Optional arguments
71+
parser.add_argument(
72+
"--threshold",
73+
"-t",
74+
type=int,
75+
help="Maximum number of considered minimal tests for a metric.",
76+
)
77+
78+
# Parse arguments
79+
args = parser.parse_args()
80+
81+
# Create output directory
82+
args.output.mkdir(parents=True, exist_ok=True)
83+
84+
# Save files associated to each metric
85+
metrics_saver: T.Dict[str, T.List] = {metric_name: [] for metric_name in METRICS}
86+
87+
# Iterate over the files contained in the input directory
88+
for path in args.input.glob("*.html"):
89+
# Open a file
90+
with open(path) as f:
91+
# Read a file
92+
file_str = f.read()
93+
94+
# Remove all code inside <pre></pre> tags
95+
file_no_pre = re.sub(r"<pre>(.|\n)*?<\/pre>", "", file_str)
96+
97+
# Iterate over metrics
98+
for metric_name, metric_files in metrics_saver.items():
99+
# Check if there is a metric difference in a file
100+
m = re.search(f"(\.{metric_name})", file_no_pre)
101+
102+
# If some errors occurred, skip to the next metric
103+
if m is None:
104+
continue
105+
106+
# Save path if there is a metric difference in a file
107+
if m.group(1):
108+
metric_files.append(path)
109+
110+
# Iterate over metrics to print them
111+
for metric_name, metric_files in metrics_saver.items():
112+
# Create path for metric directory
113+
metric_path = args.output / metric_name
114+
115+
if metric_files:
116+
# Create metric directory
117+
metric_path.mkdir(parents=True, exist_ok=True)
118+
119+
# Save the number of files specified in the threshold
120+
output_paths = (
121+
metric_files[: args.threshold] if args.threshold else metric_files
122+
)
123+
124+
for path in output_paths:
125+
# Copy files in the directory
126+
shutil.copy(path, metric_path)
127+
128+
129+
if __name__ == "__main__":
130+
main()

0 commit comments

Comments
 (0)