Skip to content

Commit d6e714b

Browse files
authored
[libc++] Rewrite the transitive header checking machinery (#110554)
Since we don't generate a full dependency graph of headers, we can greatly simplify the script that parses the result of --trace-includes. At the same time, we also unify the mechanism for detecting whether a header is a public/C compat/internal/etc header with the existing mechanism in header_information.py. As a drive-by this fixes the headers_in_modulemap.sh.py test which had been disabled by mistake because it used its own way of determining the list of libc++ headers. By consistently using header_information.py to get that information, problems like this shouldn't happen anymore. This should also unblock #110303, which was blocked because of a brittle implementation of the transitive includes check which broke when the repository was cloned at a path like /path/__something/more.
1 parent c623df3 commit d6e714b

8 files changed

+297
-298
lines changed

libcxx/test/libcxx/header_inclusions.gen.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from libcxx.header_information import lit_header_restrictions, public_headers, mandatory_inclusions
1717

1818
for header in public_headers:
19-
header_guard = lambda h: f"_LIBCPP_{h.upper().replace('.', '_').replace('/', '_')}"
19+
header_guard = lambda h: f"_LIBCPP_{str(h).upper().replace('.', '_').replace('/', '_')}"
2020

2121
# <cassert> has no header guards
2222
if header == 'cassert':

libcxx/test/libcxx/headers_in_modulemap.sh.py

+6-16
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,15 @@
1-
# RUN: %{python} %s %{libcxx-dir}/utils %{include-dir}
1+
# RUN: %{python} %s %{libcxx-dir}/utils
22

33
import sys
4-
54
sys.path.append(sys.argv[1])
5+
from libcxx.header_information import all_headers, libcxx_include
66

7-
import pathlib
8-
import sys
9-
from libcxx.header_information import is_modulemap_header, is_header
10-
11-
headers = list(pathlib.Path(sys.argv[2]).rglob("*"))
12-
modulemap = open(f"{sys.argv[2]}/module.modulemap").read()
7+
with open(libcxx_include / "module.modulemap") as f:
8+
modulemap = f.read()
139

1410
isHeaderMissing = False
15-
16-
for header in headers:
17-
if not is_header(header):
18-
continue
19-
20-
header = header.relative_to(pathlib.Path(sys.argv[2])).as_posix()
21-
22-
if not is_modulemap_header(header):
11+
for header in all_headers:
12+
if not header.is_in_modulemap():
2313
continue
2414

2515
if not str(header) in modulemap:

libcxx/test/libcxx/transitive_includes.gen.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,10 @@
4242

4343
all_traces = []
4444
for header in sorted(public_headers):
45-
if header.endswith(".h"): # Skip C compatibility or detail headers
45+
if header.is_C_compatibility() or header.is_internal():
4646
continue
4747

48-
normalized_header = re.sub("/", "_", header)
48+
normalized_header = re.sub("/", "_", str(header))
4949
print(
5050
f"""\
5151
// RUN: echo "#include <{header}>" | %{{cxx}} -xc++ - %{{flags}} %{{compile_flags}} --trace-includes -fshow-skipped-includes --preprocess > /dev/null 2> %t/trace-includes.{normalized_header}.txt
@@ -55,17 +55,17 @@
5555

5656
print(
5757
f"""\
58-
// RUN: %{{python}} %{{libcxx-dir}}/test/libcxx/transitive_includes_to_csv.py {' '.join(all_traces)} > %{{libcxx-dir}}/test/libcxx/transitive_includes/%{{cxx_std}}.csv
58+
// RUN: %{{python}} %{{libcxx-dir}}/test/libcxx/transitive_includes/to_csv.py {' '.join(all_traces)} > %{{libcxx-dir}}/test/libcxx/transitive_includes/%{{cxx_std}}.csv
5959
"""
6060
)
6161

6262
else:
6363
for header in public_headers:
64-
if header.endswith(".h"): # Skip C compatibility or detail headers
64+
if header.is_C_compatibility() or header.is_internal():
6565
continue
6666

6767
# Escape slashes for the awk command below
68-
escaped_header = header.replace("/", "\\/")
68+
escaped_header = str(header).replace("/", "\\/")
6969

7070
print(
7171
f"""\
@@ -92,7 +92,7 @@
9292
9393
// RUN: mkdir %t
9494
// RUN: %{{cxx}} %s %{{flags}} %{{compile_flags}} --trace-includes -fshow-skipped-includes --preprocess > /dev/null 2> %t/trace-includes.txt
95-
// RUN: %{{python}} %{{libcxx-dir}}/test/libcxx/transitive_includes_to_csv.py %t/trace-includes.txt > %t/actual_transitive_includes.csv
95+
// RUN: %{{python}} %{{libcxx-dir}}/test/libcxx/transitive_includes/to_csv.py %t/trace-includes.txt > %t/actual_transitive_includes.csv
9696
// RUN: cat %{{libcxx-dir}}/test/libcxx/transitive_includes/%{{cxx_std}}.csv | awk '/^{escaped_header} / {{ print }}' > %t/expected_transitive_includes.csv
9797
// RUN: diff -w %t/expected_transitive_includes.csv %t/actual_transitive_includes.csv
9898
#include <{header}>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
#!/usr/bin/env python
2+
# ===----------------------------------------------------------------------===##
3+
#
4+
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5+
# See https://llvm.org/LICENSE.txt for license information.
6+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
#
8+
# ===----------------------------------------------------------------------===##
9+
10+
from typing import List, Tuple, Optional
11+
import argparse
12+
import io
13+
import itertools
14+
import os
15+
import pathlib
16+
import re
17+
import sys
18+
19+
libcxx_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
20+
sys.path.append(os.path.join(libcxx_root, "utils"))
21+
from libcxx.header_information import Header
22+
23+
def parse_line(line: str) -> Tuple[int, str]:
24+
"""
25+
Parse a single line of --trace-includes output.
26+
27+
Returns the inclusion level and the raw file name being included.
28+
"""
29+
match = re.match(r"(\.+) (.+)", line)
30+
if not match:
31+
raise ArgumentError(f"Line {line} contains invalid data.")
32+
33+
# The number of periods in front of the header name is the nesting level of
34+
# that header.
35+
return (len(match.group(1)), match.group(2))
36+
37+
def make_cxx_v1_relative(header: str) -> Optional[str]:
38+
"""
39+
Returns the path of the header as relative to <whatever>/c++/v1, or None if the path
40+
doesn't contain c++/v1.
41+
42+
We use that heuristic to figure out which headers are libc++ headers.
43+
"""
44+
# On Windows, the path separators can either be forward slash or backslash.
45+
# If it is a backslash, Clang prints it escaped as two consecutive
46+
# backslashes, and they need to be escaped in the RE. (Use a raw string for
47+
# the pattern to avoid needing another level of escaping on the Python string
48+
# literal level.)
49+
pathsep = r"(?:/|\\\\)"
50+
CXX_V1_REGEX = r"^.*c\+\+" + pathsep + r"v[0-9]+" + pathsep + r"(.+)$"
51+
match = re.match(CXX_V1_REGEX, header)
52+
if not match:
53+
return None
54+
else:
55+
return match.group(1)
56+
57+
def parse_file(file: io.TextIOBase) -> List[Tuple[Header, Header]]:
58+
"""
59+
Parse a file containing --trace-includes output to generate a list of the
60+
transitive includes contained in it.
61+
"""
62+
result = []
63+
includer = None
64+
for line in file.readlines():
65+
(level, header) = parse_line(line)
66+
relative = make_cxx_v1_relative(header)
67+
68+
# Not a libc++ header
69+
if relative is None:
70+
continue
71+
72+
# If we're at the first level, remember this header as being the one who includes other headers.
73+
# There's usually exactly one, except if the compiler is passed a file with `-include`.
74+
if level == 1:
75+
includer = Header(relative)
76+
continue
77+
78+
# Otherwise, take note that this header is being included by the top-level includer.
79+
else:
80+
assert includer is not None
81+
result.append((includer, Header(relative)))
82+
return result
83+
84+
def print_csv(includes: List[Tuple[Header, Header]]) -> None:
85+
"""
86+
Print the transitive includes as space-delimited CSV.
87+
88+
This function only prints public libc++ headers that are not C compatibility headers.
89+
"""
90+
# Sort and group by includer
91+
by_includer = lambda t: t[0]
92+
includes = itertools.groupby(sorted(includes, key=by_includer), key=by_includer)
93+
94+
for (includer, includees) in includes:
95+
includees = map(lambda t: t[1], includees)
96+
for h in sorted(set(includees)):
97+
if h.is_public() and not h.is_C_compatibility():
98+
print(f"{includer} {h}")
99+
100+
def main(argv):
101+
parser = argparse.ArgumentParser(
102+
description="""
103+
Given a list of headers produced by --trace-includes, produce a list of libc++ headers in that output.
104+
105+
Note that -fshow-skipped-includes must also be passed to the compiler in order to get sufficient
106+
information for this script to run.
107+
108+
The output of this script is provided in space-delimited CSV format where each line contains:
109+
110+
<header performing inclusion> <header being included>
111+
""")
112+
parser.add_argument("inputs", type=argparse.FileType("r"), nargs='+', default=None,
113+
help="One or more files containing the result of --trace-includes")
114+
args = parser.parse_args(argv)
115+
116+
includes = [line for file in args.inputs for line in parse_file(file)]
117+
print_csv(includes)
118+
119+
if __name__ == "__main__":
120+
main(sys.argv[1:])

libcxx/test/libcxx/transitive_includes_to_csv.py

-147
This file was deleted.

libcxx/utils/generate_iwyu_mapping.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def main(argv: typing.List[str]):
7171

7272
mappings = [] # Pairs of (header, public_header)
7373
for header in libcxx.header_information.all_headers:
74-
public_headers = IWYU_mapping(header)
74+
public_headers = IWYU_mapping(str(header))
7575
if public_headers is not None:
7676
mappings.extend((header, public) for public in public_headers)
7777

libcxx/utils/generate_libcxx_cppm_in.py

+3-11
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,11 @@
99
import os.path
1010
import sys
1111

12-
from libcxx.header_information import module_c_headers
13-
from libcxx.header_information import module_headers
14-
from libcxx.header_information import header_restrictions
15-
from libcxx.header_information import headers_not_available
12+
from libcxx.header_information import module_c_headers, module_headers, header_restrictions, headers_not_available, libcxx_root
1613

1714

1815
def write_file(module):
19-
libcxx_module_directory = os.path.join(
20-
os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "modules"
21-
)
22-
with open(
23-
os.path.join(libcxx_module_directory, f"{module}.cppm.in"), "w"
24-
) as module_cpp_in:
16+
with open(libcxx_root / "modules" / f"{module}.cppm.in", "w") as module_cpp_in:
2517
module_cpp_in.write(
2618
"""\
2719
// -*- C++ -*-
@@ -45,7 +37,7 @@ def write_file(module):
4537
// and the headers of Table 25: C++ headers for C library facilities [tab:headers.cpp.c]
4638
"""
4739
)
48-
for header in module_headers if module == "std" else module_c_headers:
40+
for header in sorted(module_headers if module == "std" else module_c_headers):
4941
if header in header_restrictions:
5042
module_cpp_in.write(
5143
f"""\

0 commit comments

Comments
 (0)