|
| 1 | +#!/usr/bin/env python |
| 2 | +# ===----------------------------------------------------------------------===## |
| 3 | +# |
| 4 | +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 5 | +# See https://llvm.org/LICENSE.txt for license information. |
| 6 | +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 7 | +# |
| 8 | +# ===----------------------------------------------------------------------===## |
| 9 | + |
| 10 | +from typing import List, Tuple, Optional |
| 11 | +import argparse |
| 12 | +import io |
| 13 | +import itertools |
| 14 | +import os |
| 15 | +import pathlib |
| 16 | +import re |
| 17 | +import sys |
| 18 | + |
| 19 | +libcxx_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) |
| 20 | +sys.path.append(os.path.join(libcxx_root, "utils")) |
| 21 | +from libcxx.header_information import Header |
| 22 | + |
| 23 | +def parse_line(line: str) -> Tuple[int, str]: |
| 24 | + """ |
| 25 | + Parse a single line of --trace-includes output. |
| 26 | +
|
| 27 | + Returns the inclusion level and the raw file name being included. |
| 28 | + """ |
| 29 | + match = re.match(r"(\.+) (.+)", line) |
| 30 | + if not match: |
| 31 | + raise ArgumentError(f"Line {line} contains invalid data.") |
| 32 | + |
| 33 | + # The number of periods in front of the header name is the nesting level of |
| 34 | + # that header. |
| 35 | + return (len(match.group(1)), match.group(2)) |
| 36 | + |
| 37 | +def make_cxx_v1_relative(header: str) -> Optional[str]: |
| 38 | + """ |
| 39 | + Returns the path of the header as relative to <whatever>/c++/v1, or None if the path |
| 40 | + doesn't contain c++/v1. |
| 41 | +
|
| 42 | + We use that heuristic to figure out which headers are libc++ headers. |
| 43 | + """ |
| 44 | + # On Windows, the path separators can either be forward slash or backslash. |
| 45 | + # If it is a backslash, Clang prints it escaped as two consecutive |
| 46 | + # backslashes, and they need to be escaped in the RE. (Use a raw string for |
| 47 | + # the pattern to avoid needing another level of escaping on the Python string |
| 48 | + # literal level.) |
| 49 | + pathsep = r"(?:/|\\\\)" |
| 50 | + CXX_V1_REGEX = r"^.*c\+\+" + pathsep + r"v[0-9]+" + pathsep + r"(.+)$" |
| 51 | + match = re.match(CXX_V1_REGEX, header) |
| 52 | + if not match: |
| 53 | + return None |
| 54 | + else: |
| 55 | + return match.group(1) |
| 56 | + |
| 57 | +def parse_file(file: io.TextIOBase) -> List[Tuple[Header, Header]]: |
| 58 | + """ |
| 59 | + Parse a file containing --trace-includes output to generate a list of the |
| 60 | + transitive includes contained in it. |
| 61 | + """ |
| 62 | + result = [] |
| 63 | + includer = None |
| 64 | + for line in file.readlines(): |
| 65 | + (level, header) = parse_line(line) |
| 66 | + relative = make_cxx_v1_relative(header) |
| 67 | + |
| 68 | + # Not a libc++ header |
| 69 | + if relative is None: |
| 70 | + continue |
| 71 | + |
| 72 | + # If we're at the first level, remember this header as being the one who includes other headers. |
| 73 | + # There's usually exactly one, except if the compiler is passed a file with `-include`. |
| 74 | + if level == 1: |
| 75 | + includer = Header(relative) |
| 76 | + continue |
| 77 | + |
| 78 | + # Otherwise, take note that this header is being included by the top-level includer. |
| 79 | + else: |
| 80 | + assert includer is not None |
| 81 | + result.append((includer, Header(relative))) |
| 82 | + return result |
| 83 | + |
| 84 | +def print_csv(includes: List[Tuple[Header, Header]]) -> None: |
| 85 | + """ |
| 86 | + Print the transitive includes as space-delimited CSV. |
| 87 | +
|
| 88 | + This function only prints public libc++ headers that are not C compatibility headers. |
| 89 | + """ |
| 90 | + # Sort and group by includer |
| 91 | + by_includer = lambda t: t[0] |
| 92 | + includes = itertools.groupby(sorted(includes, key=by_includer), key=by_includer) |
| 93 | + |
| 94 | + for (includer, includees) in includes: |
| 95 | + includees = map(lambda t: t[1], includees) |
| 96 | + for h in sorted(set(includees)): |
| 97 | + if h.is_public() and not h.is_C_compatibility(): |
| 98 | + print(f"{includer} {h}") |
| 99 | + |
| 100 | +def main(argv): |
| 101 | + parser = argparse.ArgumentParser( |
| 102 | + description=""" |
| 103 | + Given a list of headers produced by --trace-includes, produce a list of libc++ headers in that output. |
| 104 | +
|
| 105 | + Note that -fshow-skipped-includes must also be passed to the compiler in order to get sufficient |
| 106 | + information for this script to run. |
| 107 | +
|
| 108 | + The output of this script is provided in space-delimited CSV format where each line contains: |
| 109 | +
|
| 110 | + <header performing inclusion> <header being included> |
| 111 | + """) |
| 112 | + parser.add_argument("inputs", type=argparse.FileType("r"), nargs='+', default=None, |
| 113 | + help="One or more files containing the result of --trace-includes") |
| 114 | + args = parser.parse_args(argv) |
| 115 | + |
| 116 | + includes = [line for file in args.inputs for line in parse_file(file)] |
| 117 | + print_csv(includes) |
| 118 | + |
| 119 | +if __name__ == "__main__": |
| 120 | + main(sys.argv[1:]) |
0 commit comments