Skip to content

Commit b4c83a1

Browse files
committed
[Tooling/DependencyScanning & Preprocessor] Refactor dependency scanning to produce pre-lexed preprocessor directive tokens, instead of minimized sources
This is a commit with the following changes: * Remove `ExcludedPreprocessorDirectiveSkipMapping` and related functionality Removes `ExcludedPreprocessorDirectiveSkipMapping`; its intended benefit for fast skipping of excluded directived blocks will be superseded by a follow-up patch in the series that will use dependency scanning lexing for the same purpose. * Refactor dependency scanning to produce pre-lexed preprocessor directive tokens, instead of minimized sources Replaces the "source minimization" mechanism with a mechanism that produces lexed dependency directives tokens. * Make the special lexing for dependency scanning a first-class feature of the `Preprocessor` and `Lexer` This is bringing the following benefits: * Full access to the preprocessor state during dependency scanning. E.g. a component can see what includes were taken and where they were located in the actual sources. * Improved performance for dependency scanning. Measurements with a release+thin-LTO build shows ~ -11% reduction in wall time. * Opportunity to use dependency scanning lexing to speed-up skipping of excluded conditional blocks during normal preprocessing (as follow-up, not part of this patch). For normal preprocessing measurements show differences are below the noise level. Since, after this change, we don't minimize sources and pass them in place of the real sources, `DependencyScanningFilesystem` is not technically necessary, but it has valuable performance benefits for caching file `stat`s along with the results of scanning the sources. So the setup of using the `DependencyScanningFilesystem` during a dependency scan remains. Differential Revision: https://reviews.llvm.org/D125486 Differential Revision: https://reviews.llvm.org/D125487 Differential Revision: https://reviews.llvm.org/D125488
1 parent b58a420 commit b4c83a1

19 files changed

+787
-920
lines changed

clang/include/clang/Lex/DependencyDirectivesScanner.h

Lines changed: 56 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,41 @@
1919

2020
#include "clang/Basic/SourceLocation.h"
2121
#include "llvm/ADT/ArrayRef.h"
22-
#include "llvm/ADT/SmallVector.h"
23-
#include "llvm/ADT/StringRef.h"
2422

2523
namespace clang {
2624

25+
namespace tok {
26+
enum TokenKind : unsigned short;
27+
}
28+
2729
class DiagnosticsEngine;
2830

2931
namespace dependency_directives_scan {
3032

33+
/// Token lexed as part of dependency directive scanning.
34+
struct Token {
35+
/// Offset into the original source input.
36+
unsigned Offset;
37+
unsigned Length;
38+
tok::TokenKind Kind;
39+
unsigned short Flags;
40+
41+
Token(unsigned Offset, unsigned Length, tok::TokenKind Kind,
42+
unsigned short Flags)
43+
: Offset(Offset), Length(Length), Kind(Kind), Flags(Flags) {}
44+
45+
unsigned getEnd() const { return Offset + Length; }
46+
47+
bool is(tok::TokenKind K) const { return Kind == K; }
48+
bool isNot(tok::TokenKind K) const { return Kind != K; }
49+
bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const {
50+
return is(K1) || is(K2);
51+
}
52+
template <typename... Ts> bool isOneOf(tok::TokenKind K1, Ts... Ks) const {
53+
return is(K1) || isOneOf(Ks...);
54+
}
55+
};
56+
3157
/// Represents the kind of preprocessor directive or a module declaration that
3258
/// is tracked by the scanner in its token output.
3359
enum DirectiveKind : uint8_t {
@@ -52,63 +78,59 @@ enum DirectiveKind : uint8_t {
5278
pp_else,
5379
pp_endif,
5480
decl_at_import,
55-
cxx_export_decl,
5681
cxx_module_decl,
5782
cxx_import_decl,
83+
cxx_export_module_decl,
84+
cxx_export_import_decl,
5885
pp_eof,
5986
};
6087

6188
/// Represents a directive that's lexed as part of the dependency directives
6289
/// scanning. It's used to track various preprocessor directives that could
6390
/// potentially have an effect on the depedencies.
6491
struct Directive {
92+
ArrayRef<Token> Tokens;
93+
6594
/// The kind of token.
6695
DirectiveKind Kind = pp_none;
6796

68-
/// Offset into the output byte stream of where the directive begins.
69-
int Offset = -1;
70-
71-
Directive(DirectiveKind K, int Offset) : Kind(K), Offset(Offset) {}
72-
};
73-
74-
/// Simplified token range to track the range of a potentially skippable PP
75-
/// directive.
76-
struct SkippedRange {
77-
/// Offset into the output byte stream of where the skipped directive begins.
78-
int Offset;
79-
80-
/// The number of bytes that can be skipped before the preprocessing must
81-
/// resume.
82-
int Length;
97+
Directive() = default;
98+
Directive(DirectiveKind K, ArrayRef<Token> Tokens)
99+
: Tokens(Tokens), Kind(K) {}
83100
};
84101

85-
/// Computes the potential source ranges that can be skipped by the preprocessor
86-
/// when skipping a directive like #if, #ifdef or #elsif.
87-
///
88-
/// \returns false on success, true on error.
89-
bool computeSkippedRanges(ArrayRef<Directive> Input,
90-
llvm::SmallVectorImpl<SkippedRange> &Range);
91-
92102
} // end namespace dependency_directives_scan
93103

94-
/// Minimize the input down to the preprocessor directives that might have
104+
/// Scan the input for the preprocessor directives that might have
95105
/// an effect on the dependencies for a compilation unit.
96106
///
97-
/// This function deletes all non-preprocessor code, and strips anything that
98-
/// can't affect what gets included. It canonicalizes whitespace where
99-
/// convenient to stabilize the output against formatting changes in the input.
100-
///
101-
/// Clears the output vectors at the beginning of the call.
107+
/// This function ignores all non-preprocessor code and anything that
108+
/// can't affect what gets included.
102109
///
103110
/// \returns false on success, true on error. If the diagnostic engine is not
104111
/// null, an appropriate error is reported using the given input location
105-
/// with the offset that corresponds to the minimizer's current buffer offset.
112+
/// with the offset that corresponds to the \p Input buffer offset.
106113
bool scanSourceForDependencyDirectives(
107-
llvm::StringRef Input, llvm::SmallVectorImpl<char> &Output,
108-
llvm::SmallVectorImpl<dependency_directives_scan::Directive> &Directives,
114+
StringRef Input, SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
115+
SmallVectorImpl<dependency_directives_scan::Directive> &Directives,
109116
DiagnosticsEngine *Diags = nullptr,
110117
SourceLocation InputSourceLoc = SourceLocation());
111118

119+
/// Print the previously scanned dependency directives as minimized source text.
120+
///
121+
/// \param Source The original source text that the dependency directives were
122+
/// scanned from.
123+
/// \param Directives The previously scanned dependency
124+
/// directives.
125+
/// \param OS the stream to print the dependency directives on.
126+
///
127+
/// This is used primarily for testing purposes, during dependency scanning the
128+
/// \p Lexer uses the tokens directly, not their printed version.
129+
void printDependencyDirectivesAsSource(
130+
StringRef Source,
131+
ArrayRef<dependency_directives_scan::Directive> Directives,
132+
llvm::raw_ostream &OS);
133+
112134
} // end namespace clang
113135

114136
#endif // LLVM_CLANG_LEX_DEPENDENCYDIRECTIVESSCANNER_H

clang/include/clang/Lex/Lexer.h

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "clang/Basic/LangOptions.h"
1717
#include "clang/Basic/SourceLocation.h"
1818
#include "clang/Basic/TokenKinds.h"
19+
#include "clang/Lex/DependencyDirectivesScanner.h"
1920
#include "clang/Lex/PreprocessorLexer.h"
2021
#include "clang/Lex/Token.h"
2122
#include "llvm/ADT/Optional.h"
@@ -149,6 +150,13 @@ class Lexer : public PreprocessorLexer {
149150
// CurrentConflictMarkerState - The kind of conflict marker we are handling.
150151
ConflictMarkerKind CurrentConflictMarkerState;
151152

153+
/// Non-empty if this \p Lexer is \p isDependencyDirectivesLexer().
154+
ArrayRef<dependency_directives_scan::Directive> DepDirectives;
155+
156+
/// If this \p Lexer is \p isDependencyDirectivesLexer(), it represents the
157+
/// next token to use from the current dependency directive.
158+
unsigned NextDepDirectiveTokenIndex = 0;
159+
152160
void InitLexer(const char *BufStart, const char *BufPtr, const char *BufEnd);
153161

154162
public:
@@ -195,6 +203,23 @@ class Lexer : public PreprocessorLexer {
195203
/// return the tok::eof token. This implicitly involves the preprocessor.
196204
bool Lex(Token &Result);
197205

206+
/// Called when the preprocessor is in 'dependency scanning lexing mode'.
207+
bool LexDependencyDirectiveToken(Token &Result);
208+
209+
/// Called when the preprocessor is in 'dependency scanning lexing mode' and
210+
/// is skipping a conditional block.
211+
bool LexDependencyDirectiveTokenWhileSkipping(Token &Result);
212+
213+
/// True when the preprocessor is in 'dependency scanning lexing mode' and
214+
/// created this \p Lexer for lexing a set of dependency directive tokens.
215+
bool isDependencyDirectivesLexer() const { return !DepDirectives.empty(); }
216+
217+
/// Initializes \p Result with data from \p DDTok and advances \p BufferPtr to
218+
/// the position just after the token.
219+
/// \returns the buffer pointer at the beginning of the token.
220+
const char *convertDependencyDirectiveToken(
221+
const dependency_directives_scan::Token &DDTok, Token &Result);
222+
198223
public:
199224
/// isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
200225
bool isPragmaLexer() const { return Is_PragmaLexer; }
@@ -288,14 +313,8 @@ class Lexer : public PreprocessorLexer {
288313
return BufferPtr - BufferStart;
289314
}
290315

291-
/// Skip over \p NumBytes bytes.
292-
///
293-
/// If the skip is successful, the next token will be lexed from the new
294-
/// offset. The lexer also assumes that we skipped to the start of the line.
295-
///
296-
/// \returns true if the skip failed (new offset would have been past the
297-
/// end of the buffer), false otherwise.
298-
bool skipOver(unsigned NumBytes);
316+
/// Set the lexer's buffer pointer to \p Offset.
317+
void seek(unsigned Offset, bool IsAtStartOfLine);
299318

300319
/// Stringify - Convert the specified string into a C string by i) escaping
301320
/// '\\' and " characters and ii) replacing newline character(s) with "\\n".

clang/include/clang/Lex/Preprocessor.h

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
#include "clang/Lex/ModuleLoader.h"
3030
#include "clang/Lex/ModuleMap.h"
3131
#include "clang/Lex/PPCallbacks.h"
32-
#include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h"
3332
#include "clang/Lex/Token.h"
3433
#include "clang/Lex/TokenLexer.h"
3534
#include "llvm/ADT/ArrayRef.h"
@@ -558,6 +557,7 @@ class Preprocessor {
558557
CLK_Lexer,
559558
CLK_TokenLexer,
560559
CLK_CachingLexer,
560+
CLK_DependencyDirectivesLexer,
561561
CLK_LexAfterModuleImport
562562
} CurLexerKind = CLK_Lexer;
563563

@@ -2595,14 +2595,6 @@ class Preprocessor {
25952595
void emitMacroDeprecationWarning(const Token &Identifier) const;
25962596
void emitRestrictExpansionWarning(const Token &Identifier) const;
25972597
void emitFinalMacroWarning(const Token &Identifier, bool IsUndef) const;
2598-
2599-
Optional<unsigned>
2600-
getSkippedRangeForExcludedConditionalBlock(SourceLocation HashLoc);
2601-
2602-
/// Contains the currently active skipped range mappings for skipping excluded
2603-
/// conditional directives.
2604-
ExcludedPreprocessorDirectiveSkipMapping
2605-
*ExcludedConditionalDirectiveSkipMappings;
26062598
};
26072599

26082600
/// Abstract base class that describes a handler that will receive

clang/include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h

Lines changed: 0 additions & 30 deletions
This file was deleted.

clang/include/clang/Lex/PreprocessorOptions.h

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@
1010
#define LLVM_CLANG_LEX_PREPROCESSOROPTIONS_H_
1111

1212
#include "clang/Basic/BitmaskEnum.h"
13+
#include "clang/Basic/FileEntry.h"
1314
#include "clang/Basic/LLVM.h"
14-
#include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h"
15+
#include "clang/Lex/DependencyDirectivesScanner.h"
1516
#include "llvm/ADT/StringRef.h"
1617
#include "llvm/ADT/StringSet.h"
1718
#include <functional>
@@ -200,13 +201,18 @@ class PreprocessorOptions {
200201
/// build it again.
201202
std::shared_ptr<FailedModulesSet> FailedModules;
202203

203-
/// Contains the currently active skipped range mappings for skipping excluded
204-
/// conditional directives.
204+
/// Function for getting the dependency preprocessor directives of a file.
205205
///
206-
/// The pointer is passed to the Preprocessor when it's constructed. The
207-
/// pointer is unowned, the client is responsible for its lifetime.
208-
ExcludedPreprocessorDirectiveSkipMapping
209-
*ExcludedConditionalDirectiveSkipMappings = nullptr;
206+
/// These are directives derived from a special form of lexing where the
207+
/// source input is scanned for the preprocessor directives that might have an
208+
/// effect on the dependencies for a compilation unit.
209+
///
210+
/// Enables a client to cache the directives for a file and provide them
211+
/// across multiple compiler invocations.
212+
/// FIXME: Allow returning an error.
213+
std::function<Optional<ArrayRef<dependency_directives_scan::Directive>>(
214+
FileEntryRef)>
215+
DependencyDirectivesForFile;
210216

211217
/// Set up preprocessor for RunAnalysis action.
212218
bool SetUpStaticAnalyzer = false;

0 commit comments

Comments
 (0)