Skip to content

Commit fcf503d

Browse files
committed
[clang][cas] IncludeTree filesystem merges from imported PCH/modules
Instead of looking through the PCH or modules for input files and loading them into the current FileManager while scanning, merge them directly from the include-tree that was used to build that PCH and/or module. This makes it easier to understand where files come from in the include-tree, and eliminates certain issues where the scanner module may include more files than are necessary, or include them with different paths in the case of VFS/symlinks. It also avoids needing to re-stat these files if they are not used during scanning the importing TU. As part of this, we teach IncludeTreeFilesystem to handle ./ stripping in paths to match what ASTWriter does. (cherry picked from commit 4c3f73e)
1 parent 843daa7 commit fcf503d

File tree

4 files changed

+81
-34
lines changed

4 files changed

+81
-34
lines changed

clang/include/clang/CAS/IncludeTree.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -483,4 +483,26 @@ createIncludeTreeFileSystem(IncludeTreeRoot &Root);
483483
} // namespace cas
484484
} // namespace clang
485485

486+
namespace llvm {
487+
template <> struct DenseMapInfo<clang::cas::IncludeTree::FileList::FileEntry> {
488+
using FileEntry = clang::cas::IncludeTree::FileList::FileEntry;
489+
490+
static FileEntry getEmptyKey() {
491+
return {cas::ObjectRef::getDenseMapEmptyKey(), 0};
492+
}
493+
494+
static FileEntry getTombstoneKey() {
495+
return {cas::ObjectRef::getDenseMapTombstoneKey(), 0};
496+
}
497+
498+
static unsigned getHashValue(FileEntry F) {
499+
return F.FileRef.getDenseMapHash();
500+
}
501+
502+
static bool isEqual(FileEntry LHS, FileEntry RHS) {
503+
return LHS.FileRef == RHS.FileRef && LHS.Size == RHS.Size;
504+
}
505+
};
506+
} // namespace llvm
507+
486508
#endif

clang/lib/CAS/IncludeTree.cpp

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -461,9 +461,8 @@ class IncludeTreeFileSystem : public llvm::vfs::FileSystem {
461461
Directories{Alloc};
462462

463463
llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override {
464-
SmallString<128> FilenameBuffer;
465-
StringRef Filename = Path.toStringRef(FilenameBuffer);
466-
464+
SmallString<128> Filename;
465+
getPath(Path, Filename);
467466
auto FileEntry = Files.find(Filename);
468467
if (FileEntry != Files.end()) {
469468
return makeStatus(Filename, FileEntry->second.Size,
@@ -483,8 +482,8 @@ class IncludeTreeFileSystem : public llvm::vfs::FileSystem {
483482

484483
llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
485484
openFileForRead(const Twine &Path) override {
486-
SmallString<128> FilenameBuffer;
487-
StringRef Filename = Path.toStringRef(FilenameBuffer);
485+
SmallString<128> Filename;
486+
getPath(Path, Filename);
488487
auto MaterializedFile = materialize(Filename);
489488
if (!MaterializedFile)
490489
return llvm::errorToErrorCode(MaterializedFile.takeError());
@@ -507,6 +506,15 @@ class IncludeTreeFileSystem : public llvm::vfs::FileSystem {
507506
return MaterializedFile{ContentsBlob->getData(), Entry->second};
508507
}
509508

509+
/// Produce a filename compatible with our StringMaps. See comment in
510+
/// \c createIncludeTreeFileSystem.
511+
void getPath(const Twine &Path, SmallVectorImpl<char> &Out) {
512+
Path.toVector(Out);
513+
// Strip dots, but do not eliminate a path consisting only of '.'
514+
if (Out.size() != 1)
515+
llvm::sys::path::remove_dots(Out);
516+
}
517+
510518
static llvm::vfs::Status makeStatus(StringRef Filename, uint64_t Size,
511519
llvm::sys::fs::UniqueID UniqueID,
512520
llvm::sys::fs::file_type Type) {
@@ -552,7 +560,11 @@ cas::createIncludeTreeFileSystem(IncludeTreeRoot &Root) {
552560
auto FilenameBlob = File.getFilename();
553561
if (!FilenameBlob)
554562
return FilenameBlob.takeError();
555-
StringRef Filename = FilenameBlob->getData();
563+
SmallString<128> Filename(FilenameBlob->getData());
564+
// Strip './' in the filename to match the behaviour of ASTWriter; we
565+
// also strip './' in IncludeTreeFileSystem::getPath.
566+
assert(Filename != ".");
567+
llvm::sys::path::remove_dots(Filename);
556568

557569
StringRef DirName = llvm::sys::path::parent_path(Filename);
558570
if (DirName.empty())

clang/lib/Tooling/DependencyScanning/IncludeTreeActionController.cpp

Lines changed: 40 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ class IncludeTreeBuilder {
9999
llvm::SmallBitVector HasIncludeChecks;
100100
};
101101

102+
Error addModuleInputs(ASTReader &Reader);
102103
Expected<cas::ObjectRef> getObjectForFile(Preprocessor &PP, FileID FID);
103104
Expected<cas::ObjectRef>
104105
getObjectForFileNonCached(FileManager &FM, const SrcMgr::FileInfo &FI);
@@ -128,7 +129,7 @@ class IncludeTreeBuilder {
128129
// are recorded in the PCH, ordered by \p FileEntry::UID index.
129130
SmallVector<StringRef> PreIncludedFileNames;
130131
llvm::BitVector SeenIncludeFiles;
131-
SmallVector<cas::IncludeTree::FileList::FileEntry> IncludedFiles;
132+
llvm::SetVector<cas::IncludeTree::FileList::FileEntry> IncludedFiles;
132133
std::optional<cas::ObjectRef> PredefinesBufferRef;
133134
std::optional<cas::ObjectRef> ModuleIncludesBufferRef;
134135
std::optional<cas::ObjectRef> ModuleMapFileRef;
@@ -495,30 +496,8 @@ IncludeTreeBuilder::finishIncludeTree(CompilerInstance &ScanInstance,
495496
return Error::success(); // no need for additional work.
496497

497498
// Go through all the recorded input files.
498-
SmallVector<const FileEntry *, 32> NotSeenIncludes;
499-
for (serialization::ModuleFile &MF : Reader->getModuleManager()) {
500-
if (hasErrorOccurred())
501-
break;
502-
Reader->visitInputFiles(
503-
MF, /*IncludeSystem=*/true, /*Complain=*/false,
504-
[&](const serialization::InputFile &IF, bool isSystem) {
505-
OptionalFileEntryRef FE = IF.getFile();
506-
assert(FE);
507-
if (FE->getUID() >= SeenIncludeFiles.size() ||
508-
!SeenIncludeFiles[FE->getUID()])
509-
NotSeenIncludes.push_back(*FE);
510-
});
511-
}
512-
// Sort so we can visit the files in deterministic order.
513-
llvm::sort(NotSeenIncludes, [](const FileEntry *LHS, const FileEntry *RHS) {
514-
return LHS->getUID() < RHS->getUID();
515-
});
516-
517-
for (const FileEntry *FE : NotSeenIncludes) {
518-
auto FileNode = addToFileList(FM, FE);
519-
if (!FileNode)
520-
return FileNode.takeError();
521-
}
499+
if (Error E = addModuleInputs(*Reader))
500+
return E;
522501

523502
PreprocessorOptions &PPOpts = NewInvocation.getPreprocessorOpts();
524503
if (PPOpts.ImplicitPCHInclude.empty())
@@ -544,7 +523,8 @@ IncludeTreeBuilder::finishIncludeTree(CompilerInstance &ScanInstance,
544523
getCASTreeForFileIncludes(IncludeStack.pop_back_val());
545524
if (!MainIncludeTree)
546525
return MainIncludeTree.takeError();
547-
auto FileList = cas::IncludeTree::FileList::create(DB, IncludedFiles);
526+
auto FileList =
527+
cas::IncludeTree::FileList::create(DB, IncludedFiles.getArrayRef());
548528
if (!FileList)
549529
return FileList.takeError();
550530

@@ -553,6 +533,39 @@ IncludeTreeBuilder::finishIncludeTree(CompilerInstance &ScanInstance,
553533
ModuleMapFileRef);
554534
}
555535

536+
Error IncludeTreeBuilder::addModuleInputs(ASTReader &Reader) {
537+
for (serialization::ModuleFile &MF : Reader.getModuleManager()) {
538+
// Only add direct imports to avoid duplication. Each include tree is a
539+
// superset of its imported modules' include trees.
540+
if (!MF.isDirectlyImported())
541+
continue;
542+
543+
assert(!MF.IncludeTreeID.empty() && "missing include-tree for import");
544+
545+
std::optional<cas::CASID> ID;
546+
if (Error E = DB.parseID(MF.IncludeTreeID).moveInto(ID))
547+
return E;
548+
std::optional<cas::ObjectRef> Ref = DB.getReference(*ID);
549+
if (!Ref)
550+
return DB.createUnknownObjectError(*ID);
551+
std::optional<cas::IncludeTreeRoot> Root;
552+
if (Error E = cas::IncludeTreeRoot::get(DB, *Ref).moveInto(Root))
553+
return E;
554+
std::optional<cas::IncludeTree::FileList> Files;
555+
if (Error E = Root->getFileList().moveInto(Files))
556+
return E;
557+
558+
Error E = Files->forEachFile([&](auto IF, auto Size) -> Error {
559+
IncludedFiles.insert({IF.getRef(), Size});
560+
return Error::success();
561+
});
562+
if (E)
563+
return E;
564+
}
565+
566+
return Error::success();
567+
}
568+
556569
Expected<cas::ObjectRef> IncludeTreeBuilder::getObjectForFile(Preprocessor &PP,
557570
FileID FID) {
558571
SourceManager &SM = PP.getSourceManager();
@@ -628,7 +641,7 @@ IncludeTreeBuilder::addToFileList(FileManager &FM, const FileEntry *FE) {
628641
auto FileNode = createIncludeFile(Filename, **CASContents);
629642
if (!FileNode)
630643
return FileNode.takeError();
631-
IncludedFiles.push_back(
644+
IncludedFiles.insert(
632645
{FileNode->getRef(),
633646
static_cast<cas::IncludeTree::FileList::FileSizeTy>(FE->getSize())});
634647
return FileNode->getRef();

clang/test/ClangScanDeps/modules-include-tree.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,8 @@
7777
// fmodule-file, and fmodule-file-cache-key options.
7878

7979
// CHECK: Files:
80-
// CHECK: [[PREFIX]]/Left.h llvmcas://{{[[:xdigit:]]+}}
8180
// CHECK: [[PREFIX]]/module.modulemap llvmcas://{{[[:xdigit:]]+}}
81+
// CHECK: [[PREFIX]]/Left.h llvmcas://{{[[:xdigit:]]+}}
8282
// CHECK: [[PREFIX]]/Top.h llvmcas://{{[[:xdigit:]]+}}
8383
// CHECK: [[PREFIX]]/Right.h llvmcas://{{[[:xdigit:]]+}}
8484

0 commit comments

Comments
 (0)