Skip to content

Llvm modules on demand bmi #71773

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ class ObjectFilePCHContainerWriter : public PCHContainerWriter {
const std::string &OutputFileName,
std::unique_ptr<llvm::raw_pwrite_stream> OS,
std::shared_ptr<PCHBuffer> Buffer) const override;

std::unique_ptr<ASTConsumer> CreatePCHDeferredContainerGenerator(
CompilerInstance &CI, const std::string &MainFileName,
const std::string &OutputFileName,
std::unique_ptr<llvm::raw_pwrite_stream> OS,
std::shared_ptr<PCHBuffer> Buffer) const override;
};

/// A PCHContainerReader implementation that uses LLVM to
Expand Down
5 changes: 5 additions & 0 deletions clang/include/clang/Sema/Sema.h
Original file line number Diff line number Diff line change
Expand Up @@ -2389,6 +2389,11 @@ class Sema final {

bool isModuleVisible(const Module *M, bool ModulePrivate = false);

/// Determine if the current module scope is the implementation.
bool isModuleImplementation() const {
return ModuleScopes.empty() ? false : !ModuleScopes.back().ModuleInterface;
}

// When loading a non-modular PCH files, this is used to restore module
// visibility.
void makeModuleVisible(Module *Mod, SourceLocation ImportLoc) {
Expand Down
3 changes: 2 additions & 1 deletion clang/include/clang/Serialization/ASTWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -807,6 +807,7 @@ class PCHGenerator : public SemaConsumer {
ASTWriter Writer;
bool AllowASTWithErrors;
bool ShouldCacheASTInMemory;
bool IsForBMI;

protected:
ASTWriter &getWriter() { return Writer; }
Expand All @@ -820,7 +821,7 @@ class PCHGenerator : public SemaConsumer {
ArrayRef<std::shared_ptr<ModuleFileExtension>> Extensions,
bool AllowASTWithErrors = false, bool IncludeTimestamps = true,
bool BuildingImplicitModule = false,
bool ShouldCacheASTInMemory = false);
bool ShouldCacheASTInMemory = false, bool IsForBMI = false);
~PCHGenerator() override;

void InitializeSema(Sema &S) override { SemaPtr = &S; }
Expand Down
16 changes: 16 additions & 0 deletions clang/include/clang/Serialization/PCHContainerOperations.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class CompilerInstance;

struct PCHBuffer {
ASTFileSignature Signature;
std::string PresumedFileName;
llvm::SmallVector<char, 0> Data;
bool IsComplete;
};
Expand All @@ -47,6 +48,15 @@ class PCHContainerWriter {
const std::string &OutputFileName,
std::unique_ptr<llvm::raw_pwrite_stream> OS,
std::shared_ptr<PCHBuffer> Buffer) const = 0;

/// Return an ASTConsumer that can be chained with a
/// PCHGenerator that produces a wrapper file format containing a
/// serialized AST bitstream.
virtual std::unique_ptr<ASTConsumer> CreatePCHDeferredContainerGenerator(
CompilerInstance &CI, const std::string &MainFileName,
const std::string &OutputFileName,
std::unique_ptr<llvm::raw_pwrite_stream> OS,
std::shared_ptr<PCHBuffer> Buffer) const = 0;
};

/// This abstract interface provides operations for unwrapping
Expand Down Expand Up @@ -74,6 +84,12 @@ class RawPCHContainerWriter : public PCHContainerWriter {
const std::string &OutputFileName,
std::unique_ptr<llvm::raw_pwrite_stream> OS,
std::shared_ptr<PCHBuffer> Buffer) const override;

std::unique_ptr<ASTConsumer> CreatePCHDeferredContainerGenerator(
CompilerInstance &CI, const std::string &MainFileName,
const std::string &OutputFileName,
std::unique_ptr<llvm::raw_pwrite_stream> OS,
std::shared_ptr<PCHBuffer> Buffer) const override;
};

/// Implements read operations for a raw pass-through PCH container.
Expand Down
10 changes: 10 additions & 0 deletions clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,16 @@ ArrayRef<StringRef> ObjectFilePCHContainerReader::getFormats() const {
return Formats;
}

std::unique_ptr<ASTConsumer>
ObjectFilePCHContainerWriter::CreatePCHDeferredContainerGenerator(
CompilerInstance &CI, const std::string &MainFileName,
const std::string &OutputFileName,
std::unique_ptr<llvm::raw_pwrite_stream> OS,
std::shared_ptr<PCHBuffer> Buffer) const {
assert(0 && "Did not mean to arrive here");
return nullptr;
}

StringRef
ObjectFilePCHContainerReader::ExtractPCH(llvm::MemoryBufferRef Buffer) const {
StringRef PCH;
Expand Down
78 changes: 73 additions & 5 deletions clang/lib/Frontend/FrontendAction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "clang/Basic/LangStandard.h"
#include "clang/Basic/Sarif.h"
#include "clang/Basic/Stack.h"
#include "clang/Frontend/ASTConsumers.h"
#include "clang/Frontend/ASTUnit.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Frontend/FrontendDiagnostic.h"
Expand All @@ -33,6 +34,7 @@
#include "clang/Sema/MultiplexExternalSemaSource.h"
#include "clang/Serialization/ASTDeserializationListener.h"
#include "clang/Serialization/ASTReader.h"
#include "clang/Serialization/ASTWriter.h"
#include "clang/Serialization/GlobalModuleIndex.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/Support/BuryPointer.h"
Expand Down Expand Up @@ -184,17 +186,83 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI,
if (!FoundAllPlugins)
return nullptr;

// If there are no registered plugins we don't need to wrap the consumer
if (FrontendPluginRegistry::begin() == FrontendPluginRegistry::end())
return Consumer;

// If this is a code completion run, avoid invoking the plugin consumers
if (CI.hasCodeCompletionConsumer())
return Consumer;

// Do we want to emit a BMI as a second artefact of the compile, on demand if
// the source generates an interface?
// We do this for C++20 modules, if the input is source, we do not stop after
// the Preprocessor and we intend to emit an output. Note that we do not need
// to consider the case in which a BMI is explicitly the main output of the
// compilation.
InputKind IK = getCurrentFileKind();
bool EmitBMI = CI.getLangOpts().CPlusPlusModules &&
IK.getFormat() == InputKind::Format::Source &&
!this->usesPreprocessorOnly() &&
(CI.getFrontendOpts().ProgramAction == frontend::EmitObj ||
CI.getFrontendOpts().ProgramAction == frontend::EmitAssembly ||
CI.getFrontendOpts().ProgramAction == frontend::EmitBC ||
CI.getFrontendOpts().ProgramAction == frontend::EmitLLVM ||
CI.getFrontendOpts().ProgramAction == frontend::EmitLLVMOnly ||
CI.getFrontendOpts().ProgramAction == frontend::EmitCodeGenOnly);

// If there are no registered plugins and we do not need to emit a BMI, we
// do not need to wrap the consumer in a MultiplexConsumer.
if (!EmitBMI &&
FrontendPluginRegistry::begin() == FrontendPluginRegistry::end())
return Consumer;

// List of AST consumers for this source.
std::vector<std::unique_ptr<ASTConsumer>> Consumers;

// First, add a pair of consumers that will write the AST as a CMI for this
// module. ??? : Should any plugins that precede the main consumer also be
// run before this.
if (EmitBMI) {
// Make a default output filename (this would be overwritten by the one
// derived from the module to BMI name-mapping determined from any export
// statement). We do not open this on the output stream, but provide it
// as a fallback.
// The default here is to output the pcm alongside the main output.
std::string XOut
= llvm::sys::path::parent_path(CI.getFrontendOpts().OutputFile).str();
std::string XIn = llvm::sys::path::filename(InFile).str();
if (!XOut.empty()) {
XOut += llvm::sys::path::get_separator();
XOut += XIn;
} else
XOut = XIn;

SmallString<128> Path(XOut);
llvm::sys::path::replace_extension(Path, "pcm");
XOut = std::string(Path.str());

std::unique_ptr<raw_pwrite_stream> OS;
std::string Sysroot;
auto Buffer = std::make_shared<PCHBuffer>();

// Add a job to build the CMI from the AST.
// ??? : change the CTOR flags to note that this is a CXX20 module?
Consumers.push_back(std::make_unique<PCHGenerator>(
CI.getPreprocessor(), CI.getModuleCache(), XOut, Sysroot, Buffer,
CI.getFrontendOpts().ModuleFileExtensions,
/*AllowASTWithErrors=*/false,
/*IncludeTimestamps=*/false,
/*BuildingImplicitModule=*/false,
/*ShouldCacheASTInMemory=*/false,
/*IsForBMI=*/true));

// This writes the CMI (if one is needed), but does not open the output
// file unless/until it is required.
Consumers.push_back(CI.getPCHContainerWriter()
.CreatePCHDeferredContainerGenerator(
CI, std::string(InFile), XOut, std::move(OS), Buffer));
}

// Collect the list of plugins that go before the main action (in Consumers)
// or after it (in AfterConsumers)
std::vector<std::unique_ptr<ASTConsumer>> Consumers;

std::vector<std::unique_ptr<ASTConsumer>> AfterConsumers;
for (const FrontendPluginRegistry::entry &Plugin :
FrontendPluginRegistry::entries()) {
Expand Down
51 changes: 45 additions & 6 deletions clang/lib/Serialization/GeneratePCH.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

#include "clang/AST/ASTContext.h"
#include "clang/Lex/HeaderSearch.h"
#include "clang/Lex/HeaderSearchOptions.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Sema/SemaConsumer.h"
#include "clang/Serialization/ASTWriter.h"
Expand All @@ -25,13 +26,13 @@ PCHGenerator::PCHGenerator(
StringRef OutputFile, StringRef isysroot, std::shared_ptr<PCHBuffer> Buffer,
ArrayRef<std::shared_ptr<ModuleFileExtension>> Extensions,
bool AllowASTWithErrors, bool IncludeTimestamps,
bool BuildingImplicitModule, bool ShouldCacheASTInMemory)
bool BuildingImplicitModule, bool ShouldCacheASTInMemory, bool IsForBMI)
: PP(PP), OutputFile(OutputFile), isysroot(isysroot.str()),
SemaPtr(nullptr), Buffer(std::move(Buffer)), Stream(this->Buffer->Data),
Writer(Stream, this->Buffer->Data, ModuleCache, Extensions,
IncludeTimestamps, BuildingImplicitModule),
AllowASTWithErrors(AllowASTWithErrors),
ShouldCacheASTInMemory(ShouldCacheASTInMemory) {
ShouldCacheASTInMemory(ShouldCacheASTInMemory), IsForBMI(IsForBMI) {
this->Buffer->IsComplete = false;
}

Expand All @@ -48,23 +49,61 @@ void PCHGenerator::HandleTranslationUnit(ASTContext &Ctx) {
return;

Module *Module = nullptr;
if (PP.getLangOpts().isCompilingModule()) {
if (PP.getLangOpts().isCompilingModule() || IsForBMI) {
Module = PP.getHeaderSearchInfo().lookupModule(
PP.getLangOpts().CurrentModule, SourceLocation(),
/*AllowSearch*/ false);
if (!Module) {
assert(hasErrors && "emitting module but current module doesn't exist");
// If we have errors, then that might have prevented the creation of the
// module - otherwise, for the case we are compiling a module, it must be
// present.
// Conversely, IsForBMI output is speculative and only produced for TUs
// in which module interfaces are discovered, thus it is not an error to
// find that there is no module in this case.
assert((hasErrors || IsForBMI) &&
"emitting module but current module doesn't exist");
return;
}
}
} // else, non-modular PCH.

// Errors that do not prevent the PCH from being written should not cause the
// overall compilation to fail either.
if (AllowASTWithErrors)
PP.getDiagnostics().getClient()->clear();

// Emit the PCH file to the Buffer.
assert(SemaPtr && "No Sema?");

// A module implementation implicitly pulls in its interface module.
// Since it has the same name as the implementation, it will be found
// by the lookup above. Fortunately, Sema records the difference in
// the ModuleScopes; We do not need to output the BMI in that case.
if (IsForBMI && SemaPtr->isModuleImplementation())
return;

if (IsForBMI) {

assert(Module && !Module->IsFromModuleFile &&
"trying to re-write a module?");

// Here we would ideally use a P1184 server to find the module name.
// However, in the short-term we are going to (ab-)use the name/file pairs
// that can be specified with -fmodule-file=Name=Path. If there is no
// entry there, then we fall back to the default CMI name, based on the
// source file name.
HeaderSearch &HS = PP.getHeaderSearchInfo();
const HeaderSearchOptions &HSOpts = HS.getHeaderSearchOpts();
std::string ModuleFilename;
if (!HSOpts.PrebuiltModuleFiles.empty() ||
!HSOpts.PrebuiltModulePaths.empty())
ModuleFilename = HS.getPrebuiltModuleFileName(Module->Name);

if (!ModuleFilename.empty())
OutputFile = ModuleFilename;

// So now attach that name to the buffer we are about to create.
Buffer->PresumedFileName = OutputFile;
}

Buffer->Signature = Writer.WriteAST(*SemaPtr, OutputFile, Module, isysroot,
ShouldCacheASTInMemory);

Expand Down
52 changes: 52 additions & 0 deletions clang/lib/Serialization/PCHContainerOperations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,11 @@

#include "clang/Serialization/PCHContainerOperations.h"
#include "clang/AST/ASTConsumer.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Lex/ModuleLoader.h"
#include "llvm/Bitstream/BitstreamReader.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include <utility>

Expand Down Expand Up @@ -48,6 +51,46 @@ class RawPCHContainerGenerator : public ASTConsumer {
}
};

/// A PCHContainerGenerator that writes out the PCH to a flat file if the
/// action is needed (and the filename is determined at the time the output
/// is done).
class RawPCHDeferredContainerGenerator : public ASTConsumer {
std::shared_ptr<PCHBuffer> Buffer;

public:
RawPCHDeferredContainerGenerator(std::shared_ptr<PCHBuffer> Buffer)
: Buffer(std::move(Buffer)) {}

~RawPCHDeferredContainerGenerator() override = default;

void HandleTranslationUnit(ASTContext &Ctx) override {
if (Buffer->IsComplete && !Buffer->PresumedFileName.empty()) {
std::error_code EC;
StringRef Parent = llvm::sys::path::parent_path(Buffer->PresumedFileName);
if (!Parent.empty())
EC = llvm::sys::fs::create_directory(Parent);
if (!EC) {
int FD;
EC = llvm::sys::fs::openFileForWrite(Buffer->PresumedFileName, FD);
if (!EC) {
std::unique_ptr<raw_pwrite_stream> OS;
OS.reset(new llvm::raw_fd_ostream(FD, /*shouldClose=*/true));
*OS << Buffer->Data;
OS->flush(); // Make sure it hits disk now.
// Here we would notify P1184 servers that the module is created
} else
llvm::dbgs() << " Problem creating : " << Buffer->PresumedFileName
<< "\n";
} else
llvm::dbgs() << " Problem creating dir : " << Parent << "\n";
}

// Free the space of the temporary buffer.
llvm::SmallVector<char, 0> Empty;
Buffer->Data = std::move(Empty);
}
};

} // anonymous namespace

std::unique_ptr<ASTConsumer> RawPCHContainerWriter::CreatePCHContainerGenerator(
Expand All @@ -62,6 +105,15 @@ ArrayRef<llvm::StringRef> RawPCHContainerReader::getFormats() const {
return ArrayRef(Raw);
}

std::unique_ptr<ASTConsumer>
RawPCHContainerWriter::CreatePCHDeferredContainerGenerator(
CompilerInstance &CI, const std::string &MainFileName,
const std::string &OutputFileName,
std::unique_ptr<llvm::raw_pwrite_stream> OS,
std::shared_ptr<PCHBuffer> Buffer) const {
return std::make_unique<RawPCHDeferredContainerGenerator>(Buffer);
}

StringRef
RawPCHContainerReader::ExtractPCH(llvm::MemoryBufferRef Buffer) const {
return Buffer.getBuffer();
Expand Down