Jan Svoboda 1688c3062a
[clang] Do not share ownership of PreprocessorOptions (#133467)
This PR makes it so that `CompilerInvocation` is the sole owner of the
`PreprocessorOptions` instance.
2025-04-04 10:11:14 -07:00

563 lines
19 KiB
C++

//===----------------- ModulesBuilder.cpp ------------------------*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "ModulesBuilder.h"
#include "Compiler.h"
#include "support/Logger.h"
#include "clang/Frontend/FrontendAction.h"
#include "clang/Frontend/FrontendActions.h"
#include "clang/Serialization/ASTReader.h"
#include "clang/Serialization/ModuleCache.h"
#include "llvm/ADT/ScopeExit.h"
#include <queue>
namespace clang {
namespace clangd {
namespace {
// Create a path to store module files. Generally it should be:
//
// {TEMP_DIRS}/clangd/module_files/{hashed-file-name}-%%-%%-%%-%%-%%-%%/.
//
// {TEMP_DIRS} is the temporary directory for the system, e.g., "/var/tmp"
// or "C:/TEMP".
//
// '%%' means random value to make the generated path unique.
//
// \param MainFile is used to get the root of the project from global
// compilation database.
//
// TODO: Move these module fils out of the temporary directory if the module
// files are persistent.
llvm::SmallString<256> getUniqueModuleFilesPath(PathRef MainFile) {
llvm::SmallString<128> HashedPrefix = llvm::sys::path::filename(MainFile);
// There might be multiple files with the same name in a project. So appending
// the hash value of the full path to make sure they won't conflict.
HashedPrefix += std::to_string(llvm::hash_value(MainFile));
llvm::SmallString<256> ResultPattern;
llvm::sys::path::system_temp_directory(/*erasedOnReboot=*/true,
ResultPattern);
llvm::sys::path::append(ResultPattern, "clangd");
llvm::sys::path::append(ResultPattern, "module_files");
llvm::sys::path::append(ResultPattern, HashedPrefix);
ResultPattern.append("-%%-%%-%%-%%-%%-%%");
llvm::SmallString<256> Result;
llvm::sys::fs::createUniquePath(ResultPattern, Result,
/*MakeAbsolute=*/false);
llvm::sys::fs::create_directories(Result);
return Result;
}
// Get a unique module file path under \param ModuleFilesPrefix.
std::string getModuleFilePath(llvm::StringRef ModuleName,
PathRef ModuleFilesPrefix) {
llvm::SmallString<256> ModuleFilePath(ModuleFilesPrefix);
auto [PrimaryModuleName, PartitionName] = ModuleName.split(':');
llvm::sys::path::append(ModuleFilePath, PrimaryModuleName);
if (!PartitionName.empty()) {
ModuleFilePath.append("-");
ModuleFilePath.append(PartitionName);
}
ModuleFilePath.append(".pcm");
return std::string(ModuleFilePath);
}
// FailedPrerequisiteModules - stands for the PrerequisiteModules which has
// errors happened during the building process.
class FailedPrerequisiteModules : public PrerequisiteModules {
public:
~FailedPrerequisiteModules() override = default;
// We shouldn't adjust the compilation commands based on
// FailedPrerequisiteModules.
void adjustHeaderSearchOptions(HeaderSearchOptions &Options) const override {
}
// FailedPrerequisiteModules can never be reused.
bool
canReuse(const CompilerInvocation &CI,
llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>) const override {
return false;
}
};
struct ModuleFile {
ModuleFile(StringRef ModuleName, PathRef ModuleFilePath)
: ModuleName(ModuleName.str()), ModuleFilePath(ModuleFilePath.str()) {}
ModuleFile() = delete;
ModuleFile(const ModuleFile &) = delete;
ModuleFile operator=(const ModuleFile &) = delete;
// The move constructor is needed for llvm::SmallVector.
ModuleFile(ModuleFile &&Other)
: ModuleName(std::move(Other.ModuleName)),
ModuleFilePath(std::move(Other.ModuleFilePath)) {
Other.ModuleName.clear();
Other.ModuleFilePath.clear();
}
ModuleFile &operator=(ModuleFile &&Other) {
if (this == &Other)
return *this;
this->~ModuleFile();
new (this) ModuleFile(std::move(Other));
return *this;
}
~ModuleFile() {
if (!ModuleFilePath.empty())
llvm::sys::fs::remove(ModuleFilePath);
}
StringRef getModuleName() const { return ModuleName; }
StringRef getModuleFilePath() const { return ModuleFilePath; }
private:
std::string ModuleName;
std::string ModuleFilePath;
};
// ReusablePrerequisiteModules - stands for PrerequisiteModules for which all
// the required modules are built successfully. All the module files
// are owned by the modules builder.
class ReusablePrerequisiteModules : public PrerequisiteModules {
public:
ReusablePrerequisiteModules() = default;
ReusablePrerequisiteModules(const ReusablePrerequisiteModules &Other) =
default;
ReusablePrerequisiteModules &
operator=(const ReusablePrerequisiteModules &) = default;
ReusablePrerequisiteModules(ReusablePrerequisiteModules &&) = delete;
ReusablePrerequisiteModules
operator=(ReusablePrerequisiteModules &&) = delete;
~ReusablePrerequisiteModules() override = default;
void adjustHeaderSearchOptions(HeaderSearchOptions &Options) const override {
// Appending all built module files.
for (const auto &RequiredModule : RequiredModules)
Options.PrebuiltModuleFiles.insert_or_assign(
RequiredModule->getModuleName().str(),
RequiredModule->getModuleFilePath().str());
}
bool canReuse(const CompilerInvocation &CI,
llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>) const override;
bool isModuleUnitBuilt(llvm::StringRef ModuleName) const {
return BuiltModuleNames.contains(ModuleName);
}
void addModuleFile(std::shared_ptr<const ModuleFile> ModuleFile) {
BuiltModuleNames.insert(ModuleFile->getModuleName());
RequiredModules.emplace_back(std::move(ModuleFile));
}
private:
llvm::SmallVector<std::shared_ptr<const ModuleFile>, 8> RequiredModules;
// A helper class to speedup the query if a module is built.
llvm::StringSet<> BuiltModuleNames;
};
bool IsModuleFileUpToDate(PathRef ModuleFilePath,
const PrerequisiteModules &RequisiteModules,
llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) {
HeaderSearchOptions HSOpts;
RequisiteModules.adjustHeaderSearchOptions(HSOpts);
HSOpts.ForceCheckCXX20ModulesInputFiles = true;
HSOpts.ValidateASTInputFilesContent = true;
clang::clangd::IgnoreDiagnostics IgnoreDiags;
IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
CompilerInstance::createDiagnostics(*VFS, new DiagnosticOptions,
&IgnoreDiags,
/*ShouldOwnClient=*/false);
LangOptions LangOpts;
LangOpts.SkipODRCheckInGMF = true;
FileManager FileMgr(FileSystemOptions(), VFS);
SourceManager SourceMgr(*Diags, FileMgr);
HeaderSearch HeaderInfo(HSOpts, SourceMgr, *Diags, LangOpts,
/*Target=*/nullptr);
PreprocessorOptions PPOpts;
TrivialModuleLoader ModuleLoader;
Preprocessor PP(PPOpts, *Diags, LangOpts, SourceMgr, HeaderInfo,
ModuleLoader);
IntrusiveRefCntPtr<ModuleCache> ModCache = createCrossProcessModuleCache();
PCHContainerOperations PCHOperations;
ASTReader Reader(PP, *ModCache, /*ASTContext=*/nullptr,
PCHOperations.getRawReader(), {});
// We don't need any listener here. By default it will use a validator
// listener.
Reader.setListener(nullptr);
if (Reader.ReadAST(ModuleFilePath, serialization::MK_MainFile,
SourceLocation(),
ASTReader::ARR_None) != ASTReader::Success)
return false;
bool UpToDate = true;
Reader.getModuleManager().visit([&](serialization::ModuleFile &MF) -> bool {
Reader.visitInputFiles(
MF, /*IncludeSystem=*/false, /*Complain=*/false,
[&](const serialization::InputFile &IF, bool isSystem) {
if (!IF.getFile() || IF.isOutOfDate())
UpToDate = false;
});
return !UpToDate;
});
return UpToDate;
}
bool IsModuleFilesUpToDate(
llvm::SmallVector<PathRef> ModuleFilePaths,
const PrerequisiteModules &RequisiteModules,
llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) {
return llvm::all_of(
ModuleFilePaths, [&RequisiteModules, VFS](auto ModuleFilePath) {
return IsModuleFileUpToDate(ModuleFilePath, RequisiteModules, VFS);
});
}
/// Build a module file for module with `ModuleName`. The information of built
/// module file are stored in \param BuiltModuleFiles.
llvm::Expected<ModuleFile>
buildModuleFile(llvm::StringRef ModuleName, PathRef ModuleUnitFileName,
const GlobalCompilationDatabase &CDB, const ThreadsafeFS &TFS,
const ReusablePrerequisiteModules &BuiltModuleFiles) {
// Try cheap operation earlier to boil-out cheaply if there are problems.
auto Cmd = CDB.getCompileCommand(ModuleUnitFileName);
if (!Cmd)
return llvm::createStringError(
llvm::formatv("No compile command for {0}", ModuleUnitFileName));
llvm::SmallString<256> ModuleFilesPrefix =
getUniqueModuleFilesPath(ModuleUnitFileName);
Cmd->Output = getModuleFilePath(ModuleName, ModuleFilesPrefix);
ParseInputs Inputs;
Inputs.TFS = &TFS;
Inputs.CompileCommand = std::move(*Cmd);
IgnoreDiagnostics IgnoreDiags;
auto CI = buildCompilerInvocation(Inputs, IgnoreDiags);
if (!CI)
return llvm::createStringError("Failed to build compiler invocation");
auto FS = Inputs.TFS->view(Inputs.CompileCommand.Directory);
auto Buf = FS->getBufferForFile(Inputs.CompileCommand.Filename);
if (!Buf)
return llvm::createStringError("Failed to create buffer");
// In clang's driver, we will suppress the check for ODR violation in GMF.
// See the implementation of RenderModulesOptions in Clang.cpp.
CI->getLangOpts().SkipODRCheckInGMF = true;
// Hash the contents of input files and store the hash value to the BMI files.
// So that we can check if the files are still valid when we want to reuse the
// BMI files.
CI->getHeaderSearchOpts().ValidateASTInputFilesContent = true;
BuiltModuleFiles.adjustHeaderSearchOptions(CI->getHeaderSearchOpts());
CI->getFrontendOpts().OutputFile = Inputs.CompileCommand.Output;
auto Clang =
prepareCompilerInstance(std::move(CI), /*Preamble=*/nullptr,
std::move(*Buf), std::move(FS), IgnoreDiags);
if (!Clang)
return llvm::createStringError("Failed to prepare compiler instance");
GenerateReducedModuleInterfaceAction Action;
Clang->ExecuteAction(Action);
if (Clang->getDiagnostics().hasErrorOccurred())
return llvm::createStringError("Compilation failed");
return ModuleFile{ModuleName, Inputs.CompileCommand.Output};
}
bool ReusablePrerequisiteModules::canReuse(
const CompilerInvocation &CI,
llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) const {
if (RequiredModules.empty())
return true;
llvm::SmallVector<llvm::StringRef> BMIPaths;
for (auto &MF : RequiredModules)
BMIPaths.push_back(MF->getModuleFilePath());
return IsModuleFilesUpToDate(BMIPaths, *this, VFS);
}
class ModuleFileCache {
public:
ModuleFileCache(const GlobalCompilationDatabase &CDB) : CDB(CDB) {}
const GlobalCompilationDatabase &getCDB() const { return CDB; }
std::shared_ptr<const ModuleFile> getModule(StringRef ModuleName);
void add(StringRef ModuleName, std::shared_ptr<const ModuleFile> ModuleFile) {
std::lock_guard<std::mutex> Lock(ModuleFilesMutex);
ModuleFiles[ModuleName] = ModuleFile;
}
void remove(StringRef ModuleName);
private:
const GlobalCompilationDatabase &CDB;
llvm::StringMap<std::weak_ptr<const ModuleFile>> ModuleFiles;
// Mutex to guard accesses to ModuleFiles.
std::mutex ModuleFilesMutex;
};
std::shared_ptr<const ModuleFile>
ModuleFileCache::getModule(StringRef ModuleName) {
std::lock_guard<std::mutex> Lock(ModuleFilesMutex);
auto Iter = ModuleFiles.find(ModuleName);
if (Iter == ModuleFiles.end())
return nullptr;
if (auto Res = Iter->second.lock())
return Res;
ModuleFiles.erase(Iter);
return nullptr;
}
void ModuleFileCache::remove(StringRef ModuleName) {
std::lock_guard<std::mutex> Lock(ModuleFilesMutex);
ModuleFiles.erase(ModuleName);
}
class ModuleNameToSourceCache {
public:
std::string getSourceForModuleName(llvm::StringRef ModuleName) {
std::lock_guard<std::mutex> Lock(CacheMutex);
auto Iter = ModuleNameToSourceCache.find(ModuleName);
if (Iter != ModuleNameToSourceCache.end())
return Iter->second;
return "";
}
void addEntry(llvm::StringRef ModuleName, PathRef Source) {
std::lock_guard<std::mutex> Lock(CacheMutex);
ModuleNameToSourceCache[ModuleName] = Source.str();
}
void eraseEntry(llvm::StringRef ModuleName) {
std::lock_guard<std::mutex> Lock(CacheMutex);
ModuleNameToSourceCache.erase(ModuleName);
}
private:
std::mutex CacheMutex;
llvm::StringMap<std::string> ModuleNameToSourceCache;
};
class CachingProjectModules : public ProjectModules {
public:
CachingProjectModules(std::unique_ptr<ProjectModules> MDB,
ModuleNameToSourceCache &Cache)
: MDB(std::move(MDB)), Cache(Cache) {
assert(this->MDB && "CachingProjectModules should only be created with a "
"valid underlying ProjectModules");
}
std::vector<std::string> getRequiredModules(PathRef File) override {
return MDB->getRequiredModules(File);
}
std::string getModuleNameForSource(PathRef File) override {
return MDB->getModuleNameForSource(File);
}
std::string getSourceForModuleName(llvm::StringRef ModuleName,
PathRef RequiredSrcFile) override {
std::string CachedResult = Cache.getSourceForModuleName(ModuleName);
// Verify Cached Result by seeing if the source declaring the same module
// as we query.
if (!CachedResult.empty()) {
std::string ModuleNameOfCachedSource =
MDB->getModuleNameForSource(CachedResult);
if (ModuleNameOfCachedSource == ModuleName)
return CachedResult;
// Cached Result is invalid. Clear it.
Cache.eraseEntry(ModuleName);
}
auto Result = MDB->getSourceForModuleName(ModuleName, RequiredSrcFile);
Cache.addEntry(ModuleName, Result);
return Result;
}
private:
std::unique_ptr<ProjectModules> MDB;
ModuleNameToSourceCache &Cache;
};
/// Collect the directly and indirectly required module names for \param
/// ModuleName in topological order. The \param ModuleName is guaranteed to
/// be the last element in \param ModuleNames.
llvm::SmallVector<StringRef> getAllRequiredModules(PathRef RequiredSource,
CachingProjectModules &MDB,
StringRef ModuleName) {
llvm::SmallVector<llvm::StringRef> ModuleNames;
llvm::StringSet<> ModuleNamesSet;
auto VisitDeps = [&](StringRef ModuleName, auto Visitor) -> void {
ModuleNamesSet.insert(ModuleName);
for (StringRef RequiredModuleName : MDB.getRequiredModules(
MDB.getSourceForModuleName(ModuleName, RequiredSource)))
if (ModuleNamesSet.insert(RequiredModuleName).second)
Visitor(RequiredModuleName, Visitor);
ModuleNames.push_back(ModuleName);
};
VisitDeps(ModuleName, VisitDeps);
return ModuleNames;
}
} // namespace
class ModulesBuilder::ModulesBuilderImpl {
public:
ModulesBuilderImpl(const GlobalCompilationDatabase &CDB) : Cache(CDB) {}
ModuleNameToSourceCache &getProjectModulesCache() {
return ProjectModulesCache;
}
const GlobalCompilationDatabase &getCDB() const { return Cache.getCDB(); }
llvm::Error
getOrBuildModuleFile(PathRef RequiredSource, StringRef ModuleName,
const ThreadsafeFS &TFS, CachingProjectModules &MDB,
ReusablePrerequisiteModules &BuiltModuleFiles);
private:
ModuleFileCache Cache;
ModuleNameToSourceCache ProjectModulesCache;
};
llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
PathRef RequiredSource, StringRef ModuleName, const ThreadsafeFS &TFS,
CachingProjectModules &MDB, ReusablePrerequisiteModules &BuiltModuleFiles) {
if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName))
return llvm::Error::success();
std::string ModuleUnitFileName =
MDB.getSourceForModuleName(ModuleName, RequiredSource);
/// It is possible that we're meeting third party modules (modules whose
/// source are not in the project. e.g, the std module may be a third-party
/// module for most project) or something wrong with the implementation of
/// ProjectModules.
/// FIXME: How should we treat third party modules here? If we want to ignore
/// third party modules, we should return true instead of false here.
/// Currently we simply bail out.
if (ModuleUnitFileName.empty())
return llvm::createStringError(
llvm::formatv("Don't get the module unit for module {0}", ModuleName));
// Get Required modules in topological order.
auto ReqModuleNames = getAllRequiredModules(RequiredSource, MDB, ModuleName);
for (llvm::StringRef ReqModuleName : ReqModuleNames) {
if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName))
continue;
if (auto Cached = Cache.getModule(ReqModuleName)) {
if (IsModuleFileUpToDate(Cached->getModuleFilePath(), BuiltModuleFiles,
TFS.view(std::nullopt))) {
log("Reusing module {0} from {1}", ModuleName,
Cached->getModuleFilePath());
BuiltModuleFiles.addModuleFile(std::move(Cached));
continue;
}
Cache.remove(ReqModuleName);
}
llvm::Expected<ModuleFile> MF = buildModuleFile(
ModuleName, ModuleUnitFileName, getCDB(), TFS, BuiltModuleFiles);
if (llvm::Error Err = MF.takeError())
return Err;
log("Built module {0} to {1}", ModuleName, MF->getModuleFilePath());
auto BuiltModuleFile = std::make_shared<const ModuleFile>(std::move(*MF));
Cache.add(ModuleName, BuiltModuleFile);
BuiltModuleFiles.addModuleFile(std::move(BuiltModuleFile));
}
return llvm::Error::success();
}
std::unique_ptr<PrerequisiteModules>
ModulesBuilder::buildPrerequisiteModulesFor(PathRef File,
const ThreadsafeFS &TFS) {
std::unique_ptr<ProjectModules> MDB = Impl->getCDB().getProjectModules(File);
if (!MDB) {
elog("Failed to get Project Modules information for {0}", File);
return std::make_unique<FailedPrerequisiteModules>();
}
CachingProjectModules CachedMDB(std::move(MDB),
Impl->getProjectModulesCache());
std::vector<std::string> RequiredModuleNames =
CachedMDB.getRequiredModules(File);
if (RequiredModuleNames.empty())
return std::make_unique<ReusablePrerequisiteModules>();
auto RequiredModules = std::make_unique<ReusablePrerequisiteModules>();
for (llvm::StringRef RequiredModuleName : RequiredModuleNames) {
// Return early if there is any error.
if (llvm::Error Err = Impl->getOrBuildModuleFile(
File, RequiredModuleName, TFS, CachedMDB, *RequiredModules.get())) {
elog("Failed to build module {0}; due to {1}", RequiredModuleName,
toString(std::move(Err)));
return std::make_unique<FailedPrerequisiteModules>();
}
}
return std::move(RequiredModules);
}
ModulesBuilder::ModulesBuilder(const GlobalCompilationDatabase &CDB) {
Impl = std::make_unique<ModulesBuilderImpl>(CDB);
}
ModulesBuilder::~ModulesBuilder() {}
} // namespace clangd
} // namespace clang