[clang][deps] Make clang-scan-deps write modules in raw format

We have no use for debug info for the scanner modules, and writing raw
ast files speeds up scanning ~15% in some cases. Note that the compile
commands produced by the scanner will still build the obj format (if
requested), and the scanner can *read* obj format pcms, e.g. from a PCH.

rdar://108807592

Differential Revision: https://reviews.llvm.org/D149693
This commit is contained in:
Ben Langmuir 2023-05-02 13:55:46 -07:00
parent 3b01fa264c
commit 8fe8d69ddf
10 changed files with 94 additions and 15 deletions

View File

@ -73,7 +73,7 @@ void disableUnsupportedOptions(CompilerInvocation &CI) {
// Always default to raw container format as clangd doesn't registry any other
// and clang dies when faced with unknown formats.
CI.getHeaderSearchOpts().ModuleFormat =
PCHContainerOperations().getRawReader().getFormat().str();
PCHContainerOperations().getRawReader().getFormats().front().str();
CI.getFrontendOpts().Plugins.clear();
CI.getFrontendOpts().AddPluginActions.clear();

View File

@ -32,7 +32,7 @@ class ObjectFilePCHContainerWriter : public PCHContainerWriter {
/// A PCHContainerReader implementation that uses LLVM to
/// wraps Clang modules inside a COFF, ELF, or Mach-O container.
class ObjectFilePCHContainerReader : public PCHContainerReader {
StringRef getFormat() const override { return "obj"; }
ArrayRef<StringRef> getFormats() const override;
/// Returns the serialized AST inside the PCH container Buffer.
StringRef ExtractPCH(llvm::MemoryBufferRef Buffer) const override;

View File

@ -56,7 +56,7 @@ class PCHContainerReader {
public:
virtual ~PCHContainerReader() = 0;
/// Equivalent to the format passed to -fmodule-format=
virtual llvm::StringRef getFormat() const = 0;
virtual llvm::ArrayRef<llvm::StringRef> getFormats() const = 0;
/// Returns the serialized AST inside the PCH container Buffer.
virtual llvm::StringRef ExtractPCH(llvm::MemoryBufferRef Buffer) const = 0;
@ -78,8 +78,7 @@ class RawPCHContainerWriter : public PCHContainerWriter {
/// Implements read operations for a raw pass-through PCH container.
class RawPCHContainerReader : public PCHContainerReader {
llvm::StringRef getFormat() const override { return "raw"; }
llvm::ArrayRef<llvm::StringRef> getFormats() const override;
/// Simply returns the buffer contained in Buffer.
llvm::StringRef ExtractPCH(llvm::MemoryBufferRef Buffer) const override;
};
@ -87,7 +86,9 @@ class RawPCHContainerReader : public PCHContainerReader {
/// A registry of PCHContainerWriter and -Reader objects for different formats.
class PCHContainerOperations {
llvm::StringMap<std::unique_ptr<PCHContainerWriter>> Writers;
llvm::StringMap<std::unique_ptr<PCHContainerReader>> Readers;
llvm::StringMap<PCHContainerReader *> Readers;
llvm::SmallVector<std::unique_ptr<PCHContainerReader>> OwnedReaders;
public:
/// Automatically registers a RawPCHContainerWriter and
/// RawPCHContainerReader.
@ -96,13 +97,17 @@ public:
Writers[Writer->getFormat()] = std::move(Writer);
}
void registerReader(std::unique_ptr<PCHContainerReader> Reader) {
Readers[Reader->getFormat()] = std::move(Reader);
assert(!Reader->getFormats().empty() &&
"PCHContainerReader must handle >=1 format");
for (llvm::StringRef Fmt : Reader->getFormats())
Readers[Fmt] = Reader.get();
OwnedReaders.push_back(std::move(Reader));
}
const PCHContainerWriter *getWriterOrNull(llvm::StringRef Format) {
return Writers[Format].get();
}
const PCHContainerReader *getReaderOrNull(llvm::StringRef Format) {
return Readers[Format].get();
return Readers[Format];
}
const PCHContainerReader &getRawReader() {
return *getReaderOrNull("raw");

View File

@ -349,6 +349,11 @@ ObjectFilePCHContainerWriter::CreatePCHContainerGenerator(
CI, MainFileName, OutputFileName, std::move(OS), Buffer);
}
ArrayRef<StringRef> ObjectFilePCHContainerReader::getFormats() const {
static StringRef Formats[] = {"obj", "raw"};
return Formats;
}
StringRef
ObjectFilePCHContainerReader::ExtractPCH(llvm::MemoryBufferRef Buffer) const {
StringRef PCH;

View File

@ -811,7 +811,7 @@ std::unique_ptr<ASTUnit> ASTUnit::LoadFromASTFile(
UserFilesAreVolatile);
AST->ModuleCache = new InMemoryModuleCache;
AST->HSOpts = std::make_shared<HeaderSearchOptions>();
AST->HSOpts->ModuleFormat = std::string(PCHContainerRdr.getFormat());
AST->HSOpts->ModuleFormat = std::string(PCHContainerRdr.getFormats().front());
AST->HeaderInfo.reset(new HeaderSearch(AST->HSOpts,
AST->getSourceManager(),
AST->getDiagnostics(),

View File

@ -57,6 +57,11 @@ std::unique_ptr<ASTConsumer> RawPCHContainerWriter::CreatePCHContainerGenerator(
return std::make_unique<RawPCHContainerGenerator>(std::move(OS), Buffer);
}
ArrayRef<llvm::StringRef> RawPCHContainerReader::getFormats() const {
static StringRef Raw("raw");
return ArrayRef(Raw);
}
StringRef
RawPCHContainerReader::ExtractPCH(llvm::MemoryBufferRef Buffer) const {
return Buffer.getBuffer();

View File

@ -181,6 +181,7 @@ public:
ScanInstance.getFrontendOpts().GenerateGlobalModuleIndex = false;
ScanInstance.getFrontendOpts().UseGlobalModuleIndex = false;
ScanInstance.getFrontendOpts().ModulesShareFileManager = false;
ScanInstance.getHeaderSearchOpts().ModuleFormat = "raw";
ScanInstance.setFileManager(FileMgr);
// Support for virtual file system overlays.
@ -309,12 +310,11 @@ DependencyScanningWorker::DependencyScanningWorker(
: Format(Service.getFormat()), OptimizeArgs(Service.canOptimizeArgs()),
EagerLoadModules(Service.shouldEagerLoadModules()) {
PCHContainerOps = std::make_shared<PCHContainerOperations>();
// We need to read object files from PCH built outside the scanner.
PCHContainerOps->registerReader(
std::make_unique<ObjectFilePCHContainerReader>());
// We don't need to write object files, but the current PCH implementation
// requires the writer to be registered as well.
PCHContainerOps->registerWriter(
std::make_unique<ObjectFilePCHContainerWriter>());
// The scanner itself writes only raw ast files.
PCHContainerOps->registerWriter(std::make_unique<RawPCHContainerWriter>());
switch (Service.getMode()) {
case ScanningMode::DependencyDirectivesScan:

View File

@ -0,0 +1,64 @@
// Check that the scanner produces raw ast files, even when builds produce the
// obj format, and that the scanner can read obj format from PCH and modules
// imported by PCH.
// Unsupported on AIX because we don't support the requisite "__clangast"
// section in XCOFF yet.
// UNSUPPORTED: target={{.*}}-aix{{.*}}
// REQUIRES: shell
// RUN: rm -rf %t && mkdir %t
// RUN: cp %S/Inputs/modules-pch/* %t
// Scan dependencies of the PCH:
//
// RUN: rm -f %t/cdb_pch.json
// RUN: sed "s|DIR|%/t|g" %S/Inputs/modules-pch/cdb_pch.json > %t/cdb_pch.json
// RUN: clang-scan-deps -compilation-database %t/cdb_pch.json -format experimental-full \
// RUN: -module-files-dir %t/build > %t/result_pch.json
// Explicitly build the PCH:
//
// RUN: %deps-to-rsp %t/result_pch.json --module-name=ModCommon1 > %t/mod_common_1.cc1.rsp
// RUN: %deps-to-rsp %t/result_pch.json --module-name=ModCommon2 > %t/mod_common_2.cc1.rsp
// RUN: %deps-to-rsp %t/result_pch.json --module-name=ModPCH > %t/mod_pch.cc1.rsp
// RUN: %deps-to-rsp %t/result_pch.json --tu-index=0 > %t/pch.rsp
//
// RUN: %clang @%t/mod_common_1.cc1.rsp
// RUN: %clang @%t/mod_common_2.cc1.rsp
// RUN: %clang @%t/mod_pch.cc1.rsp
// RUN: %clang @%t/pch.rsp
// Scan dependencies of the TU:
//
// RUN: rm -f %t/cdb_tu.json
// RUN: sed "s|DIR|%/t|g" %S/Inputs/modules-pch/cdb_tu.json > %t/cdb_tu.json
// RUN: clang-scan-deps -compilation-database %t/cdb_tu.json -format experimental-full \
// RUN: -module-files-dir %t/build > %t/result_tu.json
// Explicitly build the TU:
//
// RUN: %deps-to-rsp %t/result_tu.json --module-name=ModTU > %t/mod_tu.cc1.rsp
// RUN: %deps-to-rsp %t/result_tu.json --tu-index=0 > %t/tu.rsp
//
// RUN: %clang @%t/mod_tu.cc1.rsp
// RUN: %clang @%t/tu.rsp
// Check the module format for scanner modules:
//
// RUN: find %t/cache -name "*.pcm" -exec %clang_cc1 -module-file-info "{}" ";" | FileCheck %s -check-prefix=SCAN
// SCAN: Module format: raw
// SCAN: Module format: raw
// SCAN: Module format: raw
// SCAN: Module format: raw
// Check the module format for built modules:
//
// RUN: find %t/build -name "*.pcm" -exec %clang_cc1 -module-file-info "{}" ";" | FileCheck %s -check-prefix=BUILD
// BUILD: Module format: obj
// BUILD: Module format: obj
// BUILD: Module format: obj
// BUILD: Module format: obj
// FIXME: check pch format as well; -module-file-info does not work with a PCH

View File

@ -3964,7 +3964,7 @@ clang_parseTranslationUnit_Impl(CXIndex CIdx, const char *source_filename,
TUKind, CacheCodeCompletionResults, IncludeBriefCommentsInCodeCompletion,
/*AllowPCHWithCompilerErrors=*/true, SkipFunctionBodies, SingleFileParse,
/*UserFilesAreVolatile=*/true, ForSerialization, RetainExcludedCB,
CXXIdx->getPCHContainerOperations()->getRawReader().getFormat(),
CXXIdx->getPCHContainerOperations()->getRawReader().getFormats().front(),
&ErrUnit));
// Early failures in LoadFromCommandLine may return with ErrUnit unset.

View File

@ -552,7 +552,7 @@ static CXErrorCode clang_indexSourceFile_Impl(
// Make sure to use the raw module format.
CInvok->getHeaderSearchOpts().ModuleFormat = std::string(
CXXIdx->getPCHContainerOperations()->getRawReader().getFormat());
CXXIdx->getPCHContainerOperations()->getRawReader().getFormats().front());
auto Unit = ASTUnit::create(CInvok, Diags, CaptureDiagnostics,
/*UserFilesAreVolatile=*/true);