[C++20][Modules] Load function body from the module that gives canonical decl (#111992)

Summary:
Fix crash from reproducer provided in
https://github.com/llvm/llvm-project/pull/109167#issuecomment-2405289565
Also fix issues with merged inline friend functions merged during deserialization.

Test Plan: check-clang
This commit is contained in:
Dmitry Polukhin 2024-12-16 12:22:43 +00:00 committed by GitHub
parent 671095b452
commit 38b3d87bd3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 283 additions and 60 deletions

View File

@ -724,11 +724,9 @@ enum ASTRecordTypes {
/// Record code for vtables to emit.
VTABLES_TO_EMIT = 70,
/// Record code for the FunctionDecl to lambdas mapping. These lambdas have to
/// be loaded right after the function they belong to. It is required to have
/// canonical declaration for the lambda class from the same module as
/// enclosing function.
FUNCTION_DECL_TO_LAMBDAS_MAP = 71,
/// Record code for related declarations that have to be deserialized together
/// from the same module.
RELATED_DECLS_MAP = 71,
/// Record code for Sema's vector of functions/blocks with effects to
/// be verified.

View File

@ -537,17 +537,14 @@ private:
/// namespace as if it is not delayed.
DelayedNamespaceOffsetMapTy DelayedNamespaceOffsetMap;
/// Mapping from FunctionDecl IDs to the corresponding lambda IDs.
/// Mapping from main decl ID to the related decls IDs.
///
/// These lambdas have to be loaded right after the function they belong to.
/// It is required to have canonical declaration for lambda class from the
/// same module as enclosing function. This is required to correctly resolve
/// captured variables in the lambda. Without this, due to lazy
/// deserialization, canonical declarations for the function and lambdas can
/// be selected from different modules and DeclRefExprs may refer to the AST
/// nodes that don't exist in the function.
llvm::DenseMap<GlobalDeclID, SmallVector<GlobalDeclID, 4>>
FunctionToLambdasMap;
/// These related decls have to be loaded right after the main decl.
/// It is required to have canonical declaration for related decls from the
/// same module as the enclosing main decl. Without this, due to lazy
/// deserialization, canonical declarations for the main decl and related can
/// be selected from different modules.
llvm::DenseMap<GlobalDeclID, SmallVector<GlobalDeclID, 4>> RelatedDeclsMap;
struct PendingUpdateRecord {
Decl *D;

View File

@ -230,13 +230,12 @@ private:
/// instead of comparing the result of `getDeclID()` or `GetDeclRef()`.
llvm::SmallPtrSet<const Decl *, 32> PredefinedDecls;
/// Mapping from FunctionDecl ID to the list of lambda IDs inside the
/// function.
/// Mapping from the main decl to related decls inside the main decls.
///
/// These lambdas have to be loaded right after the function they belong to.
/// In order to have canonical declaration for lambda class from the same
/// module as enclosing function during deserialization.
llvm::DenseMap<LocalDeclID, SmallVector<LocalDeclID, 4>> FunctionToLambdasMap;
/// These related decls have to be loaded right after the main decl they
/// belong to. In order to have canonical declaration for related decls from
/// the same module as the main decl during deserialization.
llvm::DenseMap<LocalDeclID, SmallVector<LocalDeclID, 4>> RelatedDeclsMap;
/// Offset of each declaration in the bitstream, indexed by
/// the declaration's ID.

View File

@ -2146,6 +2146,23 @@ Decl *TemplateDeclInstantiator::VisitFunctionDecl(
// Check whether there is already a function template specialization for
// this declaration.
FunctionTemplateDecl *FunctionTemplate = D->getDescribedFunctionTemplate();
bool isFriend;
if (FunctionTemplate)
isFriend = (FunctionTemplate->getFriendObjectKind() != Decl::FOK_None);
else
isFriend = (D->getFriendObjectKind() != Decl::FOK_None);
// Friend function defined withing class template may stop being function
// definition during AST merges from different modules, in this case decl
// with function body should be used for instantiation.
if (isFriend) {
const FunctionDecl *Defn = nullptr;
if (D->hasBody(Defn)) {
D = const_cast<FunctionDecl *>(Defn);
FunctionTemplate = Defn->getDescribedFunctionTemplate();
}
}
if (FunctionTemplate && !TemplateParams) {
ArrayRef<TemplateArgument> Innermost = TemplateArgs.getInnermost();
@ -2158,12 +2175,6 @@ Decl *TemplateDeclInstantiator::VisitFunctionDecl(
return SpecFunc;
}
bool isFriend;
if (FunctionTemplate)
isFriend = (FunctionTemplate->getFriendObjectKind() != Decl::FOK_None);
else
isFriend = (D->getFriendObjectKind() != Decl::FOK_None);
bool MergeWithParentScope = (TemplateParams != nullptr) ||
Owner->isFunctionOrMethod() ||
!(isa<Decl>(Owner) &&

View File

@ -3963,14 +3963,14 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
break;
}
case FUNCTION_DECL_TO_LAMBDAS_MAP:
case RELATED_DECLS_MAP:
for (unsigned I = 0, N = Record.size(); I != N; /*in loop*/) {
GlobalDeclID ID = ReadDeclID(F, Record, I);
auto &Lambdas = FunctionToLambdasMap[ID];
auto &RelatedDecls = RelatedDeclsMap[ID];
unsigned NN = Record[I++];
Lambdas.reserve(NN);
RelatedDecls.reserve(NN);
for (unsigned II = 0; II < NN; II++)
Lambdas.push_back(ReadDeclID(F, Record, I));
RelatedDecls.push_back(ReadDeclID(F, Record, I));
}
break;
@ -10278,19 +10278,6 @@ void ASTReader::finishPendingActions() {
PBEnd = PendingBodies.end();
PB != PBEnd; ++PB) {
if (FunctionDecl *FD = dyn_cast<FunctionDecl>(PB->first)) {
// For a function defined inline within a class template, force the
// canonical definition to be the one inside the canonical definition of
// the template. This ensures that we instantiate from a correct view
// of the template.
//
// Sadly we can't do this more generally: we can't be sure that all
// copies of an arbitrary class definition will have the same members
// defined (eg, some member functions may not be instantiated, and some
// special members may or may not have been implicitly defined).
if (auto *RD = dyn_cast<CXXRecordDecl>(FD->getLexicalParent()))
if (RD->isDependentContext() && !RD->isThisDeclarationADefinition())
continue;
// FIXME: Check for =delete/=default?
const FunctionDecl *Defn = nullptr;
if (!getContext().getLangOpts().Modules || !FD->hasBody(Defn)) {

View File

@ -4329,13 +4329,12 @@ void ASTReader::loadDeclUpdateRecords(PendingUpdateRecord &Record) {
DC->setHasExternalVisibleStorage(true);
}
// Load any pending lambdas for the function.
if (auto *FD = dyn_cast<FunctionDecl>(D); FD && FD->isCanonicalDecl()) {
if (auto IT = FunctionToLambdasMap.find(ID);
IT != FunctionToLambdasMap.end()) {
// Load any pending related decls.
if (D->isCanonicalDecl()) {
if (auto IT = RelatedDeclsMap.find(ID); IT != RelatedDeclsMap.end()) {
for (auto LID : IT->second)
GetDecl(LID);
FunctionToLambdasMap.erase(IT);
RelatedDeclsMap.erase(IT);
}
}

View File

@ -941,7 +941,7 @@ void ASTWriter::WriteBlockInfoBlock() {
RECORD(PENDING_IMPLICIT_INSTANTIATIONS);
RECORD(UPDATE_VISIBLE);
RECORD(DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD);
RECORD(FUNCTION_DECL_TO_LAMBDAS_MAP);
RECORD(RELATED_DECLS_MAP);
RECORD(DECL_UPDATE_OFFSETS);
RECORD(DECL_UPDATES);
RECORD(CUDA_SPECIAL_DECL_REFS);
@ -5972,23 +5972,23 @@ void ASTWriter::WriteDeclAndTypes(ASTContext &Context) {
Stream.EmitRecord(DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD,
DelayedNamespaceRecord);
if (!FunctionToLambdasMap.empty()) {
// TODO: on disk hash table for function to lambda mapping might be more
if (!RelatedDeclsMap.empty()) {
// TODO: on disk hash table for related decls mapping might be more
// efficent becuase it allows lazy deserialization.
RecordData FunctionToLambdasMapRecord;
for (const auto &Pair : FunctionToLambdasMap) {
FunctionToLambdasMapRecord.push_back(Pair.first.getRawValue());
FunctionToLambdasMapRecord.push_back(Pair.second.size());
RecordData RelatedDeclsMapRecord;
for (const auto &Pair : RelatedDeclsMap) {
RelatedDeclsMapRecord.push_back(Pair.first.getRawValue());
RelatedDeclsMapRecord.push_back(Pair.second.size());
for (const auto &Lambda : Pair.second)
FunctionToLambdasMapRecord.push_back(Lambda.getRawValue());
RelatedDeclsMapRecord.push_back(Lambda.getRawValue());
}
auto Abv = std::make_shared<llvm::BitCodeAbbrev>();
Abv->Add(llvm::BitCodeAbbrevOp(FUNCTION_DECL_TO_LAMBDAS_MAP));
Abv->Add(llvm::BitCodeAbbrevOp(RELATED_DECLS_MAP));
Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Array));
Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6));
unsigned FunctionToLambdaMapAbbrev = Stream.EmitAbbrev(std::move(Abv));
Stream.EmitRecord(FUNCTION_DECL_TO_LAMBDAS_MAP, FunctionToLambdasMapRecord,
Stream.EmitRecord(RELATED_DECLS_MAP, RelatedDeclsMapRecord,
FunctionToLambdaMapAbbrev);
}

View File

@ -798,6 +798,17 @@ void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) {
}
}
if (D->getFriendObjectKind()) {
// For a function defined inline within a class template, we have to force
// the canonical definition to be the one inside the canonical definition of
// the template. Remember this relation to deserialize them together.
if (auto *RD = dyn_cast<CXXRecordDecl>(D->getLexicalParent()))
if (RD->isDependentContext() && RD->isThisDeclarationADefinition()) {
Writer.RelatedDeclsMap[Writer.GetDeclRef(RD)].push_back(
Writer.GetDeclRef(D));
}
}
Record.push_back(D->param_size());
for (auto *P : D->parameters())
Record.AddDeclRef(P);
@ -1563,7 +1574,7 @@ void ASTDeclWriter::VisitCXXRecordDecl(CXXRecordDecl *D) {
// For lambdas inside canonical FunctionDecl remember the mapping.
if (auto FD = llvm::dyn_cast_or_null<FunctionDecl>(D->getDeclContext());
FD && FD->isCanonicalDecl()) {
Writer.FunctionToLambdasMap[Writer.GetDeclRef(FD)].push_back(
Writer.RelatedDeclsMap[Writer.GetDeclRef(FD)].push_back(
Writer.GetDeclRef(D));
}
} else {

View File

@ -0,0 +1,110 @@
// RUN: rm -fR %t
// RUN: split-file %s %t
// RUN: cd %t
// RUN: %clang_cc1 -verify -std=c++20 -x c++ -fmodule-map-file=modules.map -fmodule-name=foo1 -emit-module modules.map -o foo1.pcm
// RUN: %clang_cc1 -verify -std=c++20 -x c++ -fmodule-map-file=modules.map -fmodule-name=foo2 -emit-module modules.map -o foo2.pcm
// RUN: %clang_cc1 -verify -std=c++20 -x c++ -fmodule-map-file=modules.map -fmodule-file=foo1.pcm -fmodule-file=foo2.pcm server.cc
//--- functional
#pragma once
namespace std {
template <class> class function {};
} // namespace std
//--- foo.h
#pragma once
class MethodHandler {
public:
virtual ~MethodHandler() {}
struct HandlerParameter {
HandlerParameter();
};
virtual void RunHandler(const HandlerParameter &param);
};
template <class RequestType, class ResponseType>
class CallbackUnaryHandler : public MethodHandler {
public:
explicit CallbackUnaryHandler();
void RunHandler(const HandlerParameter &param) final {
void *call = nullptr;
(void)[call](bool){};
}
};
//--- foo1.h
// expected-no-diagnostics
#pragma once
#include "functional"
#include "foo.h"
class A;
class ClientAsyncResponseReaderHelper {
public:
using t = std::function<void(A)>;
static void SetupRequest(t finish);
};
//--- foo2.h
// expected-no-diagnostics
#pragma once
#include "foo.h"
template <class BaseClass>
class a : public BaseClass {
public:
a() { [[maybe_unused]] CallbackUnaryHandler<int, int> a; }
};
//--- modules.map
module "foo" {
export *
module "foo.h" {
export *
textual header "foo.h"
}
}
module "foo1" {
export *
module "foo1.h" {
export *
header "foo1.h"
}
use "foo"
}
module "foo2" {
export *
module "foo2.h" {
export *
header "foo2.h"
}
use "foo"
}
//--- server.cc
// expected-no-diagnostics
#include "functional"
#include "foo1.h"
#include "foo2.h"
std::function<void()> on_emit;
template <class RequestType, class ResponseType>
class CallbackUnaryHandler;
class s {};
class hs final : public a<s> {
explicit hs() {}
};

View File

@ -0,0 +1,111 @@
// RUN: rm -fR %t
// RUN: split-file %s %t
// RUN: cd %t
// RUN: %clang_cc1 -std=c++20 -fmodule-map-file=modules.map -xc++ -emit-module -fmodule-name=internal modules.map -o internal.pcm
// RUN: %clang_cc1 -std=c++20 -fmodule-map-file=modules.map -xc++ -emit-module -fmodule-name=interface modules.map -o interface.pcm
// RUN: %clang_cc1 -std=c++20 -fmodule-map-file=modules.map -xc++ -emit-module -fmodule-name=foo modules.map -o foo.pcm -fmodule-file=interface.pcm -fmodule-file=internal.pcm
// RUN: %clang_cc1 -std=c++20 -fmodule-map-file=modules.map -O1 -emit-obj main.cc -verify -fmodule-file=foo.pcm
//--- modules.map
module "interface" {
export *
module "interface.h" {
export *
header "interface.h"
}
}
module "internal" {
export *
module "internal.h" {
export *
header "internal.h"
}
}
module "foo" {
export *
module "foo.h" {
export *
header "foo.h"
}
}
//--- foo.h
#ifndef FOO_H
#define FOO_H
#include "strong_int.h"
DEFINE_STRONG_INT_TYPE(CallbackId, int);
#define CALL_HASH(id) \
(void)[&]() { AbslHashValue(0, id); };
void test(CallbackId id) {
CALL_HASH(id);
}
#include "internal.h"
#include "interface.h"
#endif
//--- interface.h
#ifndef INTERFACE_H
#define INTERFACE_H
#include "strong_int.h"
DEFINE_STRONG_INT_TYPE(EndpointToken, int);
#endif
//--- internal.h
#ifndef INTERNAL_H
#define INTERNAL_H
#include "strong_int.h"
DEFINE_STRONG_INT_TYPE(OrderedListSortKey, int);
DEFINE_STRONG_INT_TYPE(OrderedListId, int);
#endif
//--- strong_int.h
#ifndef STRONG_INT_H
#define STRONG_INT_H
namespace util_intops {
template <typename TagType, typename NativeType>
class StrongInt2;
template <typename TagType, typename NativeType>
class StrongInt2 {
public:
template <typename H>
friend H AbslHashValue(H h, const StrongInt2& i) {
return h;
}
};
} // namespace util_intops
#define DEFINE_STRONG_INT_TYPE(type_name, value_type) \
struct type_name##_strong_int_tag_ {}; \
typedef ::util_intops::StrongInt2<type_name##_strong_int_tag_, value_type> \
type_name;
#endif
//--- main.cc
// expected-no-diagnostics
#include "foo.h"
#include "strong_int.h"
DEFINE_STRONG_INT_TYPE(ArchiveId2, int);
void partial(ArchiveId2 id) {
CALL_HASH(id);
}