mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-26 08:16:06 +00:00
[OPENMP][NVPTX] Enable support for lastprivates in SPMD constructs.
Previously we could not use lastprivates in SPMD constructs, patch allows supporting lastprivates in SPMD with uninitialized runtime. llvm-svn: 342738
This commit is contained in:
parent
022bf16b41
commit
2adecff1aa
@ -179,6 +179,54 @@ enum NamedBarrier : unsigned {
|
||||
NB_Parallel = 1,
|
||||
};
|
||||
|
||||
typedef std::pair<CharUnits /*Align*/, const ValueDecl *> VarsDataTy;
|
||||
static bool stable_sort_comparator(const VarsDataTy P1, const VarsDataTy P2) {
|
||||
return P1.first > P2.first;
|
||||
}
|
||||
|
||||
static RecordDecl *buildRecordForGlobalizedVars(
|
||||
ASTContext &C, ArrayRef<const ValueDecl *> EscapedDecls,
|
||||
llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
|
||||
&MappedDeclsFields) {
|
||||
if (EscapedDecls.empty())
|
||||
return nullptr;
|
||||
SmallVector<VarsDataTy, 4> GlobalizedVars;
|
||||
for (const ValueDecl *D : EscapedDecls)
|
||||
GlobalizedVars.emplace_back(C.getDeclAlign(D), D);
|
||||
std::stable_sort(GlobalizedVars.begin(), GlobalizedVars.end(),
|
||||
stable_sort_comparator);
|
||||
// Build struct _globalized_locals_ty {
|
||||
// /* globalized vars */
|
||||
// };
|
||||
RecordDecl *GlobalizedRD = C.buildImplicitRecord("_globalized_locals_ty");
|
||||
GlobalizedRD->startDefinition();
|
||||
for (const auto &Pair : GlobalizedVars) {
|
||||
const ValueDecl *VD = Pair.second;
|
||||
QualType Type = VD->getType();
|
||||
if (Type->isLValueReferenceType())
|
||||
Type = C.getPointerType(Type.getNonReferenceType());
|
||||
else
|
||||
Type = Type.getNonReferenceType();
|
||||
SourceLocation Loc = VD->getLocation();
|
||||
auto *Field =
|
||||
FieldDecl::Create(C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,
|
||||
C.getTrivialTypeSourceInfo(Type, SourceLocation()),
|
||||
/*BW=*/nullptr, /*Mutable=*/false,
|
||||
/*InitStyle=*/ICIS_NoInit);
|
||||
Field->setAccess(AS_public);
|
||||
GlobalizedRD->addDecl(Field);
|
||||
if (VD->hasAttrs()) {
|
||||
for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
|
||||
E(VD->getAttrs().end());
|
||||
I != E; ++I)
|
||||
Field->addAttr(*I);
|
||||
}
|
||||
MappedDeclsFields.try_emplace(VD, Field);
|
||||
}
|
||||
GlobalizedRD->completeDefinition();
|
||||
return GlobalizedRD;
|
||||
}
|
||||
|
||||
/// Get the list of variables that can escape their declaration context.
|
||||
class CheckVarsEscapingDeclContext final
|
||||
: public ConstStmtVisitor<CheckVarsEscapingDeclContext> {
|
||||
@ -292,51 +340,11 @@ class CheckVarsEscapingDeclContext final
|
||||
}
|
||||
}
|
||||
|
||||
typedef std::pair<CharUnits /*Align*/, const ValueDecl *> VarsDataTy;
|
||||
static bool stable_sort_comparator(const VarsDataTy P1, const VarsDataTy P2) {
|
||||
return P1.first > P2.first;
|
||||
}
|
||||
|
||||
void buildRecordForGlobalizedVars() {
|
||||
assert(!GlobalizedRD &&
|
||||
"Record for globalized variables is built already.");
|
||||
if (EscapedDecls.empty())
|
||||
return;
|
||||
ASTContext &C = CGF.getContext();
|
||||
SmallVector<VarsDataTy, 4> GlobalizedVars;
|
||||
for (const ValueDecl *D : EscapedDecls)
|
||||
GlobalizedVars.emplace_back(C.getDeclAlign(D), D);
|
||||
std::stable_sort(GlobalizedVars.begin(), GlobalizedVars.end(),
|
||||
stable_sort_comparator);
|
||||
// Build struct _globalized_locals_ty {
|
||||
// /* globalized vars */
|
||||
// };
|
||||
GlobalizedRD = C.buildImplicitRecord("_globalized_locals_ty");
|
||||
GlobalizedRD->startDefinition();
|
||||
for (const auto &Pair : GlobalizedVars) {
|
||||
const ValueDecl *VD = Pair.second;
|
||||
QualType Type = VD->getType();
|
||||
if (Type->isLValueReferenceType())
|
||||
Type = C.getPointerType(Type.getNonReferenceType());
|
||||
else
|
||||
Type = Type.getNonReferenceType();
|
||||
SourceLocation Loc = VD->getLocation();
|
||||
auto *Field = FieldDecl::Create(
|
||||
C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,
|
||||
C.getTrivialTypeSourceInfo(Type, SourceLocation()),
|
||||
/*BW=*/nullptr, /*Mutable=*/false,
|
||||
/*InitStyle=*/ICIS_NoInit);
|
||||
Field->setAccess(AS_public);
|
||||
GlobalizedRD->addDecl(Field);
|
||||
if (VD->hasAttrs()) {
|
||||
for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
|
||||
E(VD->getAttrs().end());
|
||||
I != E; ++I)
|
||||
Field->addAttr(*I);
|
||||
}
|
||||
MappedDeclsFields.try_emplace(VD, Field);
|
||||
}
|
||||
GlobalizedRD->completeDefinition();
|
||||
GlobalizedRD = ::buildRecordForGlobalizedVars(
|
||||
CGF.getContext(), EscapedDecls.getArrayRef(), MappedDeclsFields);
|
||||
}
|
||||
|
||||
public:
|
||||
@ -672,13 +680,6 @@ static bool hasParallelIfNumThreadsClause(ASTContext &Ctx,
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Checks if the directive is the distribute clause with the lastprivate
|
||||
/// clauses. This construct does not support SPMD execution mode.
|
||||
static bool hasDistributeWithLastprivateClauses(const OMPExecutableDirective &D) {
|
||||
return isOpenMPDistributeDirective(D.getDirectiveKind()) &&
|
||||
D.hasClausesOfKind<OMPLastprivateClause>();
|
||||
}
|
||||
|
||||
/// Check for inner (nested) SPMD construct, if any
|
||||
static bool hasNestedSPMDDirective(ASTContext &Ctx,
|
||||
const OMPExecutableDirective &D) {
|
||||
@ -692,8 +693,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
|
||||
switch (D.getDirectiveKind()) {
|
||||
case OMPD_target:
|
||||
if (isOpenMPParallelDirective(DKind) &&
|
||||
!hasParallelIfNumThreadsClause(Ctx, *NestedDir) &&
|
||||
!hasDistributeWithLastprivateClauses(*NestedDir))
|
||||
!hasParallelIfNumThreadsClause(Ctx, *NestedDir))
|
||||
return true;
|
||||
if (DKind == OMPD_teams) {
|
||||
Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
|
||||
@ -704,16 +704,14 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
|
||||
if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
|
||||
DKind = NND->getDirectiveKind();
|
||||
if (isOpenMPParallelDirective(DKind) &&
|
||||
!hasParallelIfNumThreadsClause(Ctx, *NND) &&
|
||||
!hasDistributeWithLastprivateClauses(*NND))
|
||||
!hasParallelIfNumThreadsClause(Ctx, *NND))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
case OMPD_target_teams:
|
||||
return isOpenMPParallelDirective(DKind) &&
|
||||
!hasParallelIfNumThreadsClause(Ctx, *NestedDir) &&
|
||||
!hasDistributeWithLastprivateClauses(*NestedDir);
|
||||
!hasParallelIfNumThreadsClause(Ctx, *NestedDir);
|
||||
case OMPD_target_simd:
|
||||
case OMPD_target_parallel:
|
||||
case OMPD_target_parallel_for:
|
||||
@ -786,8 +784,7 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx,
|
||||
case OMPD_target_teams_distribute_parallel_for:
|
||||
case OMPD_target_teams_distribute_parallel_for_simd:
|
||||
// Distribute with lastprivates requires non-SPMD execution mode.
|
||||
return !hasParallelIfNumThreadsClause(Ctx, D) &&
|
||||
!hasDistributeWithLastprivateClauses(D);
|
||||
return !hasParallelIfNumThreadsClause(Ctx, D);
|
||||
case OMPD_target_simd:
|
||||
case OMPD_target_teams_distribute:
|
||||
case OMPD_target_teams_distribute_simd:
|
||||
@ -1799,28 +1796,88 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
|
||||
return OutlinedFun;
|
||||
}
|
||||
|
||||
/// Get list of lastprivate variables from the teams distribute ... or
|
||||
/// teams {distribute ...} directives.
|
||||
static void
|
||||
getDistributeLastprivateVars(const OMPExecutableDirective &D,
|
||||
llvm::SmallVectorImpl<const ValueDecl *> &Vars) {
|
||||
assert(isOpenMPTeamsDirective(D.getDirectiveKind()) &&
|
||||
"expected teams directive.");
|
||||
const OMPExecutableDirective *Dir = &D;
|
||||
if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
|
||||
if (const Stmt *S = getSingleCompoundChild(
|
||||
D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(
|
||||
/*IgnoreCaptured=*/true))) {
|
||||
Dir = dyn_cast<OMPExecutableDirective>(S);
|
||||
if (Dir && !isOpenMPDistributeDirective(Dir->getDirectiveKind()))
|
||||
Dir = nullptr;
|
||||
}
|
||||
}
|
||||
if (!Dir)
|
||||
return;
|
||||
for (const OMPLastprivateClause *C :
|
||||
Dir->getClausesOfKind<OMPLastprivateClause>()) {
|
||||
for (const Expr *E : C->getVarRefs()) {
|
||||
const auto *DE = cast<DeclRefExpr>(E->IgnoreParens());
|
||||
Vars.push_back(cast<ValueDecl>(DE->getDecl()->getCanonicalDecl()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
|
||||
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
|
||||
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
|
||||
SourceLocation Loc = D.getBeginLoc();
|
||||
|
||||
const RecordDecl *GlobalizedRD = nullptr;
|
||||
llvm::SmallVector<const ValueDecl *, 4> LastPrivates;
|
||||
llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
|
||||
if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) {
|
||||
getDistributeLastprivateVars(D, LastPrivates);
|
||||
if (!LastPrivates.empty())
|
||||
GlobalizedRD = buildRecordForGlobalizedVars(
|
||||
CGM.getContext(), LastPrivates, MappedDeclsFields);
|
||||
}
|
||||
|
||||
// Emit target region as a standalone region.
|
||||
class NVPTXPrePostActionTy : public PrePostActionTy {
|
||||
SourceLocation &Loc;
|
||||
const RecordDecl *GlobalizedRD;
|
||||
llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
|
||||
&MappedDeclsFields;
|
||||
|
||||
public:
|
||||
NVPTXPrePostActionTy(SourceLocation &Loc) : Loc(Loc) {}
|
||||
NVPTXPrePostActionTy(
|
||||
SourceLocation &Loc, const RecordDecl *GlobalizedRD,
|
||||
llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
|
||||
&MappedDeclsFields)
|
||||
: Loc(Loc), GlobalizedRD(GlobalizedRD),
|
||||
MappedDeclsFields(MappedDeclsFields) {}
|
||||
void Enter(CodeGenFunction &CGF) override {
|
||||
static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
|
||||
.emitGenericVarsProlog(CGF, Loc);
|
||||
auto &Rt =
|
||||
static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime());
|
||||
if (GlobalizedRD) {
|
||||
auto I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;
|
||||
I->getSecond().GlobalRecord = GlobalizedRD;
|
||||
I->getSecond().MappedParams =
|
||||
llvm::make_unique<CodeGenFunction::OMPMapVars>();
|
||||
DeclToAddrMapTy &Data = I->getSecond().LocalVarData;
|
||||
for (const auto &Pair : MappedDeclsFields) {
|
||||
assert(Pair.getFirst()->isCanonicalDecl() &&
|
||||
"Expected canonical declaration");
|
||||
Data.insert(std::make_pair(
|
||||
Pair.getFirst(),
|
||||
std::make_pair(Pair.getSecond(), Address::invalid())));
|
||||
}
|
||||
}
|
||||
Rt.emitGenericVarsProlog(CGF, Loc);
|
||||
}
|
||||
void Exit(CodeGenFunction &CGF) override {
|
||||
static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
|
||||
.emitGenericVarsEpilog(CGF);
|
||||
}
|
||||
} Action(Loc);
|
||||
if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD)
|
||||
CodeGen.setAction(Action);
|
||||
} Action(Loc, GlobalizedRD, MappedDeclsFields);
|
||||
CodeGen.setAction(Action);
|
||||
llvm::Value *OutlinedFunVal = CGOpenMPRuntime::emitTeamsOutlinedFunction(
|
||||
D, ThreadIDVar, InnermostKind, CodeGen);
|
||||
llvm::Function *OutlinedFun = cast<llvm::Function>(OutlinedFunVal);
|
||||
@ -1834,7 +1891,8 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
|
||||
void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
|
||||
SourceLocation Loc,
|
||||
bool WithSPMDCheck) {
|
||||
if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic)
|
||||
if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic &&
|
||||
getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD)
|
||||
return;
|
||||
|
||||
CGBuilderTy &Bld = CGF.Builder;
|
||||
@ -1892,8 +1950,6 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
|
||||
I->getSecond().GlobalRecordAddr = Phi;
|
||||
I->getSecond().IsInSPMDModeFlag = IsSPMD;
|
||||
} else {
|
||||
assert(getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_NonSPMD &&
|
||||
"Expected Non-SPMD construct.");
|
||||
// TODO: allow the usage of shared memory to be controlled by
|
||||
// the user, for now, default to global.
|
||||
llvm::Value *GlobalRecordSizeArg[] = {
|
||||
@ -1967,7 +2023,8 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
|
||||
|
||||
void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF,
|
||||
bool WithSPMDCheck) {
|
||||
if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic)
|
||||
if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic &&
|
||||
getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD)
|
||||
return;
|
||||
|
||||
const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
|
||||
@ -1997,8 +2054,6 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF,
|
||||
CGF.EmitCastToVoidPtr(I->getSecond().GlobalRecordAddr));
|
||||
CGF.EmitBlock(ExitBB);
|
||||
} else {
|
||||
assert(getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_NonSPMD &&
|
||||
"Expected Non-SPMD mode.");
|
||||
CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
|
||||
OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
|
||||
I->getSecond().GlobalRecordAddr);
|
||||
@ -3950,6 +4005,9 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF,
|
||||
} else if (const auto *CD = dyn_cast<CapturedDecl>(D)) {
|
||||
Body = CD->getBody();
|
||||
NeedToDelayGlobalization = CGF.CapturedStmtInfo->getKind() == CR_OpenMP;
|
||||
if (NeedToDelayGlobalization &&
|
||||
getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD)
|
||||
return;
|
||||
}
|
||||
if (!Body)
|
||||
return;
|
||||
|
@ -8,8 +8,6 @@
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
// CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1
|
||||
// CHECK: @__omp_offloading_{{.+}}_l52_exec_mode = weak constant i8 1
|
||||
// CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1
|
||||
|
||||
void foo() {
|
||||
@ -42,7 +40,7 @@ void foo() {
|
||||
for (int i = 0; i < 10; ++i)
|
||||
;
|
||||
int a;
|
||||
// CHECK: call void @__kmpc_kernel_init(
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 0, i16 0)
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||
|
@ -8,8 +8,6 @@
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
// CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1
|
||||
// CHECK: @__omp_offloading_{{.+}}_l52_exec_mode = weak constant i8 1
|
||||
// CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1
|
||||
|
||||
void foo() {
|
||||
@ -42,7 +40,7 @@ void foo() {
|
||||
for (int i = 0; i < 10; ++i)
|
||||
;
|
||||
int a;
|
||||
// CHECK: call void @__kmpc_kernel_init(
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1, i16 {{.+}})
|
||||
|
@ -8,13 +8,12 @@
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
// Check that the execution mode of the target region with lastprivates on the gpu is set to Non-SPMD Mode.
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l33}}_exec_mode = weak constant i8 1
|
||||
// Check that the execution mode of all 4 target regions on the gpu is set to SPMD Mode.
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l39}}_exec_mode = weak constant i8 0
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l44}}_exec_mode = weak constant i8 0
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l49}}_exec_mode = weak constant i8 0
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l57}}_exec_mode = weak constant i8 0
|
||||
// Check that the execution mode of all 5 target regions on the gpu is set to SPMD Mode.
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l32}}_exec_mode = weak constant i8 0
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l38}}_exec_mode = weak constant i8 0
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l43}}_exec_mode = weak constant i8 0
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l48}}_exec_mode = weak constant i8 0
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l56}}_exec_mode = weak constant i8 0
|
||||
|
||||
#define N 1000
|
||||
#define M 10
|
||||
@ -68,14 +67,16 @@ int bar(int n){
|
||||
return a;
|
||||
}
|
||||
|
||||
// CHECK_LABEL: define internal void @__omp_offloading_{{.+}}_l33_worker()
|
||||
|
||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l33(
|
||||
// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1)
|
||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l32(
|
||||
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
|
||||
// CHECK: [[TEAM_ALLOC:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 4, i16 0)
|
||||
// CHECK: [[BC:%.+]] = bitcast i8* [[TEAM_ALLOC]] to [[REC:%.+]]*
|
||||
// CHECK: getelementptr inbounds [[REC]], [[REC]]* [[BC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
|
||||
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
|
||||
// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[OUTL1:@__omp_outlined.*]]_wrapper to i8*), i16 1)
|
||||
// CHECK: {{call|invoke}} void [[OUTL1:@.+]](
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: call void @__kmpc_kernel_deinit(i16 1)
|
||||
// CHECK: call void @__kmpc_spmd_kernel_deinit()
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK: define internal void [[OUTL1]](
|
||||
@ -127,7 +128,7 @@ int bar(int n){
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK: define weak void @__omp_offloading_{{.*}}_l57(i[[SZ:64|32]] %{{[^,]+}}, [1000 x i32]* dereferenceable{{.*}}, i32* %{{[^)]+}})
|
||||
// CHECK: define weak void @__omp_offloading_{{.*}}_l56(i[[SZ:64|32]] %{{[^,]+}}, [1000 x i32]* dereferenceable{{.*}}, i32* %{{[^)]+}})
|
||||
// CHECK: call void [[OUTLINED:@__omp_outlined.*]](i32* %{{.+}}, i32* %{{.+}}, i[[SZ]] %{{.*}}, i[[SZ]] %{{.*}}, i[[SZ]] %{{.*}}, [1000 x i32]* %{{.*}}, i32* %{{.*}})
|
||||
// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.*}}, i32* noalias %{{.*}} i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [1000 x i32]* dereferenceable{{.*}}, i32* %{{.*}})
|
||||
|
||||
|
@ -8,12 +8,11 @@
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
// Check that the execution mode of the target region with lastprivates on the gpu is set to Non-SPMD Mode.
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l31}}_exec_mode = weak constant i8 1
|
||||
// Check that the execution mode of all 3 target regions on the gpu is set to SPMD Mode.
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l37}}_exec_mode = weak constant i8 0
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l42}}_exec_mode = weak constant i8 0
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l47}}_exec_mode = weak constant i8 0
|
||||
// Check that the execution mode of all 4 target regions on the gpu is set to SPMD Mode.
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 0
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l36}}_exec_mode = weak constant i8 0
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l41}}_exec_mode = weak constant i8 0
|
||||
// CHECK-DAG: {{@__omp_offloading_.+l46}}_exec_mode = weak constant i8 0
|
||||
|
||||
#define N 1000
|
||||
#define M 10
|
||||
@ -63,14 +62,16 @@ int bar(int n){
|
||||
return a;
|
||||
}
|
||||
|
||||
// CHECK_LABEL: define internal void @__omp_offloading_{{.+}}_l31_worker()
|
||||
|
||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l31(
|
||||
// CHECK: call void @__kmpc_kernel_init(i32 %{{.+}}, i16 1)
|
||||
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l30(
|
||||
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
|
||||
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
|
||||
// CHECK: [[TEAM_ALLOC:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 4, i16 0)
|
||||
// CHECK: [[BC:%.+]] = bitcast i8* [[TEAM_ALLOC]] to [[REC:%.+]]*
|
||||
// CHECK: getelementptr inbounds [[REC]], [[REC]]* [[BC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
|
||||
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91,
|
||||
// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[OUTL1:@__omp_outlined.*]]_wrapper to i8*), i16 1)
|
||||
// CHECK: {{call|invoke}} void [[OUTL1:@.+]](
|
||||
// CHECK: call void @__kmpc_for_static_fini(
|
||||
// CHECK: call void @__kmpc_kernel_deinit(i16 1)
|
||||
// CHECK: call void @__kmpc_spmd_kernel_deinit()
|
||||
// CHECK: ret void
|
||||
|
||||
// CHECK: define internal void [[OUTL1]](
|
||||
|
Loading…
x
Reference in New Issue
Block a user