2019-03-19 03:18:21 +00:00
|
|
|
//===------ LoopGeneratorsGOMP.cpp - IR helper to create loops ------------===//
|
|
|
|
//
|
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This file contains functions to create parallel loops as LLVM-IR.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "polly/CodeGen/LoopGeneratorsGOMP.h"
|
[Polly] Use separate DT/LI/SE for outlined subfn. NFC. (#102460)
DominatorTree, LoopInfo, and ScalarEvolution are function-level analyses
that expect to be called only on instructions and basic blocks of the
function they were original created for. When Polly outlined a parallel
loop body into a separate function, it reused the same analyses seemed
to work until new checks to be added in #101198.
This patch creates new analyses for the subfunctions. GenDT, GenLI, and
GenSE now refer to the analyses of the current region of code. Outside
of an outlined function, they refer to the same analysis as used for the
SCoP, but are substituted within an outlined function.
Additionally to the cross-function queries of DT/LI/SE, we must not
create SCEVs that refer to a mix of expressions for old and generated
values. Currently, SCEVs themselves do not "remember" which
ScalarEvolution analysis they were created for, but mixing them is just
as unexpected as using DT/LI across function boundaries. Hence
`SCEVLoopAddRecRewriter` was combined into `ScopExpander`.
`SCEVLoopAddRecRewriter` only replaced induction variables but left
SCEVUnknowns to reference the old function. `SCEVParameterRewriter`
would have done so but its job was effectively superseded by
`ScopExpander`, and now also `SCEVLoopAddRecRewriter`. Some issues
persist put marked with a FIXME in the code. Changing them would
possibly cause this patch to be not NFC anymore.
2024-08-10 14:25:15 +02:00
|
|
|
#include "llvm/Analysis/LoopInfo.h"
|
2019-03-19 03:18:21 +00:00
|
|
|
#include "llvm/IR/Dominators.h"
|
|
|
|
#include "llvm/IR/Module.h"
|
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
using namespace polly;
|
|
|
|
|
|
|
|
void ParallelLoopGeneratorGOMP::createCallSpawnThreads(Value *SubFn,
|
|
|
|
Value *SubFnParam,
|
|
|
|
Value *LB, Value *UB,
|
|
|
|
Value *Stride) {
|
|
|
|
const std::string Name = "GOMP_parallel_loop_runtime_start";
|
|
|
|
|
|
|
|
Function *F = M->getFunction(Name);
|
|
|
|
|
|
|
|
// If F is not available, declare it.
|
|
|
|
if (!F) {
|
|
|
|
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
|
|
|
|
2025-04-04 16:18:13 +09:00
|
|
|
Type *Params[] = {
|
|
|
|
Builder.getPtrTy(), Builder.getPtrTy(), Builder.getInt32Ty(),
|
|
|
|
LongType, LongType, LongType};
|
2019-03-19 03:18:21 +00:00
|
|
|
|
|
|
|
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
|
|
|
|
F = Function::Create(Ty, Linkage, Name, M);
|
|
|
|
}
|
|
|
|
|
|
|
|
Value *Args[] = {SubFn, SubFnParam, Builder.getInt32(PollyNumThreads),
|
|
|
|
LB, UB, Stride};
|
|
|
|
|
2022-07-26 19:02:21 -05:00
|
|
|
CallInst *Call = Builder.CreateCall(F, Args);
|
|
|
|
Call->setDebugLoc(DLGenerated);
|
2019-03-19 03:18:21 +00:00
|
|
|
}
|
|
|
|
|
2020-04-03 14:57:12 -07:00
|
|
|
void ParallelLoopGeneratorGOMP::deployParallelExecution(Function *SubFn,
|
2019-03-19 03:18:21 +00:00
|
|
|
Value *SubFnParam,
|
|
|
|
Value *LB, Value *UB,
|
|
|
|
Value *Stride) {
|
|
|
|
// Tell the runtime we start a parallel loop
|
|
|
|
createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride);
|
2022-07-26 19:02:21 -05:00
|
|
|
CallInst *Call = Builder.CreateCall(SubFn, SubFnParam);
|
|
|
|
Call->setDebugLoc(DLGenerated);
|
2019-03-19 03:18:21 +00:00
|
|
|
createCallJoinThreads();
|
|
|
|
}
|
|
|
|
|
|
|
|
Function *ParallelLoopGeneratorGOMP::prepareSubFnDefinition(Function *F) const {
|
|
|
|
FunctionType *FT =
|
2023-11-27 20:58:25 -08:00
|
|
|
FunctionType::get(Builder.getVoidTy(), {Builder.getPtrTy()}, false);
|
2019-03-19 03:18:21 +00:00
|
|
|
Function *SubFn = Function::Create(FT, Function::InternalLinkage,
|
|
|
|
F->getName() + "_polly_subfn", M);
|
|
|
|
// Name the function's arguments
|
|
|
|
SubFn->arg_begin()->setName("polly.par.userContext");
|
|
|
|
return SubFn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create a subfunction of the following (preliminary) structure:
|
|
|
|
//
|
|
|
|
// PrevBB
|
|
|
|
// |
|
|
|
|
// v
|
|
|
|
// HeaderBB
|
|
|
|
// | _____
|
|
|
|
// v v |
|
|
|
|
// CheckNextBB PreHeaderBB
|
|
|
|
// |\ |
|
|
|
|
// | \______/
|
|
|
|
// |
|
|
|
|
// v
|
|
|
|
// ExitBB
|
|
|
|
//
|
|
|
|
// HeaderBB will hold allocations and loading of variables.
|
|
|
|
// CheckNextBB will check for more work.
|
|
|
|
// If there is more work to do: go to PreHeaderBB, otherwise go to ExitBB.
|
|
|
|
// PreHeaderBB loads the new boundaries (& will lead to the loop body later on).
|
|
|
|
// ExitBB marks the end of the parallel execution.
|
|
|
|
std::tuple<Value *, Function *>
|
|
|
|
ParallelLoopGeneratorGOMP::createSubFn(Value *Stride, AllocaInst *StructData,
|
|
|
|
SetVector<Value *> Data,
|
|
|
|
ValueMapT &Map) {
|
|
|
|
if (PollyScheduling != OMPGeneralSchedulingType::Runtime) {
|
|
|
|
// User tried to influence the scheduling type (currently not supported)
|
|
|
|
errs() << "warning: Polly's GNU OpenMP backend solely "
|
|
|
|
"supports the scheduling type 'runtime'.\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
if (PollyChunkSize != 0) {
|
|
|
|
// User tried to influence the chunk size (currently not supported)
|
|
|
|
errs() << "warning: Polly's GNU OpenMP backend solely "
|
|
|
|
"supports the default chunk size.\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
Function *SubFn = createSubFnDefinition();
|
|
|
|
LLVMContext &Context = SubFn->getContext();
|
|
|
|
|
|
|
|
// Create basic blocks.
|
|
|
|
BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn);
|
[Polly] Use separate DT/LI/SE for outlined subfn. NFC. (#102460)
DominatorTree, LoopInfo, and ScalarEvolution are function-level analyses
that expect to be called only on instructions and basic blocks of the
function they were original created for. When Polly outlined a parallel
loop body into a separate function, it reused the same analyses seemed
to work until new checks to be added in #101198.
This patch creates new analyses for the subfunctions. GenDT, GenLI, and
GenSE now refer to the analyses of the current region of code. Outside
of an outlined function, they refer to the same analysis as used for the
SCoP, but are substituted within an outlined function.
Additionally to the cross-function queries of DT/LI/SE, we must not
create SCEVs that refer to a mix of expressions for old and generated
values. Currently, SCEVs themselves do not "remember" which
ScalarEvolution analysis they were created for, but mixing them is just
as unexpected as using DT/LI across function boundaries. Hence
`SCEVLoopAddRecRewriter` was combined into `ScopExpander`.
`SCEVLoopAddRecRewriter` only replaced induction variables but left
SCEVUnknowns to reference the old function. `SCEVParameterRewriter`
would have done so but its job was effectively superseded by
`ScopExpander`, and now also `SCEVLoopAddRecRewriter`. Some issues
persist put marked with a FIXME in the code. Changing them would
possibly cause this patch to be not NFC anymore.
2024-08-10 14:25:15 +02:00
|
|
|
SubFnDT = std::make_unique<DominatorTree>(*SubFn);
|
|
|
|
SubFnLI = std::make_unique<LoopInfo>(*SubFnDT);
|
|
|
|
|
2019-03-19 03:18:21 +00:00
|
|
|
BasicBlock *ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn);
|
|
|
|
BasicBlock *CheckNextBB =
|
|
|
|
BasicBlock::Create(Context, "polly.par.checkNext", SubFn);
|
|
|
|
BasicBlock *PreHeaderBB =
|
|
|
|
BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn);
|
|
|
|
|
[Polly] Use separate DT/LI/SE for outlined subfn. NFC. (#102460)
DominatorTree, LoopInfo, and ScalarEvolution are function-level analyses
that expect to be called only on instructions and basic blocks of the
function they were original created for. When Polly outlined a parallel
loop body into a separate function, it reused the same analyses seemed
to work until new checks to be added in #101198.
This patch creates new analyses for the subfunctions. GenDT, GenLI, and
GenSE now refer to the analyses of the current region of code. Outside
of an outlined function, they refer to the same analysis as used for the
SCoP, but are substituted within an outlined function.
Additionally to the cross-function queries of DT/LI/SE, we must not
create SCEVs that refer to a mix of expressions for old and generated
values. Currently, SCEVs themselves do not "remember" which
ScalarEvolution analysis they were created for, but mixing them is just
as unexpected as using DT/LI across function boundaries. Hence
`SCEVLoopAddRecRewriter` was combined into `ScopExpander`.
`SCEVLoopAddRecRewriter` only replaced induction variables but left
SCEVUnknowns to reference the old function. `SCEVParameterRewriter`
would have done so but its job was effectively superseded by
`ScopExpander`, and now also `SCEVLoopAddRecRewriter`. Some issues
persist put marked with a FIXME in the code. Changing them would
possibly cause this patch to be not NFC anymore.
2024-08-10 14:25:15 +02:00
|
|
|
SubFnDT->addNewBlock(ExitBB, HeaderBB);
|
|
|
|
SubFnDT->addNewBlock(CheckNextBB, HeaderBB);
|
|
|
|
SubFnDT->addNewBlock(PreHeaderBB, HeaderBB);
|
2019-03-19 03:18:21 +00:00
|
|
|
|
|
|
|
// Fill up basic block HeaderBB.
|
|
|
|
Builder.SetInsertPoint(HeaderBB);
|
|
|
|
Value *LBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.LBPtr");
|
|
|
|
Value *UBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.UBPtr");
|
2023-03-17 15:58:52 +01:00
|
|
|
Value *UserContext = &*SubFn->arg_begin();
|
2019-03-19 03:18:21 +00:00
|
|
|
|
|
|
|
extractValuesFromStruct(Data, StructData->getAllocatedType(), UserContext,
|
|
|
|
Map);
|
|
|
|
Builder.CreateBr(CheckNextBB);
|
|
|
|
|
|
|
|
// Add code to check if another set of iterations will be executed.
|
|
|
|
Builder.SetInsertPoint(CheckNextBB);
|
|
|
|
Value *Next = createCallGetWorkItem(LBPtr, UBPtr);
|
|
|
|
Value *HasNextSchedule = Builder.CreateTrunc(
|
|
|
|
Next, Builder.getInt1Ty(), "polly.par.hasNextScheduleBlock");
|
|
|
|
Builder.CreateCondBr(HasNextSchedule, PreHeaderBB, ExitBB);
|
|
|
|
|
|
|
|
// Add code to load the iv bounds for this set of iterations.
|
|
|
|
Builder.SetInsertPoint(PreHeaderBB);
|
2021-02-12 00:07:23 +01:00
|
|
|
Value *LB = Builder.CreateLoad(LongType, LBPtr, "polly.par.LB");
|
|
|
|
Value *UB = Builder.CreateLoad(LongType, UBPtr, "polly.par.UB");
|
2019-03-19 03:18:21 +00:00
|
|
|
|
|
|
|
// Subtract one as the upper bound provided by OpenMP is a < comparison
|
|
|
|
// whereas the codegenForSequential function creates a <= comparison.
|
|
|
|
UB = Builder.CreateSub(UB, ConstantInt::get(LongType, 1),
|
|
|
|
"polly.par.UBAdjusted");
|
|
|
|
|
|
|
|
Builder.CreateBr(CheckNextBB);
|
|
|
|
Builder.SetInsertPoint(&*--Builder.GetInsertPoint());
|
|
|
|
BasicBlock *AfterBB;
|
|
|
|
Value *IV =
|
[Polly] Use separate DT/LI/SE for outlined subfn. NFC. (#102460)
DominatorTree, LoopInfo, and ScalarEvolution are function-level analyses
that expect to be called only on instructions and basic blocks of the
function they were original created for. When Polly outlined a parallel
loop body into a separate function, it reused the same analyses seemed
to work until new checks to be added in #101198.
This patch creates new analyses for the subfunctions. GenDT, GenLI, and
GenSE now refer to the analyses of the current region of code. Outside
of an outlined function, they refer to the same analysis as used for the
SCoP, but are substituted within an outlined function.
Additionally to the cross-function queries of DT/LI/SE, we must not
create SCEVs that refer to a mix of expressions for old and generated
values. Currently, SCEVs themselves do not "remember" which
ScalarEvolution analysis they were created for, but mixing them is just
as unexpected as using DT/LI across function boundaries. Hence
`SCEVLoopAddRecRewriter` was combined into `ScopExpander`.
`SCEVLoopAddRecRewriter` only replaced induction variables but left
SCEVUnknowns to reference the old function. `SCEVParameterRewriter`
would have done so but its job was effectively superseded by
`ScopExpander`, and now also `SCEVLoopAddRecRewriter`. Some issues
persist put marked with a FIXME in the code. Changing them would
possibly cause this patch to be not NFC anymore.
2024-08-10 14:25:15 +02:00
|
|
|
createLoop(LB, UB, Stride, Builder, *SubFnLI, *SubFnDT, AfterBB,
|
|
|
|
ICmpInst::ICMP_SLE, nullptr, true, /* UseGuard */ false);
|
2019-03-19 03:18:21 +00:00
|
|
|
|
|
|
|
BasicBlock::iterator LoopBody = Builder.GetInsertPoint();
|
|
|
|
|
|
|
|
// Add code to terminate this subfunction.
|
|
|
|
Builder.SetInsertPoint(ExitBB);
|
|
|
|
createCallCleanupThread();
|
|
|
|
Builder.CreateRetVoid();
|
|
|
|
|
|
|
|
Builder.SetInsertPoint(&*LoopBody);
|
|
|
|
|
[Polly] Use separate DT/LI/SE for outlined subfn. NFC. (#102460)
DominatorTree, LoopInfo, and ScalarEvolution are function-level analyses
that expect to be called only on instructions and basic blocks of the
function they were original created for. When Polly outlined a parallel
loop body into a separate function, it reused the same analyses seemed
to work until new checks to be added in #101198.
This patch creates new analyses for the subfunctions. GenDT, GenLI, and
GenSE now refer to the analyses of the current region of code. Outside
of an outlined function, they refer to the same analysis as used for the
SCoP, but are substituted within an outlined function.
Additionally to the cross-function queries of DT/LI/SE, we must not
create SCEVs that refer to a mix of expressions for old and generated
values. Currently, SCEVs themselves do not "remember" which
ScalarEvolution analysis they were created for, but mixing them is just
as unexpected as using DT/LI across function boundaries. Hence
`SCEVLoopAddRecRewriter` was combined into `ScopExpander`.
`SCEVLoopAddRecRewriter` only replaced induction variables but left
SCEVUnknowns to reference the old function. `SCEVParameterRewriter`
would have done so but its job was effectively superseded by
`ScopExpander`, and now also `SCEVLoopAddRecRewriter`. Some issues
persist put marked with a FIXME in the code. Changing them would
possibly cause this patch to be not NFC anymore.
2024-08-10 14:25:15 +02:00
|
|
|
// FIXME: Call SubFnDT->verify() and SubFnLI->verify() to check that the
|
|
|
|
// DominatorTree/LoopInfo has been created correctly. Alternatively, recreate
|
|
|
|
// from scratch since it is not needed here directly.
|
|
|
|
|
2019-03-19 03:18:21 +00:00
|
|
|
return std::make_tuple(IV, SubFn);
|
|
|
|
}
|
|
|
|
|
|
|
|
Value *ParallelLoopGeneratorGOMP::createCallGetWorkItem(Value *LBPtr,
|
|
|
|
Value *UBPtr) {
|
|
|
|
const std::string Name = "GOMP_loop_runtime_next";
|
|
|
|
|
|
|
|
Function *F = M->getFunction(Name);
|
|
|
|
|
|
|
|
// If F is not available, declare it.
|
|
|
|
if (!F) {
|
|
|
|
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
2024-10-16 06:14:38 -04:00
|
|
|
Type *Params[] = {Builder.getPtrTy(0), Builder.getPtrTy(0)};
|
2019-03-19 03:18:21 +00:00
|
|
|
FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false);
|
|
|
|
F = Function::Create(Ty, Linkage, Name, M);
|
|
|
|
}
|
|
|
|
|
|
|
|
Value *Args[] = {LBPtr, UBPtr};
|
2022-07-26 19:02:21 -05:00
|
|
|
CallInst *Call = Builder.CreateCall(F, Args);
|
|
|
|
Call->setDebugLoc(DLGenerated);
|
|
|
|
Value *Return = Builder.CreateICmpNE(
|
|
|
|
Call, Builder.CreateZExt(Builder.getFalse(), Call->getType()));
|
2019-03-19 03:18:21 +00:00
|
|
|
return Return;
|
|
|
|
}
|
|
|
|
|
|
|
|
void ParallelLoopGeneratorGOMP::createCallJoinThreads() {
|
|
|
|
const std::string Name = "GOMP_parallel_end";
|
|
|
|
|
|
|
|
Function *F = M->getFunction(Name);
|
|
|
|
|
|
|
|
// If F is not available, declare it.
|
|
|
|
if (!F) {
|
|
|
|
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
|
|
|
|
|
|
|
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
|
|
|
|
F = Function::Create(Ty, Linkage, Name, M);
|
|
|
|
}
|
|
|
|
|
2022-07-26 19:02:21 -05:00
|
|
|
CallInst *Call = Builder.CreateCall(F, {});
|
|
|
|
Call->setDebugLoc(DLGenerated);
|
2019-03-19 03:18:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void ParallelLoopGeneratorGOMP::createCallCleanupThread() {
|
|
|
|
const std::string Name = "GOMP_loop_end_nowait";
|
|
|
|
|
|
|
|
Function *F = M->getFunction(Name);
|
|
|
|
|
|
|
|
// If F is not available, declare it.
|
|
|
|
if (!F) {
|
|
|
|
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
|
|
|
|
|
|
|
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
|
|
|
|
F = Function::Create(Ty, Linkage, Name, M);
|
|
|
|
}
|
|
|
|
|
2022-07-26 19:02:21 -05:00
|
|
|
CallInst *Call = Builder.CreateCall(F, {});
|
|
|
|
Call->setDebugLoc(DLGenerated);
|
2019-03-19 03:18:21 +00:00
|
|
|
}
|