[mlir][OpenMP][NFC] delayed privatisation cleanup (#115298)

Upstreaming some code cleanups ahead of supporting delayed task
execution.
- Make allocatePrivateVars not need to be a template (it will need to
operate separately on firstprivate and private variables for delayed
task execution so it can't index into lists of all variables in the
operation).
 - Use llvm::SmallVectorImpl for function arguments
- collectPrivatizationDecls already reserves size for privateDecls so we
don't need to do that in callers
 - Use llvm::zip_equal instead of C-style array indexing
This commit is contained in:
Tom Eccles 2024-11-07 12:27:31 +00:00 committed by GitHub
parent b358f218a1
commit 8269c400b4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1254,13 +1254,13 @@ static LogicalResult allocAndInitializeReductionVars(
/// Allocate delayed private variables. Returns the basic block which comes
/// after all of these allocations. llvm::Value * for each of these private
/// variables are populated in llvmPrivateVars.
template <class OP>
static llvm::Expected<llvm::BasicBlock *>
allocatePrivateVars(OP opInst, llvm::IRBuilderBase &builder,
allocatePrivateVars(llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation,
MutableArrayRef<BlockArgument> privateBlockArgs,
MutableArrayRef<omp::PrivateClauseOp> privateDecls,
llvm::SmallVector<llvm::Value *> &llvmPrivateVars,
MutableArrayRef<mlir::Value> mlirPrivateVars,
llvm::SmallVectorImpl<llvm::Value *> &llvmPrivateVars,
const llvm::OpenMPIRBuilder::InsertPointTy &allocaIP) {
// Allocate private vars
llvm::BranchInst *allocaTerminator =
@ -1285,19 +1285,18 @@ allocatePrivateVars(OP opInst, llvm::IRBuilderBase &builder,
llvm::BasicBlock *privAllocBlock = nullptr;
if (!privateBlockArgs.empty())
privAllocBlock = splitBB(builder, true, "omp.private.latealloc");
for (unsigned i = 0; i < privateBlockArgs.size(); ++i) {
Region &allocRegion = privateDecls[i].getAllocRegion();
for (auto [privDecl, mlirPrivVar, blockArg] :
llvm::zip_equal(privateDecls, mlirPrivateVars, privateBlockArgs)) {
Region &allocRegion = privDecl.getAllocRegion();
// map allocation region block argument
llvm::Value *nonPrivateVar =
moduleTranslation.lookupValue(opInst.getPrivateVars()[i]);
llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirPrivVar);
assert(nonPrivateVar);
moduleTranslation.mapValue(privateDecls[i].getAllocMoldArg(),
nonPrivateVar);
moduleTranslation.mapValue(privDecl.getAllocMoldArg(), nonPrivateVar);
// in-place convert the private allocation region
SmallVector<llvm::Value *, 1> phis;
if (privateDecls[i].getAllocMoldArg().getUses().empty()) {
if (privDecl.getAllocMoldArg().getUses().empty()) {
// TODO this should use
// allocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca() so it goes before
// the code for fetching the thread id. Not doing this for now to avoid
@ -1313,7 +1312,7 @@ allocatePrivateVars(OP opInst, llvm::IRBuilderBase &builder,
assert(phis.size() == 1 && "expected one allocation to be yielded");
moduleTranslation.mapValue(privateBlockArgs[i], phis[0]);
moduleTranslation.mapValue(blockArg, phis[0]);
llvmPrivateVars.push_back(phis[0]);
// clear alloc region block argument mapping in case it needs to be
@ -1561,11 +1560,14 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
// Collect delayed privatisation declarations
MutableArrayRef<BlockArgument> privateBlockArgs =
cast<omp::BlockArgOpenMPOpInterface>(*taskOp).getPrivateBlockArgs();
SmallVector<mlir::Value> mlirPrivateVars;
SmallVector<llvm::Value *> llvmPrivateVars;
SmallVector<omp::PrivateClauseOp> privateDecls;
mlirPrivateVars.reserve(privateBlockArgs.size());
llvmPrivateVars.reserve(privateBlockArgs.size());
privateDecls.reserve(privateBlockArgs.size());
collectPrivatizationDecls(taskOp, privateDecls);
for (mlir::Value privateVar : taskOp.getPrivateVars())
mlirPrivateVars.push_back(privateVar);
auto bodyCB = [&](InsertPointTy allocaIP,
InsertPointTy codegenIP) -> llvm::Error {
@ -1575,8 +1577,8 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
moduleTranslation, allocaIP);
llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
taskOp, builder, moduleTranslation, privateBlockArgs, privateDecls,
llvmPrivateVars, allocaIP);
builder, moduleTranslation, privateBlockArgs, privateDecls,
mlirPrivateVars, llvmPrivateVars, allocaIP);
if (handleError(afterAllocas, *taskOp).failed())
return llvm::make_error<PreviouslyReportedError>();
@ -1879,11 +1881,14 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
// Collect delayed privatization declarations
MutableArrayRef<BlockArgument> privateBlockArgs =
cast<omp::BlockArgOpenMPOpInterface>(*opInst).getPrivateBlockArgs();
SmallVector<mlir::Value> mlirPrivateVars;
SmallVector<llvm::Value *> llvmPrivateVars;
SmallVector<omp::PrivateClauseOp> privateDecls;
mlirPrivateVars.reserve(privateBlockArgs.size());
llvmPrivateVars.reserve(privateBlockArgs.size());
privateDecls.reserve(privateBlockArgs.size());
collectPrivatizationDecls(opInst, privateDecls);
for (mlir::Value privateVar : opInst.getPrivateVars())
mlirPrivateVars.push_back(privateVar);
// Collect reduction declarations
SmallVector<omp::DeclareReductionOp> reductionDecls;
@ -1895,8 +1900,8 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
auto bodyGenCB = [&](InsertPointTy allocaIP,
InsertPointTy codeGenIP) -> llvm::Error {
llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
opInst, builder, moduleTranslation, privateBlockArgs, privateDecls,
llvmPrivateVars, allocaIP);
builder, moduleTranslation, privateBlockArgs, privateDecls,
mlirPrivateVars, llvmPrivateVars, allocaIP);
if (handleError(afterAllocas, *opInst).failed())
return llvm::make_error<PreviouslyReportedError>();