//===-- OpenMP.cpp -- Open MP directive lowering --------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/ // //===----------------------------------------------------------------------===// #include "flang/Lower/OpenMP.h" #include "ClauseProcessor.h" #include "Clauses.h" #include "DataSharingProcessor.h" #include "Decomposer.h" #include "DirectivesCommon.h" #include "ReductionProcessor.h" #include "Utils.h" #include "flang/Common/idioms.h" #include "flang/Lower/Bridge.h" #include "flang/Lower/ConvertExpr.h" #include "flang/Lower/ConvertVariable.h" #include "flang/Lower/StatementContext.h" #include "flang/Lower/SymbolMap.h" #include "flang/Optimizer/Builder/BoxValue.h" #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Builder/Todo.h" #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Parser/parse-tree.h" #include "flang/Semantics/openmp-directive-sets.h" #include "flang/Semantics/tools.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/Transforms/RegionUtils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" using namespace Fortran::lower::omp; //===----------------------------------------------------------------------===// // Code generation helper functions //===----------------------------------------------------------------------===// static void genOMPDispatch(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item); static lower::pft::Evaluation * getCollapsedLoopEval(lower::pft::Evaluation &eval, int collapseValue) { // Return the Evaluation of the innermost collapsed loop, or the current one // if there was no COLLAPSE. if (collapseValue == 0) return &eval; lower::pft::Evaluation *curEval = &eval.getFirstNestedEvaluation(); for (int i = 1; i < collapseValue; i++) { // The nested evaluations should be DoConstructs (i.e. they should form // a loop nest). Each DoConstruct is a tuple . assert(curEval->isA()); curEval = &*std::next(curEval->getNestedEvaluations().begin()); } return curEval; } static void genNestedEvaluations(lower::AbstractConverter &converter, lower::pft::Evaluation &eval, int collapseValue = 0) { lower::pft::Evaluation *curEval = getCollapsedLoopEval(eval, collapseValue); for (lower::pft::Evaluation &e : curEval->getNestedEvaluations()) converter.genEval(e); } static fir::GlobalOp globalInitialization(lower::AbstractConverter &converter, fir::FirOpBuilder &firOpBuilder, const semantics::Symbol &sym, const lower::pft::Variable &var, mlir::Location currentLocation) { mlir::Type ty = converter.genType(sym); std::string globalName = converter.mangleName(sym); mlir::StringAttr linkage = firOpBuilder.createInternalLinkage(); fir::GlobalOp global = firOpBuilder.createGlobal(currentLocation, ty, globalName, linkage); // Create default initialization for non-character scalar. if (semantics::IsAllocatableOrObjectPointer(&sym)) { mlir::Type baseAddrType = mlir::dyn_cast(ty).getEleTy(); lower::createGlobalInitialization( firOpBuilder, global, [&](fir::FirOpBuilder &b) { mlir::Value nullAddr = b.createNullConstant(currentLocation, baseAddrType); mlir::Value box = b.create(currentLocation, ty, nullAddr); b.create(currentLocation, box); }); } else { lower::createGlobalInitialization( firOpBuilder, global, [&](fir::FirOpBuilder &b) { mlir::Value undef = b.create(currentLocation, ty); b.create(currentLocation, undef); }); } return global; } // Get the extended value for \p val by extracting additional variable // information from \p base. static fir::ExtendedValue getExtendedValue(fir::ExtendedValue base, mlir::Value val) { return base.match( [&](const fir::MutableBoxValue &box) -> fir::ExtendedValue { return fir::MutableBoxValue(val, box.nonDeferredLenParams(), {}); }, [&](const auto &) -> fir::ExtendedValue { return fir::substBase(base, val); }); } #ifndef NDEBUG static bool isThreadPrivate(lower::SymbolRef sym) { if (const auto *details = sym->detailsIf()) { for (const auto &obj : details->objects()) if (!obj->test(semantics::Symbol::Flag::OmpThreadprivate)) return false; return true; } return sym->test(semantics::Symbol::Flag::OmpThreadprivate); } #endif static void threadPrivatizeVars(lower::AbstractConverter &converter, lower::pft::Evaluation &eval) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); mlir::Location currentLocation = converter.getCurrentLocation(); mlir::OpBuilder::InsertionGuard guard(firOpBuilder); firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock()); // If the symbol corresponds to the original ThreadprivateOp, use the symbol // value from that operation to create one ThreadprivateOp copy operation // inside the parallel region. // In some cases, however, the symbol will correspond to the original, // non-threadprivate variable. This can happen, for instance, with a common // block, declared in a separate module, used by a parent procedure and // privatized in its child procedure. auto genThreadprivateOp = [&](lower::SymbolRef sym) -> mlir::Value { assert(isThreadPrivate(sym)); mlir::Value symValue = converter.getSymbolAddress(sym); mlir::Operation *op = symValue.getDefiningOp(); if (auto declOp = mlir::dyn_cast(op)) op = declOp.getMemref().getDefiningOp(); if (mlir::isa(op)) symValue = mlir::dyn_cast(op).getSymAddr(); return firOpBuilder.create( currentLocation, symValue.getType(), symValue); }; llvm::SetVector threadprivateSyms; converter.collectSymbolSet(eval, threadprivateSyms, semantics::Symbol::Flag::OmpThreadprivate, /*collectSymbols=*/true, /*collectHostAssociatedSymbols=*/true); std::set threadprivateSymNames; // For a COMMON block, the ThreadprivateOp is generated for itself instead of // its members, so only bind the value of the new copied ThreadprivateOp // inside the parallel region to the common block symbol only once for // multiple members in one COMMON block. llvm::SetVector commonSyms; for (std::size_t i = 0; i < threadprivateSyms.size(); i++) { const semantics::Symbol *sym = threadprivateSyms[i]; mlir::Value symThreadprivateValue; // The variable may be used more than once, and each reference has one // symbol with the same name. Only do once for references of one variable. if (threadprivateSymNames.find(sym->name()) != threadprivateSymNames.end()) continue; threadprivateSymNames.insert(sym->name()); if (const semantics::Symbol *common = semantics::FindCommonBlockContaining(sym->GetUltimate())) { mlir::Value commonThreadprivateValue; if (commonSyms.contains(common)) { commonThreadprivateValue = converter.getSymbolAddress(*common); } else { commonThreadprivateValue = genThreadprivateOp(*common); converter.bindSymbol(*common, commonThreadprivateValue); commonSyms.insert(common); } symThreadprivateValue = lower::genCommonBlockMember( converter, currentLocation, *sym, commonThreadprivateValue); } else { symThreadprivateValue = genThreadprivateOp(*sym); } fir::ExtendedValue sexv = converter.getSymbolExtendedValue(*sym); fir::ExtendedValue symThreadprivateExv = getExtendedValue(sexv, symThreadprivateValue); converter.bindSymbol(*sym, symThreadprivateExv); } } static mlir::Operation * createAndSetPrivatizedLoopVar(lower::AbstractConverter &converter, mlir::Location loc, mlir::Value indexVal, const semantics::Symbol *sym) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint(); firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock()); mlir::Type tempTy = converter.genType(*sym); assert(converter.isPresentShallowLookup(*sym) && "Expected symbol to be in symbol table."); firOpBuilder.restoreInsertionPoint(insPt); mlir::Value cvtVal = firOpBuilder.createConvert(loc, tempTy, indexVal); mlir::Operation *storeOp = firOpBuilder.create( loc, cvtVal, converter.getSymbolAddress(*sym)); return storeOp; } // This helper function implements the functionality of "promoting" // non-CPTR arguments of use_device_ptr to use_device_addr // arguments (automagic conversion of use_device_ptr -> // use_device_addr in these cases). The way we do so currently is // through the shuffling of operands from the devicePtrOperands to // deviceAddrOperands where neccesary and re-organizing the types, // locations and symbols to maintain the correct ordering of ptr/addr // input -> BlockArg. // // This effectively implements some deprecated OpenMP functionality // that some legacy applications unfortunately depend on // (deprecated in specification version 5.2): // // "If a list item in a use_device_ptr clause is not of type C_PTR, // the behavior is as if the list item appeared in a use_device_addr // clause. Support for such list items in a use_device_ptr clause // is deprecated." static void promoteNonCPtrUseDevicePtrArgsToUseDeviceAddr( llvm::SmallVectorImpl &useDeviceAddrVars, llvm::SmallVectorImpl &useDevicePtrVars, llvm::SmallVectorImpl &useDeviceTypes, llvm::SmallVectorImpl &useDeviceLocs, llvm::SmallVectorImpl &useDeviceSymbols) { auto moveElementToBack = [](size_t idx, auto &vector) { auto *iter = std::next(vector.begin(), idx); vector.push_back(*iter); vector.erase(iter); }; // Iterate over our use_device_ptr list and shift all non-cptr arguments into // use_device_addr. for (auto *it = useDevicePtrVars.begin(); it != useDevicePtrVars.end();) { if (!fir::isa_builtin_cptr_type(fir::unwrapRefType(it->getType()))) { useDeviceAddrVars.push_back(*it); // We have to shuffle the symbols around as well, to maintain // the correct Input -> BlockArg for use_device_ptr/use_device_addr. // NOTE: However, as map's do not seem to be included currently // this isn't as pertinent, but we must try to maintain for // future alterations. I believe the reason they are not currently // is that the BlockArg assign/lowering needs to be extended // to a greater set of types. auto idx = std::distance(useDevicePtrVars.begin(), it); moveElementToBack(idx, useDeviceTypes); moveElementToBack(idx, useDeviceLocs); moveElementToBack(idx, useDeviceSymbols); it = useDevicePtrVars.erase(it); continue; } ++it; } } /// Extract the list of function and variable symbols affected by the given /// 'declare target' directive and return the intended device type for them. static void getDeclareTargetInfo( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct, mlir::omp::DeclareTargetOperands &clauseOps, llvm::SmallVectorImpl &symbolAndClause) { const auto &spec = std::get(declareTargetConstruct.t); if (const auto *objectList{parser::Unwrap(spec.u)}) { ObjectList objects{makeObjects(*objectList, semaCtx)}; // Case: declare target(func, var1, var2) gatherFuncAndVarSyms(objects, mlir::omp::DeclareTargetCaptureClause::to, symbolAndClause); } else if (const auto *clauseList{ parser::Unwrap(spec.u)}) { List clauses = makeClauses(*clauseList, semaCtx); if (clauses.empty() && (!eval.getOwningProcedure()->isMainProgram() || eval.getOwningProcedure()->getMainProgramSymbol())) { // Case: declare target, implicit capture of function symbolAndClause.emplace_back( mlir::omp::DeclareTargetCaptureClause::to, eval.getOwningProcedure()->getSubprogramSymbol()); } ClauseProcessor cp(converter, semaCtx, clauses); cp.processDeviceType(clauseOps); cp.processEnter(symbolAndClause); cp.processLink(symbolAndClause); cp.processTo(symbolAndClause); cp.processTODO(converter.getCurrentLocation(), llvm::omp::Directive::OMPD_declare_target); } } static void collectDeferredDeclareTargets( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct, llvm::SmallVectorImpl &deferredDeclareTarget) { mlir::omp::DeclareTargetOperands clauseOps; llvm::SmallVector symbolAndClause; getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct, clauseOps, symbolAndClause); // Return the device type only if at least one of the targets for the // directive is a function or subroutine mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); for (const DeclareTargetCapturePair &symClause : symbolAndClause) { mlir::Operation *op = mod.lookupSymbol( converter.mangleName(std::get(symClause))); if (!op) { deferredDeclareTarget.push_back({std::get<0>(symClause), clauseOps.deviceType, std::get<1>(symClause)}); } } } static std::optional getDeclareTargetFunctionDevice( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct) { mlir::omp::DeclareTargetOperands clauseOps; llvm::SmallVector symbolAndClause; getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct, clauseOps, symbolAndClause); // Return the device type only if at least one of the targets for the // directive is a function or subroutine mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); for (const DeclareTargetCapturePair &symClause : symbolAndClause) { mlir::Operation *op = mod.lookupSymbol( converter.mangleName(std::get(symClause))); if (mlir::isa_and_nonnull(op)) return clauseOps.deviceType; } return std::nullopt; } /// Set up the entry block of the given `omp.loop_nest` operation, adding a /// block argument for each loop induction variable and allocating and /// initializing a private value to hold each of them. /// /// This function can also bind the symbols of any variables that should match /// block arguments on parent loop wrapper operations attached to the same /// loop. This allows the introduction of any necessary `hlfir.declare` /// operations inside of the entry block of the `omp.loop_nest` operation and /// not directly under any of the wrappers, which would invalidate them. /// /// \param [in] op - the loop nest operation. /// \param [in] converter - PFT to MLIR conversion interface. /// \param [in] loc - location. /// \param [in] args - symbols of induction variables. /// \param [in] wrapperSyms - symbols of variables to be mapped to loop wrapper /// entry block arguments. /// \param [in] wrapperArgs - entry block arguments of parent loop wrappers. static void genLoopVars(mlir::Operation *op, lower::AbstractConverter &converter, mlir::Location &loc, llvm::ArrayRef args, llvm::ArrayRef wrapperSyms = {}, llvm::ArrayRef wrapperArgs = {}) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); auto ®ion = op->getRegion(0); std::size_t loopVarTypeSize = 0; for (const semantics::Symbol *arg : args) loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size()); mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); llvm::SmallVector tiv(args.size(), loopVarType); llvm::SmallVector locs(args.size(), loc); firOpBuilder.createBlock(®ion, {}, tiv, locs); // Bind the entry block arguments of parent wrappers to the corresponding // symbols. for (auto [arg, prv] : llvm::zip_equal(wrapperSyms, wrapperArgs)) converter.bindSymbol(*arg, prv); // The argument is not currently in memory, so make a temporary for the // argument, and store it there, then bind that location to the argument. mlir::Operation *storeOp = nullptr; for (auto [argIndex, argSymbol] : llvm::enumerate(args)) { mlir::Value indexVal = fir::getBase(region.front().getArgument(argIndex)); storeOp = createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol); } firOpBuilder.setInsertionPointAfter(storeOp); } static void genReductionVars(mlir::Operation *op, lower::AbstractConverter &converter, mlir::Location &loc, llvm::ArrayRef reductionArgs, llvm::ArrayRef reductionTypes) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); llvm::SmallVector blockArgLocs(reductionArgs.size(), loc); mlir::Block *entryBlock = firOpBuilder.createBlock( &op->getRegion(0), {}, reductionTypes, blockArgLocs); // Bind the reduction arguments to their block arguments. for (auto [arg, prv] : llvm::zip_equal(reductionArgs, entryBlock->getArguments())) { converter.bindSymbol(*arg, prv); } } static void markDeclareTarget(mlir::Operation *op, lower::AbstractConverter &converter, mlir::omp::DeclareTargetCaptureClause captureClause, mlir::omp::DeclareTargetDeviceType deviceType) { // TODO: Add support for program local variables with declare target applied auto declareTargetOp = llvm::dyn_cast(op); if (!declareTargetOp) fir::emitFatalError( converter.getCurrentLocation(), "Attempt to apply declare target on unsupported operation"); // The function or global already has a declare target applied to it, very // likely through implicit capture (usage in another declare target // function/subroutine). It should be marked as any if it has been assigned // both host and nohost, else we skip, as there is no change if (declareTargetOp.isDeclareTarget()) { if (declareTargetOp.getDeclareTargetDeviceType() != deviceType) declareTargetOp.setDeclareTarget(mlir::omp::DeclareTargetDeviceType::any, captureClause); return; } declareTargetOp.setDeclareTarget(deviceType, captureClause); } /// For an operation that takes `omp.private` values as region args, this util /// merges the private vars info into the region arguments list. /// /// \tparam OMPOP - the OpenMP op that takes `omp.private` inputs. /// \tparam InfoTy - the type of private info we want to merge; e.g. mlir::Type /// or mlir::Location fields of the private var list. /// /// \param [in] op - the op accepting `omp.private` inputs. /// \param [in] currentList - the current list of region info that we /// want to merge private info with. For example this could be the list of types /// or locations of previous arguments to \op's region. /// \param [in] infoAccessor - for a private variable, this returns the /// data we want to merge: type or location. /// \param [out] allRegionArgsInfo - the merged list of region info. template static void mergePrivateVarsInfo(OMPOp op, llvm::ArrayRef currentList, llvm::function_ref infoAccessor, llvm::SmallVectorImpl &allRegionArgsInfo) { mlir::OperandRange privateVars = op.getPrivateVars(); llvm::transform(currentList, std::back_inserter(allRegionArgsInfo), [](InfoTy i) { return i; }); llvm::transform(privateVars, std::back_inserter(allRegionArgsInfo), infoAccessor); } //===----------------------------------------------------------------------===// // Op body generation helper structures and functions //===----------------------------------------------------------------------===// struct OpWithBodyGenInfo { /// A type for a code-gen callback function. This takes as argument the op for /// which the code is being generated and returns the arguments of the op's /// region. using GenOMPRegionEntryCBFn = std::function( mlir::Operation *)>; OpWithBodyGenInfo(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, mlir::Location loc, lower::pft::Evaluation &eval, llvm::omp::Directive dir) : converter(converter), symTable(symTable), semaCtx(semaCtx), loc(loc), eval(eval), dir(dir) {} OpWithBodyGenInfo &setClauses(const List *value) { clauses = value; return *this; } OpWithBodyGenInfo &setDataSharingProcessor(DataSharingProcessor *value) { dsp = value; return *this; } OpWithBodyGenInfo &setGenRegionEntryCb(GenOMPRegionEntryCBFn value) { genRegionEntryCB = value; return *this; } /// [inout] converter to use for the clauses. lower::AbstractConverter &converter; /// [in] Symbol table lower::SymMap &symTable; /// [in] Semantics context semantics::SemanticsContext &semaCtx; /// [in] location in source code. mlir::Location loc; /// [in] current PFT node/evaluation. lower::pft::Evaluation &eval; /// [in] leaf directive for which to generate the op body. llvm::omp::Directive dir; /// [in] list of clauses to process. const List *clauses = nullptr; /// [in] if provided, processes the construct's data-sharing attributes. DataSharingProcessor *dsp = nullptr; /// [in] if provided, emits the op's region entry. Otherwise, an emtpy block /// is created in the region. GenOMPRegionEntryCBFn genRegionEntryCB = nullptr; }; /// Create the body (block) for an OpenMP Operation. /// /// \param [in] op - the operation the body belongs to. /// \param [in] info - options controlling code-gen for the construction. /// \param [in] queue - work queue with nested constructs. /// \param [in] item - item in the queue to generate body for. static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info, const ConstructQueue &queue, ConstructQueue::const_iterator item) { fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder(); auto insertMarker = [](fir::FirOpBuilder &builder) { mlir::Value undef = builder.create(builder.getUnknownLoc(), builder.getIndexType()); return undef.getDefiningOp(); }; // If an argument for the region is provided then create the block with that // argument. Also update the symbol's address with the mlir argument value. // e.g. For loops the argument is the induction variable. And all further // uses of the induction variable should use this mlir value. auto regionArgs = [&]() -> llvm::SmallVector { if (info.genRegionEntryCB != nullptr) { return info.genRegionEntryCB(&op); } firOpBuilder.createBlock(&op.getRegion(0)); return {}; }(); // Mark the earliest insertion point. mlir::Operation *marker = insertMarker(firOpBuilder); // If it is an unstructured region, create empty blocks for all evaluations. if (info.eval.lowerAsUnstructured()) lower::createEmptyRegionBlocks( firOpBuilder, info.eval.getNestedEvaluations()); // Start with privatization, so that the lowering of the nested // code will use the right symbols. bool isLoop = llvm::omp::getDirectiveAssociation(info.dir) == llvm::omp::Association::Loop; bool privatize = info.clauses; firOpBuilder.setInsertionPoint(marker); std::optional tempDsp; if (privatize && !info.dsp) { tempDsp.emplace(info.converter, info.semaCtx, *info.clauses, info.eval, Fortran::lower::omp::isLastItemInQueue(item, queue)); tempDsp->processStep1(); } if (info.dir == llvm::omp::Directive::OMPD_parallel) { threadPrivatizeVars(info.converter, info.eval); if (info.clauses) { firOpBuilder.setInsertionPoint(marker); ClauseProcessor(info.converter, info.semaCtx, *info.clauses) .processCopyin(); } } if (ConstructQueue::const_iterator next = std::next(item); next != queue.end()) { genOMPDispatch(info.converter, info.symTable, info.semaCtx, info.eval, info.loc, queue, next); } else { // genFIR(Evaluation&) tries to patch up unterminated blocks, causing // a lot of complications for our approach if the terminator generation // is delayed past this point. Insert a temporary terminator here, then // delete it. firOpBuilder.setInsertionPointToEnd(&op.getRegion(0).back()); auto *temp = lower::genOpenMPTerminator(firOpBuilder, &op, info.loc); firOpBuilder.setInsertionPointAfter(marker); genNestedEvaluations(info.converter, info.eval); temp->erase(); } // Get or create a unique exiting block from the given region, or // return nullptr if there is no exiting block. auto getUniqueExit = [&](mlir::Region ®ion) -> mlir::Block * { // Find the blocks where the OMP terminator should go. In simple cases // it is the single block in the operation's region. When the region // is more complicated, especially with unstructured control flow, there // may be multiple blocks, and some of them may have non-OMP terminators // resulting from lowering of the code contained within the operation. // All the remaining blocks are potential exit points from the op's region. // // Explicit control flow cannot exit any OpenMP region (other than via // STOP), and that is enforced by semantic checks prior to lowering. STOP // statements are lowered to a function call. // Collect unterminated blocks. llvm::SmallVector exits; for (mlir::Block &b : region) { if (b.empty() || !b.back().hasTrait()) exits.push_back(&b); } if (exits.empty()) return nullptr; // If there already is a unique exiting block, do not create another one. // Additionally, some ops (e.g. omp.sections) require only 1 block in // its region. if (exits.size() == 1) return exits[0]; mlir::Block *exit = firOpBuilder.createBlock(®ion); for (mlir::Block *b : exits) { firOpBuilder.setInsertionPointToEnd(b); firOpBuilder.create(info.loc, exit); } return exit; }; if (auto *exitBlock = getUniqueExit(op.getRegion(0))) { firOpBuilder.setInsertionPointToEnd(exitBlock); auto *term = lower::genOpenMPTerminator(firOpBuilder, &op, info.loc); // Only insert lastprivate code when there actually is an exit block. // Such a block may not exist if the nested code produced an infinite // loop (this may not make sense in production code, but a user could // write that and we should handle it). firOpBuilder.setInsertionPoint(term); if (privatize) { // DataSharingProcessor::processStep2() may create operations before/after // the one passed as argument. We need to treat loop wrappers and their // nested loop as a unit, so we need to pass the top level wrapper (if // present). Otherwise, these operations will be inserted within a // wrapper region. mlir::Operation *privatizationTopLevelOp = &op; if (auto loopNest = llvm::dyn_cast(op)) { llvm::SmallVector wrappers; loopNest.gatherWrappers(wrappers); if (!wrappers.empty()) privatizationTopLevelOp = &*wrappers.back(); } if (!info.dsp) { assert(tempDsp.has_value()); tempDsp->processStep2(privatizationTopLevelOp, isLoop); } else { if (isLoop && regionArgs.size() > 0) { for (const auto ®ionArg : regionArgs) { info.dsp->pushLoopIV(info.converter.getSymbolAddress(*regionArg)); } } info.dsp->processStep2(privatizationTopLevelOp, isLoop); } } } firOpBuilder.setInsertionPointAfter(marker); marker->erase(); } static void genBodyOfTargetDataOp( lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::omp::TargetDataOp &dataOp, llvm::ArrayRef useDeviceTypes, llvm::ArrayRef useDeviceLocs, llvm::ArrayRef useDeviceSymbols, const mlir::Location ¤tLocation, const ConstructQueue &queue, ConstructQueue::const_iterator item) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); mlir::Region ®ion = dataOp.getRegion(); firOpBuilder.createBlock(®ion, {}, useDeviceTypes, useDeviceLocs); for (auto [argIndex, argSymbol] : llvm::enumerate(useDeviceSymbols)) { const mlir::BlockArgument &arg = region.front().getArgument(argIndex); fir::ExtendedValue extVal = converter.getSymbolExtendedValue(*argSymbol); if (auto refType = mlir::dyn_cast(arg.getType())) { if (fir::isa_builtin_cptr_type(refType.getElementType())) { converter.bindSymbol(*argSymbol, arg); } else { // Avoid capture of a reference to a structured binding. const semantics::Symbol *sym = argSymbol; extVal.match( [&](const fir::MutableBoxValue &mbv) { converter.bindSymbol( *sym, fir::MutableBoxValue( arg, fir::factory::getNonDeferredLenParams(extVal), {})); }, [&](const auto &) { TODO(converter.getCurrentLocation(), "use_device clause operand unsupported type"); }); } } else { TODO(converter.getCurrentLocation(), "use_device clause operand unsupported type"); } } // Insert dummy instruction to remember the insertion position. The // marker will be deleted by clean up passes since there are no uses. // Remembering the position for further insertion is important since // there are hlfir.declares inserted above while setting block arguments // and new code from the body should be inserted after that. mlir::Value undefMarker = firOpBuilder.create( dataOp.getOperation()->getLoc(), firOpBuilder.getIndexType()); // Create blocks for unstructured regions. This has to be done since // blocks are initially allocated with the function as the parent region. if (eval.lowerAsUnstructured()) { lower::createEmptyRegionBlocks( firOpBuilder, eval.getNestedEvaluations()); } firOpBuilder.create(currentLocation); // Set the insertion point after the marker. firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp()); if (ConstructQueue::const_iterator next = std::next(item); next != queue.end()) { genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue, next); } else { genNestedEvaluations(converter, eval); } } // This generates intermediate common block member accesses within a region // and then rebinds the members symbol to the intermediate accessors we have // generated so that subsequent code generation will utilise these instead. // // When the scope changes, the bindings to the intermediate accessors should // be dropped in place of the original symbol bindings. // // This is for utilisation with TargetOp. static void genIntermediateCommonBlockAccessors( Fortran::lower::AbstractConverter &converter, const mlir::Location ¤tLocation, mlir::Region ®ion, llvm::ArrayRef mapSyms) { for (auto [argIndex, argSymbol] : llvm::enumerate(mapSyms)) { if (auto *details = argSymbol->detailsIf()) { for (auto obj : details->objects()) { auto targetCBMemberBind = Fortran::lower::genCommonBlockMember( converter, currentLocation, *obj, region.getArgument(argIndex)); fir::ExtendedValue sexv = converter.getSymbolExtendedValue(*obj); fir::ExtendedValue targetCBExv = getExtendedValue(sexv, targetCBMemberBind); converter.bindSymbol(*obj, targetCBExv); } } } } // This functions creates a block for the body of the targetOp's region. It adds // all the symbols present in mapSymbols as block arguments to this block. static void genBodyOfTargetOp( lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::omp::TargetOp &targetOp, llvm::ArrayRef mapSyms, llvm::ArrayRef mapSymLocs, llvm::ArrayRef mapSymTypes, DataSharingProcessor &dsp, const mlir::Location ¤tLocation, const ConstructQueue &queue, ConstructQueue::const_iterator item) { assert(mapSymTypes.size() == mapSymLocs.size()); fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); mlir::Region ®ion = targetOp.getRegion(); llvm::SmallVector allRegionArgTypes; mergePrivateVarsInfo(targetOp, mapSymTypes, llvm::function_ref{ [](mlir::Value v) { return v.getType(); }}, allRegionArgTypes); llvm::SmallVector allRegionArgLocs; mergePrivateVarsInfo(targetOp, mapSymLocs, llvm::function_ref{ [](mlir::Value v) { return v.getLoc(); }}, allRegionArgLocs); auto *regionBlock = firOpBuilder.createBlock(®ion, {}, allRegionArgTypes, allRegionArgLocs); // Clones the `bounds` placing them inside the target region and returns them. auto cloneBound = [&](mlir::Value bound) { if (mlir::isMemoryEffectFree(bound.getDefiningOp())) { mlir::Operation *clonedOp = bound.getDefiningOp()->clone(); regionBlock->push_back(clonedOp); return clonedOp->getResult(0); } TODO(converter.getCurrentLocation(), "target map clause operand unsupported bound type"); }; auto cloneBounds = [cloneBound](llvm::ArrayRef bounds) { llvm::SmallVector clonedBounds; for (mlir::Value bound : bounds) clonedBounds.emplace_back(cloneBound(bound)); return clonedBounds; }; // Bind the symbols to their corresponding block arguments. for (auto [argIndex, argSymbol] : llvm::enumerate(mapSyms)) { const mlir::BlockArgument &arg = region.getArgument(argIndex); // Avoid capture of a reference to a structured binding. const semantics::Symbol *sym = argSymbol; // Structure component symbols don't have bindings. if (sym->owner().IsDerivedType()) continue; fir::ExtendedValue extVal = converter.getSymbolExtendedValue(*sym); extVal.match( [&](const fir::BoxValue &v) { converter.bindSymbol(*sym, fir::BoxValue(arg, cloneBounds(v.getLBounds()), v.getExplicitParameters(), v.getExplicitExtents())); }, [&](const fir::MutableBoxValue &v) { converter.bindSymbol( *sym, fir::MutableBoxValue(arg, cloneBounds(v.getLBounds()), v.getMutableProperties())); }, [&](const fir::ArrayBoxValue &v) { converter.bindSymbol( *sym, fir::ArrayBoxValue(arg, cloneBounds(v.getExtents()), cloneBounds(v.getLBounds()), v.getSourceBox())); }, [&](const fir::CharArrayBoxValue &v) { converter.bindSymbol( *sym, fir::CharArrayBoxValue(arg, cloneBound(v.getLen()), cloneBounds(v.getExtents()), cloneBounds(v.getLBounds()))); }, [&](const fir::CharBoxValue &v) { converter.bindSymbol(*sym, fir::CharBoxValue(arg, cloneBound(v.getLen()))); }, [&](const fir::UnboxedValue &v) { converter.bindSymbol(*sym, arg); }, [&](const auto &) { TODO(converter.getCurrentLocation(), "target map clause operand unsupported type"); }); } for (auto [argIndex, argSymbol] : llvm::enumerate(dsp.getAllSymbolsToPrivatize())) { argIndex = mapSyms.size() + argIndex; const mlir::BlockArgument &arg = region.getArgument(argIndex); converter.bindSymbol(*argSymbol, hlfir::translateToExtendedValue( currentLocation, firOpBuilder, hlfir::Entity{arg}, /*contiguousHint=*/ evaluate::IsSimplyContiguous( *argSymbol, converter.getFoldingContext())) .first); } // Check if cloning the bounds introduced any dependency on the outer region. // If so, then either clone them as well if they are MemoryEffectFree, or else // copy them to a new temporary and add them to the map and block_argument // lists and replace their uses with the new temporary. llvm::SetVector valuesDefinedAbove; mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove); while (!valuesDefinedAbove.empty()) { for (mlir::Value val : valuesDefinedAbove) { mlir::Operation *valOp = val.getDefiningOp(); if (mlir::isMemoryEffectFree(valOp)) { mlir::Operation *clonedOp = valOp->clone(); regionBlock->push_front(clonedOp); val.replaceUsesWithIf( clonedOp->getResult(0), [regionBlock](mlir::OpOperand &use) { return use.getOwner()->getBlock() == regionBlock; }); } else { auto savedIP = firOpBuilder.getInsertionPoint(); firOpBuilder.setInsertionPointAfter(valOp); auto copyVal = firOpBuilder.createTemporary(val.getLoc(), val.getType()); firOpBuilder.createStoreWithConvert(copyVal.getLoc(), val, copyVal); llvm::SmallVector bounds; std::stringstream name; firOpBuilder.setInsertionPoint(targetOp); mlir::Value mapOp = createMapInfoOp( firOpBuilder, copyVal.getLoc(), copyVal, /*varPtrPtr=*/mlir::Value{}, name.str(), bounds, /*members=*/llvm::SmallVector{}, /*membersIndex=*/mlir::DenseIntElementsAttr{}, static_cast< std::underlying_type_t>( llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT), mlir::omp::VariableCaptureKind::ByCopy, copyVal.getType()); targetOp.getMapVarsMutable().append(mapOp); mlir::Value clonedValArg = region.addArgument(copyVal.getType(), copyVal.getLoc()); firOpBuilder.setInsertionPointToStart(regionBlock); auto loadOp = firOpBuilder.create(clonedValArg.getLoc(), clonedValArg); val.replaceUsesWithIf( loadOp->getResult(0), [regionBlock](mlir::OpOperand &use) { return use.getOwner()->getBlock() == regionBlock; }); firOpBuilder.setInsertionPoint(regionBlock, savedIP); } } valuesDefinedAbove.clear(); mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove); } // Insert dummy instruction to remember the insertion position. The // marker will be deleted since there are not uses. // In the HLFIR flow there are hlfir.declares inserted above while // setting block arguments. mlir::Value undefMarker = firOpBuilder.create( targetOp.getOperation()->getLoc(), firOpBuilder.getIndexType()); // Create blocks for unstructured regions. This has to be done since // blocks are initially allocated with the function as the parent region. if (eval.lowerAsUnstructured()) { lower::createEmptyRegionBlocks( firOpBuilder, eval.getNestedEvaluations()); } firOpBuilder.create(currentLocation); // Create the insertion point after the marker. firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp()); // If we map a common block using it's symbol e.g. map(tofrom: /common_block/) // and accessing it's members within the target region, there is a large // chance we will end up with uses external to the region accessing the common // resolve these, we do so by generating new common block member accesses // within the region, binding them to the member symbol for the scope of the // region so that subsequent code generation within the region will utilise // our new member accesses we have created. genIntermediateCommonBlockAccessors(converter, currentLocation, region, mapSyms); if (ConstructQueue::const_iterator next = std::next(item); next != queue.end()) { genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue, next); } else { genNestedEvaluations(converter, eval); } dsp.processStep2(targetOp, /*isLoop=*/false); } template static OpTy genOpWithBody(const OpWithBodyGenInfo &info, const ConstructQueue &queue, ConstructQueue::const_iterator item, Args &&...args) { auto op = info.converter.getFirOpBuilder().create( info.loc, std::forward(args)...); createBodyOfOp(*op, info, queue, item); return op; } template static OpTy genWrapperOp(lower::AbstractConverter &converter, mlir::Location loc, const ClauseOpsTy &clauseOps, llvm::ArrayRef blockArgTypes) { static_assert( OpTy::template hasTrait(), "expected a loop wrapper"); fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); // Create wrapper. auto op = firOpBuilder.create(loc, clauseOps); // Create entry block with arguments. llvm::SmallVector locs(blockArgTypes.size(), loc); firOpBuilder.createBlock(&op.getRegion(), /*insertPt=*/{}, blockArgTypes, locs); firOpBuilder.setInsertionPoint( lower::genOpenMPTerminator(firOpBuilder, op, loc)); return op; } //===----------------------------------------------------------------------===// // Code generation functions for clauses //===----------------------------------------------------------------------===// static void genCriticalDeclareClauses( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, const List &clauses, mlir::Location loc, mlir::omp::CriticalDeclareOperands &clauseOps, llvm::StringRef name) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processHint(clauseOps); clauseOps.symName = mlir::StringAttr::get(converter.getFirOpBuilder().getContext(), name); } static void genDistributeClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, const List &clauses, mlir::Location loc, mlir::omp::DistributeOperands &clauseOps) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processAllocate(clauseOps); cp.processDistSchedule(stmtCtx, clauseOps); cp.processOrder(clauseOps); // TODO Support delayed privatization. } static void genFlushClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, const ObjectList &objects, const List &clauses, mlir::Location loc, llvm::SmallVectorImpl &operandRange) { if (!objects.empty()) genObjectList(objects, converter, operandRange); ClauseProcessor cp(converter, semaCtx, clauses); cp.processTODO(loc, llvm::omp::OMPD_flush); } static void genLoopNestClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const List &clauses, mlir::Location loc, mlir::omp::LoopNestOperands &clauseOps, llvm::SmallVectorImpl &iv) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processCollapse(loc, eval, clauseOps, iv); clauseOps.loopInclusive = converter.getFirOpBuilder().getUnitAttr(); } static void genMaskedClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, const List &clauses, mlir::Location loc, mlir::omp::MaskedOperands &clauseOps) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processFilter(stmtCtx, clauseOps); } static void genOrderedRegionClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, const List &clauses, mlir::Location loc, mlir::omp::OrderedRegionOperands &clauseOps) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processTODO(loc, llvm::omp::Directive::OMPD_ordered); } static void genParallelClauses( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, const List &clauses, mlir::Location loc, mlir::omp::ParallelOperands &clauseOps, llvm::SmallVectorImpl &reductionTypes, llvm::SmallVectorImpl &reductionSyms) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processAllocate(clauseOps); cp.processIf(llvm::omp::Directive::OMPD_parallel, clauseOps); cp.processNumThreads(stmtCtx, clauseOps); cp.processProcBind(clauseOps); cp.processReduction(loc, clauseOps, &reductionTypes, &reductionSyms); } static void genSectionsClauses( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, const List &clauses, mlir::Location loc, mlir::omp::SectionsOperands &clauseOps, llvm::SmallVectorImpl &reductionTypes, llvm::SmallVectorImpl &reductionSyms) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processAllocate(clauseOps); cp.processNowait(clauseOps); cp.processReduction(loc, clauseOps, &reductionTypes, &reductionSyms); // TODO Support delayed privatization. } static void genSimdClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, const List &clauses, mlir::Location loc, mlir::omp::SimdOperands &clauseOps) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processAligned(clauseOps); cp.processIf(llvm::omp::Directive::OMPD_simd, clauseOps); cp.processOrder(clauseOps); cp.processReduction(loc, clauseOps); cp.processSafelen(clauseOps); cp.processSimdlen(clauseOps); // TODO Support delayed privatization. cp.processTODO( loc, llvm::omp::Directive::OMPD_simd); } static void genSingleClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, const List &clauses, mlir::Location loc, mlir::omp::SingleOperands &clauseOps) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processAllocate(clauseOps); cp.processCopyprivate(loc, clauseOps); cp.processNowait(clauseOps); // TODO Support delayed privatization. } static void genTargetClauses( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, const List &clauses, mlir::Location loc, bool processHostOnlyClauses, mlir::omp::TargetOperands &clauseOps, llvm::SmallVectorImpl &mapSyms, llvm::SmallVectorImpl &mapLocs, llvm::SmallVectorImpl &mapTypes, llvm::SmallVectorImpl &deviceAddrSyms, llvm::SmallVectorImpl &deviceAddrLocs, llvm::SmallVectorImpl &deviceAddrTypes, llvm::SmallVectorImpl &devicePtrSyms, llvm::SmallVectorImpl &devicePtrLocs, llvm::SmallVectorImpl &devicePtrTypes) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processDepend(clauseOps); cp.processDevice(stmtCtx, clauseOps); cp.processHasDeviceAddr(clauseOps, deviceAddrTypes, deviceAddrLocs, deviceAddrSyms); cp.processIf(llvm::omp::Directive::OMPD_target, clauseOps); cp.processIsDevicePtr(clauseOps, devicePtrTypes, devicePtrLocs, devicePtrSyms); cp.processMap(loc, stmtCtx, clauseOps, &mapSyms, &mapLocs, &mapTypes); if (processHostOnlyClauses) cp.processNowait(clauseOps); cp.processThreadLimit(stmtCtx, clauseOps); cp.processTODO( loc, llvm::omp::Directive::OMPD_target); // `target private(..)` is only supported in delayed privatization mode. if (!enableDelayedPrivatizationStaging) cp.processTODO(loc, llvm::omp::Directive::OMPD_target); } static void genTargetDataClauses( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, const List &clauses, mlir::Location loc, mlir::omp::TargetDataOperands &clauseOps, llvm::SmallVectorImpl &useDeviceTypes, llvm::SmallVectorImpl &useDeviceLocs, llvm::SmallVectorImpl &useDeviceSyms) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processDevice(stmtCtx, clauseOps); cp.processIf(llvm::omp::Directive::OMPD_target_data, clauseOps); cp.processMap(loc, stmtCtx, clauseOps); cp.processUseDeviceAddr(clauseOps, useDeviceTypes, useDeviceLocs, useDeviceSyms); cp.processUseDevicePtr(clauseOps, useDeviceTypes, useDeviceLocs, useDeviceSyms); // This function implements the deprecated functionality of use_device_ptr // that allows users to provide non-CPTR arguments to it with the caveat // that the compiler will treat them as use_device_addr. A lot of legacy // code may still depend on this functionality, so we should support it // in some manner. We do so currently by simply shifting non-cptr operands // from the use_device_ptr list into the front of the use_device_addr list // whilst maintaining the ordering of useDeviceLocs, useDeviceSyms and // useDeviceTypes to use_device_ptr/use_device_addr input for BlockArg // ordering. // TODO: Perhaps create a user provideable compiler option that will // re-introduce a hard-error rather than a warning in these cases. promoteNonCPtrUseDevicePtrArgsToUseDeviceAddr( clauseOps.useDeviceAddrVars, clauseOps.useDevicePtrVars, useDeviceTypes, useDeviceLocs, useDeviceSyms); } static void genTargetEnterExitUpdateDataClauses( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, const List &clauses, mlir::Location loc, llvm::omp::Directive directive, mlir::omp::TargetEnterExitUpdateDataOperands &clauseOps) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processDepend(clauseOps); cp.processDevice(stmtCtx, clauseOps); cp.processIf(directive, clauseOps); if (directive == llvm::omp::Directive::OMPD_target_update) { cp.processMotionClauses(stmtCtx, clauseOps); cp.processMotionClauses(stmtCtx, clauseOps); } else { cp.processMap(loc, stmtCtx, clauseOps); } cp.processNowait(clauseOps); } static void genTaskClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, const List &clauses, mlir::Location loc, mlir::omp::TaskOperands &clauseOps) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processAllocate(clauseOps); cp.processDepend(clauseOps); cp.processFinal(stmtCtx, clauseOps); cp.processIf(llvm::omp::Directive::OMPD_task, clauseOps); cp.processMergeable(clauseOps); cp.processPriority(stmtCtx, clauseOps); cp.processUntied(clauseOps); // TODO Support delayed privatization. cp.processTODO( loc, llvm::omp::Directive::OMPD_task); } static void genTaskgroupClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, const List &clauses, mlir::Location loc, mlir::omp::TaskgroupOperands &clauseOps) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processAllocate(clauseOps); cp.processTODO(loc, llvm::omp::Directive::OMPD_taskgroup); } static void genTaskwaitClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, const List &clauses, mlir::Location loc, mlir::omp::TaskwaitOperands &clauseOps) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processTODO( loc, llvm::omp::Directive::OMPD_taskwait); } static void genTeamsClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, const List &clauses, mlir::Location loc, mlir::omp::TeamsOperands &clauseOps) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processAllocate(clauseOps); cp.processIf(llvm::omp::Directive::OMPD_teams, clauseOps); cp.processNumTeams(stmtCtx, clauseOps); cp.processThreadLimit(stmtCtx, clauseOps); // TODO Support delayed privatization. cp.processTODO(loc, llvm::omp::Directive::OMPD_teams); } static void genWsloopClauses( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::StatementContext &stmtCtx, const List &clauses, mlir::Location loc, mlir::omp::WsloopOperands &clauseOps, llvm::SmallVectorImpl &reductionTypes, llvm::SmallVectorImpl &reductionSyms) { ClauseProcessor cp(converter, semaCtx, clauses); cp.processNowait(clauseOps); cp.processOrder(clauseOps); cp.processOrdered(clauseOps); cp.processReduction(loc, clauseOps, &reductionTypes, &reductionSyms); cp.processSchedule(stmtCtx, clauseOps); // TODO Support delayed privatization. cp.processTODO( loc, llvm::omp::Directive::OMPD_do); } //===----------------------------------------------------------------------===// // Code generation functions for leaf constructs //===----------------------------------------------------------------------===// static mlir::omp::BarrierOp genBarrierOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { return converter.getFirOpBuilder().create(loc); } static mlir::omp::CriticalOp genCriticalOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item, const std::optional &name) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); mlir::FlatSymbolRefAttr nameAttr; if (name) { std::string nameStr = name->ToString(); mlir::ModuleOp mod = firOpBuilder.getModule(); auto global = mod.lookupSymbol(nameStr); if (!global) { mlir::omp::CriticalDeclareOperands clauseOps; genCriticalDeclareClauses(converter, semaCtx, item->clauses, loc, clauseOps, nameStr); mlir::OpBuilder modBuilder(mod.getBodyRegion()); global = modBuilder.create(loc, clauseOps); } nameAttr = mlir::FlatSymbolRefAttr::get(firOpBuilder.getContext(), global.getSymName()); } return genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, llvm::omp::Directive::OMPD_critical), queue, item, nameAttr); } static mlir::omp::FlushOp genFlushOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ObjectList &objects, const ConstructQueue &queue, ConstructQueue::const_iterator item) { llvm::SmallVector operandRange; genFlushClauses(converter, semaCtx, objects, item->clauses, loc, operandRange); return converter.getFirOpBuilder().create( converter.getCurrentLocation(), operandRange); } static mlir::omp::LoopNestOp genLoopNestOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item, mlir::omp::LoopNestOperands &clauseOps, llvm::ArrayRef iv, llvm::ArrayRef wrapperSyms, llvm::ArrayRef wrapperArgs, llvm::omp::Directive directive, DataSharingProcessor &dsp) { assert(wrapperSyms.size() == wrapperArgs.size() && "Number of symbols and wrapper block arguments must match"); auto ivCallback = [&](mlir::Operation *op) { genLoopVars(op, converter, loc, iv, wrapperSyms, wrapperArgs); return llvm::SmallVector(iv); }; auto *nestedEval = getCollapsedLoopEval(eval, getCollapseValue(item->clauses)); return genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *nestedEval, directive) .setClauses(&item->clauses) .setDataSharingProcessor(&dsp) .setGenRegionEntryCb(ivCallback), queue, item, clauseOps); } static mlir::omp::MaskedOp genMaskedOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { lower::StatementContext stmtCtx; mlir::omp::MaskedOperands clauseOps; genMaskedClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps); return genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, llvm::omp::Directive::OMPD_masked), queue, item, clauseOps); } static mlir::omp::MasterOp genMasterOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { return genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, llvm::omp::Directive::OMPD_master), queue, item); } static mlir::omp::OrderedOp genOrderedOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { TODO(loc, "OMPD_ordered"); return nullptr; } static mlir::omp::OrderedRegionOp genOrderedRegionOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { mlir::omp::OrderedRegionOperands clauseOps; genOrderedRegionClauses(converter, semaCtx, item->clauses, loc, clauseOps); return genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, llvm::omp::Directive::OMPD_ordered), queue, item, clauseOps); } static mlir::omp::ParallelOp genParallelOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item, mlir::omp::ParallelOperands &clauseOps, llvm::ArrayRef reductionSyms, llvm::ArrayRef reductionTypes) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); auto reductionCallback = [&](mlir::Operation *op) { genReductionVars(op, converter, loc, reductionSyms, reductionTypes); return llvm::SmallVector(reductionSyms); }; OpWithBodyGenInfo genInfo = OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, llvm::omp::Directive::OMPD_parallel) .setClauses(&item->clauses) .setGenRegionEntryCb(reductionCallback); if (!enableDelayedPrivatization) return genOpWithBody(genInfo, queue, item, clauseOps); DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, lower::omp::isLastItemInQueue(item, queue), /*useDelayedPrivatization=*/true, &symTable); dsp.processStep1(&clauseOps); auto genRegionEntryCB = [&](mlir::Operation *op) { auto parallelOp = llvm::cast(op); llvm::SmallVector reductionLocs( clauseOps.reductionVars.size(), loc); llvm::SmallVector allRegionArgTypes; mergePrivateVarsInfo(parallelOp, reductionTypes, llvm::function_ref{ [](mlir::Value v) { return v.getType(); }}, allRegionArgTypes); llvm::SmallVector allRegionArgLocs; mergePrivateVarsInfo(parallelOp, llvm::ArrayRef(reductionLocs), llvm::function_ref{ [](mlir::Value v) { return v.getLoc(); }}, allRegionArgLocs); mlir::Region ®ion = parallelOp.getRegion(); firOpBuilder.createBlock(®ion, /*insertPt=*/{}, allRegionArgTypes, allRegionArgLocs); llvm::SmallVector allSymbols(reductionSyms); allSymbols.append(dsp.getAllSymbolsToPrivatize().begin(), dsp.getAllSymbolsToPrivatize().end()); unsigned argIdx = 0; for (const semantics::Symbol *arg : allSymbols) { auto bind = [&](const semantics::Symbol *sym) { mlir::BlockArgument blockArg = region.getArgument(argIdx); ++argIdx; converter.bindSymbol(*sym, hlfir::translateToExtendedValue( loc, firOpBuilder, hlfir::Entity{blockArg}, /*contiguousHint=*/ evaluate::IsSimplyContiguous( *sym, converter.getFoldingContext())) .first); }; if (const auto *commonDet = arg->detailsIf()) { for (const auto &mem : commonDet->objects()) bind(&*mem); } else bind(arg); } return allSymbols; }; genInfo.setGenRegionEntryCb(genRegionEntryCB).setDataSharingProcessor(&dsp); return genOpWithBody(genInfo, queue, item, clauseOps); } /// This breaks the normal prototype of the gen*Op functions: adding the /// sectionBlocks argument so that the enclosed section constructs can be /// lowered here with correct reduction symbol remapping. static mlir::omp::SectionsOp genSectionsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item, const parser::OmpSectionBlocks §ionBlocks) { llvm::SmallVector reductionTypes; llvm::SmallVector reductionSyms; mlir::omp::SectionsOperands clauseOps; genSectionsClauses(converter, semaCtx, item->clauses, loc, clauseOps, reductionTypes, reductionSyms); auto &builder = converter.getFirOpBuilder(); // Insert privatizations before SECTIONS symTable.pushScope(); DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, lower::omp::isLastItemInQueue(item, queue)); dsp.processStep1(); List nonDsaClauses; List lastprivates; for (const Clause &clause : item->clauses) { if (clause.id == llvm::omp::Clause::OMPC_lastprivate) { lastprivates.push_back(&std::get(clause.u)); } else { switch (clause.id) { case llvm::omp::Clause::OMPC_firstprivate: case llvm::omp::Clause::OMPC_private: case llvm::omp::Clause::OMPC_shared: break; default: nonDsaClauses.push_back(clause); } } } // SECTIONS construct. auto sectionsOp = builder.create(loc, clauseOps); // create entry block with reduction variables as arguments llvm::SmallVector blockArgLocs(reductionSyms.size(), loc); builder.createBlock(§ionsOp->getRegion(0), {}, reductionTypes, blockArgLocs); mlir::Operation *terminator = lower::genOpenMPTerminator(builder, sectionsOp, loc); auto reductionCallback = [&](mlir::Operation *op) { genReductionVars(op, converter, loc, reductionSyms, reductionTypes); return reductionSyms; }; // Generate nested SECTION constructs. // This is done here rather than in genOMP([...], OpenMPSectionConstruct ) // because we need to run genReductionVars on each omp.section so that the // reduction variable gets mapped to the private version for (auto [construct, nestedEval] : llvm::zip(sectionBlocks.v, eval.getNestedEvaluations())) { const auto *sectionConstruct = std::get_if(&construct.u); if (!sectionConstruct) { assert(false && "unexpected construct nested inside of SECTIONS construct"); continue; } ConstructQueue sectionQueue{buildConstructQueue( converter.getFirOpBuilder().getModule(), semaCtx, nestedEval, sectionConstruct->source, llvm::omp::Directive::OMPD_section, {})}; builder.setInsertionPoint(terminator); genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, nestedEval, llvm::omp::Directive::OMPD_section) .setClauses(§ionQueue.begin()->clauses) .setGenRegionEntryCb(reductionCallback), sectionQueue, sectionQueue.begin()); } if (!lastprivates.empty()) { mlir::Region §ionsBody = sectionsOp.getRegion(); assert(sectionsBody.hasOneBlock()); mlir::Block &body = sectionsBody.front(); auto lastSectionOp = llvm::find_if( llvm::reverse(body.getOperations()), [](const mlir::Operation &op) { return llvm::isa(op); }); assert(lastSectionOp != body.rend()); for (const clause::Lastprivate *lastp : lastprivates) { builder.setInsertionPoint( lastSectionOp->getRegion(0).back().getTerminator()); mlir::OpBuilder::InsertPoint insp = builder.saveInsertionPoint(); const auto &objList = std::get(lastp->t); for (const Object &object : objList) { semantics::Symbol *sym = object.sym(); converter.copyHostAssociateVar(*sym, &insp); } } } // Perform DataSharingProcessor's step2 out of SECTIONS builder.setInsertionPointAfter(sectionsOp.getOperation()); dsp.processStep2(sectionsOp, false); // Emit implicit barrier to synchronize threads and avoid data // races on post-update of lastprivate variables when `nowait` // clause is present. if (clauseOps.nowait && !lastprivates.empty()) builder.create(loc); symTable.popScope(); return sectionsOp; } static mlir::omp::SingleOp genSingleOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { mlir::omp::SingleOperands clauseOps; genSingleClauses(converter, semaCtx, item->clauses, loc, clauseOps); return genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, llvm::omp::Directive::OMPD_single) .setClauses(&item->clauses), queue, item, clauseOps); } static mlir::omp::TargetOp genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); lower::StatementContext stmtCtx; bool processHostOnlyClauses = !llvm::cast(*converter.getModuleOp()) .getIsTargetDevice(); mlir::omp::TargetOperands clauseOps; llvm::SmallVector mapSyms, devicePtrSyms, deviceAddrSyms; llvm::SmallVector mapLocs, devicePtrLocs, deviceAddrLocs; llvm::SmallVector mapTypes, devicePtrTypes, deviceAddrTypes; genTargetClauses(converter, semaCtx, stmtCtx, item->clauses, loc, processHostOnlyClauses, clauseOps, mapSyms, mapLocs, mapTypes, deviceAddrSyms, deviceAddrLocs, deviceAddrTypes, devicePtrSyms, devicePtrLocs, devicePtrTypes); llvm::SmallVector privateSyms; DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/ lower::omp::isLastItemInQueue(item, queue), /*useDelayedPrivatization=*/true, &symTable); dsp.processStep1(&clauseOps); // 5.8.1 Implicit Data-Mapping Attribute Rules // The following code follows the implicit data-mapping rules to map all the // symbols used inside the region that do not have explicit data-environment // attribute clauses (neither data-sharing; e.g. `private`, nor `map` // clauses). auto captureImplicitMap = [&](const semantics::Symbol &sym) { if (dsp.getAllSymbolsToPrivatize().contains(&sym)) return; // if the symbol is part of an already mapped common block, do not make a // map for it. if (const Fortran::semantics::Symbol *common = Fortran::semantics::FindCommonBlockContaining(sym.GetUltimate())) if (llvm::is_contained(mapSyms, common)) return; if (!llvm::is_contained(mapSyms, &sym)) { mlir::Value baseOp = converter.getSymbolAddress(sym); if (!baseOp) if (const auto *details = sym.template detailsIf()) { baseOp = converter.getSymbolAddress(details->symbol()); converter.copySymbolBinding(details->symbol(), sym); } if (baseOp) { llvm::SmallVector bounds; std::stringstream name; fir::ExtendedValue dataExv = converter.getSymbolExtendedValue(sym); name << sym.name().ToString(); lower::AddrAndBoundsInfo info = getDataOperandBaseAddr( converter, firOpBuilder, sym, converter.getCurrentLocation()); if (mlir::isa( fir::unwrapRefType(info.addr.getType()))) bounds = lower::genBoundsOpsFromBox( firOpBuilder, converter.getCurrentLocation(), dataExv, info); if (mlir::isa( fir::unwrapRefType(info.addr.getType()))) { bool dataExvIsAssumedSize = semantics::IsAssumedSizeArray(sym.GetUltimate()); bounds = lower::genBaseBoundsOps( firOpBuilder, converter.getCurrentLocation(), dataExv, dataExvIsAssumedSize); } llvm::omp::OpenMPOffloadMappingFlags mapFlag = llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; mlir::omp::VariableCaptureKind captureKind = mlir::omp::VariableCaptureKind::ByRef; mlir::Type eleType = baseOp.getType(); if (auto refType = mlir::dyn_cast(baseOp.getType())) eleType = refType.getElementType(); // If a variable is specified in declare target link and if device // type is not specified as `nohost`, it needs to be mapped tofrom mlir::ModuleOp mod = firOpBuilder.getModule(); mlir::Operation *op = mod.lookupSymbol(converter.mangleName(sym)); auto declareTargetOp = llvm::dyn_cast_if_present(op); if (declareTargetOp && declareTargetOp.isDeclareTarget()) { if (declareTargetOp.getDeclareTargetCaptureClause() == mlir::omp::DeclareTargetCaptureClause::link && declareTargetOp.getDeclareTargetDeviceType() != mlir::omp::DeclareTargetDeviceType::nohost) { mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; } } else if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) { captureKind = mlir::omp::VariableCaptureKind::ByCopy; } else if (!fir::isa_builtin_cptr_type(eleType)) { mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; } auto location = mlir::NameLoc::get(mlir::StringAttr::get(firOpBuilder.getContext(), sym.name().ToString()), baseOp.getLoc()); mlir::Value mapOp = createMapInfoOp( firOpBuilder, location, baseOp, /*varPtrPtr=*/mlir::Value{}, name.str(), bounds, /*members=*/{}, /*membersIndex=*/mlir::DenseIntElementsAttr{}, static_cast< std::underlying_type_t>( mapFlag), captureKind, baseOp.getType()); clauseOps.mapVars.push_back(mapOp); mapSyms.push_back(&sym); mapLocs.push_back(baseOp.getLoc()); mapTypes.push_back(baseOp.getType()); } } }; lower::pft::visitAllSymbols(eval, captureImplicitMap); auto targetOp = firOpBuilder.create(loc, clauseOps); genBodyOfTargetOp(converter, symTable, semaCtx, eval, targetOp, mapSyms, mapLocs, mapTypes, dsp, loc, queue, item); return targetOp; } static mlir::omp::TargetDataOp genTargetDataOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { lower::StatementContext stmtCtx; mlir::omp::TargetDataOperands clauseOps; llvm::SmallVector useDeviceTypes; llvm::SmallVector useDeviceLocs; llvm::SmallVector useDeviceSyms; genTargetDataClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps, useDeviceTypes, useDeviceLocs, useDeviceSyms); auto targetDataOp = converter.getFirOpBuilder().create(loc, clauseOps); genBodyOfTargetDataOp(converter, symTable, semaCtx, eval, targetDataOp, useDeviceTypes, useDeviceLocs, useDeviceSyms, loc, queue, item); return targetDataOp; } template static OpTy genTargetEnterExitUpdateDataOp( lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); lower::StatementContext stmtCtx; // GCC 9.3.0 emits a (probably) bogus warning about an unused variable. [[maybe_unused]] llvm::omp::Directive directive; if constexpr (std::is_same_v) { directive = llvm::omp::Directive::OMPD_target_enter_data; } else if constexpr (std::is_same_v) { directive = llvm::omp::Directive::OMPD_target_exit_data; } else if constexpr (std::is_same_v) { directive = llvm::omp::Directive::OMPD_target_update; } else { llvm_unreachable("Unexpected TARGET DATA construct"); } mlir::omp::TargetEnterExitUpdateDataOperands clauseOps; genTargetEnterExitUpdateDataClauses(converter, semaCtx, stmtCtx, item->clauses, loc, directive, clauseOps); return firOpBuilder.create(loc, clauseOps); } static mlir::omp::TaskOp genTaskOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { lower::StatementContext stmtCtx; mlir::omp::TaskOperands clauseOps; genTaskClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps); return genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, llvm::omp::Directive::OMPD_task) .setClauses(&item->clauses), queue, item, clauseOps); } static mlir::omp::TaskgroupOp genTaskgroupOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { mlir::omp::TaskgroupOperands clauseOps; genTaskgroupClauses(converter, semaCtx, item->clauses, loc, clauseOps); return genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, llvm::omp::Directive::OMPD_taskgroup) .setClauses(&item->clauses), queue, item, clauseOps); } static mlir::omp::TaskwaitOp genTaskwaitOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { mlir::omp::TaskwaitOperands clauseOps; genTaskwaitClauses(converter, semaCtx, item->clauses, loc, clauseOps); return converter.getFirOpBuilder().create(loc, clauseOps); } static mlir::omp::TaskyieldOp genTaskyieldOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { return converter.getFirOpBuilder().create(loc); } static mlir::omp::TeamsOp genTeamsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { lower::StatementContext stmtCtx; mlir::omp::TeamsOperands clauseOps; genTeamsClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps); return genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval, llvm::omp::Directive::OMPD_teams) .setClauses(&item->clauses), queue, item, clauseOps); } //===----------------------------------------------------------------------===// // Code generation functions for the standalone version of constructs that can // also be a leaf of a composite construct //===----------------------------------------------------------------------===// static void genStandaloneDistribute( lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item, DataSharingProcessor &dsp) { lower::StatementContext stmtCtx; mlir::omp::DistributeOperands distributeClauseOps; genDistributeClauses(converter, semaCtx, stmtCtx, item->clauses, loc, distributeClauseOps); mlir::omp::LoopNestOperands loopNestClauseOps; llvm::SmallVector iv; genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, loopNestClauseOps, iv); // TODO: Populate entry block arguments with private variables. auto distributeOp = genWrapperOp( converter, loc, distributeClauseOps, /*blockArgTypes=*/{}); genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, loopNestClauseOps, iv, /*wrapperSyms=*/{}, distributeOp.getRegion().getArguments(), llvm::omp::Directive::OMPD_distribute, dsp); } static void genStandaloneDo(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item, DataSharingProcessor &dsp) { lower::StatementContext stmtCtx; mlir::omp::WsloopOperands wsloopClauseOps; llvm::SmallVector reductionSyms; llvm::SmallVector reductionTypes; genWsloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc, wsloopClauseOps, reductionTypes, reductionSyms); mlir::omp::LoopNestOperands loopNestClauseOps; llvm::SmallVector iv; genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, loopNestClauseOps, iv); // TODO: Add private variables to entry block arguments. auto wsloopOp = genWrapperOp( converter, loc, wsloopClauseOps, reductionTypes); genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, loopNestClauseOps, iv, reductionSyms, wsloopOp.getRegion().getArguments(), llvm::omp::Directive::OMPD_do, dsp); } static void genStandaloneParallel(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { lower::StatementContext stmtCtx; mlir::omp::ParallelOperands clauseOps; llvm::SmallVector reductionSyms; llvm::SmallVector reductionTypes; genParallelClauses(converter, semaCtx, stmtCtx, item->clauses, loc, clauseOps, reductionTypes, reductionSyms); genParallelOp(converter, symTable, semaCtx, eval, loc, queue, item, clauseOps, reductionSyms, reductionTypes); } static void genStandaloneSimd(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item, DataSharingProcessor &dsp) { mlir::omp::SimdOperands simdClauseOps; genSimdClauses(converter, semaCtx, item->clauses, loc, simdClauseOps); mlir::omp::LoopNestOperands loopNestClauseOps; llvm::SmallVector iv; genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, loopNestClauseOps, iv); // TODO: Populate entry block arguments with reduction and private variables. auto simdOp = genWrapperOp(converter, loc, simdClauseOps, /*blockArgTypes=*/{}); genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, loopNestClauseOps, iv, /*wrapperSyms=*/{}, simdOp.getRegion().getArguments(), llvm::omp::Directive::OMPD_simd, dsp); } static void genStandaloneTaskloop( lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item, DataSharingProcessor &dsp) { TODO(loc, "Taskloop construct"); } //===----------------------------------------------------------------------===// // Code generation functions for composite constructs //===----------------------------------------------------------------------===// static void genCompositeDistributeParallelDo( lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item, DataSharingProcessor &dsp) { assert(std::distance(item, queue.end()) == 3 && "Invalid leaf constructs"); TODO(loc, "Composite DISTRIBUTE PARALLEL DO"); } static void genCompositeDistributeParallelDoSimd( lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item, DataSharingProcessor &dsp) { assert(std::distance(item, queue.end()) == 4 && "Invalid leaf constructs"); TODO(loc, "Composite DISTRIBUTE PARALLEL DO SIMD"); } static void genCompositeDistributeSimd( lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item, DataSharingProcessor &dsp) { lower::StatementContext stmtCtx; assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs"); ConstructQueue::const_iterator distributeItem = item; ConstructQueue::const_iterator simdItem = std::next(distributeItem); // Clause processing. mlir::omp::DistributeOperands distributeClauseOps; genDistributeClauses(converter, semaCtx, stmtCtx, distributeItem->clauses, loc, distributeClauseOps); mlir::omp::SimdOperands simdClauseOps; genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps); // Pass the innermost leaf construct's clauses because that's where COLLAPSE // is placed by construct decomposition. mlir::omp::LoopNestOperands loopNestClauseOps; llvm::SmallVector iv; genLoopNestClauses(converter, semaCtx, eval, simdItem->clauses, loc, loopNestClauseOps, iv); // Operation creation. // TODO: Populate entry block arguments with private variables. auto distributeOp = genWrapperOp( converter, loc, distributeClauseOps, /*blockArgTypes=*/{}); distributeOp.setComposite(/*val=*/true); // TODO: Populate entry block arguments with reduction and private variables. auto simdOp = genWrapperOp(converter, loc, simdClauseOps, /*blockArgTypes=*/{}); simdOp.setComposite(/*val=*/true); // Construct wrapper entry block list and associated symbols. It is important // that the symbol order and the block argument order match, so that the // symbol-value bindings created are correct. // TODO: Add omp.distribute private and omp.simd private and reduction args. auto wrapperArgs = llvm::to_vector( llvm::concat(distributeOp.getRegion().getArguments(), simdOp.getRegion().getArguments())); genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem, loopNestClauseOps, iv, /*wrapperSyms=*/{}, wrapperArgs, llvm::omp::Directive::OMPD_distribute_simd, dsp); } static void genCompositeDoSimd(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item, DataSharingProcessor &dsp) { lower::StatementContext stmtCtx; assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs"); ConstructQueue::const_iterator doItem = item; ConstructQueue::const_iterator simdItem = std::next(doItem); // Clause processing. mlir::omp::WsloopOperands wsloopClauseOps; llvm::SmallVector wsloopReductionSyms; llvm::SmallVector wsloopReductionTypes; genWsloopClauses(converter, semaCtx, stmtCtx, doItem->clauses, loc, wsloopClauseOps, wsloopReductionTypes, wsloopReductionSyms); mlir::omp::SimdOperands simdClauseOps; genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps); // Pass the innermost leaf construct's clauses because that's where COLLAPSE // is placed by construct decomposition. mlir::omp::LoopNestOperands loopNestClauseOps; llvm::SmallVector iv; genLoopNestClauses(converter, semaCtx, eval, simdItem->clauses, loc, loopNestClauseOps, iv); // Operation creation. // TODO: Add private variables to entry block arguments. auto wsloopOp = genWrapperOp( converter, loc, wsloopClauseOps, wsloopReductionTypes); wsloopOp.setComposite(/*val=*/true); // TODO: Populate entry block arguments with reduction and private variables. auto simdOp = genWrapperOp(converter, loc, simdClauseOps, /*blockArgTypes=*/{}); simdOp.setComposite(/*val=*/true); // Construct wrapper entry block list and associated symbols. It is important // that the symbol and block argument order match, so that the symbol-value // bindings created are correct. // TODO: Add omp.wsloop private and omp.simd private and reduction args. auto wrapperArgs = llvm::to_vector(llvm::concat( wsloopOp.getRegion().getArguments(), simdOp.getRegion().getArguments())); genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem, loopNestClauseOps, iv, wsloopReductionSyms, wrapperArgs, llvm::omp::Directive::OMPD_do_simd, dsp); } static void genCompositeTaskloopSimd( lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item, DataSharingProcessor &dsp) { assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs"); TODO(loc, "Composite TASKLOOP SIMD"); } //===----------------------------------------------------------------------===// // Dispatch //===----------------------------------------------------------------------===// static bool genOMPCompositeDispatch( lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item, DataSharingProcessor &dsp) { using llvm::omp::Directive; using lower::omp::matchLeafSequence; if (matchLeafSequence(item, queue, Directive::OMPD_distribute_parallel_do)) genCompositeDistributeParallelDo(converter, symTable, semaCtx, eval, loc, queue, item, dsp); else if (matchLeafSequence(item, queue, Directive::OMPD_distribute_parallel_do_simd)) genCompositeDistributeParallelDoSimd(converter, symTable, semaCtx, eval, loc, queue, item, dsp); else if (matchLeafSequence(item, queue, Directive::OMPD_distribute_simd)) genCompositeDistributeSimd(converter, symTable, semaCtx, eval, loc, queue, item, dsp); else if (matchLeafSequence(item, queue, Directive::OMPD_do_simd)) genCompositeDoSimd(converter, symTable, semaCtx, eval, loc, queue, item, dsp); else if (matchLeafSequence(item, queue, Directive::OMPD_taskloop_simd)) genCompositeTaskloopSimd(converter, symTable, semaCtx, eval, loc, queue, item, dsp); else return false; return true; } static void genOMPDispatch(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { assert(item != queue.end()); std::optional loopDsp; bool loopLeaf = llvm::omp::getDirectiveAssociation(item->id) == llvm::omp::Association::Loop; if (loopLeaf) { symTable.pushScope(); // TODO: Use one DataSharingProcessor for each leaf of a composite // construct. loopDsp.emplace(converter, semaCtx, item->clauses, eval, /*shouldCollectPreDeterminedSymbols=*/true, /*useDelayedPrivatization=*/false, &symTable); loopDsp->processStep1(); if (genOMPCompositeDispatch(converter, symTable, semaCtx, eval, loc, queue, item, *loopDsp)) { symTable.popScope(); return; } } switch (llvm::omp::Directive dir = item->id) { case llvm::omp::Directive::OMPD_barrier: genBarrierOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_distribute: genStandaloneDistribute(converter, symTable, semaCtx, eval, loc, queue, item, *loopDsp); break; case llvm::omp::Directive::OMPD_do: genStandaloneDo(converter, symTable, semaCtx, eval, loc, queue, item, *loopDsp); break; case llvm::omp::Directive::OMPD_loop: TODO(loc, "Unhandled directive " + llvm::omp::getOpenMPDirectiveName(dir)); break; case llvm::omp::Directive::OMPD_masked: genMaskedOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_master: genMasterOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_ordered: // Block-associated "ordered" construct. genOrderedRegionOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_parallel: genStandaloneParallel(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_section: llvm_unreachable("genOMPDispatch: OMPD_section"); // Lowered in the enclosing genSectionsOp. break; case llvm::omp::Directive::OMPD_sections: // Called directly from genOMP([...], OpenMPSectionsConstruct) because it // has a different prototype. // This code path is still taken when iterating through the construct queue // in genBodyOfOp break; case llvm::omp::Directive::OMPD_simd: genStandaloneSimd(converter, symTable, semaCtx, eval, loc, queue, item, *loopDsp); break; case llvm::omp::Directive::OMPD_single: genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_target: genTargetOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_target_data: genTargetDataOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_target_enter_data: genTargetEnterExitUpdateDataOp( converter, symTable, semaCtx, loc, queue, item); break; case llvm::omp::Directive::OMPD_target_exit_data: genTargetEnterExitUpdateDataOp( converter, symTable, semaCtx, loc, queue, item); break; case llvm::omp::Directive::OMPD_target_update: genTargetEnterExitUpdateDataOp( converter, symTable, semaCtx, loc, queue, item); break; case llvm::omp::Directive::OMPD_task: genTaskOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_taskgroup: genTaskgroupOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_taskloop: genStandaloneTaskloop(converter, symTable, semaCtx, eval, loc, queue, item, *loopDsp); break; case llvm::omp::Directive::OMPD_taskwait: genTaskwaitOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_taskyield: genTaskyieldOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_teams: genTeamsOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_tile: case llvm::omp::Directive::OMPD_unroll: TODO(loc, "Unhandled loop directive (" + llvm::omp::getOpenMPDirectiveName(dir) + ")"); // case llvm::omp::Directive::OMPD_workdistribute: case llvm::omp::Directive::OMPD_workshare: // FIXME: Workshare is not a commonly used OpenMP construct, an // implementation for this feature will come later. For the codes // that use this construct, add a single construct for now. genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item); break; default: // Combined and composite constructs should have been split into a sequence // of leaf constructs when building the construct queue. assert(!llvm::omp::isLeafConstruct(dir) && "Unexpected compound construct."); break; } if (loopLeaf) symTable.popScope(); } //===----------------------------------------------------------------------===// // OpenMPDeclarativeConstruct visitors //===----------------------------------------------------------------------===// static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDeclarativeAllocate &declarativeAllocate) { TODO(converter.getCurrentLocation(), "OpenMPDeclarativeAllocate"); } static void genOMP( lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDeclareReductionConstruct &declareReductionConstruct) { TODO(converter.getCurrentLocation(), "OpenMPDeclareReductionConstruct"); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDeclareSimdConstruct &declareSimdConstruct) { TODO(converter.getCurrentLocation(), "OpenMPDeclareSimdConstruct"); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDeclareTargetConstruct &declareTargetConstruct) { mlir::omp::DeclareTargetOperands clauseOps; llvm::SmallVector symbolAndClause; mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct, clauseOps, symbolAndClause); for (const DeclareTargetCapturePair &symClause : symbolAndClause) { mlir::Operation *op = mod.lookupSymbol( converter.mangleName(std::get(symClause))); // Some symbols are deferred until later in the module, these are handled // upon finalization of the module for OpenMP inside of Bridge, so we simply // skip for now. if (!op) continue; markDeclareTarget( op, converter, std::get(symClause), clauseOps.deviceType); } } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPRequiresConstruct &requiresConstruct) { // Requires directives are gathered and processed in semantics and // then combined in the lowering bridge before triggering codegen // just once. Hence, there is no need to lower each individual // occurrence here. } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPThreadprivate &threadprivate) { // The directive is lowered when instantiating the variable to // support the case of threadprivate variable declared in module. } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDeclarativeConstruct &ompDeclConstruct) { Fortran::common::visit( [&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); }, ompDeclConstruct.u); } //===----------------------------------------------------------------------===// // OpenMPStandaloneConstruct visitors //===----------------------------------------------------------------------===// static void genOMP( lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPSimpleStandaloneConstruct &simpleStandaloneConstruct) { const auto &directive = std::get( simpleStandaloneConstruct.t); List clauses = makeClauses( std::get(simpleStandaloneConstruct.t), semaCtx); mlir::Location currentLocation = converter.genLocation(directive.source); ConstructQueue queue{ buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, eval, directive.source, directive.v, clauses)}; if (directive.v == llvm::omp::Directive::OMPD_ordered) { // Standalone "ordered" directive. genOrderedOp(converter, symTable, semaCtx, eval, currentLocation, queue, queue.begin()); } else { // Dispatch handles the "block-associated" variant of "ordered". genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue, queue.begin()); } } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPFlushConstruct &flushConstruct) { const auto &verbatim = std::get(flushConstruct.t); const auto &objectList = std::get>(flushConstruct.t); const auto &clauseList = std::get>>( flushConstruct.t); ObjectList objects = objectList ? makeObjects(*objectList, semaCtx) : ObjectList{}; List clauses = clauseList ? makeList(*clauseList, [&](auto &&s) { return makeClause(s.v, semaCtx); }) : List{}; mlir::Location currentLocation = converter.genLocation(verbatim.source); ConstructQueue queue{buildConstructQueue( converter.getFirOpBuilder().getModule(), semaCtx, eval, verbatim.source, llvm::omp::Directive::OMPD_flush, clauses)}; genFlushOp(converter, symTable, semaCtx, eval, currentLocation, objects, queue, queue.begin()); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPCancelConstruct &cancelConstruct) { TODO(converter.getCurrentLocation(), "OpenMPCancelConstruct"); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPCancellationPointConstruct &cancellationPointConstruct) { TODO(converter.getCurrentLocation(), "OpenMPCancelConstruct"); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPStandaloneConstruct &standaloneConstruct) { Fortran::common::visit( [&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); }, standaloneConstruct.u); } //===----------------------------------------------------------------------===// // OpenMPConstruct visitors //===----------------------------------------------------------------------===// static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPAllocatorsConstruct &allocsConstruct) { TODO(converter.getCurrentLocation(), "OpenMPAllocatorsConstruct"); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPAtomicConstruct &atomicConstruct) { Fortran::common::visit( common::visitors{ [&](const parser::OmpAtomicRead &atomicRead) { mlir::Location loc = converter.genLocation(atomicRead.source); lower::genOmpAccAtomicRead( converter, atomicRead, loc); }, [&](const parser::OmpAtomicWrite &atomicWrite) { mlir::Location loc = converter.genLocation(atomicWrite.source); lower::genOmpAccAtomicWrite( converter, atomicWrite, loc); }, [&](const parser::OmpAtomic &atomicConstruct) { mlir::Location loc = converter.genLocation(atomicConstruct.source); lower::genOmpAtomic( converter, atomicConstruct, loc); }, [&](const parser::OmpAtomicUpdate &atomicUpdate) { mlir::Location loc = converter.genLocation(atomicUpdate.source); lower::genOmpAccAtomicUpdate( converter, atomicUpdate, loc); }, [&](const parser::OmpAtomicCapture &atomicCapture) { mlir::Location loc = converter.genLocation(atomicCapture.source); lower::genOmpAccAtomicCapture( converter, atomicCapture, loc); }, }, atomicConstruct.u); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPBlockConstruct &blockConstruct) { const auto &beginBlockDirective = std::get(blockConstruct.t); const auto &endBlockDirective = std::get(blockConstruct.t); mlir::Location currentLocation = converter.genLocation(beginBlockDirective.source); const auto origDirective = std::get(beginBlockDirective.t).v; List clauses = makeClauses( std::get(beginBlockDirective.t), semaCtx); clauses.append(makeClauses( std::get(endBlockDirective.t), semaCtx)); assert(llvm::omp::blockConstructSet.test(origDirective) && "Expected block construct"); (void)origDirective; for (const Clause &clause : clauses) { mlir::Location clauseLocation = converter.genLocation(clause.source); if (!std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u) && !std::holds_alternative(clause.u)) { TODO(clauseLocation, "OpenMP Block construct clause"); } } llvm::omp::Directive directive = std::get(beginBlockDirective.t).v; const parser::CharBlock &source = std::get(beginBlockDirective.t).source; ConstructQueue queue{ buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, eval, source, directive, clauses)}; genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue, queue.begin()); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPCriticalConstruct &criticalConstruct) { const auto &cd = std::get(criticalConstruct.t); List clauses = makeClauses(std::get(cd.t), semaCtx); ConstructQueue queue{buildConstructQueue( converter.getFirOpBuilder().getModule(), semaCtx, eval, cd.source, llvm::omp::Directive::OMPD_critical, clauses)}; const auto &name = std::get>(cd.t); mlir::Location currentLocation = converter.getCurrentLocation(); genCriticalOp(converter, symTable, semaCtx, eval, currentLocation, queue, queue.begin(), name); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPExecutableAllocate &execAllocConstruct) { TODO(converter.getCurrentLocation(), "OpenMPExecutableAllocate"); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPLoopConstruct &loopConstruct) { const auto &beginLoopDirective = std::get(loopConstruct.t); List clauses = makeClauses( std::get(beginLoopDirective.t), semaCtx); if (auto &endLoopDirective = std::get>( loopConstruct.t)) { clauses.append(makeClauses( std::get(endLoopDirective->t), semaCtx)); } mlir::Location currentLocation = converter.genLocation(beginLoopDirective.source); llvm::omp::Directive directive = std::get(beginLoopDirective.t).v; const parser::CharBlock &source = std::get(beginLoopDirective.t).source; ConstructQueue queue{ buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, eval, source, directive, clauses)}; genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue, queue.begin()); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPSectionConstruct §ionConstruct) { // Do nothing here. SECTION is lowered inside of the lowering for Sections } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPSectionsConstruct §ionsConstruct) { const auto &beginSectionsDirective = std::get(sectionsConstruct.t); List clauses = makeClauses( std::get(beginSectionsDirective.t), semaCtx); const auto &endSectionsDirective = std::get(sectionsConstruct.t); const auto §ionBlocks = std::get(sectionsConstruct.t); clauses.append(makeClauses( std::get(endSectionsDirective.t), semaCtx)); mlir::Location currentLocation = converter.getCurrentLocation(); llvm::omp::Directive directive = std::get(beginSectionsDirective.t).v; const parser::CharBlock &source = std::get(beginSectionsDirective.t).source; ConstructQueue queue{ buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, eval, source, directive, clauses)}; ConstructQueue::iterator next = queue.begin(); // Generate constructs that come first e.g. Parallel while (next != queue.end() && next->id != llvm::omp::Directive::OMPD_sections) { genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue, next); next = std::next(next); } // call genSectionsOp directly (not via genOMPDispatch) so that we can add the // sectionBlocks argument assert(next != queue.end()); assert(next->id == llvm::omp::Directive::OMPD_sections); genSectionsOp(converter, symTable, semaCtx, eval, currentLocation, queue, next, sectionBlocks); assert(std::next(next) == queue.end()); } static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPConstruct &ompConstruct) { Fortran::common::visit( [&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); }, ompConstruct.u); } //===----------------------------------------------------------------------===// // Public functions //===----------------------------------------------------------------------===// mlir::Operation *Fortran::lower::genOpenMPTerminator(fir::FirOpBuilder &builder, mlir::Operation *op, mlir::Location loc) { if (mlir::isa(op)) return builder.create(loc); return builder.create(loc); } void Fortran::lower::genOpenMPConstruct(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPConstruct &omp) { symTable.pushScope(); genOMP(converter, symTable, semaCtx, eval, omp); symTable.popScope(); } void Fortran::lower::genOpenMPDeclarativeConstruct( lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDeclarativeConstruct &omp) { genOMP(converter, symTable, semaCtx, eval, omp); genNestedEvaluations(converter, eval); } void Fortran::lower::genOpenMPSymbolProperties( lower::AbstractConverter &converter, const lower::pft::Variable &var) { assert(var.hasSymbol() && "Expecting Symbol"); const semantics::Symbol &sym = var.getSymbol(); if (sym.test(semantics::Symbol::Flag::OmpThreadprivate)) lower::genThreadprivateOp(converter, var); if (sym.test(semantics::Symbol::Flag::OmpDeclareTarget)) lower::genDeclareTargetIntGlobal(converter, var); } int64_t Fortran::lower::getCollapseValue(const parser::OmpClauseList &clauseList) { for (const parser::OmpClause &clause : clauseList.v) { if (const auto &collapseClause = std::get_if(&clause.u)) { const auto *expr = semantics::GetExpr(collapseClause->v); return evaluate::ToInt64(*expr).value(); } } return 1; } void Fortran::lower::genThreadprivateOp(lower::AbstractConverter &converter, const lower::pft::Variable &var) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); mlir::Location currentLocation = converter.getCurrentLocation(); const semantics::Symbol &sym = var.getSymbol(); mlir::Value symThreadprivateValue; if (const semantics::Symbol *common = semantics::FindCommonBlockContaining(sym.GetUltimate())) { mlir::Value commonValue = converter.getSymbolAddress(*common); if (mlir::isa(commonValue.getDefiningOp())) { // Generate ThreadprivateOp for a common block instead of its members and // only do it once for a common block. return; } // Generate ThreadprivateOp and rebind the common block. mlir::Value commonThreadprivateValue = firOpBuilder.create( currentLocation, commonValue.getType(), commonValue); converter.bindSymbol(*common, commonThreadprivateValue); // Generate the threadprivate value for the common block member. symThreadprivateValue = genCommonBlockMember(converter, currentLocation, sym, commonThreadprivateValue); } else if (!var.isGlobal()) { // Non-global variable which can be in threadprivate directive must be one // variable in main program, and it has implicit SAVE attribute. Take it as // with SAVE attribute, so to create GlobalOp for it to simplify the // translation to LLVM IR. // Avoids performing multiple globalInitializations. fir::GlobalOp global; auto module = converter.getModuleOp(); std::string globalName = converter.mangleName(sym); if (module.lookupSymbol(globalName)) global = module.lookupSymbol(globalName); else global = globalInitialization(converter, firOpBuilder, sym, var, currentLocation); mlir::Value symValue = firOpBuilder.create( currentLocation, global.resultType(), global.getSymbol()); symThreadprivateValue = firOpBuilder.create( currentLocation, symValue.getType(), symValue); } else { mlir::Value symValue = converter.getSymbolAddress(sym); // The symbol may be use-associated multiple times, and nothing needs to be // done after the original symbol is mapped to the threadprivatized value // for the first time. Use the threadprivatized value directly. mlir::Operation *op; if (auto declOp = symValue.getDefiningOp()) op = declOp.getMemref().getDefiningOp(); else op = symValue.getDefiningOp(); if (mlir::isa(op)) return; symThreadprivateValue = firOpBuilder.create( currentLocation, symValue.getType(), symValue); } fir::ExtendedValue sexv = converter.getSymbolExtendedValue(sym); fir::ExtendedValue symThreadprivateExv = getExtendedValue(sexv, symThreadprivateValue); converter.bindSymbol(sym, symThreadprivateExv); } // This function replicates threadprivate's behaviour of generating // an internal fir.GlobalOp for non-global variables in the main program // that have the implicit SAVE attribute, to simplifiy LLVM-IR and MLIR // generation. void Fortran::lower::genDeclareTargetIntGlobal( lower::AbstractConverter &converter, const lower::pft::Variable &var) { if (!var.isGlobal()) { // A non-global variable which can be in a declare target directive must // be a variable in the main program, and it has the implicit SAVE // attribute. We create a GlobalOp for it to simplify the translation to // LLVM IR. globalInitialization(converter, converter.getFirOpBuilder(), var.getSymbol(), var, converter.getCurrentLocation()); } } bool Fortran::lower::isOpenMPTargetConstruct( const parser::OpenMPConstruct &omp) { llvm::omp::Directive dir = llvm::omp::Directive::OMPD_unknown; if (const auto *block = std::get_if(&omp.u)) { const auto &begin = std::get(block->t); dir = std::get(begin.t).v; } else if (const auto *loop = std::get_if(&omp.u)) { const auto &begin = std::get(loop->t); dir = std::get(begin.t).v; } return llvm::omp::allTargetSet.test(dir); } void Fortran::lower::gatherOpenMPDeferredDeclareTargets( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDeclarativeConstruct &ompDecl, llvm::SmallVectorImpl &deferredDeclareTarget) { Fortran::common::visit( common::visitors{ [&](const parser::OpenMPDeclareTargetConstruct &ompReq) { collectDeferredDeclareTargets(converter, semaCtx, eval, ompReq, deferredDeclareTarget); }, [&](const auto &) {}, }, ompDecl.u); } bool Fortran::lower::isOpenMPDeviceDeclareTarget( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPDeclarativeConstruct &ompDecl) { return Fortran::common::visit( common::visitors{ [&](const parser::OpenMPDeclareTargetConstruct &ompReq) { mlir::omp::DeclareTargetDeviceType targetType = getDeclareTargetFunctionDevice(converter, semaCtx, eval, ompReq) .value_or(mlir::omp::DeclareTargetDeviceType::host); return targetType != mlir::omp::DeclareTargetDeviceType::host; }, [&](const auto &) { return false; }, }, ompDecl.u); } // In certain cases such as subroutine or function interfaces which declare // but do not define or directly call the subroutine or function in the same // module, their lowering is delayed until after the declare target construct // itself is processed, so there symbol is not within the table. // // This function will also return true if we encounter any device declare // target cases, to satisfy checking if we require the requires attributes // on the module. bool Fortran::lower::markOpenMPDeferredDeclareTargetFunctions( mlir::Operation *mod, llvm::SmallVectorImpl &deferredDeclareTargets, AbstractConverter &converter) { bool deviceCodeFound = false; auto modOp = llvm::cast(mod); for (auto declTar : deferredDeclareTargets) { mlir::Operation *op = modOp.lookupSymbol(converter.mangleName(declTar.sym)); // Due to interfaces being optionally emitted on usage in a module, // not finding an operation at this point cannot be a hard error, we // simply ignore it for now. // TODO: Add semantic checks for detecting cases where an erronous // (undefined) symbol has been supplied to a declare target clause if (!op) continue; auto devType = declTar.declareTargetDeviceType; if (!deviceCodeFound && devType != mlir::omp::DeclareTargetDeviceType::host) deviceCodeFound = true; markDeclareTarget(op, converter, declTar.declareTargetCaptureClause, devType); } return deviceCodeFound; } void Fortran::lower::genOpenMPRequires(mlir::Operation *mod, const semantics::Symbol *symbol) { using MlirRequires = mlir::omp::ClauseRequires; using SemaRequires = semantics::WithOmpDeclarative::RequiresFlag; if (auto offloadMod = llvm::dyn_cast(mod)) { semantics::WithOmpDeclarative::RequiresFlags semaFlags; if (symbol) { common::visit( [&](const auto &details) { if constexpr (std::is_base_of_v>) { if (details.has_ompRequires()) semaFlags = *details.ompRequires(); } }, symbol->details()); } // Use pre-populated omp.requires module attribute if it was set, so that // the "-fopenmp-force-usm" compiler option is honored. MlirRequires mlirFlags = offloadMod.getRequires(); if (semaFlags.test(SemaRequires::ReverseOffload)) mlirFlags = mlirFlags | MlirRequires::reverse_offload; if (semaFlags.test(SemaRequires::UnifiedAddress)) mlirFlags = mlirFlags | MlirRequires::unified_address; if (semaFlags.test(SemaRequires::UnifiedSharedMemory)) mlirFlags = mlirFlags | MlirRequires::unified_shared_memory; if (semaFlags.test(SemaRequires::DynamicAllocators)) mlirFlags = mlirFlags | MlirRequires::dynamic_allocators; offloadMod.setRequires(mlirFlags); } }