mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-25 03:56:06 +00:00
[flang] Simplify hlfir.sum total reductions. (#119482)
I am trying to switch to keeping the reduction value in a temporary scalar location so that I can use hlfir::genLoopNest easily. This also allows using omp.loop_nest with worksharing for OpenMP.
This commit is contained in:
parent
af5d3afff5
commit
a00946fc94
@ -366,6 +366,10 @@ struct LoopNest {
|
||||
/// Generate a fir.do_loop nest looping from 1 to extents[i].
|
||||
/// \p isUnordered specifies whether the loops in the loop nest
|
||||
/// are unordered.
|
||||
///
|
||||
/// NOTE: genLoopNestWithReductions() should be used in favor
|
||||
/// of this method, though, it cannot generate OpenMP workshare
|
||||
/// loop constructs currently.
|
||||
LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
|
||||
mlir::ValueRange extents, bool isUnordered = false,
|
||||
bool emitWorkshareLoop = false);
|
||||
@ -376,6 +380,37 @@ inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
|
||||
isUnordered, emitWorkshareLoop);
|
||||
}
|
||||
|
||||
/// The type of a callback that generates the body of a reduction
|
||||
/// loop nest. It takes a location and a builder, as usual.
|
||||
/// In addition, the first set of values are the values of the loops'
|
||||
/// induction variables. The second set of values are the values
|
||||
/// of the reductions on entry to the innermost loop.
|
||||
/// The callback must return the updated values of the reductions.
|
||||
using ReductionLoopBodyGenerator = std::function<llvm::SmallVector<mlir::Value>(
|
||||
mlir::Location, fir::FirOpBuilder &, mlir::ValueRange, mlir::ValueRange)>;
|
||||
|
||||
/// Generate a loop nest loopong from 1 to \p extents[i] and reducing
|
||||
/// a set of values.
|
||||
/// \p isUnordered specifies whether the loops in the loop nest
|
||||
/// are unordered.
|
||||
/// \p reductionInits are the initial values of the reductions
|
||||
/// on entry to the outermost loop.
|
||||
/// \p genBody callback is repsonsible for generating the code
|
||||
/// that updates the reduction values in the innermost loop.
|
||||
///
|
||||
/// NOTE: the implementation of this function may decide
|
||||
/// to perform the reductions on SSA or in memory.
|
||||
/// In the latter case, this function is responsible for
|
||||
/// allocating/loading/storing the reduction variables,
|
||||
/// and making sure they have proper data sharing attributes
|
||||
/// in case any parallel constructs are present around the point
|
||||
/// of the loop nest insertion, or if the function decides
|
||||
/// to use any worksharing loop constructs for the loop nest.
|
||||
llvm::SmallVector<mlir::Value> genLoopNestWithReductions(
|
||||
mlir::Location loc, fir::FirOpBuilder &builder, mlir::ValueRange extents,
|
||||
mlir::ValueRange reductionInits, const ReductionLoopBodyGenerator &genBody,
|
||||
bool isUnordered = false);
|
||||
|
||||
/// Inline the body of an hlfir.elemental at the current insertion point
|
||||
/// given a list of one based indices. This generates the computation
|
||||
/// of one element of the elemental expression. Return the YieldElementOp
|
||||
|
@ -910,6 +910,56 @@ hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc,
|
||||
return loopNest;
|
||||
}
|
||||
|
||||
llvm::SmallVector<mlir::Value> hlfir::genLoopNestWithReductions(
|
||||
mlir::Location loc, fir::FirOpBuilder &builder, mlir::ValueRange extents,
|
||||
mlir::ValueRange reductionInits, const ReductionLoopBodyGenerator &genBody,
|
||||
bool isUnordered) {
|
||||
assert(!extents.empty() && "must have at least one extent");
|
||||
// Build loop nest from column to row.
|
||||
auto one = builder.create<mlir::arith::ConstantIndexOp>(loc, 1);
|
||||
mlir::Type indexType = builder.getIndexType();
|
||||
unsigned dim = extents.size() - 1;
|
||||
fir::DoLoopOp outerLoop = nullptr;
|
||||
fir::DoLoopOp parentLoop = nullptr;
|
||||
llvm::SmallVector<mlir::Value> oneBasedIndices;
|
||||
oneBasedIndices.resize(dim + 1);
|
||||
for (auto extent : llvm::reverse(extents)) {
|
||||
auto ub = builder.createConvert(loc, indexType, extent);
|
||||
|
||||
// The outermost loop takes reductionInits as the initial
|
||||
// values of its iter-args.
|
||||
// A child loop takes its iter-args from the region iter-args
|
||||
// of its parent loop.
|
||||
fir::DoLoopOp doLoop;
|
||||
if (!parentLoop) {
|
||||
doLoop = builder.create<fir::DoLoopOp>(loc, one, ub, one, isUnordered,
|
||||
/*finalCountValue=*/false,
|
||||
reductionInits);
|
||||
} else {
|
||||
doLoop = builder.create<fir::DoLoopOp>(loc, one, ub, one, isUnordered,
|
||||
/*finalCountValue=*/false,
|
||||
parentLoop.getRegionIterArgs());
|
||||
// Return the results of the child loop from its parent loop.
|
||||
builder.create<fir::ResultOp>(loc, doLoop.getResults());
|
||||
}
|
||||
|
||||
builder.setInsertionPointToStart(doLoop.getBody());
|
||||
// Reverse the indices so they are in column-major order.
|
||||
oneBasedIndices[dim--] = doLoop.getInductionVar();
|
||||
if (!outerLoop)
|
||||
outerLoop = doLoop;
|
||||
parentLoop = doLoop;
|
||||
}
|
||||
|
||||
llvm::SmallVector<mlir::Value> reductionValues;
|
||||
reductionValues =
|
||||
genBody(loc, builder, oneBasedIndices, parentLoop.getRegionIterArgs());
|
||||
builder.setInsertionPointToEnd(parentLoop.getBody());
|
||||
builder.create<fir::ResultOp>(loc, reductionValues);
|
||||
builder.setInsertionPointAfter(outerLoop);
|
||||
return outerLoop->getResults();
|
||||
}
|
||||
|
||||
static fir::ExtendedValue translateVariableToExtendedValue(
|
||||
mlir::Location loc, fir::FirOpBuilder &builder, hlfir::Entity variable,
|
||||
bool forceHlfirBase = false, bool contiguousHint = false) {
|
||||
|
@ -106,34 +106,43 @@ public:
|
||||
mlir::PatternRewriter &rewriter) const override {
|
||||
mlir::Location loc = sum.getLoc();
|
||||
fir::FirOpBuilder builder{rewriter, sum.getOperation()};
|
||||
hlfir::ExprType expr = mlir::dyn_cast<hlfir::ExprType>(sum.getType());
|
||||
assert(expr && "expected an expression type for the result of hlfir.sum");
|
||||
mlir::Type elementType = expr.getElementType();
|
||||
mlir::Type elementType = hlfir::getFortranElementType(sum.getType());
|
||||
hlfir::Entity array = hlfir::Entity{sum.getArray()};
|
||||
mlir::Value mask = sum.getMask();
|
||||
mlir::Value dim = sum.getDim();
|
||||
int64_t dimVal = fir::getIntIfConstant(dim).value_or(0);
|
||||
bool isTotalReduction = hlfir::Entity{sum}.getRank() == 0;
|
||||
int64_t dimVal =
|
||||
isTotalReduction ? 0 : fir::getIntIfConstant(dim).value_or(0);
|
||||
mlir::Value resultShape, dimExtent;
|
||||
std::tie(resultShape, dimExtent) =
|
||||
genResultShape(loc, builder, array, dimVal);
|
||||
llvm::SmallVector<mlir::Value> arrayExtents;
|
||||
if (isTotalReduction)
|
||||
arrayExtents = genArrayExtents(loc, builder, array);
|
||||
else
|
||||
std::tie(resultShape, dimExtent) =
|
||||
genResultShapeForPartialReduction(loc, builder, array, dimVal);
|
||||
|
||||
// If the mask is present and is a scalar, then we'd better load its value
|
||||
// outside of the reduction loop making the loop unswitching easier.
|
||||
mlir::Value isPresentPred, maskValue;
|
||||
if (mask) {
|
||||
if (mlir::isa<fir::BaseBoxType>(mask.getType())) {
|
||||
// MASK represented by a box might be dynamically optional,
|
||||
// so we have to check for its presence before accessing it.
|
||||
isPresentPred =
|
||||
builder.create<fir::IsPresentOp>(loc, builder.getI1Type(), mask);
|
||||
}
|
||||
|
||||
if (hlfir::Entity{mask}.isScalar())
|
||||
maskValue = genMaskValue(loc, builder, mask, isPresentPred, {});
|
||||
}
|
||||
|
||||
auto genKernel = [&](mlir::Location loc, fir::FirOpBuilder &builder,
|
||||
mlir::ValueRange inputIndices) -> hlfir::Entity {
|
||||
// Loop over all indices in the DIM dimension, and reduce all values.
|
||||
// We do not need to create the reduction loop always: if we can
|
||||
// slice the input array given the inputIndices, then we can
|
||||
// just apply a new SUM operation (total reduction) to the slice.
|
||||
// For the time being, generate the explicit loop because the slicing
|
||||
// requires generating an elemental operation for the input array
|
||||
// (and the mask, if present).
|
||||
// TODO: produce the slices and new SUM after adding a pattern
|
||||
// for expanding total reduction SUM case.
|
||||
mlir::Type indexType = builder.getIndexType();
|
||||
auto one = builder.createIntegerConstant(loc, indexType, 1);
|
||||
auto ub = builder.createConvert(loc, indexType, dimExtent);
|
||||
// If DIM is not present, do total reduction.
|
||||
|
||||
// Initial value for the reduction.
|
||||
mlir::Value initValue = genInitValue(loc, builder, elementType);
|
||||
mlir::Value reductionInitValue = genInitValue(loc, builder, elementType);
|
||||
|
||||
// The reduction loop may be unordered if FastMathFlags::reassoc
|
||||
// transformations are allowed. The integer reduction is always
|
||||
@ -142,79 +151,83 @@ public:
|
||||
static_cast<bool>(sum.getFastmath() &
|
||||
mlir::arith::FastMathFlags::reassoc);
|
||||
|
||||
// If the mask is present and is a scalar, then we'd better load its value
|
||||
// outside of the reduction loop making the loop unswitching easier.
|
||||
// Maybe it is worth hoisting it from the elemental operation as well.
|
||||
mlir::Value isPresentPred, maskValue;
|
||||
if (mask) {
|
||||
if (mlir::isa<fir::BaseBoxType>(mask.getType())) {
|
||||
// MASK represented by a box might be dynamically optional,
|
||||
// so we have to check for its presence before accessing it.
|
||||
isPresentPred =
|
||||
builder.create<fir::IsPresentOp>(loc, builder.getI1Type(), mask);
|
||||
llvm::SmallVector<mlir::Value> extents;
|
||||
if (isTotalReduction)
|
||||
extents = arrayExtents;
|
||||
else
|
||||
extents.push_back(
|
||||
builder.createConvert(loc, builder.getIndexType(), dimExtent));
|
||||
|
||||
auto genBody = [&](mlir::Location loc, fir::FirOpBuilder &builder,
|
||||
mlir::ValueRange oneBasedIndices,
|
||||
mlir::ValueRange reductionArgs)
|
||||
-> llvm::SmallVector<mlir::Value, 1> {
|
||||
// Generate the reduction loop-nest body.
|
||||
// The initial reduction value in the innermost loop
|
||||
// is passed via reductionArgs[0].
|
||||
llvm::SmallVector<mlir::Value> indices;
|
||||
if (isTotalReduction) {
|
||||
indices = oneBasedIndices;
|
||||
} else {
|
||||
indices = inputIndices;
|
||||
indices.insert(indices.begin() + dimVal - 1, oneBasedIndices[0]);
|
||||
}
|
||||
|
||||
if (hlfir::Entity{mask}.isScalar())
|
||||
maskValue = genMaskValue(loc, builder, mask, isPresentPred, {});
|
||||
}
|
||||
mlir::Value reductionValue = reductionArgs[0];
|
||||
fir::IfOp ifOp;
|
||||
if (mask) {
|
||||
// Make the reduction value update conditional on the value
|
||||
// of the mask.
|
||||
if (!maskValue) {
|
||||
// If the mask is an array, use the elemental and the loop indices
|
||||
// to address the proper mask element.
|
||||
maskValue =
|
||||
genMaskValue(loc, builder, mask, isPresentPred, indices);
|
||||
}
|
||||
mlir::Value isUnmasked = builder.create<fir::ConvertOp>(
|
||||
loc, builder.getI1Type(), maskValue);
|
||||
ifOp = builder.create<fir::IfOp>(loc, elementType, isUnmasked,
|
||||
/*withElseRegion=*/true);
|
||||
// In the 'else' block return the current reduction value.
|
||||
builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
|
||||
builder.create<fir::ResultOp>(loc, reductionValue);
|
||||
|
||||
// NOTE: the outer elemental operation may be lowered into
|
||||
// omp.workshare.loop_wrapper/omp.loop_nest later, so the reduction
|
||||
// loop may appear disjoint from the workshare loop nest.
|
||||
// Moreover, the inner loop is not strictly nested (due to the reduction
|
||||
// starting value initialization), and the above omp dialect operations
|
||||
// cannot produce results.
|
||||
// It is unclear what we should do about it yet.
|
||||
auto doLoop = builder.create<fir::DoLoopOp>(
|
||||
loc, one, ub, one, isUnordered, /*finalCountValue=*/false,
|
||||
mlir::ValueRange{initValue});
|
||||
|
||||
// Address the input array using the reduction loop's IV
|
||||
// for the DIM dimension.
|
||||
mlir::Value iv = doLoop.getInductionVar();
|
||||
llvm::SmallVector<mlir::Value> indices{inputIndices};
|
||||
indices.insert(indices.begin() + dimVal - 1, iv);
|
||||
|
||||
mlir::OpBuilder::InsertionGuard guard(builder);
|
||||
builder.setInsertionPointToStart(doLoop.getBody());
|
||||
mlir::Value reductionValue = doLoop.getRegionIterArgs()[0];
|
||||
fir::IfOp ifOp;
|
||||
if (mask) {
|
||||
// Make the reduction value update conditional on the value
|
||||
// of the mask.
|
||||
if (!maskValue) {
|
||||
// If the mask is an array, use the elemental and the loop indices
|
||||
// to address the proper mask element.
|
||||
maskValue = genMaskValue(loc, builder, mask, isPresentPred, indices);
|
||||
// In the 'then' block do the actual addition.
|
||||
builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
|
||||
}
|
||||
mlir::Value isUnmasked =
|
||||
builder.create<fir::ConvertOp>(loc, builder.getI1Type(), maskValue);
|
||||
ifOp = builder.create<fir::IfOp>(loc, elementType, isUnmasked,
|
||||
/*withElseRegion=*/true);
|
||||
// In the 'else' block return the current reduction value.
|
||||
builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
|
||||
builder.create<fir::ResultOp>(loc, reductionValue);
|
||||
|
||||
// In the 'then' block do the actual addition.
|
||||
builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
|
||||
}
|
||||
hlfir::Entity element =
|
||||
hlfir::getElementAt(loc, builder, array, indices);
|
||||
hlfir::Entity elementValue =
|
||||
hlfir::loadTrivialScalar(loc, builder, element);
|
||||
// NOTE: we can use "Kahan summation" same way as the runtime
|
||||
// (e.g. when fast-math is not allowed), but let's start with
|
||||
// the simple version.
|
||||
reductionValue =
|
||||
genScalarAdd(loc, builder, reductionValue, elementValue);
|
||||
|
||||
hlfir::Entity element = hlfir::getElementAt(loc, builder, array, indices);
|
||||
hlfir::Entity elementValue =
|
||||
hlfir::loadTrivialScalar(loc, builder, element);
|
||||
// NOTE: we can use "Kahan summation" same way as the runtime
|
||||
// (e.g. when fast-math is not allowed), but let's start with
|
||||
// the simple version.
|
||||
reductionValue = genScalarAdd(loc, builder, reductionValue, elementValue);
|
||||
builder.create<fir::ResultOp>(loc, reductionValue);
|
||||
if (ifOp) {
|
||||
builder.create<fir::ResultOp>(loc, reductionValue);
|
||||
builder.setInsertionPointAfter(ifOp);
|
||||
reductionValue = ifOp.getResult(0);
|
||||
}
|
||||
|
||||
if (ifOp) {
|
||||
builder.setInsertionPointAfter(ifOp);
|
||||
builder.create<fir::ResultOp>(loc, ifOp.getResult(0));
|
||||
}
|
||||
return {reductionValue};
|
||||
};
|
||||
|
||||
return hlfir::Entity{doLoop.getResult(0)};
|
||||
llvm::SmallVector<mlir::Value, 1> reductionFinalValues =
|
||||
hlfir::genLoopNestWithReductions(loc, builder, extents,
|
||||
{reductionInitValue}, genBody,
|
||||
isUnordered);
|
||||
return hlfir::Entity{reductionFinalValues[0]};
|
||||
};
|
||||
|
||||
if (isTotalReduction) {
|
||||
hlfir::Entity result = genKernel(loc, builder, mlir::ValueRange{});
|
||||
rewriter.replaceOp(sum, result);
|
||||
return mlir::success();
|
||||
}
|
||||
|
||||
hlfir::ElementalOp elementalOp = hlfir::genElementalOp(
|
||||
loc, builder, elementType, resultShape, {}, genKernel,
|
||||
/*isUnordered=*/true, /*polymorphicMold=*/nullptr,
|
||||
@ -230,20 +243,29 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
static llvm::SmallVector<mlir::Value>
|
||||
genArrayExtents(mlir::Location loc, fir::FirOpBuilder &builder,
|
||||
hlfir::Entity array) {
|
||||
mlir::Value inShape = hlfir::genShape(loc, builder, array);
|
||||
llvm::SmallVector<mlir::Value> inExtents =
|
||||
hlfir::getExplicitExtentsFromShape(inShape, builder);
|
||||
if (inShape.getUses().empty())
|
||||
inShape.getDefiningOp()->erase();
|
||||
return inExtents;
|
||||
}
|
||||
|
||||
// Return fir.shape specifying the shape of the result
|
||||
// of a SUM reduction with DIM=dimVal. The second return value
|
||||
// is the extent of the DIM dimension.
|
||||
static std::tuple<mlir::Value, mlir::Value>
|
||||
genResultShape(mlir::Location loc, fir::FirOpBuilder &builder,
|
||||
hlfir::Entity array, int64_t dimVal) {
|
||||
mlir::Value inShape = hlfir::genShape(loc, builder, array);
|
||||
genResultShapeForPartialReduction(mlir::Location loc,
|
||||
fir::FirOpBuilder &builder,
|
||||
hlfir::Entity array, int64_t dimVal) {
|
||||
llvm::SmallVector<mlir::Value> inExtents =
|
||||
hlfir::getExplicitExtentsFromShape(inShape, builder);
|
||||
genArrayExtents(loc, builder, array);
|
||||
assert(dimVal > 0 && dimVal <= static_cast<int64_t>(inExtents.size()) &&
|
||||
"DIM must be present and a positive constant not exceeding "
|
||||
"the array's rank");
|
||||
if (inShape.getUses().empty())
|
||||
inShape.getDefiningOp()->erase();
|
||||
|
||||
mlir::Value dimExtent = inExtents[dimVal - 1];
|
||||
inExtents.erase(inExtents.begin() + dimVal - 1);
|
||||
@ -459,22 +481,22 @@ public:
|
||||
target.addDynamicallyLegalOp<hlfir::SumOp>([](hlfir::SumOp sum) {
|
||||
if (!simplifySum)
|
||||
return true;
|
||||
if (mlir::Value dim = sum.getDim()) {
|
||||
if (auto dimVal = fir::getIntIfConstant(dim)) {
|
||||
if (!fir::isa_trivial(sum.getType())) {
|
||||
// Ignore the case SUM(a, DIM=X), where 'a' is a 1D array.
|
||||
// It is only legal when X is 1, and it should probably be
|
||||
// canonicalized into SUM(a).
|
||||
fir::SequenceType arrayTy = mlir::cast<fir::SequenceType>(
|
||||
hlfir::getFortranElementOrSequenceType(
|
||||
sum.getArray().getType()));
|
||||
if (*dimVal > 0 && *dimVal <= arrayTy.getDimension()) {
|
||||
// Ignore SUMs with illegal DIM values.
|
||||
// They may appear in dead code,
|
||||
// and they do not have to be converted.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Always inline total reductions.
|
||||
if (hlfir::Entity{sum}.getRank() == 0)
|
||||
return false;
|
||||
mlir::Value dim = sum.getDim();
|
||||
if (!dim)
|
||||
return false;
|
||||
|
||||
if (auto dimVal = fir::getIntIfConstant(dim)) {
|
||||
fir::SequenceType arrayTy = mlir::cast<fir::SequenceType>(
|
||||
hlfir::getFortranElementOrSequenceType(sum.getArray().getType()));
|
||||
if (*dimVal > 0 && *dimVal <= arrayTy.getDimension()) {
|
||||
// Ignore SUMs with illegal DIM values.
|
||||
// They may appear in dead code,
|
||||
// and they do not have to be converted.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
|
@ -14,9 +14,9 @@ func.func @sum_box_known_extents(%arg0: !fir.box<!fir.array<2x3xi32>>) {
|
||||
// CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_2]] : (index) -> !fir.shape<1>
|
||||
// CHECK: %[[VAL_5:.*]] = hlfir.elemental %[[VAL_4]] unordered : (!fir.shape<1>) -> !hlfir.expr<2xi32> {
|
||||
// CHECK: ^bb0(%[[VAL_6:.*]]: index):
|
||||
// CHECK: %[[VAL_7:.*]] = arith.constant 1 : index
|
||||
// CHECK: %[[VAL_8:.*]] = arith.constant 0 : i32
|
||||
// CHECK: %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_7]] to %[[VAL_3]] step %[[VAL_7]] unordered iter_args(%[[VAL_11:.*]] = %[[VAL_8]]) -> (i32) {
|
||||
// CHECK: %[[VAL_7:.*]] = arith.constant 0 : i32
|
||||
// CHECK: %[[VAL_8:.*]] = arith.constant 1 : index
|
||||
// CHECK: %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_8]] to %[[VAL_3]] step %[[VAL_8]] unordered iter_args(%[[VAL_11:.*]] = %[[VAL_7]]) -> (i32) {
|
||||
// CHECK: %[[VAL_12:.*]] = arith.constant 0 : index
|
||||
// CHECK: %[[VAL_13:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_12]] : (!fir.box<!fir.array<2x3xi32>>, index) -> (index, index, index)
|
||||
// CHECK: %[[VAL_14:.*]] = arith.constant 1 : index
|
||||
@ -50,9 +50,9 @@ func.func @sum_expr_known_extents(%arg0: !hlfir.expr<2x3xi32>) {
|
||||
// CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1>
|
||||
// CHECK: %[[VAL_5:.*]] = hlfir.elemental %[[VAL_4]] unordered : (!fir.shape<1>) -> !hlfir.expr<3xi32> {
|
||||
// CHECK: ^bb0(%[[VAL_6:.*]]: index):
|
||||
// CHECK: %[[VAL_7:.*]] = arith.constant 1 : index
|
||||
// CHECK: %[[VAL_8:.*]] = arith.constant 0 : i32
|
||||
// CHECK: %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_7]] to %[[VAL_2]] step %[[VAL_7]] unordered iter_args(%[[VAL_11:.*]] = %[[VAL_8]]) -> (i32) {
|
||||
// CHECK: %[[VAL_7:.*]] = arith.constant 0 : i32
|
||||
// CHECK: %[[VAL_8:.*]] = arith.constant 1 : index
|
||||
// CHECK: %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_8]] to %[[VAL_2]] step %[[VAL_8]] unordered iter_args(%[[VAL_11:.*]] = %[[VAL_7]]) -> (i32) {
|
||||
// CHECK: %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]], %[[VAL_6]] : (!hlfir.expr<2x3xi32>, index, index) -> i32
|
||||
// CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_11]], %[[VAL_12]] : i32
|
||||
// CHECK: fir.result %[[VAL_13]] : i32
|
||||
@ -77,12 +77,12 @@ func.func @sum_box_unknown_extent1(%arg0: !fir.box<!fir.array<?x3xcomplex<f64>>>
|
||||
// CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1>
|
||||
// CHECK: %[[VAL_6:.*]] = hlfir.elemental %[[VAL_5]] unordered : (!fir.shape<1>) -> !hlfir.expr<3xcomplex<f64>> {
|
||||
// CHECK: ^bb0(%[[VAL_7:.*]]: index):
|
||||
// CHECK: %[[VAL_8:.*]] = arith.constant 1 : index
|
||||
// CHECK: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f64
|
||||
// CHECK: %[[VAL_10:.*]] = fir.undefined complex<f64>
|
||||
// CHECK: %[[VAL_11:.*]] = fir.insert_value %[[VAL_10]], %[[VAL_9]], [0 : index] : (complex<f64>, f64) -> complex<f64>
|
||||
// CHECK: %[[VAL_12:.*]] = fir.insert_value %[[VAL_11]], %[[VAL_9]], [1 : index] : (complex<f64>, f64) -> complex<f64>
|
||||
// CHECK: %[[VAL_13:.*]] = fir.do_loop %[[VAL_14:.*]] = %[[VAL_8]] to %[[VAL_3]]#1 step %[[VAL_8]] iter_args(%[[VAL_15:.*]] = %[[VAL_12]]) -> (complex<f64>) {
|
||||
// CHECK: %[[VAL_8:.*]] = arith.constant 0.000000e+00 : f64
|
||||
// CHECK: %[[VAL_9:.*]] = fir.undefined complex<f64>
|
||||
// CHECK: %[[VAL_10:.*]] = fir.insert_value %[[VAL_9]], %[[VAL_8]], [0 : index] : (complex<f64>, f64) -> complex<f64>
|
||||
// CHECK: %[[VAL_11:.*]] = fir.insert_value %[[VAL_10]], %[[VAL_8]], [1 : index] : (complex<f64>, f64) -> complex<f64>
|
||||
// CHECK: %[[VAL_12:.*]] = arith.constant 1 : index
|
||||
// CHECK: %[[VAL_13:.*]] = fir.do_loop %[[VAL_14:.*]] = %[[VAL_12]] to %[[VAL_3]]#1 step %[[VAL_12]] iter_args(%[[VAL_15:.*]] = %[[VAL_11]]) -> (complex<f64>) {
|
||||
// CHECK: %[[VAL_16:.*]] = arith.constant 0 : index
|
||||
// CHECK: %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_16]] : (!fir.box<!fir.array<?x3xcomplex<f64>>>, index) -> (index, index, index)
|
||||
// CHECK: %[[VAL_18:.*]] = arith.constant 1 : index
|
||||
@ -116,12 +116,12 @@ func.func @sum_box_unknown_extent2(%arg0: !fir.box<!fir.array<?x3xcomplex<f64>>>
|
||||
// CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_3]]#1 : (index) -> !fir.shape<1>
|
||||
// CHECK: %[[VAL_6:.*]] = hlfir.elemental %[[VAL_5]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xcomplex<f64>> {
|
||||
// CHECK: ^bb0(%[[VAL_7:.*]]: index):
|
||||
// CHECK: %[[VAL_8:.*]] = arith.constant 1 : index
|
||||
// CHECK: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f64
|
||||
// CHECK: %[[VAL_10:.*]] = fir.undefined complex<f64>
|
||||
// CHECK: %[[VAL_11:.*]] = fir.insert_value %[[VAL_10]], %[[VAL_9]], [0 : index] : (complex<f64>, f64) -> complex<f64>
|
||||
// CHECK: %[[VAL_12:.*]] = fir.insert_value %[[VAL_11]], %[[VAL_9]], [1 : index] : (complex<f64>, f64) -> complex<f64>
|
||||
// CHECK: %[[VAL_13:.*]] = fir.do_loop %[[VAL_14:.*]] = %[[VAL_8]] to %[[VAL_4]] step %[[VAL_8]] iter_args(%[[VAL_15:.*]] = %[[VAL_12]]) -> (complex<f64>) {
|
||||
// CHECK: %[[VAL_8:.*]] = arith.constant 0.000000e+00 : f64
|
||||
// CHECK: %[[VAL_9:.*]] = fir.undefined complex<f64>
|
||||
// CHECK: %[[VAL_10:.*]] = fir.insert_value %[[VAL_9]], %[[VAL_8]], [0 : index] : (complex<f64>, f64) -> complex<f64>
|
||||
// CHECK: %[[VAL_11:.*]] = fir.insert_value %[[VAL_10]], %[[VAL_8]], [1 : index] : (complex<f64>, f64) -> complex<f64>
|
||||
// CHECK: %[[VAL_12:.*]] = arith.constant 1 : index
|
||||
// CHECK: %[[VAL_13:.*]] = fir.do_loop %[[VAL_14:.*]] = %[[VAL_12]] to %[[VAL_4]] step %[[VAL_12]] iter_args(%[[VAL_15:.*]] = %[[VAL_11]]) -> (complex<f64>) {
|
||||
// CHECK: %[[VAL_16:.*]] = arith.constant 0 : index
|
||||
// CHECK: %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_16]] : (!fir.box<!fir.array<?x3xcomplex<f64>>>, index) -> (index, index, index)
|
||||
// CHECK: %[[VAL_18:.*]] = arith.constant 1 : index
|
||||
@ -148,7 +148,7 @@ func.func @sum_expr_unknown_extent1(%arg0: !hlfir.expr<?x3xf32>) {
|
||||
return
|
||||
}
|
||||
// CHECK-LABEL: func.func @sum_expr_unknown_extent1(
|
||||
// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr<?x3xf32>) {
|
||||
// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr<?x3xf32>) {
|
||||
// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i32
|
||||
// CHECK: %[[VAL_2:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x3xf32>) -> !fir.shape<2>
|
||||
// CHECK: %[[VAL_3:.*]] = hlfir.get_extent %[[VAL_2]] {dim = 0 : index} : (!fir.shape<2>) -> index
|
||||
@ -156,9 +156,9 @@ func.func @sum_expr_unknown_extent1(%arg0: !hlfir.expr<?x3xf32>) {
|
||||
// CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1>
|
||||
// CHECK: %[[VAL_6:.*]] = hlfir.elemental %[[VAL_5]] unordered : (!fir.shape<1>) -> !hlfir.expr<3xf32> {
|
||||
// CHECK: ^bb0(%[[VAL_7:.*]]: index):
|
||||
// CHECK: %[[VAL_8:.*]] = arith.constant 1 : index
|
||||
// CHECK: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32
|
||||
// CHECK: %[[VAL_10:.*]] = fir.do_loop %[[VAL_11:.*]] = %[[VAL_8]] to %[[VAL_3]] step %[[VAL_8]] iter_args(%[[VAL_12:.*]] = %[[VAL_9]]) -> (f32) {
|
||||
// CHECK: %[[VAL_8:.*]] = arith.constant 0.000000e+00 : f32
|
||||
// CHECK: %[[VAL_9:.*]] = arith.constant 1 : index
|
||||
// CHECK: %[[VAL_10:.*]] = fir.do_loop %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_3]] step %[[VAL_9]] iter_args(%[[VAL_12:.*]] = %[[VAL_8]]) -> (f32) {
|
||||
// CHECK: %[[VAL_13:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_11]], %[[VAL_7]] : (!hlfir.expr<?x3xf32>, index, index) -> f32
|
||||
// CHECK: %[[VAL_14:.*]] = arith.addf %[[VAL_12]], %[[VAL_13]] : f32
|
||||
// CHECK: fir.result %[[VAL_14]] : f32
|
||||
@ -174,7 +174,7 @@ func.func @sum_expr_unknown_extent2(%arg0: !hlfir.expr<?x3xf32>) {
|
||||
return
|
||||
}
|
||||
// CHECK-LABEL: func.func @sum_expr_unknown_extent2(
|
||||
// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr<?x3xf32>) {
|
||||
// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr<?x3xf32>) {
|
||||
// CHECK: %[[VAL_1:.*]] = arith.constant 2 : i32
|
||||
// CHECK: %[[VAL_2:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x3xf32>) -> !fir.shape<2>
|
||||
// CHECK: %[[VAL_3:.*]] = hlfir.get_extent %[[VAL_2]] {dim = 0 : index} : (!fir.shape<2>) -> index
|
||||
@ -182,9 +182,9 @@ func.func @sum_expr_unknown_extent2(%arg0: !hlfir.expr<?x3xf32>) {
|
||||
// CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1>
|
||||
// CHECK: %[[VAL_6:.*]] = hlfir.elemental %[[VAL_5]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xf32> {
|
||||
// CHECK: ^bb0(%[[VAL_7:.*]]: index):
|
||||
// CHECK: %[[VAL_8:.*]] = arith.constant 1 : index
|
||||
// CHECK: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32
|
||||
// CHECK: %[[VAL_10:.*]] = fir.do_loop %[[VAL_11:.*]] = %[[VAL_8]] to %[[VAL_4]] step %[[VAL_8]] iter_args(%[[VAL_12:.*]] = %[[VAL_9]]) -> (f32) {
|
||||
// CHECK: %[[VAL_8:.*]] = arith.constant 0.000000e+00 : f32
|
||||
// CHECK: %[[VAL_9:.*]] = arith.constant 1 : index
|
||||
// CHECK: %[[VAL_10:.*]] = fir.do_loop %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_4]] step %[[VAL_9]] iter_args(%[[VAL_12:.*]] = %[[VAL_8]]) -> (f32) {
|
||||
// CHECK: %[[VAL_13:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_7]], %[[VAL_11]] : (!hlfir.expr<?x3xf32>, index, index) -> f32
|
||||
// CHECK: %[[VAL_14:.*]] = arith.addf %[[VAL_12]], %[[VAL_13]] : f32
|
||||
// CHECK: fir.result %[[VAL_14]] : f32
|
||||
@ -208,15 +208,15 @@ func.func @sum_scalar_mask(%arg0: !hlfir.expr<?x3xf32>, %mask: !fir.ref<!fir.log
|
||||
// CHECK: %[[VAL_4:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 0 : index} : (!fir.shape<2>) -> index
|
||||
// CHECK: %[[VAL_5:.*]] = arith.constant 3 : index
|
||||
// CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1>
|
||||
// CHECK: %[[VAL_7:.*]] = hlfir.elemental %[[VAL_6]] unordered : (!fir.shape<1>) -> !hlfir.expr<3xf32> {
|
||||
// CHECK: ^bb0(%[[VAL_8:.*]]: index):
|
||||
// CHECK: %[[VAL_9:.*]] = arith.constant 1 : index
|
||||
// CHECK: %[[VAL_7:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.logical<1>>
|
||||
// CHECK: %[[VAL_8:.*]] = hlfir.elemental %[[VAL_6]] unordered : (!fir.shape<1>) -> !hlfir.expr<3xf32> {
|
||||
// CHECK: ^bb0(%[[VAL_9:.*]]: index):
|
||||
// CHECK: %[[VAL_10:.*]] = arith.constant 0.000000e+00 : f32
|
||||
// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.logical<1>>
|
||||
// CHECK: %[[VAL_12:.*]] = fir.do_loop %[[VAL_13:.*]] = %[[VAL_9]] to %[[VAL_4]] step %[[VAL_9]] iter_args(%[[VAL_14:.*]] = %[[VAL_10]]) -> (f32) {
|
||||
// CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_11]] : (!fir.logical<1>) -> i1
|
||||
// CHECK: %[[VAL_11:.*]] = arith.constant 1 : index
|
||||
// CHECK: %[[VAL_12:.*]] = fir.do_loop %[[VAL_13:.*]] = %[[VAL_11]] to %[[VAL_4]] step %[[VAL_11]] iter_args(%[[VAL_14:.*]] = %[[VAL_10]]) -> (f32) {
|
||||
// CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_7]] : (!fir.logical<1>) -> i1
|
||||
// CHECK: %[[VAL_16:.*]] = fir.if %[[VAL_15]] -> (f32) {
|
||||
// CHECK: %[[VAL_17:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_13]], %[[VAL_8]] : (!hlfir.expr<?x3xf32>, index, index) -> f32
|
||||
// CHECK: %[[VAL_17:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_13]], %[[VAL_9]] : (!hlfir.expr<?x3xf32>, index, index) -> f32
|
||||
// CHECK: %[[VAL_18:.*]] = arith.addf %[[VAL_14]], %[[VAL_17]] : f32
|
||||
// CHECK: fir.result %[[VAL_18]] : f32
|
||||
// CHECK: } else {
|
||||
@ -243,24 +243,24 @@ func.func @sum_scalar_boxed_mask(%arg0: !hlfir.expr<?x3xf32>, %mask: !fir.box<!f
|
||||
// CHECK: %[[VAL_4:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 0 : index} : (!fir.shape<2>) -> index
|
||||
// CHECK: %[[VAL_5:.*]] = arith.constant 3 : index
|
||||
// CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1>
|
||||
// CHECK: %[[VAL_7:.*]] = hlfir.elemental %[[VAL_6]] unordered : (!fir.shape<1>) -> !hlfir.expr<3xf32> {
|
||||
// CHECK: ^bb0(%[[VAL_8:.*]]: index):
|
||||
// CHECK: %[[VAL_9:.*]] = arith.constant 1 : index
|
||||
// CHECK: %[[VAL_10:.*]] = arith.constant 0.000000e+00 : f32
|
||||
// CHECK: %[[VAL_11:.*]] = fir.is_present %[[VAL_1]] : (!fir.box<!fir.logical<1>>) -> i1
|
||||
// CHECK: %[[VAL_12:.*]] = fir.if %[[VAL_11]] -> (!fir.logical<1>) {
|
||||
// CHECK: %[[VAL_13:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box<!fir.logical<1>>) -> !fir.ref<!fir.logical<1>>
|
||||
// CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_13]] : !fir.ref<!fir.logical<1>>
|
||||
// CHECK: fir.result %[[VAL_14]] : !fir.logical<1>
|
||||
// CHECK: } else {
|
||||
// CHECK: %[[VAL_15:.*]] = arith.constant true
|
||||
// CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i1) -> !fir.logical<1>
|
||||
// CHECK: fir.result %[[VAL_16]] : !fir.logical<1>
|
||||
// CHECK: }
|
||||
// CHECK: %[[VAL_17:.*]] = fir.do_loop %[[VAL_18:.*]] = %[[VAL_9]] to %[[VAL_4]] step %[[VAL_9]] iter_args(%[[VAL_19:.*]] = %[[VAL_10]]) -> (f32) {
|
||||
// CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_12]] : (!fir.logical<1>) -> i1
|
||||
// CHECK: %[[VAL_7:.*]] = fir.is_present %[[VAL_1]] : (!fir.box<!fir.logical<1>>) -> i1
|
||||
// CHECK: %[[VAL_8:.*]] = fir.if %[[VAL_7]] -> (!fir.logical<1>) {
|
||||
// CHECK: %[[VAL_9:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box<!fir.logical<1>>) -> !fir.ref<!fir.logical<1>>
|
||||
// CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_9]] : !fir.ref<!fir.logical<1>>
|
||||
// CHECK: fir.result %[[VAL_10]] : !fir.logical<1>
|
||||
// CHECK: } else {
|
||||
// CHECK: %[[VAL_11:.*]] = arith.constant true
|
||||
// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (i1) -> !fir.logical<1>
|
||||
// CHECK: fir.result %[[VAL_12]] : !fir.logical<1>
|
||||
// CHECK: }
|
||||
// CHECK: %[[VAL_13:.*]] = hlfir.elemental %[[VAL_6]] unordered : (!fir.shape<1>) -> !hlfir.expr<3xf32> {
|
||||
// CHECK: ^bb0(%[[VAL_14:.*]]: index):
|
||||
// CHECK: %[[VAL_15:.*]] = arith.constant 0.000000e+00 : f32
|
||||
// CHECK: %[[VAL_16:.*]] = arith.constant 1 : index
|
||||
// CHECK: %[[VAL_17:.*]] = fir.do_loop %[[VAL_18:.*]] = %[[VAL_16]] to %[[VAL_4]] step %[[VAL_16]] iter_args(%[[VAL_19:.*]] = %[[VAL_15]]) -> (f32) {
|
||||
// CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_8]] : (!fir.logical<1>) -> i1
|
||||
// CHECK: %[[VAL_21:.*]] = fir.if %[[VAL_20]] -> (f32) {
|
||||
// CHECK: %[[VAL_22:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_18]], %[[VAL_8]] : (!hlfir.expr<?x3xf32>, index, index) -> f32
|
||||
// CHECK: %[[VAL_22:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_18]], %[[VAL_14]] : (!hlfir.expr<?x3xf32>, index, index) -> f32
|
||||
// CHECK: %[[VAL_23:.*]] = arith.addf %[[VAL_19]], %[[VAL_22]] : f32
|
||||
// CHECK: fir.result %[[VAL_23]] : f32
|
||||
// CHECK: } else {
|
||||
@ -287,20 +287,20 @@ func.func @sum_array_mask(%arg0: !hlfir.expr<?x3xf32>, %mask: !fir.box<!fir.arra
|
||||
// CHECK: %[[VAL_4:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 0 : index} : (!fir.shape<2>) -> index
|
||||
// CHECK: %[[VAL_5:.*]] = arith.constant 3 : index
|
||||
// CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1>
|
||||
// CHECK: %[[VAL_7:.*]] = hlfir.elemental %[[VAL_6]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xf32> {
|
||||
// CHECK: ^bb0(%[[VAL_8:.*]]: index):
|
||||
// CHECK: %[[VAL_9:.*]] = arith.constant 1 : index
|
||||
// CHECK: %[[VAL_7:.*]] = fir.is_present %[[VAL_1]] : (!fir.box<!fir.array<?x3x!fir.logical<1>>>) -> i1
|
||||
// CHECK: %[[VAL_8:.*]] = hlfir.elemental %[[VAL_6]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xf32> {
|
||||
// CHECK: ^bb0(%[[VAL_9:.*]]: index):
|
||||
// CHECK: %[[VAL_10:.*]] = arith.constant 0.000000e+00 : f32
|
||||
// CHECK: %[[VAL_11:.*]] = fir.is_present %[[VAL_1]] : (!fir.box<!fir.array<?x3x!fir.logical<1>>>) -> i1
|
||||
// CHECK: %[[VAL_12:.*]] = fir.do_loop %[[VAL_13:.*]] = %[[VAL_9]] to %[[VAL_5]] step %[[VAL_9]] iter_args(%[[VAL_14:.*]] = %[[VAL_10]]) -> (f32) {
|
||||
// CHECK: %[[VAL_15:.*]] = fir.if %[[VAL_11]] -> (!fir.logical<1>) {
|
||||
// CHECK: %[[VAL_11:.*]] = arith.constant 1 : index
|
||||
// CHECK: %[[VAL_12:.*]] = fir.do_loop %[[VAL_13:.*]] = %[[VAL_11]] to %[[VAL_5]] step %[[VAL_11]] iter_args(%[[VAL_14:.*]] = %[[VAL_10]]) -> (f32) {
|
||||
// CHECK: %[[VAL_15:.*]] = fir.if %[[VAL_7]] -> (!fir.logical<1>) {
|
||||
// CHECK: %[[VAL_16:.*]] = arith.constant 0 : index
|
||||
// CHECK: %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_16]] : (!fir.box<!fir.array<?x3x!fir.logical<1>>>, index) -> (index, index, index)
|
||||
// CHECK: %[[VAL_18:.*]] = arith.constant 1 : index
|
||||
// CHECK: %[[VAL_19:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_18]] : (!fir.box<!fir.array<?x3x!fir.logical<1>>>, index) -> (index, index, index)
|
||||
// CHECK: %[[VAL_20:.*]] = arith.constant 1 : index
|
||||
// CHECK: %[[VAL_21:.*]] = arith.subi %[[VAL_17]]#0, %[[VAL_20]] : index
|
||||
// CHECK: %[[VAL_22:.*]] = arith.addi %[[VAL_8]], %[[VAL_21]] : index
|
||||
// CHECK: %[[VAL_22:.*]] = arith.addi %[[VAL_9]], %[[VAL_21]] : index
|
||||
// CHECK: %[[VAL_23:.*]] = arith.subi %[[VAL_19]]#0, %[[VAL_20]] : index
|
||||
// CHECK: %[[VAL_24:.*]] = arith.addi %[[VAL_13]], %[[VAL_23]] : index
|
||||
// CHECK: %[[VAL_25:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_22]], %[[VAL_24]]) : (!fir.box<!fir.array<?x3x!fir.logical<1>>>, index, index) -> !fir.ref<!fir.logical<1>>
|
||||
@ -313,7 +313,7 @@ func.func @sum_array_mask(%arg0: !hlfir.expr<?x3xf32>, %mask: !fir.box<!fir.arra
|
||||
// CHECK: }
|
||||
// CHECK: %[[VAL_29:.*]] = fir.convert %[[VAL_15]] : (!fir.logical<1>) -> i1
|
||||
// CHECK: %[[VAL_30:.*]] = fir.if %[[VAL_29]] -> (f32) {
|
||||
// CHECK: %[[VAL_31:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_8]], %[[VAL_13]] : (!hlfir.expr<?x3xf32>, index, index) -> f32
|
||||
// CHECK: %[[VAL_31:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_9]], %[[VAL_13]] : (!hlfir.expr<?x3xf32>, index, index) -> f32
|
||||
// CHECK: %[[VAL_32:.*]] = arith.addf %[[VAL_14]], %[[VAL_31]] : f32
|
||||
// CHECK: fir.result %[[VAL_32]] : f32
|
||||
// CHECK: } else {
|
||||
@ -342,9 +342,9 @@ func.func @sum_array_expr_mask(%arg0: !hlfir.expr<?x3xf32>, %mask: !hlfir.expr<?
|
||||
// CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1>
|
||||
// CHECK: %[[VAL_7:.*]] = hlfir.elemental %[[VAL_6]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xf32> {
|
||||
// CHECK: ^bb0(%[[VAL_8:.*]]: index):
|
||||
// CHECK: %[[VAL_9:.*]] = arith.constant 1 : index
|
||||
// CHECK: %[[VAL_10:.*]] = arith.constant 0.000000e+00 : f32
|
||||
// CHECK: %[[VAL_11:.*]] = fir.do_loop %[[VAL_12:.*]] = %[[VAL_9]] to %[[VAL_5]] step %[[VAL_9]] iter_args(%[[VAL_13:.*]] = %[[VAL_10]]) -> (f32) {
|
||||
// CHECK: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32
|
||||
// CHECK: %[[VAL_10:.*]] = arith.constant 1 : index
|
||||
// CHECK: %[[VAL_11:.*]] = fir.do_loop %[[VAL_12:.*]] = %[[VAL_10]] to %[[VAL_5]] step %[[VAL_10]] iter_args(%[[VAL_13:.*]] = %[[VAL_9]]) -> (f32) {
|
||||
// CHECK: %[[VAL_14:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_8]], %[[VAL_12]] : (!hlfir.expr<?x3x!fir.logical<1>>, index, index) -> !fir.logical<1>
|
||||
// CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (!fir.logical<1>) -> i1
|
||||
// CHECK: %[[VAL_16:.*]] = fir.if %[[VAL_15]] -> (f32) {
|
||||
@ -375,9 +375,9 @@ func.func @sum_unordered_reduction(%arg0: !hlfir.expr<2x3xf32>) {
|
||||
// CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_3]] : (index) -> !fir.shape<1>
|
||||
// CHECK: %[[VAL_5:.*]] = hlfir.elemental %[[VAL_4]] unordered : (!fir.shape<1>) -> !hlfir.expr<3xf32> {
|
||||
// CHECK: ^bb0(%[[VAL_6:.*]]: index):
|
||||
// CHECK: %[[VAL_7:.*]] = arith.constant 1 : index
|
||||
// CHECK: %[[VAL_8:.*]] = arith.constant 0.000000e+00 : f32
|
||||
// CHECK: %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_7]] to %[[VAL_2]] step %[[VAL_7]] unordered iter_args(%[[VAL_11:.*]] = %[[VAL_8]]) -> (f32) {
|
||||
// CHECK: %[[VAL_7:.*]] = arith.constant 0.000000e+00 : f32
|
||||
// CHECK: %[[VAL_8:.*]] = arith.constant 1 : index
|
||||
// CHECK: %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_8]] to %[[VAL_2]] step %[[VAL_8]] unordered iter_args(%[[VAL_11:.*]] = %[[VAL_7]]) -> (f32) {
|
||||
// CHECK: %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]], %[[VAL_6]] : (!hlfir.expr<2x3xf32>, index, index) -> f32
|
||||
// CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_11]], %[[VAL_12]] fastmath<reassoc> : f32
|
||||
// CHECK: fir.result %[[VAL_13]] : f32
|
||||
@ -387,28 +387,62 @@ func.func @sum_unordered_reduction(%arg0: !hlfir.expr<2x3xf32>) {
|
||||
// CHECK: return
|
||||
// CHECK: }
|
||||
|
||||
// negative: total reduction
|
||||
func.func @sum_total_reduction(%arg0: !fir.box<!fir.array<3xi32>>) {
|
||||
// total 1d reduction
|
||||
func.func @sum_total_1d_reduction(%arg0: !fir.box<!fir.array<3xi32>>) {
|
||||
%cst = arith.constant 1 : i32
|
||||
%res = hlfir.sum %arg0 dim %cst : (!fir.box<!fir.array<3xi32>>, i32) -> i32
|
||||
return
|
||||
}
|
||||
// CHECK-LABEL: func.func @sum_total_reduction(
|
||||
// CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<3xi32>>) {
|
||||
// CHECK-LABEL: func.func @sum_total_1d_reduction(
|
||||
// CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<3xi32>>) {
|
||||
// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i32
|
||||
// CHECK: %[[VAL_2:.*]] = hlfir.sum %[[VAL_0]] dim %[[VAL_1]] : (!fir.box<!fir.array<3xi32>>, i32) -> i32
|
||||
// CHECK: %[[VAL_2:.*]] = arith.constant 3 : index
|
||||
// CHECK: %[[VAL_3:.*]] = arith.constant 0 : i32
|
||||
// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index
|
||||
// CHECK: %[[VAL_5:.*]] = fir.do_loop %[[VAL_6:.*]] = %[[VAL_4]] to %[[VAL_2]] step %[[VAL_4]] unordered iter_args(%[[VAL_7:.*]] = %[[VAL_3]]) -> (i32) {
|
||||
// CHECK: %[[VAL_8:.*]] = arith.constant 0 : index
|
||||
// CHECK: %[[VAL_9:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_8]] : (!fir.box<!fir.array<3xi32>>, index) -> (index, index, index)
|
||||
// CHECK: %[[VAL_10:.*]] = arith.constant 1 : index
|
||||
// CHECK: %[[VAL_11:.*]] = arith.subi %[[VAL_9]]#0, %[[VAL_10]] : index
|
||||
// CHECK: %[[VAL_12:.*]] = arith.addi %[[VAL_6]], %[[VAL_11]] : index
|
||||
// CHECK: %[[VAL_13:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_12]]) : (!fir.box<!fir.array<3xi32>>, index) -> !fir.ref<i32>
|
||||
// CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_13]] : !fir.ref<i32>
|
||||
// CHECK: %[[VAL_15:.*]] = arith.addi %[[VAL_7]], %[[VAL_14]] : i32
|
||||
// CHECK: fir.result %[[VAL_15]] : i32
|
||||
// CHECK: }
|
||||
// CHECK: return
|
||||
// CHECK: }
|
||||
|
||||
// negative: non-const dim
|
||||
func.func @sum_non_const_dim(%arg0: !fir.box<!fir.array<3xi32>>, %dim: i32) {
|
||||
%res = hlfir.sum %arg0 dim %dim : (!fir.box<!fir.array<3xi32>>, i32) -> i32
|
||||
// total 2d reduction
|
||||
func.func @sum_total_2d_reduction(%arg0: !fir.box<!fir.array<?x3xi32>>) {
|
||||
%res = hlfir.sum %arg0 : (!fir.box<!fir.array<?x3xi32>>) -> i32
|
||||
return
|
||||
}
|
||||
// CHECK-LABEL: func.func @sum_non_const_dim(
|
||||
// CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<3xi32>>,
|
||||
// CHECK-SAME: %[[VAL_1:.*]]: i32) {
|
||||
// CHECK: %[[VAL_2:.*]] = hlfir.sum %[[VAL_0]] dim %[[VAL_1]] : (!fir.box<!fir.array<3xi32>>, i32) -> i32
|
||||
// CHECK-LABEL: func.func @sum_total_2d_reduction(
|
||||
// CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?x3xi32>>) {
|
||||
// CHECK: %[[VAL_1:.*]] = arith.constant 0 : index
|
||||
// CHECK: %[[VAL_2:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_1]] : (!fir.box<!fir.array<?x3xi32>>, index) -> (index, index, index)
|
||||
// CHECK: %[[VAL_3:.*]] = arith.constant 3 : index
|
||||
// CHECK: %[[VAL_4:.*]] = arith.constant 0 : i32
|
||||
// CHECK: %[[VAL_5:.*]] = arith.constant 1 : index
|
||||
// CHECK: %[[VAL_6:.*]] = fir.do_loop %[[VAL_7:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_5]] unordered iter_args(%[[VAL_8:.*]] = %[[VAL_4]]) -> (i32) {
|
||||
// CHECK: %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_2]]#1 step %[[VAL_5]] unordered iter_args(%[[VAL_11:.*]] = %[[VAL_8]]) -> (i32) {
|
||||
// CHECK: %[[VAL_12:.*]] = arith.constant 0 : index
|
||||
// CHECK: %[[VAL_13:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_12]] : (!fir.box<!fir.array<?x3xi32>>, index) -> (index, index, index)
|
||||
// CHECK: %[[VAL_14:.*]] = arith.constant 1 : index
|
||||
// CHECK: %[[VAL_15:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_14]] : (!fir.box<!fir.array<?x3xi32>>, index) -> (index, index, index)
|
||||
// CHECK: %[[VAL_16:.*]] = arith.constant 1 : index
|
||||
// CHECK: %[[VAL_17:.*]] = arith.subi %[[VAL_13]]#0, %[[VAL_16]] : index
|
||||
// CHECK: %[[VAL_18:.*]] = arith.addi %[[VAL_10]], %[[VAL_17]] : index
|
||||
// CHECK: %[[VAL_19:.*]] = arith.subi %[[VAL_15]]#0, %[[VAL_16]] : index
|
||||
// CHECK: %[[VAL_20:.*]] = arith.addi %[[VAL_7]], %[[VAL_19]] : index
|
||||
// CHECK: %[[VAL_21:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_18]], %[[VAL_20]]) : (!fir.box<!fir.array<?x3xi32>>, index, index) -> !fir.ref<i32>
|
||||
// CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<i32>
|
||||
// CHECK: %[[VAL_23:.*]] = arith.addi %[[VAL_11]], %[[VAL_22]] : i32
|
||||
// CHECK: fir.result %[[VAL_23]] : i32
|
||||
// CHECK: }
|
||||
// CHECK: fir.result %[[VAL_9]] : i32
|
||||
// CHECK: }
|
||||
// CHECK: return
|
||||
// CHECK: }
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user