mirror of
https://github.com/llvm/llvm-project.git
synced 2025-05-01 00:16:06 +00:00

This CL is the 1st on the path to simplifying AffineMap composition. This CL uses the now accepted AffineExpr.replaceDimsAndSymbols to implement `AffineExpr::compose(AffineMap)`. Arguably, `simplifyAffineExpr` should be part of IR and not Analysis but this CL does not yet pull the trigger on that. PiperOrigin-RevId: 228265845
447 lines
18 KiB
C++
447 lines
18 KiB
C++
//===- VectorAnalysis.cpp - Analysis for Vectorization --------------------===//
|
|
//
|
|
// Copyright 2019 The MLIR Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
// =============================================================================
|
|
|
|
#include "mlir/Analysis/VectorAnalysis.h"
|
|
#include "mlir/Analysis/AffineAnalysis.h"
|
|
#include "mlir/Analysis/LoopAnalysis.h"
|
|
#include "mlir/IR/Builders.h"
|
|
#include "mlir/IR/BuiltinOps.h"
|
|
#include "mlir/IR/Instructions.h"
|
|
#include "mlir/StandardOps/StandardOps.h"
|
|
#include "mlir/SuperVectorOps/SuperVectorOps.h"
|
|
#include "mlir/Support/Functional.h"
|
|
#include "mlir/Support/STLExtras.h"
|
|
|
|
#include "llvm/ADT/DenseSet.h"
|
|
#include "llvm/ADT/SetVector.h"
|
|
#include "llvm/Support/Debug.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
///
|
|
/// Implements Analysis functions specific to vectors which support
|
|
/// the vectorization and vectorization materialization passes.
|
|
///
|
|
|
|
using namespace mlir;
|
|
|
|
#define DEBUG_TYPE "vector-analysis"
|
|
|
|
using llvm::dbgs;
|
|
using llvm::SetVector;
|
|
|
|
Optional<SmallVector<unsigned, 4>> mlir::shapeRatio(ArrayRef<int> superShape,
|
|
ArrayRef<int> subShape) {
|
|
if (superShape.size() < subShape.size()) {
|
|
return Optional<SmallVector<unsigned, 4>>();
|
|
}
|
|
|
|
// Starting from the end, compute the integer divisors.
|
|
// Set the boolean `divides` if integral division is not possible.
|
|
std::vector<unsigned> result;
|
|
result.reserve(superShape.size());
|
|
bool divides = true;
|
|
auto divide = [÷s, &result](int superSize, int subSize) {
|
|
assert(superSize > 0 && "superSize must be > 0");
|
|
assert(subSize > 0 && "subSize must be > 0");
|
|
divides &= (superSize % subSize == 0);
|
|
result.push_back(superSize / subSize);
|
|
};
|
|
functional::zipApply(
|
|
divide, SmallVector<int, 8>{superShape.rbegin(), superShape.rend()},
|
|
SmallVector<int, 8>{subShape.rbegin(), subShape.rend()});
|
|
|
|
// If integral division does not occur, return and let the caller decide.
|
|
if (!divides) {
|
|
return None;
|
|
}
|
|
|
|
// At this point we computed the ratio (in reverse) for the common
|
|
// size. Fill with the remaining entries from the super-vector shape (still in
|
|
// reverse).
|
|
int commonSize = subShape.size();
|
|
std::copy(superShape.rbegin() + commonSize, superShape.rend(),
|
|
std::back_inserter(result));
|
|
|
|
assert(result.size() == superShape.size() &&
|
|
"super to sub shape ratio is not of the same size as the super rank");
|
|
|
|
// Reverse again to get it back in the proper order and return.
|
|
return SmallVector<unsigned, 4>{result.rbegin(), result.rend()};
|
|
}
|
|
|
|
Optional<SmallVector<unsigned, 4>> mlir::shapeRatio(VectorType superVectorType,
|
|
VectorType subVectorType) {
|
|
assert(superVectorType.getElementType() == subVectorType.getElementType() &&
|
|
"vector types must be of the same elemental type");
|
|
return shapeRatio(superVectorType.getShape(), subVectorType.getShape());
|
|
}
|
|
|
|
/// Constructs a permutation map from memref indices to vector dimension.
|
|
///
|
|
/// The implementation uses the knowledge of the mapping of enclosing loop to
|
|
/// vector dimension. `enclosingLoopToVectorDim` carries this information as a
|
|
/// map with:
|
|
/// - keys representing "vectorized enclosing loops";
|
|
/// - values representing the corresponding vector dimension.
|
|
/// The algorithm traverses "vectorized enclosing loops" and extracts the
|
|
/// at-most-one MemRef index that is invariant along said loop. This index is
|
|
/// guaranteed to be at most one by construction: otherwise the MemRef is not
|
|
/// vectorizable.
|
|
/// If this invariant index is found, it is added to the permutation_map at the
|
|
/// proper vector dimension.
|
|
/// If no index is found to be invariant, 0 is added to the permutation_map and
|
|
/// corresponds to a vector broadcast along that dimension.
|
|
///
|
|
/// Examples can be found in the documentation of `makePermutationMap`, in the
|
|
/// header file.
|
|
static AffineMap makePermutationMap(
|
|
MLIRContext *context,
|
|
llvm::iterator_range<OperationInst::operand_iterator> indices,
|
|
const DenseMap<ForInst *, unsigned> &enclosingLoopToVectorDim) {
|
|
using functional::makePtrDynCaster;
|
|
using functional::map;
|
|
auto unwrappedIndices = map(makePtrDynCaster<Value, Value>(), indices);
|
|
SmallVector<AffineExpr, 4> perm(enclosingLoopToVectorDim.size(),
|
|
getAffineConstantExpr(0, context));
|
|
for (auto kvp : enclosingLoopToVectorDim) {
|
|
assert(kvp.second < perm.size());
|
|
auto invariants = getInvariantAccesses(*kvp.first, unwrappedIndices);
|
|
unsigned numIndices = unwrappedIndices.size();
|
|
unsigned countInvariantIndices = 0;
|
|
for (unsigned dim = 0; dim < numIndices; ++dim) {
|
|
if (!invariants.count(unwrappedIndices[dim])) {
|
|
assert(perm[kvp.second] == getAffineConstantExpr(0, context) &&
|
|
"permutationMap already has an entry along dim");
|
|
perm[kvp.second] = getAffineDimExpr(dim, context);
|
|
} else {
|
|
++countInvariantIndices;
|
|
}
|
|
}
|
|
assert((countInvariantIndices == numIndices ||
|
|
countInvariantIndices == numIndices - 1) &&
|
|
"Vectorization prerequisite violated: at most 1 index may be "
|
|
"invariant wrt a vectorized loop");
|
|
}
|
|
return AffineMap::get(unwrappedIndices.size(), 0, perm, {});
|
|
}
|
|
|
|
/// Implementation detail that walks up the parents and records the ones with
|
|
/// the specified type.
|
|
/// TODO(ntv): could also be implemented as a collect parents followed by a
|
|
/// filter and made available outside this file.
|
|
template <typename T>
|
|
static SetVector<T *> getParentsOfType(Instruction *inst) {
|
|
SetVector<T *> res;
|
|
auto *current = inst;
|
|
while (auto *parent = current->getParentInst()) {
|
|
auto *typedParent = dyn_cast<T>(parent);
|
|
if (typedParent) {
|
|
assert(res.count(typedParent) == 0 && "Already inserted");
|
|
res.insert(typedParent);
|
|
}
|
|
current = parent;
|
|
}
|
|
return res;
|
|
}
|
|
|
|
/// Returns the enclosing ForInst, from closest to farthest.
|
|
static SetVector<ForInst *> getEnclosingforInsts(Instruction *inst) {
|
|
return getParentsOfType<ForInst>(inst);
|
|
}
|
|
|
|
AffineMap
|
|
mlir::makePermutationMap(OperationInst *opInst,
|
|
const DenseMap<ForInst *, unsigned> &loopToVectorDim) {
|
|
DenseMap<ForInst *, unsigned> enclosingLoopToVectorDim;
|
|
auto enclosingLoops = getEnclosingforInsts(opInst);
|
|
for (auto *forInst : enclosingLoops) {
|
|
auto it = loopToVectorDim.find(forInst);
|
|
if (it != loopToVectorDim.end()) {
|
|
enclosingLoopToVectorDim.insert(*it);
|
|
}
|
|
}
|
|
|
|
if (auto load = opInst->dyn_cast<LoadOp>()) {
|
|
return ::makePermutationMap(opInst->getContext(), load->getIndices(),
|
|
enclosingLoopToVectorDim);
|
|
}
|
|
|
|
auto store = opInst->cast<StoreOp>();
|
|
return ::makePermutationMap(opInst->getContext(), store->getIndices(),
|
|
enclosingLoopToVectorDim);
|
|
}
|
|
|
|
bool mlir::matcher::operatesOnSuperVectors(const OperationInst &opInst,
|
|
VectorType subVectorType) {
|
|
// First, extract the vector type and ditinguish between:
|
|
// a. ops that *must* lower a super-vector (i.e. vector_transfer_read,
|
|
// vector_transfer_write); and
|
|
// b. ops that *may* lower a super-vector (all other ops).
|
|
// The ops that *may* lower a super-vector only do so if the super-vector to
|
|
// sub-vector ratio exists. The ops that *must* lower a super-vector are
|
|
// explicitly checked for this property.
|
|
/// TODO(ntv): there should be a single function for all ops to do this so we
|
|
/// do not have to special case. Maybe a trait, or just a method, unclear atm.
|
|
bool mustDivide = false;
|
|
VectorType superVectorType;
|
|
if (auto read = opInst.dyn_cast<VectorTransferReadOp>()) {
|
|
superVectorType = read->getResultType();
|
|
mustDivide = true;
|
|
} else if (auto write = opInst.dyn_cast<VectorTransferWriteOp>()) {
|
|
superVectorType = write->getVectorType();
|
|
mustDivide = true;
|
|
} else if (opInst.getNumResults() == 0) {
|
|
if (!opInst.isa<ReturnOp>()) {
|
|
opInst.emitError("NYI: assuming only return instructions can have 0 "
|
|
" results at this point");
|
|
}
|
|
return false;
|
|
} else if (opInst.getNumResults() == 1) {
|
|
if (auto v = opInst.getResult(0)->getType().dyn_cast<VectorType>()) {
|
|
superVectorType = v;
|
|
} else {
|
|
// Not a vector type.
|
|
return false;
|
|
}
|
|
} else {
|
|
// Not a vector_transfer and has more than 1 result, fail hard for now to
|
|
// wake us up when something changes.
|
|
opInst.emitError("NYI: instruction has more than 1 result");
|
|
return false;
|
|
}
|
|
|
|
// Get the ratio.
|
|
auto ratio = shapeRatio(superVectorType, subVectorType);
|
|
|
|
// Sanity check.
|
|
assert((ratio.hasValue() || !mustDivide) &&
|
|
"vector_transfer instruction in which super-vector size is not an"
|
|
" integer multiple of sub-vector size");
|
|
|
|
// This catches cases that are not strictly necessary to have multiplicity but
|
|
// still aren't divisible by the sub-vector shape.
|
|
// This could be useful information if we wanted to reshape at the level of
|
|
// the vector type (but we would have to look at the compute and distinguish
|
|
// between parallel, reduction and possibly other cases.
|
|
if (!ratio.hasValue()) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
namespace {
|
|
|
|
/// A `SingleResultAffineNormalizer` is a helper class that is not visible to
|
|
/// the user and supports renumbering operands of single-result AffineApplyOp.
|
|
/// This operates on the assumption that only single-result unbounded AffineMap
|
|
/// are used for all operands.
|
|
/// This acts as a reindexing map of Value* to positional dims or symbols and
|
|
/// allows simplifications such as:
|
|
///
|
|
/// ```mlir
|
|
/// %1 = affine_apply (d0, d1) -> (d0 - d1) (%0, %0)
|
|
/// ```
|
|
///
|
|
/// into:
|
|
///
|
|
/// ```mlir
|
|
/// %1 = affine_apply () -> (0)
|
|
/// ```
|
|
struct SingleResultAffineNormalizer {
|
|
SingleResultAffineNormalizer(AffineMap map, ArrayRef<Value *> operands);
|
|
|
|
/// Returns the single result, unbounded, AffineMap resulting from
|
|
/// normalization.
|
|
AffineMap getAffineMap() {
|
|
return AffineMap::get(reorderedDims.size(), reorderedSymbols.size(), {expr},
|
|
{});
|
|
}
|
|
|
|
SmallVector<Value *, 8> getOperands() {
|
|
SmallVector<Value *, 8> res(reorderedDims);
|
|
res.append(reorderedSymbols.begin(), reorderedSymbols.end());
|
|
return res;
|
|
}
|
|
|
|
private:
|
|
/// Helper function to insert `v` into the coordinate system of the current
|
|
/// SingleResultAffineNormalizer (i.e. in the proper `xxxValueToPosition` and
|
|
/// the proper `reorderedXXX`).
|
|
/// Returns the AffineDimExpr or AffineSymbolExpr with the correponding
|
|
/// renumbered position.
|
|
template <typename DimOrSymbol> DimOrSymbol renumberOneIndex(Value *v);
|
|
|
|
/// Given an `other` normalizer, this rewrites `other.expr` in the coordinate
|
|
/// system of the current SingleResultAffineNormalizer.
|
|
/// Returns the rewritten AffineExpr.
|
|
AffineExpr renumber(const SingleResultAffineNormalizer &other);
|
|
|
|
/// Given an `app` with single result and unbounded AffineMap, this rewrites
|
|
/// the app's map single result AffineExpr in the coordinate system of the
|
|
/// current SingleResultAffineNormalizer.
|
|
/// Returns the rewritten AffineExpr.
|
|
AffineExpr renumber(AffineApplyOp *app);
|
|
|
|
/// Maps of Value* to position in the `expr`.
|
|
DenseMap<Value *, unsigned> dimValueToPosition;
|
|
DenseMap<Value *, unsigned> symValueToPosition;
|
|
|
|
/// Ordered dims and symbols matching positional dims and symbols in `expr`.
|
|
SmallVector<Value *, 8> reorderedDims;
|
|
SmallVector<Value *, 8> reorderedSymbols;
|
|
|
|
AffineExpr expr;
|
|
};
|
|
|
|
} // namespace
|
|
|
|
template <typename DimOrSymbol>
|
|
static DimOrSymbol make(unsigned position, MLIRContext *context);
|
|
|
|
template <> AffineDimExpr make(unsigned position, MLIRContext *context) {
|
|
return getAffineDimExpr(position, context).cast<AffineDimExpr>();
|
|
}
|
|
|
|
template <> AffineSymbolExpr make(unsigned position, MLIRContext *context) {
|
|
return getAffineSymbolExpr(position, context).cast<AffineSymbolExpr>();
|
|
}
|
|
|
|
template <typename DimOrSymbol>
|
|
DimOrSymbol SingleResultAffineNormalizer::renumberOneIndex(Value *v) {
|
|
static_assert(std::is_same<DimOrSymbol, AffineDimExpr>::value ||
|
|
std::is_same<DimOrSymbol, AffineSymbolExpr>::value,
|
|
"renumber<AffineDimExpr>(...) or renumber<AffineDimExpr>(...) "
|
|
"required");
|
|
DenseMap<Value *, unsigned> &pos =
|
|
std::is_same<DimOrSymbol, AffineSymbolExpr>::value ? symValueToPosition
|
|
: dimValueToPosition;
|
|
DenseMap<Value *, unsigned>::iterator iterPos;
|
|
bool inserted = false;
|
|
std::tie(iterPos, inserted) = pos.insert(std::make_pair(v, pos.size()));
|
|
if (inserted) {
|
|
std::is_same<DimOrSymbol, AffineDimExpr>::value
|
|
? reorderedDims.push_back(v)
|
|
: reorderedSymbols.push_back(v);
|
|
}
|
|
return make<DimOrSymbol>(iterPos->second, v->getFunction()->getContext());
|
|
}
|
|
|
|
AffineExpr SingleResultAffineNormalizer::renumber(
|
|
const SingleResultAffineNormalizer &other) {
|
|
SmallVector<AffineExpr, 8> dimRemapping, symRemapping;
|
|
for (auto *v : other.reorderedDims) {
|
|
auto kvp = other.dimValueToPosition.find(v);
|
|
if (dimRemapping.size() <= kvp->second)
|
|
dimRemapping.resize(kvp->second + 1);
|
|
dimRemapping[kvp->second] = renumberOneIndex<AffineDimExpr>(kvp->first);
|
|
}
|
|
for (auto *v : other.reorderedSymbols) {
|
|
auto kvp = other.symValueToPosition.find(v);
|
|
if (symRemapping.size() <= kvp->second)
|
|
symRemapping.resize(kvp->second + 1);
|
|
symRemapping[kvp->second] = renumberOneIndex<AffineSymbolExpr>(kvp->first);
|
|
}
|
|
return other.expr.replaceDimsAndSymbols(dimRemapping, symRemapping);
|
|
}
|
|
|
|
AffineExpr SingleResultAffineNormalizer::renumber(AffineApplyOp *app) {
|
|
// Sanity check, single result AffineApplyOp if one wants to use this.
|
|
assert(app->getNumResults() == 1 && "Not a single result AffineApplyOp");
|
|
assert(app->getAffineMap().getRangeSizes().empty() &&
|
|
"Non-empty range sizes");
|
|
|
|
// Create the SingleResultAffineNormalizer for the operands of this
|
|
// AffineApplyOp and combine it with the current SingleResultAffineNormalizer.
|
|
using ValueTy = decltype(*(app->getOperands().begin()));
|
|
SingleResultAffineNormalizer normalizer(
|
|
app->getAffineMap(),
|
|
functional::map([](ValueTy v) { return static_cast<Value *>(v); },
|
|
app->getOperands()));
|
|
|
|
// We know this is a single result AffineMap, we need to append a
|
|
// renumbered AffineExpr.
|
|
return renumber(normalizer);
|
|
}
|
|
|
|
SingleResultAffineNormalizer::SingleResultAffineNormalizer(
|
|
AffineMap map, ArrayRef<Value *> operands) {
|
|
assert(map.getNumResults() == 1 && "Single-result map expected");
|
|
assert(map.getRangeSizes().empty() && "Unbounded map expected");
|
|
assert(map.getNumInputs() == operands.size() &&
|
|
"number of operands does not match the number of map inputs");
|
|
|
|
if (operands.empty()) {
|
|
return;
|
|
}
|
|
|
|
auto *context = operands[0]->getFunction()->getContext();
|
|
SmallVector<AffineExpr, 8> exprs;
|
|
for (auto en : llvm::enumerate(operands)) {
|
|
auto *t = en.value();
|
|
assert(t->getType().isIndex());
|
|
if (auto inst = t->getDefiningInst()) {
|
|
if (auto app = inst->dyn_cast<AffineApplyOp>()) {
|
|
// Sanity check, AffineApplyOp must always be composed by construction
|
|
// and there can only ever be a dependence chain of 1 AffineApply. So we
|
|
// can never get a second AffineApplyOp.
|
|
// This also guarantees we can build another
|
|
// SingleResultAffineNormalizer here that does not recurse a second
|
|
// time.
|
|
for (auto *pred : app->getOperands()) {
|
|
assert(!pred->getDefiningInst() ||
|
|
!pred->getDefiningInst()->isa<AffineApplyOp>() &&
|
|
"AffineApplyOp chain of length > 1");
|
|
(void)pred;
|
|
}
|
|
exprs.push_back(renumber(app));
|
|
} else if (auto constant = inst->dyn_cast<ConstantOp>()) {
|
|
// Constants remain constants.
|
|
auto affineConstant = inst->cast<ConstantIndexOp>();
|
|
exprs.push_back(
|
|
getAffineConstantExpr(affineConstant->getValue(), context));
|
|
} else {
|
|
// DimOp, top of the function symbols are all symbols.
|
|
exprs.push_back(renumberOneIndex<AffineSymbolExpr>(t));
|
|
}
|
|
} else if (en.index() < map.getNumDims()) {
|
|
assert(isa<ForInst>(t) && "ForInst expected for AffineDimExpr");
|
|
exprs.push_back(renumberOneIndex<AffineDimExpr>(t));
|
|
} else {
|
|
assert(!isa<ForInst>(t) && "unexpectd ForInst for a AffineSymbolExpr");
|
|
exprs.push_back(renumberOneIndex<AffineSymbolExpr>(t));
|
|
}
|
|
}
|
|
auto exprsMap = AffineMap::get(dimValueToPosition.size(),
|
|
symValueToPosition.size(), exprs, {});
|
|
|
|
expr = simplifyAffineExpr(map.getResult(0).compose(exprsMap),
|
|
exprsMap.getNumDims(), exprsMap.getNumSymbols());
|
|
|
|
LLVM_DEBUG(map.getResult(0).print(dbgs() << "\nCompose expr: "));
|
|
LLVM_DEBUG(exprsMap.print(dbgs() << "\nWith map: "));
|
|
LLVM_DEBUG(expr.print(dbgs() << "\nResult: "));
|
|
}
|
|
|
|
OpPointer<AffineApplyOp>
|
|
mlir::makeNormalizedAffineApply(FuncBuilder *b, Location loc, AffineMap map,
|
|
ArrayRef<Value *> operands) {
|
|
SingleResultAffineNormalizer normalizer(map, operands);
|
|
return b->create<AffineApplyOp>(loc, normalizer.getAffineMap(),
|
|
normalizer.getOperands());
|
|
}
|