mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-27 11:46:07 +00:00
Make vectorization aware of loop semantics
Now that we have a dependence analysis, we can check that loops are indeed parallel and make vectorization correct. PiperOrigin-RevId: 240682727
This commit is contained in:
parent
21547ace87
commit
4dc7af9da8
@ -85,20 +85,20 @@ bool isAccessInvariant(Value &iv, Value &index);
|
|||||||
llvm::DenseSet<Value *, llvm::DenseMapInfo<Value *>>
|
llvm::DenseSet<Value *, llvm::DenseMapInfo<Value *>>
|
||||||
getInvariantAccesses(Value &iv, llvm::ArrayRef<Value *> indices);
|
getInvariantAccesses(Value &iv, llvm::ArrayRef<Value *> indices);
|
||||||
|
|
||||||
|
using VectorizableLoopFun = std::function<bool(AffineForOp)>;
|
||||||
|
|
||||||
/// Checks whether the loop is structurally vectorizable; i.e.:
|
/// Checks whether the loop is structurally vectorizable; i.e.:
|
||||||
/// 1. the loop has proper dependence semantics (parallel, reduction, etc);
|
/// 1. no conditionals are nested under the loop;
|
||||||
/// 2. no conditionals are nested under the loop;
|
/// 2. all nested load/stores are to scalar MemRefs.
|
||||||
/// 3. all nested load/stores are to scalar MemRefs.
|
|
||||||
/// TODO(ntv): implement dependence semantics
|
|
||||||
/// TODO(ntv): relax the no-conditionals restriction
|
/// TODO(ntv): relax the no-conditionals restriction
|
||||||
bool isVectorizableLoop(AffineForOp loop);
|
bool isVectorizableLoopBody(AffineForOp loop);
|
||||||
|
|
||||||
/// Checks whether the loop is structurally vectorizable and that all the LoadOp
|
/// Checks whether the loop is structurally vectorizable and that all the LoadOp
|
||||||
/// and StoreOp matched have access indexing functions that are are either:
|
/// and StoreOp matched have access indexing functions that are are either:
|
||||||
/// 1. invariant along the loop induction variable created by 'loop';
|
/// 1. invariant along the loop induction variable created by 'loop';
|
||||||
/// 2. varying along the 'fastestVaryingDim' memory dimension.
|
/// 2. varying along the 'fastestVaryingDim' memory dimension.
|
||||||
bool isVectorizableLoopAlongFastestVaryingMemRefDim(AffineForOp loop,
|
bool isVectorizableLoopBodyAlongFastestVaryingMemRefDim(
|
||||||
unsigned fastestVaryingDim);
|
AffineForOp loop, unsigned fastestVaryingDim);
|
||||||
|
|
||||||
/// Checks where SSA dominance would be violated if a for inst's body
|
/// Checks where SSA dominance would be violated if a for inst's body
|
||||||
/// operations are shifted by the specified shifts. This method checks if a
|
/// operations are shifted by the specified shifts. This method checks if a
|
||||||
|
@ -274,20 +274,17 @@ static bool isVectorTransferReadOrWrite(Operation &op) {
|
|||||||
return op.isa<VectorTransferReadOp>() || op.isa<VectorTransferWriteOp>();
|
return op.isa<VectorTransferReadOp>() || op.isa<VectorTransferWriteOp>();
|
||||||
}
|
}
|
||||||
|
|
||||||
using VectorizableInstFun = std::function<bool(AffineForOp, Operation &)>;
|
using VectorizableOpFun = std::function<bool(AffineForOp, Operation &)>;
|
||||||
|
|
||||||
static bool isVectorizableLoopWithCond(AffineForOp loop,
|
static bool
|
||||||
VectorizableInstFun isVectorizableInst) {
|
isVectorizableLoopBodyWithOpCond(AffineForOp loop,
|
||||||
auto *forInst = loop.getOperation();
|
VectorizableOpFun isVectorizableOp) {
|
||||||
if (!matcher::isParallelLoop(*forInst) &&
|
auto *forOp = loop.getOperation();
|
||||||
!matcher::isReductionLoop(*forInst)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// No vectorization across conditionals for now.
|
// No vectorization across conditionals for now.
|
||||||
auto conditionals = matcher::If();
|
auto conditionals = matcher::If();
|
||||||
SmallVector<NestedMatch, 8> conditionalsMatched;
|
SmallVector<NestedMatch, 8> conditionalsMatched;
|
||||||
conditionals.match(forInst, &conditionalsMatched);
|
conditionals.match(forOp, &conditionalsMatched);
|
||||||
if (!conditionalsMatched.empty()) {
|
if (!conditionalsMatched.empty()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -298,21 +295,21 @@ static bool isVectorizableLoopWithCond(AffineForOp loop,
|
|||||||
!(op.isa<AffineIfOp>() || op.isa<AffineForOp>());
|
!(op.isa<AffineIfOp>() || op.isa<AffineForOp>());
|
||||||
});
|
});
|
||||||
SmallVector<NestedMatch, 8> regionsMatched;
|
SmallVector<NestedMatch, 8> regionsMatched;
|
||||||
regions.match(forInst, ®ionsMatched);
|
regions.match(forOp, ®ionsMatched);
|
||||||
if (!regionsMatched.empty()) {
|
if (!regionsMatched.empty()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto vectorTransfers = matcher::Op(isVectorTransferReadOrWrite);
|
auto vectorTransfers = matcher::Op(isVectorTransferReadOrWrite);
|
||||||
SmallVector<NestedMatch, 8> vectorTransfersMatched;
|
SmallVector<NestedMatch, 8> vectorTransfersMatched;
|
||||||
vectorTransfers.match(forInst, &vectorTransfersMatched);
|
vectorTransfers.match(forOp, &vectorTransfersMatched);
|
||||||
if (!vectorTransfersMatched.empty()) {
|
if (!vectorTransfersMatched.empty()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto loadAndStores = matcher::Op(matcher::isLoadOrStore);
|
auto loadAndStores = matcher::Op(matcher::isLoadOrStore);
|
||||||
SmallVector<NestedMatch, 8> loadAndStoresMatched;
|
SmallVector<NestedMatch, 8> loadAndStoresMatched;
|
||||||
loadAndStores.match(forInst, &loadAndStoresMatched);
|
loadAndStores.match(forOp, &loadAndStoresMatched);
|
||||||
for (auto ls : loadAndStoresMatched) {
|
for (auto ls : loadAndStoresMatched) {
|
||||||
auto *op = ls.getMatchedOperation();
|
auto *op = ls.getMatchedOperation();
|
||||||
auto load = op->dyn_cast<LoadOp>();
|
auto load = op->dyn_cast<LoadOp>();
|
||||||
@ -324,16 +321,16 @@ static bool isVectorizableLoopWithCond(AffineForOp loop,
|
|||||||
if (vector) {
|
if (vector) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!isVectorizableInst(loop, *op)) {
|
if (isVectorizableOp && !isVectorizableOp(loop, *op)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool mlir::isVectorizableLoopAlongFastestVaryingMemRefDim(
|
bool mlir::isVectorizableLoopBodyAlongFastestVaryingMemRefDim(
|
||||||
AffineForOp loop, unsigned fastestVaryingDim) {
|
AffineForOp loop, unsigned fastestVaryingDim) {
|
||||||
VectorizableInstFun fun([fastestVaryingDim](AffineForOp loop, Operation &op) {
|
VectorizableOpFun fun([fastestVaryingDim](AffineForOp loop, Operation &op) {
|
||||||
auto load = op.dyn_cast<LoadOp>();
|
auto load = op.dyn_cast<LoadOp>();
|
||||||
auto store = op.dyn_cast<StoreOp>();
|
auto store = op.dyn_cast<StoreOp>();
|
||||||
return load ? isContiguousAccess(*loop.getInductionVar(), load,
|
return load ? isContiguousAccess(*loop.getInductionVar(), load,
|
||||||
@ -341,14 +338,11 @@ bool mlir::isVectorizableLoopAlongFastestVaryingMemRefDim(
|
|||||||
: isContiguousAccess(*loop.getInductionVar(), store,
|
: isContiguousAccess(*loop.getInductionVar(), store,
|
||||||
fastestVaryingDim);
|
fastestVaryingDim);
|
||||||
});
|
});
|
||||||
return isVectorizableLoopWithCond(loop, fun);
|
return isVectorizableLoopBodyWithOpCond(loop, fun);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool mlir::isVectorizableLoop(AffineForOp loop) {
|
bool mlir::isVectorizableLoopBody(AffineForOp loop) {
|
||||||
VectorizableInstFun fun(
|
return isVectorizableLoopBodyWithOpCond(loop, nullptr);
|
||||||
// TODO: implement me
|
|
||||||
[](AffineForOp loop, Operation &op) { return true; });
|
|
||||||
return isVectorizableLoopWithCond(loop, fun);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Checks whether SSA dominance would be violated if a for op's body
|
/// Checks whether SSA dominance would be violated if a for op's body
|
||||||
|
@ -153,18 +153,6 @@ NestedPattern For(FilterFunctionType filter, ArrayRef<NestedPattern> nested) {
|
|||||||
nested, [=](Operation &op) { return isAffineForOp(op) && filter(op); });
|
nested, [=](Operation &op) { return isAffineForOp(op) && filter(op); });
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(ntv): parallel annotation on loops.
|
|
||||||
bool isParallelLoop(Operation &op) {
|
|
||||||
auto loop = op.cast<AffineForOp>();
|
|
||||||
return loop || true; // loop->isParallel();
|
|
||||||
};
|
|
||||||
|
|
||||||
// TODO(ntv): reduction annotation on loops.
|
|
||||||
bool isReductionLoop(Operation &op) {
|
|
||||||
auto loop = op.cast<AffineForOp>();
|
|
||||||
return loop || true; // loop->isReduction();
|
|
||||||
};
|
|
||||||
|
|
||||||
bool isLoadOrStore(Operation &op) {
|
bool isLoadOrStore(Operation &op) {
|
||||||
return op.isa<LoadOp>() || op.isa<StoreOp>();
|
return op.isa<LoadOp>() || op.isa<StoreOp>();
|
||||||
};
|
};
|
||||||
|
@ -24,6 +24,7 @@
|
|||||||
#include "mlir/Analysis/LoopAnalysis.h"
|
#include "mlir/Analysis/LoopAnalysis.h"
|
||||||
#include "mlir/Analysis/NestedMatcher.h"
|
#include "mlir/Analysis/NestedMatcher.h"
|
||||||
#include "mlir/Analysis/SliceAnalysis.h"
|
#include "mlir/Analysis/SliceAnalysis.h"
|
||||||
|
#include "mlir/Analysis/Utils.h"
|
||||||
#include "mlir/Analysis/VectorAnalysis.h"
|
#include "mlir/Analysis/VectorAnalysis.h"
|
||||||
#include "mlir/IR/AffineExpr.h"
|
#include "mlir/IR/AffineExpr.h"
|
||||||
#include "mlir/IR/Builders.h"
|
#include "mlir/IR/Builders.h"
|
||||||
@ -565,7 +566,8 @@ static llvm::cl::list<int> clFastestVaryingPattern(
|
|||||||
|
|
||||||
/// Forward declaration.
|
/// Forward declaration.
|
||||||
static FilterFunctionType
|
static FilterFunctionType
|
||||||
isVectorizableLoopPtrFactory(unsigned fastestVaryingMemRefDimension);
|
isVectorizableLoopPtrFactory(const llvm::DenseSet<Operation *> ¶llelLoops,
|
||||||
|
unsigned fastestVaryingMemRefDimension);
|
||||||
|
|
||||||
// Build a bunch of predetermined patterns that will be traversed in order.
|
// Build a bunch of predetermined patterns that will be traversed in order.
|
||||||
// Due to the recursive nature of NestedPatterns, this captures
|
// Due to the recursive nature of NestedPatterns, this captures
|
||||||
@ -573,77 +575,84 @@ isVectorizableLoopPtrFactory(unsigned fastestVaryingMemRefDimension);
|
|||||||
/// Note that this currently only matches 2 nested loops and will be extended.
|
/// Note that this currently only matches 2 nested loops and will be extended.
|
||||||
// TODO(ntv): support 3-D loop patterns with a common reduction loop that can
|
// TODO(ntv): support 3-D loop patterns with a common reduction loop that can
|
||||||
// be matched to GEMMs.
|
// be matched to GEMMs.
|
||||||
static std::vector<NestedPattern> defaultPatterns() {
|
static std::vector<NestedPattern>
|
||||||
|
defaultPatterns(const llvm::DenseSet<Operation *> ¶llelLoops) {
|
||||||
using matcher::For;
|
using matcher::For;
|
||||||
return std::vector<NestedPattern>{
|
return std::vector<NestedPattern>{
|
||||||
// 3-D patterns
|
// 3-D patterns
|
||||||
For(isVectorizableLoopPtrFactory(2),
|
For(isVectorizableLoopPtrFactory(parallelLoops, 2),
|
||||||
For(isVectorizableLoopPtrFactory(1),
|
For(isVectorizableLoopPtrFactory(parallelLoops, 1),
|
||||||
For(isVectorizableLoopPtrFactory(0)))),
|
For(isVectorizableLoopPtrFactory(parallelLoops, 0)))),
|
||||||
// for i { for j { A[??f(not i, not j), f(i, not j), f(not i, j)];}}
|
// for i { for j { A[??f(not i, not j), f(i, not j), f(not i, j)];}}
|
||||||
// test independently with:
|
// test independently with:
|
||||||
// --test-fastest-varying=1 --test-fastest-varying=0
|
// --test-fastest-varying=1 --test-fastest-varying=0
|
||||||
For(isVectorizableLoopPtrFactory(1),
|
For(isVectorizableLoopPtrFactory(parallelLoops, 1),
|
||||||
For(isVectorizableLoopPtrFactory(0))),
|
For(isVectorizableLoopPtrFactory(parallelLoops, 0))),
|
||||||
// for i { for j { A[??f(not i, not j), f(i, not j), ?, f(not i, j)];}}
|
// for i { for j { A[??f(not i, not j), f(i, not j), ?, f(not i, j)];}}
|
||||||
// test independently with:
|
// test independently with:
|
||||||
// --test-fastest-varying=2 --test-fastest-varying=0
|
// --test-fastest-varying=2 --test-fastest-varying=0
|
||||||
For(isVectorizableLoopPtrFactory(2),
|
For(isVectorizableLoopPtrFactory(parallelLoops, 2),
|
||||||
For(isVectorizableLoopPtrFactory(0))),
|
For(isVectorizableLoopPtrFactory(parallelLoops, 0))),
|
||||||
// for i { for j { A[??f(not i, not j), f(i, not j), ?, ?, f(not i, j)];}}
|
// for i { for j { A[??f(not i, not j), f(i, not j), ?, ?, f(not i, j)];}}
|
||||||
// test independently with:
|
// test independently with:
|
||||||
// --test-fastest-varying=3 --test-fastest-varying=0
|
// --test-fastest-varying=3 --test-fastest-varying=0
|
||||||
For(isVectorizableLoopPtrFactory(3),
|
For(isVectorizableLoopPtrFactory(parallelLoops, 3),
|
||||||
For(isVectorizableLoopPtrFactory(0))),
|
For(isVectorizableLoopPtrFactory(parallelLoops, 0))),
|
||||||
// for i { for j { A[??f(not i, not j), f(not i, j), f(i, not j)];}}
|
// for i { for j { A[??f(not i, not j), f(not i, j), f(i, not j)];}}
|
||||||
// test independently with:
|
// test independently with:
|
||||||
// --test-fastest-varying=0 --test-fastest-varying=1
|
// --test-fastest-varying=0 --test-fastest-varying=1
|
||||||
For(isVectorizableLoopPtrFactory(0),
|
For(isVectorizableLoopPtrFactory(parallelLoops, 0),
|
||||||
For(isVectorizableLoopPtrFactory(1))),
|
For(isVectorizableLoopPtrFactory(parallelLoops, 1))),
|
||||||
// for i { for j { A[??f(not i, not j), f(not i, j), ?, f(i, not j)];}}
|
// for i { for j { A[??f(not i, not j), f(not i, j), ?, f(i, not j)];}}
|
||||||
// test independently with:
|
// test independently with:
|
||||||
// --test-fastest-varying=0 --test-fastest-varying=2
|
// --test-fastest-varying=0 --test-fastest-varying=2
|
||||||
For(isVectorizableLoopPtrFactory(0),
|
For(isVectorizableLoopPtrFactory(parallelLoops, 0),
|
||||||
For(isVectorizableLoopPtrFactory(2))),
|
For(isVectorizableLoopPtrFactory(parallelLoops, 2))),
|
||||||
// for i { for j { A[??f(not i, not j), f(not i, j), ?, ?, f(i, not j)];}}
|
// for i { for j { A[??f(not i, not j), f(not i, j), ?, ?, f(i, not j)];}}
|
||||||
// test independently with:
|
// test independently with:
|
||||||
// --test-fastest-varying=0 --test-fastest-varying=3
|
// --test-fastest-varying=0 --test-fastest-varying=3
|
||||||
For(isVectorizableLoopPtrFactory(0),
|
For(isVectorizableLoopPtrFactory(parallelLoops, 0),
|
||||||
For(isVectorizableLoopPtrFactory(3))),
|
For(isVectorizableLoopPtrFactory(parallelLoops, 3))),
|
||||||
// for i { A[??f(not i) , f(i)];}
|
// for i { A[??f(not i) , f(i)];}
|
||||||
// test independently with: --test-fastest-varying=0
|
// test independently with: --test-fastest-varying=0
|
||||||
For(isVectorizableLoopPtrFactory(0)),
|
For(isVectorizableLoopPtrFactory(parallelLoops, 0)),
|
||||||
// for i { A[??f(not i) , f(i), ?];}
|
// for i { A[??f(not i) , f(i), ?];}
|
||||||
// test independently with: --test-fastest-varying=1
|
// test independently with: --test-fastest-varying=1
|
||||||
For(isVectorizableLoopPtrFactory(1)),
|
For(isVectorizableLoopPtrFactory(parallelLoops, 1)),
|
||||||
// for i { A[??f(not i) , f(i), ?, ?];}
|
// for i { A[??f(not i) , f(i), ?, ?];}
|
||||||
// test independently with: --test-fastest-varying=2
|
// test independently with: --test-fastest-varying=2
|
||||||
For(isVectorizableLoopPtrFactory(2)),
|
For(isVectorizableLoopPtrFactory(parallelLoops, 2)),
|
||||||
// for i { A[??f(not i) , f(i), ?, ?, ?];}
|
// for i { A[??f(not i) , f(i), ?, ?, ?];}
|
||||||
// test independently with: --test-fastest-varying=3
|
// test independently with: --test-fastest-varying=3
|
||||||
For(isVectorizableLoopPtrFactory(3))};
|
For(isVectorizableLoopPtrFactory(parallelLoops, 3))};
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a vectorization pattern from the command line arguments.
|
/// Creates a vectorization pattern from the command line arguments.
|
||||||
/// Up to 3-D patterns are supported.
|
/// Up to 3-D patterns are supported.
|
||||||
/// If the command line argument requests a pattern of higher order, returns an
|
/// If the command line argument requests a pattern of higher order, returns an
|
||||||
/// empty pattern list which will conservatively result in no vectorization.
|
/// empty pattern list which will conservatively result in no vectorization.
|
||||||
static std::vector<NestedPattern> makePatterns() {
|
static std::vector<NestedPattern>
|
||||||
|
makePatterns(const llvm::DenseSet<Operation *> ¶llelLoops) {
|
||||||
using matcher::For;
|
using matcher::For;
|
||||||
if (clFastestVaryingPattern.empty()) {
|
if (clFastestVaryingPattern.empty()) {
|
||||||
return defaultPatterns();
|
return defaultPatterns(parallelLoops);
|
||||||
}
|
}
|
||||||
switch (clFastestVaryingPattern.size()) {
|
switch (clFastestVaryingPattern.size()) {
|
||||||
case 1:
|
case 1:
|
||||||
return {For(isVectorizableLoopPtrFactory(clFastestVaryingPattern[0]))};
|
return {For(isVectorizableLoopPtrFactory(parallelLoops,
|
||||||
|
clFastestVaryingPattern[0]))};
|
||||||
case 2:
|
case 2:
|
||||||
return {For(isVectorizableLoopPtrFactory(clFastestVaryingPattern[0]),
|
return {For(
|
||||||
For(isVectorizableLoopPtrFactory(clFastestVaryingPattern[1])))};
|
isVectorizableLoopPtrFactory(parallelLoops, clFastestVaryingPattern[0]),
|
||||||
|
For(isVectorizableLoopPtrFactory(parallelLoops,
|
||||||
|
clFastestVaryingPattern[1])))};
|
||||||
case 3:
|
case 3:
|
||||||
return {For(
|
return {For(
|
||||||
isVectorizableLoopPtrFactory(clFastestVaryingPattern[0]),
|
isVectorizableLoopPtrFactory(parallelLoops, clFastestVaryingPattern[0]),
|
||||||
For(isVectorizableLoopPtrFactory(clFastestVaryingPattern[1]),
|
For(isVectorizableLoopPtrFactory(parallelLoops,
|
||||||
For(isVectorizableLoopPtrFactory(clFastestVaryingPattern[2]))))};
|
clFastestVaryingPattern[1]),
|
||||||
|
For(isVectorizableLoopPtrFactory(parallelLoops,
|
||||||
|
clFastestVaryingPattern[2]))))};
|
||||||
default:
|
default:
|
||||||
return std::vector<NestedPattern>();
|
return std::vector<NestedPattern>();
|
||||||
}
|
}
|
||||||
@ -905,10 +914,14 @@ static LogicalResult vectorizeAffineForOp(AffineForOp loop, int64_t step,
|
|||||||
/// once we understand better the performance implications and we are confident
|
/// once we understand better the performance implications and we are confident
|
||||||
/// we can build a cost model and a search procedure.
|
/// we can build a cost model and a search procedure.
|
||||||
static FilterFunctionType
|
static FilterFunctionType
|
||||||
isVectorizableLoopPtrFactory(unsigned fastestVaryingMemRefDimension) {
|
isVectorizableLoopPtrFactory(const llvm::DenseSet<Operation *> ¶llelLoops,
|
||||||
return [fastestVaryingMemRefDimension](Operation &forOp) {
|
unsigned fastestVaryingMemRefDimension) {
|
||||||
|
return [¶llelLoops, fastestVaryingMemRefDimension](Operation &forOp) {
|
||||||
auto loop = forOp.cast<AffineForOp>();
|
auto loop = forOp.cast<AffineForOp>();
|
||||||
return isVectorizableLoopAlongFastestVaryingMemRefDim(
|
auto parallelIt = parallelLoops.find(loop);
|
||||||
|
if (parallelIt == parallelLoops.end())
|
||||||
|
return false;
|
||||||
|
return isVectorizableLoopBodyAlongFastestVaryingMemRefDim(
|
||||||
loop, fastestVaryingMemRefDimension);
|
loop, fastestVaryingMemRefDimension);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -1168,7 +1181,7 @@ static LogicalResult vectorizeRootMatch(NestedMatch m,
|
|||||||
// vectorizable. If a pattern is not vectorizable anymore, we just skip it.
|
// vectorizable. If a pattern is not vectorizable anymore, we just skip it.
|
||||||
// TODO(ntv): implement a non-greedy profitability analysis that keeps only
|
// TODO(ntv): implement a non-greedy profitability analysis that keeps only
|
||||||
// non-intersecting patterns.
|
// non-intersecting patterns.
|
||||||
if (!isVectorizableLoop(loop)) {
|
if (!isVectorizableLoopBody(loop)) {
|
||||||
LLVM_DEBUG(dbgs() << "\n[early-vect]+++++ loop is not vectorizable");
|
LLVM_DEBUG(dbgs() << "\n[early-vect]+++++ loop is not vectorizable");
|
||||||
return failure();
|
return failure();
|
||||||
}
|
}
|
||||||
@ -1240,7 +1253,16 @@ void Vectorize::runOnFunction() {
|
|||||||
NestedPatternContext mlContext;
|
NestedPatternContext mlContext;
|
||||||
|
|
||||||
Function &f = getFunction();
|
Function &f = getFunction();
|
||||||
for (auto &pat : makePatterns()) {
|
llvm::DenseSet<Operation *> parallelLoops;
|
||||||
|
f.walkPostOrder([¶llelLoops](Operation *op) {
|
||||||
|
if (auto loop = op->dyn_cast<AffineForOp>()) {
|
||||||
|
if (isLoopParallel(loop)) {
|
||||||
|
parallelLoops.insert(op);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
for (auto &pat : makePatterns(parallelLoops)) {
|
||||||
LLVM_DEBUG(dbgs() << "\n******************************************");
|
LLVM_DEBUG(dbgs() << "\n******************************************");
|
||||||
LLVM_DEBUG(dbgs() << "\n******************************************");
|
LLVM_DEBUG(dbgs() << "\n******************************************");
|
||||||
LLVM_DEBUG(dbgs() << "\n[early-vect] new pattern on Function\n");
|
LLVM_DEBUG(dbgs() << "\n[early-vect] new pattern on Function\n");
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
#set0 = (i) : (i >= 0)
|
#set0 = (i) : (i >= 0)
|
||||||
|
|
||||||
// Maps introduced to vectorize fastest varying memory index.
|
// Maps introduced to vectorize fastest varying memory index.
|
||||||
|
// CHECK-LABEL: vec1d
|
||||||
func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
|
func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
|
||||||
// CHECK-DAG: [[C0:%[a-z0-9_]+]] = constant 0 : index
|
// CHECK-DAG: [[C0:%[a-z0-9_]+]] = constant 0 : index
|
||||||
// CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %arg0, 0 : memref<?x?xf32>
|
// CHECK-DAG: [[ARG_M:%[0-9]+]] = dim %arg0, 0 : memref<?x?xf32>
|
||||||
@ -133,6 +134,7 @@ func @vec1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CHECK-LABEL: vector_add_2d
|
||||||
func @vector_add_2d(%M : index, %N : index) -> f32 {
|
func @vector_add_2d(%M : index, %N : index) -> f32 {
|
||||||
%A = alloc (%M, %N) : memref<?x?xf32, 0>
|
%A = alloc (%M, %N) : memref<?x?xf32, 0>
|
||||||
%B = alloc (%M, %N) : memref<?x?xf32, 0>
|
%B = alloc (%M, %N) : memref<?x?xf32, 0>
|
||||||
@ -201,3 +203,17 @@ func @vec_rejected(%A : memref<?x?xf32>, %C : memref<?x?xf32>) {
|
|||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This should not vectorize due to the sequential dependence in the loop.
|
||||||
|
// CHECK-LABEL: @vec_rejected_sequential
|
||||||
|
func @vec_rejected_sequential(%A : memref<?xf32>) {
|
||||||
|
%N = dim %A, 0 : memref<?xf32>
|
||||||
|
affine.for %i = 0 to %N {
|
||||||
|
// CHECK-NOT: vector
|
||||||
|
%a = load %A[%i] : memref<?xf32>
|
||||||
|
// CHECK-NOT: vector
|
||||||
|
%ip1 = affine.apply (d0)->(d0 + 1) (%i)
|
||||||
|
store %a, %A[%ip1] : memref<?xf32>
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user