Clean up build trip count analysis method - avoid mutating IR

- NFC - on any pass/utility logic/output. - Resolve TODO; the method building loop trip count maps was creating and deleting affine.apply ops (transforming IR from under analysis!, strictly speaking). Introduce AffineValueMap::difference to do this correctly (without the need to create any IR). - Move AffineApplyNormalizer out so that its methods are reusable from AffineStructures.cpp; add a helper method 'normalize' to it. Fix AffineApplyNormalize::renumberOneDim (Issue tensorflow/mlir#89). - Trim includes on files touched. - add test case on a scenario previously not covered Signed-off-by: Uday Bondhugula <uday@polymagelabs.com> Closes tensorflow/mlir#133 COPYBARA_INTEGRATE_REVIEW=https://github.com/tensorflow/mlir/pull/133 from bondhugula:trip-count-build 7fc34d857f7788f98b641792cafad6f5bd50e47b PiperOrigin-RevId: 269101118
2025-04-26 21:06:06 +00:00 · 2019-09-14 12:10:18 -07:00 · 2019-09-14 12:10:18 -07:00 · 018cfa94d9
commit 018cfa94d9
parent 2de18fb84d
6 changed files with 174 additions and 114 deletions
--- a/mlir/include/mlir/Analysis/AffineStructures.h
+++ b/mlir/include/mlir/Analysis/AffineStructures.h
@ -123,7 +123,6 @@ public:
  // Creates an empty AffineValueMap (users should call 'reset' to reset map
  // and operands).
  AffineValueMap() {}
-  AffineValueMap(AffineMap map);
  AffineValueMap(AffineMap map, ArrayRef<Value *> operands,
                 ArrayRef<Value *> results = llvm::None);

@ -136,6 +135,12 @@ public:
  void reset(AffineMap map, ArrayRef<Value *> operands,
             ArrayRef<Value *> results = llvm::None);

+  /// Return the value map that is the difference of value maps 'a' and 'b',
+  /// represented as an affine map and its operands. The output map + operands
+  /// are canonicalized and simplified.
+  static void difference(const AffineValueMap &a, const AffineValueMap &b,
+                         AffineValueMap *res);
+
  /// Return true if the idx^th result can be proved to be a multiple of
  /// 'factor', false otherwise.
  inline bool isMultipleOf(unsigned idx, int64_t factor) const;
@ -150,6 +155,8 @@ public:
  /// Return true if this is an identity map.
  bool isIdentity() const;

+  void setResult(unsigned i, AffineExpr e) { map.setResult(i, e); }
+  AffineExpr getResult(unsigned i) { return map.getResult(i); }
  inline unsigned getNumOperands() const { return operands.size(); }
  inline unsigned getNumDims() const { return map.getNumDims(); }
  inline unsigned getNumSymbols() const { return map.getNumSymbols(); }
--- a/mlir/include/mlir/Dialect/AffineOps/AffineOps.h
+++ b/mlir/include/mlir/Dialect/AffineOps/AffineOps.h
@ -31,6 +31,7 @@

 namespace mlir {
 class AffineBound;
+class AffineDimExpr;
 class AffineValueMap;
 class AffineTerminatorOp;
 class FlatAffineConstraints;
@ -597,6 +598,77 @@ private:
  friend class AffineForOp;
 };

+/// An `AffineApplyNormalizer` is a helper class that supports renumbering
+/// operands of AffineApplyOp. This acts as a reindexing map of Value* to
+/// positional dims or symbols and allows simplifications such as:
+///
+/// ```mlir
+///    %1 = affine.apply (d0, d1) -> (d0 - d1) (%0, %0)
+/// ```
+///
+/// into:
+///
+/// ```mlir
+///    %1 = affine.apply () -> (0)
+/// ```
+struct AffineApplyNormalizer {
+  AffineApplyNormalizer(AffineMap map, ArrayRef<Value *> operands);
+
+  /// Returns the AffineMap resulting from normalization.
+  AffineMap getAffineMap() { return affineMap; }
+
+  SmallVector<Value *, 8> getOperands() {
+    SmallVector<Value *, 8> res(reorderedDims);
+    res.append(concatenatedSymbols.begin(), concatenatedSymbols.end());
+    return res;
+  }
+
+  unsigned getNumSymbols() { return concatenatedSymbols.size(); }
+  unsigned getNumDims() { return reorderedDims.size(); }
+
+  /// Normalizes 'otherMap' and its operands 'otherOperands' to map to this
+  /// normalizer's coordinate space.
+  void normalize(AffineMap *otherMap, SmallVectorImpl<Value *> *otherOperands);
+
+private:
+  /// Helper function to insert `v` into the coordinate system of the current
+  /// AffineApplyNormalizer. Returns the AffineDimExpr with the corresponding
+  /// renumbered position.
+  AffineDimExpr renumberOneDim(Value *v);
+
+  /// Given an `other` normalizer, this rewrites `other.affineMap` in the
+  /// coordinate system of the current AffineApplyNormalizer.
+  /// Returns the rewritten AffineMap and updates the dims and symbols of
+  /// `this`.
+  AffineMap renumber(const AffineApplyNormalizer &other);
+
+  /// Maps of Value* to position in `affineMap`.
+  DenseMap<Value *, unsigned> dimValueToPosition;
+
+  /// Ordered dims and symbols matching positional dims and symbols in
+  /// `affineMap`.
+  SmallVector<Value *, 8> reorderedDims;
+  SmallVector<Value *, 8> concatenatedSymbols;
+
+  AffineMap affineMap;
+
+  /// Used with RAII to control the depth at which AffineApply are composed
+  /// recursively. Only accepts depth 1 for now to allow a behavior where a
+  /// newly composed AffineApplyOp does not increase the length of the chain of
+  /// AffineApplyOps. Full composition is implemented iteratively on top of
+  /// this behavior.
+  static unsigned &affineApplyDepth() {
+    static thread_local unsigned depth = 0;
+    return depth;
+  }
+  static constexpr unsigned kMaxAffineApplyDepth = 1;
+
+  AffineApplyNormalizer() { affineApplyDepth()++; }
+
+public:
+  ~AffineApplyNormalizer() { affineApplyDepth()--; }
+};
+
 } // end namespace mlir

 #endif
--- a/mlir/lib/Analysis/AffineStructures.cpp
+++ b/mlir/lib/Analysis/AffineStructures.cpp
@ -23,11 +23,8 @@
 #include "mlir/Dialect/AffineOps/AffineOps.h"
 #include "mlir/Dialect/StandardOps/Ops.h"
 #include "mlir/IR/AffineExprVisitor.h"
-#include "mlir/IR/AffineMap.h"
 #include "mlir/IR/IntegerSet.h"
-#include "mlir/IR/Operation.h"
 #include "mlir/Support/MathExtras.h"
-#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@ -228,6 +225,41 @@ void AffineValueMap::reset(AffineMap map, ArrayRef<Value *> operands,
  this->results.assign(results.begin(), results.end());
 }

+void AffineValueMap::difference(const AffineValueMap &a,
+                                const AffineValueMap &b, AffineValueMap *res) {
+  assert(a.getNumResults() == b.getNumResults() && "invalid inputs");
+
+  // Fully compose A's map + operands.
+  auto aMap = a.getAffineMap();
+  SmallVector<Value *, 4> aOperands(a.getOperands().begin(),
+                                    a.getOperands().end());
+  fullyComposeAffineMapAndOperands(&aMap, &aOperands);
+
+  // Use the affine apply normalizer to get B's map into A's coordinate space.
+  AffineApplyNormalizer normalizer(aMap, aOperands);
+  SmallVector<Value *, 4> bOperands(b.getOperands().begin(),
+                                    b.getOperands().end());
+  auto bMap = b.getAffineMap();
+  normalizer.normalize(&bMap, &bOperands);
+
+  assert(std::equal(bOperands.begin(), bOperands.end(),
+                    normalizer.getOperands().begin()) &&
+         "operands are expected to be the same after normalization");
+
+  // Construct the difference expressions.
+  SmallVector<AffineExpr, 4> diffExprs;
+  diffExprs.reserve(a.getNumResults());
+  for (unsigned i = 0, e = bMap.getNumResults(); i < e; ++i)
+    diffExprs.push_back(normalizer.getAffineMap().getResult(i) -
+                        bMap.getResult(i));
+
+  auto diffMap = AffineMap::get(normalizer.getNumDims(),
+                                normalizer.getNumSymbols(), diffExprs);
+  canonicalizeMapAndOperands(&diffMap, &bOperands);
+  diffMap = simplifyAffineMap(diffMap);
+  res->reset(diffMap, bOperands);
+}
+
 // Returns true and sets 'indexOfMatch' if 'valueToMatch' is found in
 // 'valuesToSearch' beginning at 'indexStart'. Returns false otherwise.
 static bool findIndex(Value *valueToMatch, ArrayRef<Value *> valuesToSearch,
--- a/mlir/lib/Analysis/LoopAnalysis.cpp
+++ b/mlir/lib/Analysis/LoopAnalysis.cpp
@ -24,14 +24,8 @@
 #include "mlir/Analysis/AffineAnalysis.h"
 #include "mlir/Analysis/AffineStructures.h"
 #include "mlir/Analysis/NestedMatcher.h"
-#include "mlir/Analysis/VectorAnalysis.h"
 #include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
 #include "mlir/Dialect/VectorOps/VectorOps.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/Support/Functional.h"
 #include "mlir/Support/MathExtras.h"

 #include "llvm/ADT/DenseSet.h"
@ -49,7 +43,7 @@ using namespace mlir;
 // pure analysis method relying on FlatAffineConstraints; the latter will also
 // be more powerful (since both inequalities and equalities will be considered).
 void mlir::buildTripCountMapAndOperands(
-    AffineForOp forOp, AffineMap *map,
+    AffineForOp forOp, AffineMap *tripCountMap,
    SmallVectorImpl<Value *> *tripCountOperands) {
  int64_t loopSpan;

@ -62,48 +56,39 @@ void mlir::buildTripCountMapAndOperands(
    loopSpan = ub - lb;
    if (loopSpan < 0)
      loopSpan = 0;
-    *map = b.getConstantAffineMap(ceilDiv(loopSpan, step));
+    *tripCountMap = b.getConstantAffineMap(ceilDiv(loopSpan, step));
    tripCountOperands->clear();
    return;
  }
  auto lbMap = forOp.getLowerBoundMap();
  auto ubMap = forOp.getUpperBoundMap();
  if (lbMap.getNumResults() != 1) {
-    *map = AffineMap();
+    *tripCountMap = AffineMap();
    return;
  }
  SmallVector<Value *, 4> lbOperands(forOp.getLowerBoundOperands());
  SmallVector<Value *, 4> ubOperands(forOp.getUpperBoundOperands());
-  auto lb = b.create<AffineApplyOp>(forOp.getLoc(), lbMap, lbOperands);
-  SmallVector<Value *, 4> ubs;
-  ubs.reserve(ubMap.getNumResults());
-  for (auto ubExpr : ubMap.getResults())
-    ubs.push_back(b.create<AffineApplyOp>(
-        forOp.getLoc(),
-        b.getAffineMap(ubMap.getNumDims(), ubMap.getNumSymbols(), {ubExpr}),
-        ubOperands));

-  tripCountOperands->clear();
-  tripCountOperands->reserve(1 + ubs.size());
-  tripCountOperands->push_back(lb);
-  tripCountOperands->append(ubs.begin(), ubs.end());
+  // Difference of each upper bound expression from the single lower bound
+  // expression (divided by the step) provides the expressions for the trip
+  // count map.
+  AffineValueMap ubValueMap(ubMap, ubOperands);

-  SmallVector<AffineExpr, 4> tripCountExprs(ubs.size());
-  for (unsigned i = 0, e = ubs.size(); i < e; i++)
-    tripCountExprs[i] =
-        (b.getAffineDimExpr(1 + i) - b.getAffineDimExpr(0)).ceilDiv(step);
-  *map = b.getAffineMap(1 + ubs.size(), 0, tripCountExprs);
+  SmallVector<AffineExpr, 4> lbSplatExpr(ubValueMap.getNumResults(),
+                                         lbMap.getResult(0));
+  auto lbMapSplat =
+      b.getAffineMap(lbMap.getNumDims(), lbMap.getNumSymbols(), lbSplatExpr);
+  AffineValueMap lbSplatValueMap(lbMapSplat, lbOperands);

-  fullyComposeAffineMapAndOperands(map, tripCountOperands);
-  *map = simplifyAffineMap(*map);
-  canonicalizeMapAndOperands(map, tripCountOperands);
-  // Remove any affine.apply's that became dead as a result of composition,
-  // simplification, and canonicalization above.
-  for (auto *v : ubs)
-    if (v->use_empty())
-      v->getDefiningOp()->erase();
-  if (lb.use_empty())
-    lb.erase();
+  AffineValueMap tripCountValueMap;
+  AffineValueMap::difference(ubValueMap, lbSplatValueMap, &tripCountValueMap);
+  for (unsigned i = 0, e = tripCountValueMap.getNumResults(); i < e; ++i)
+    tripCountValueMap.setResult(i,
+                                tripCountValueMap.getResult(i).ceilDiv(step));
+
+  *tripCountMap = tripCountValueMap.getAffineMap();
+  tripCountOperands->assign(tripCountValueMap.getOperands().begin(),
+                            tripCountValueMap.getOperands().end());
 }

 /// Returns the trip count of the loop if it's a constant, None otherwise. This
--- a/mlir/lib/Dialect/AffineOps/AffineOps.cpp
+++ b/mlir/lib/Dialect/AffineOps/AffineOps.cpp
@ -17,8 +17,6 @@

 #include "mlir/Dialect/AffineOps/AffineOps.h"
 #include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/Block.h"
-#include "mlir/IR/Builders.h"
 #include "mlir/IR/Function.h"
 #include "mlir/IR/IntegerSet.h"
 #include "mlir/IR/Matchers.h"
@ -284,73 +282,6 @@ OpFoldResult AffineApplyOp::fold(ArrayRef<Attribute> operands) {
  return result[0];
 }

-namespace {
-/// An `AffineApplyNormalizer` is a helper class that is not visible to the user
-/// and supports renumbering operands of AffineApplyOp. This acts as a
-/// reindexing map of Value* to positional dims or symbols and allows
-/// simplifications such as:
-///
-/// ```mlir
-///    %1 = affine.apply (d0, d1) -> (d0 - d1) (%0, %0)
-/// ```
-///
-/// into:
-///
-/// ```mlir
-///    %1 = affine.apply () -> (0)
-/// ```
-struct AffineApplyNormalizer {
-  AffineApplyNormalizer(AffineMap map, ArrayRef<Value *> operands);
-
-  /// Returns the AffineMap resulting from normalization.
-  AffineMap getAffineMap() { return affineMap; }
-
-  SmallVector<Value *, 8> getOperands() {
-    SmallVector<Value *, 8> res(reorderedDims);
-    res.append(concatenatedSymbols.begin(), concatenatedSymbols.end());
-    return res;
-  }
-
-private:
-  /// Helper function to insert `v` into the coordinate system of the current
-  /// AffineApplyNormalizer. Returns the AffineDimExpr with the corresponding
-  /// renumbered position.
-  AffineDimExpr renumberOneDim(Value *v);
-
-  /// Given an `other` normalizer, this rewrites `other.affineMap` in the
-  /// coordinate system of the current AffineApplyNormalizer.
-  /// Returns the rewritten AffineMap and updates the dims and symbols of
-  /// `this`.
-  AffineMap renumber(const AffineApplyNormalizer &other);
-
-  /// Maps of Value* to position in `affineMap`.
-  DenseMap<Value *, unsigned> dimValueToPosition;
-
-  /// Ordered dims and symbols matching positional dims and symbols in
-  /// `affineMap`.
-  SmallVector<Value *, 8> reorderedDims;
-  SmallVector<Value *, 8> concatenatedSymbols;
-
-  AffineMap affineMap;
-
-  /// Used with RAII to control the depth at which AffineApply are composed
-  /// recursively. Only accepts depth 1 for now to allow a behavior where a
-  /// newly composed AffineApplyOp does not increase the length of the chain of
-  /// AffineApplyOps. Full composition is implemented iteratively on top of
-  /// this behavior.
-  static unsigned &affineApplyDepth() {
-    static thread_local unsigned depth = 0;
-    return depth;
-  }
-  static constexpr unsigned kMaxAffineApplyDepth = 1;
-
-  AffineApplyNormalizer() { affineApplyDepth()++; }
-
-public:
-  ~AffineApplyNormalizer() { affineApplyDepth()--; }
-};
-} // end anonymous namespace.
-
 AffineDimExpr AffineApplyNormalizer::renumberOneDim(Value *v) {
  DenseMap<Value *, unsigned>::iterator iterPos;
  bool inserted = false;
@ -383,7 +314,8 @@ AffineMap AffineApplyNormalizer::renumber(const AffineApplyNormalizer &other) {
                             other.concatenatedSymbols.end());
  auto map = other.affineMap;
  return map.replaceDimsAndSymbols(dimRemapping, symRemapping,
-                                   dimRemapping.size(), symRemapping.size());
+                                   reorderedDims.size(),
+                                   concatenatedSymbols.size());
 }

 // Gather the positions of the operands that are produced by an AffineApplyOp.
@ -586,6 +518,16 @@ AffineApplyNormalizer::AffineApplyNormalizer(AffineMap map,
  LLVM_DEBUG(dbgs() << "\n");
 }

+void AffineApplyNormalizer::normalize(AffineMap *otherMap,
+                                      SmallVectorImpl<Value *> *otherOperands) {
+  AffineApplyNormalizer other(*otherMap, *otherOperands);
+  *otherMap = renumber(other);
+
+  otherOperands->reserve(reorderedDims.size() + concatenatedSymbols.size());
+  otherOperands->assign(reorderedDims.begin(), reorderedDims.end());
+  otherOperands->append(concatenatedSymbols.begin(), concatenatedSymbols.end());
+}
+
 /// Implements `map` and `operands` composition and simplification to support
 /// `makeComposedAffineApply`. This can be called to achieve the same effects
 /// on `map` and `operands` without creating an AffineApplyOp that needs to be
--- a/mlir/test/Transforms/unroll.mlir
+++ b/mlir/test/Transforms/unroll.mlir
@ -509,6 +509,28 @@ func @loop_nest_symbolic_bound(%N : index) {
  return
 }

+// UNROLL-BY-4-LABEL: func @loop_nest_symbolic_bound_with_step
+// UNROLL-BY-4-SAME: %[[N:.*]]: index
+func @loop_nest_symbolic_bound_with_step(%N : index) {
+  // UNROLL-BY-4: affine.for %arg1 = 0 to 100 {
+  affine.for %i = 0 to 100 {
+    affine.for %j = 0 to %N step 3 {
+      %x = "foo"() : () -> i32
+    }
+// UNROLL-BY-4:      affine.for %{{.*}} = 0 to #map{{[0-9]+}}()[%[[N]]] step 12 {
+// UNROLL-BY-4:        "foo"()
+// UNROLL-BY-4-NEXT:   "foo"()
+// UNROLL-BY-4-NEXT:   "foo"()
+// UNROLL-BY-4-NEXT:   "foo"()
+// UNROLL-BY-4-NEXT: }
+// A cleanup loop will be be generated here.
+// UNROLL-BY-4-NEXT: affine.for %{{.*}} = #map{{[0-9]+}}()[%[[N]]] to %[[N]] step 3 {
+// UNROLL-BY-4-NEXT:   "foo"()
+// UNROLL-BY-4_NEXT: }
+  }
+  return
+}
+
 // UNROLL-BY-4-LABEL: func @loop_nest_symbolic_and_min_upper_bound
 func @loop_nest_symbolic_and_min_upper_bound(%M : index, %N : index, %K : index) {
  affine.for %i = %M to min ()[s0, s1] -> (s0, s1, 1024)()[%N, %K] {
@ -529,8 +551,8 @@ func @loop_nest_symbolic_and_min_upper_bound(%M : index, %N : index, %K : index)

 // The trip count here is a multiple of four, but this can be inferred only
 // through composition. Check for no cleanup loop.
-// UNROLL-BY-4-LABEL: func @loop_nest_non_trivial_multiple_unroll_factor
-func @loop_nest_non_trivial_multiple_unroll_factor(%M : index, %N : index) {
+// UNROLL-BY-4-LABEL: func @loop_nest_non_trivial_multiple_upper_bound
+func @loop_nest_non_trivial_multiple_upper_bound(%M : index, %N : index) {
  %T = affine.apply (d0) -> (4*d0 + 1)(%M)
  %K = affine.apply (d0) -> (d0 - 1) (%T)
  affine.for %i = 0 to min (d0, d1) -> (4 * d0, d1, 1024)(%N, %K) {
@ -542,8 +564,8 @@ func @loop_nest_non_trivial_multiple_unroll_factor(%M : index, %N : index) {
 // UNROLL-BY-4-NOT: for
 // UNROLL-BY-4: return

-// UNROLL-BY-4-LABEL: func @loop_nest_non_trivial_multiple_unroll_factor_2
-func @loop_nest_non_trivial_multiple_unroll_factor_2(%M : index, %N : index) {
+// UNROLL-BY-4-LABEL: func @loop_nest_non_trivial_multiple_upper_bound_alt
+func @loop_nest_non_trivial_multiple_upper_bound_alt(%M : index, %N : index) {
  %K = affine.apply (d0) -> (4*d0) (%M)
  affine.for %i = 0 to min ()[s0, s1] -> (4 * s0, s1, 1024)()[%N, %K] {
    "foo"() : () -> ()