Mercurial > hg > CbC > CbC_llvm
view mlir/lib/Analysis/AffineAnalysis.cpp @ 201:a96fbbdf2d0f
...
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 04 Jun 2021 21:07:06 +0900 |
parents | 0572611fdcc8 |
children | 2e18cbf3894f |
line wrap: on
line source
//===- AffineAnalysis.cpp - Affine structures analysis routines -----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements miscellaneous analysis routines for affine structures // (expressions, maps, sets), and other utilities relying on such analysis. // //===----------------------------------------------------------------------===// #include "mlir/Analysis/AffineAnalysis.h" #include "mlir/Analysis/Utils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Affine/IR/AffineValueMap.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/IR/AffineExprVisitor.h" #include "mlir/IR/Function.h" #include "mlir/IR/IntegerSet.h" #include "mlir/Support/MathExtras.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE "affine-analysis" using namespace mlir; using llvm::dbgs; /// Returns the sequence of AffineApplyOp Operations operation in /// 'affineApplyOps', which are reachable via a search starting from 'operands', /// and ending at operands which are not defined by AffineApplyOps. // TODO(andydavis) Add a method to AffineApplyOp which forward substitutes // the AffineApplyOp into any user AffineApplyOps. void mlir::getReachableAffineApplyOps( ArrayRef<Value> operands, SmallVectorImpl<Operation *> &affineApplyOps) { struct State { // The ssa value for this node in the DFS traversal. Value value; // The operand index of 'value' to explore next during DFS traversal. unsigned operandIndex; }; SmallVector<State, 4> worklist; for (auto operand : operands) { worklist.push_back({operand, 0}); } while (!worklist.empty()) { State &state = worklist.back(); auto *opInst = state.value.getDefiningOp(); // Note: getDefiningOp will return nullptr if the operand is not an // Operation (i.e. block argument), which is a terminator for the search. if (!isa_and_nonnull<AffineApplyOp>(opInst)) { worklist.pop_back(); continue; } if (state.operandIndex == 0) { // Pre-Visit: Add 'opInst' to reachable sequence. affineApplyOps.push_back(opInst); } if (state.operandIndex < opInst->getNumOperands()) { // Visit: Add next 'affineApplyOp' operand to worklist. // Get next operand to visit at 'operandIndex'. auto nextOperand = opInst->getOperand(state.operandIndex); // Increment 'operandIndex' in 'state'. ++state.operandIndex; // Add 'nextOperand' to worklist. worklist.push_back({nextOperand, 0}); } else { // Post-visit: done visiting operands AffineApplyOp, pop off stack. worklist.pop_back(); } } } // Builds a system of constraints with dimensional identifiers corresponding to // the loop IVs of the forOps appearing in that order. Any symbols founds in // the bound operands are added as symbols in the system. Returns failure for // the yet unimplemented cases. // TODO(andydavis,bondhugula) Handle non-unit steps through local variables or // stride information in FlatAffineConstraints. (For eg., by using iv - lb % // step = 0 and/or by introducing a method in FlatAffineConstraints // setExprStride(ArrayRef<int64_t> expr, int64_t stride) LogicalResult mlir::getIndexSet(MutableArrayRef<AffineForOp> forOps, FlatAffineConstraints *domain) { SmallVector<Value, 4> indices; extractForInductionVars(forOps, &indices); // Reset while associated Values in 'indices' to the domain. domain->reset(forOps.size(), /*numSymbols=*/0, /*numLocals=*/0, indices); for (auto forOp : forOps) { // Add constraints from forOp's bounds. if (failed(domain->addAffineForOpDomain(forOp))) return failure(); } return success(); } // Computes the iteration domain for 'opInst' and populates 'indexSet', which // encapsulates the constraints involving loops surrounding 'opInst' and // potentially involving any Function symbols. The dimensional identifiers in // 'indexSet' correspond to the loops surrounding 'op' from outermost to // innermost. // TODO(andydavis) Add support to handle IfInsts surrounding 'op'. static LogicalResult getInstIndexSet(Operation *op, FlatAffineConstraints *indexSet) { // TODO(andydavis) Extend this to gather enclosing IfInsts and consider // factoring it out into a utility function. SmallVector<AffineForOp, 4> loops; getLoopIVs(*op, &loops); return getIndexSet(loops, indexSet); } namespace { // ValuePositionMap manages the mapping from Values which represent dimension // and symbol identifiers from 'src' and 'dst' access functions to positions // in new space where some Values are kept separate (using addSrc/DstValue) // and some Values are merged (addSymbolValue). // Position lookups return the absolute position in the new space which // has the following format: // // [src-dim-identifiers] [dst-dim-identifiers] [symbol-identifiers] // // Note: access function non-IV dimension identifiers (that have 'dimension' // positions in the access function position space) are assigned as symbols // in the output position space. Convenience access functions which lookup // an Value in multiple maps are provided (i.e. getSrcDimOrSymPos) to handle // the common case of resolving positions for all access function operands. // // TODO(andydavis) Generalize this: could take a template parameter for // the number of maps (3 in the current case), and lookups could take indices // of maps to check. So getSrcDimOrSymPos would be "getPos(value, {0, 2})". class ValuePositionMap { public: void addSrcValue(Value value) { if (addValueAt(value, &srcDimPosMap, numSrcDims)) ++numSrcDims; } void addDstValue(Value value) { if (addValueAt(value, &dstDimPosMap, numDstDims)) ++numDstDims; } void addSymbolValue(Value value) { if (addValueAt(value, &symbolPosMap, numSymbols)) ++numSymbols; } unsigned getSrcDimOrSymPos(Value value) const { return getDimOrSymPos(value, srcDimPosMap, 0); } unsigned getDstDimOrSymPos(Value value) const { return getDimOrSymPos(value, dstDimPosMap, numSrcDims); } unsigned getSymPos(Value value) const { auto it = symbolPosMap.find(value); assert(it != symbolPosMap.end()); return numSrcDims + numDstDims + it->second; } unsigned getNumSrcDims() const { return numSrcDims; } unsigned getNumDstDims() const { return numDstDims; } unsigned getNumDims() const { return numSrcDims + numDstDims; } unsigned getNumSymbols() const { return numSymbols; } private: bool addValueAt(Value value, DenseMap<Value, unsigned> *posMap, unsigned position) { auto it = posMap->find(value); if (it == posMap->end()) { (*posMap)[value] = position; return true; } return false; } unsigned getDimOrSymPos(Value value, const DenseMap<Value, unsigned> &dimPosMap, unsigned dimPosOffset) const { auto it = dimPosMap.find(value); if (it != dimPosMap.end()) { return dimPosOffset + it->second; } it = symbolPosMap.find(value); assert(it != symbolPosMap.end()); return numSrcDims + numDstDims + it->second; } unsigned numSrcDims = 0; unsigned numDstDims = 0; unsigned numSymbols = 0; DenseMap<Value, unsigned> srcDimPosMap; DenseMap<Value, unsigned> dstDimPosMap; DenseMap<Value, unsigned> symbolPosMap; }; } // namespace // Builds a map from Value to identifier position in a new merged identifier // list, which is the result of merging dim/symbol lists from src/dst // iteration domains, the format of which is as follows: // // [src-dim-identifiers, dst-dim-identifiers, symbol-identifiers, const_term] // // This method populates 'valuePosMap' with mappings from operand Values in // 'srcAccessMap'/'dstAccessMap' (as well as those in 'srcDomain'/'dstDomain') // to the position of these values in the merged list. static void buildDimAndSymbolPositionMaps( const FlatAffineConstraints &srcDomain, const FlatAffineConstraints &dstDomain, const AffineValueMap &srcAccessMap, const AffineValueMap &dstAccessMap, ValuePositionMap *valuePosMap, FlatAffineConstraints *dependenceConstraints) { auto updateValuePosMap = [&](ArrayRef<Value> values, bool isSrc) { for (unsigned i = 0, e = values.size(); i < e; ++i) { auto value = values[i]; if (!isForInductionVar(values[i])) { assert(isValidSymbol(values[i]) && "access operand has to be either a loop IV or a symbol"); valuePosMap->addSymbolValue(value); } else if (isSrc) { valuePosMap->addSrcValue(value); } else { valuePosMap->addDstValue(value); } } }; SmallVector<Value, 4> srcValues, destValues; srcDomain.getIdValues(0, srcDomain.getNumDimAndSymbolIds(), &srcValues); dstDomain.getIdValues(0, dstDomain.getNumDimAndSymbolIds(), &destValues); // Update value position map with identifiers from src iteration domain. updateValuePosMap(srcValues, /*isSrc=*/true); // Update value position map with identifiers from dst iteration domain. updateValuePosMap(destValues, /*isSrc=*/false); // Update value position map with identifiers from src access function. updateValuePosMap(srcAccessMap.getOperands(), /*isSrc=*/true); // Update value position map with identifiers from dst access function. updateValuePosMap(dstAccessMap.getOperands(), /*isSrc=*/false); } // Sets up dependence constraints columns appropriately, in the format: // [src-dim-ids, dst-dim-ids, symbol-ids, local-ids, const_term] static void initDependenceConstraints( const FlatAffineConstraints &srcDomain, const FlatAffineConstraints &dstDomain, const AffineValueMap &srcAccessMap, const AffineValueMap &dstAccessMap, const ValuePositionMap &valuePosMap, FlatAffineConstraints *dependenceConstraints) { // Calculate number of equalities/inequalities and columns required to // initialize FlatAffineConstraints for 'dependenceDomain'. unsigned numIneq = srcDomain.getNumInequalities() + dstDomain.getNumInequalities(); AffineMap srcMap = srcAccessMap.getAffineMap(); assert(srcMap.getNumResults() == dstAccessMap.getAffineMap().getNumResults()); unsigned numEq = srcMap.getNumResults(); unsigned numDims = srcDomain.getNumDimIds() + dstDomain.getNumDimIds(); unsigned numSymbols = valuePosMap.getNumSymbols(); unsigned numLocals = srcDomain.getNumLocalIds() + dstDomain.getNumLocalIds(); unsigned numIds = numDims + numSymbols + numLocals; unsigned numCols = numIds + 1; // Set flat affine constraints sizes and reserving space for constraints. dependenceConstraints->reset(numIneq, numEq, numCols, numDims, numSymbols, numLocals); // Set values corresponding to dependence constraint identifiers. SmallVector<Value, 4> srcLoopIVs, dstLoopIVs; srcDomain.getIdValues(0, srcDomain.getNumDimIds(), &srcLoopIVs); dstDomain.getIdValues(0, dstDomain.getNumDimIds(), &dstLoopIVs); dependenceConstraints->setIdValues(0, srcLoopIVs.size(), srcLoopIVs); dependenceConstraints->setIdValues( srcLoopIVs.size(), srcLoopIVs.size() + dstLoopIVs.size(), dstLoopIVs); // Set values for the symbolic identifier dimensions. auto setSymbolIds = [&](ArrayRef<Value> values) { for (auto value : values) { if (!isForInductionVar(value)) { assert(isValidSymbol(value) && "expected symbol"); dependenceConstraints->setIdValue(valuePosMap.getSymPos(value), value); } } }; setSymbolIds(srcAccessMap.getOperands()); setSymbolIds(dstAccessMap.getOperands()); SmallVector<Value, 8> srcSymbolValues, dstSymbolValues; srcDomain.getIdValues(srcDomain.getNumDimIds(), srcDomain.getNumDimAndSymbolIds(), &srcSymbolValues); dstDomain.getIdValues(dstDomain.getNumDimIds(), dstDomain.getNumDimAndSymbolIds(), &dstSymbolValues); setSymbolIds(srcSymbolValues); setSymbolIds(dstSymbolValues); for (unsigned i = 0, e = dependenceConstraints->getNumDimAndSymbolIds(); i < e; i++) assert(dependenceConstraints->getIds()[i].hasValue()); } // Adds iteration domain constraints from 'srcDomain' and 'dstDomain' into // 'dependenceDomain'. // Uses 'valuePosMap' to determine the position in 'dependenceDomain' to which a // srcDomain/dstDomain Value maps. static void addDomainConstraints(const FlatAffineConstraints &srcDomain, const FlatAffineConstraints &dstDomain, const ValuePositionMap &valuePosMap, FlatAffineConstraints *dependenceDomain) { unsigned depNumDimsAndSymbolIds = dependenceDomain->getNumDimAndSymbolIds(); SmallVector<int64_t, 4> cst(dependenceDomain->getNumCols()); auto addDomain = [&](bool isSrc, bool isEq, unsigned localOffset) { const FlatAffineConstraints &domain = isSrc ? srcDomain : dstDomain; unsigned numCsts = isEq ? domain.getNumEqualities() : domain.getNumInequalities(); unsigned numDimAndSymbolIds = domain.getNumDimAndSymbolIds(); auto at = [&](unsigned i, unsigned j) -> int64_t { return isEq ? domain.atEq(i, j) : domain.atIneq(i, j); }; auto map = [&](unsigned i) -> int64_t { return isSrc ? valuePosMap.getSrcDimOrSymPos(domain.getIdValue(i)) : valuePosMap.getDstDimOrSymPos(domain.getIdValue(i)); }; for (unsigned i = 0; i < numCsts; ++i) { // Zero fill. std::fill(cst.begin(), cst.end(), 0); // Set coefficients for identifiers corresponding to domain. for (unsigned j = 0; j < numDimAndSymbolIds; ++j) cst[map(j)] = at(i, j); // Local terms. for (unsigned j = 0, e = domain.getNumLocalIds(); j < e; j++) cst[depNumDimsAndSymbolIds + localOffset + j] = at(i, numDimAndSymbolIds + j); // Set constant term. cst[cst.size() - 1] = at(i, domain.getNumCols() - 1); // Add constraint. if (isEq) dependenceDomain->addEquality(cst); else dependenceDomain->addInequality(cst); } }; // Add equalities from src domain. addDomain(/*isSrc=*/true, /*isEq=*/true, /*localOffset=*/0); // Add inequalities from src domain. addDomain(/*isSrc=*/true, /*isEq=*/false, /*localOffset=*/0); // Add equalities from dst domain. addDomain(/*isSrc=*/false, /*isEq=*/true, /*localOffset=*/srcDomain.getNumLocalIds()); // Add inequalities from dst domain. addDomain(/*isSrc=*/false, /*isEq=*/false, /*localOffset=*/srcDomain.getNumLocalIds()); } // Adds equality constraints that equate src and dst access functions // represented by 'srcAccessMap' and 'dstAccessMap' for each result. // Requires that 'srcAccessMap' and 'dstAccessMap' have the same results count. // For example, given the following two accesses functions to a 2D memref: // // Source access function: // (a0 * d0 + a1 * s0 + a2, b0 * d0 + b1 * s0 + b2) // // Destination access function: // (c0 * d0 + c1 * s0 + c2, f0 * d0 + f1 * s0 + f2) // // This method constructs the following equality constraints in // 'dependenceDomain', by equating the access functions for each result // (i.e. each memref dim). Notice that 'd0' for the destination access function // is mapped into 'd0' in the equality constraint: // // d0 d1 s0 c // -- -- -- -- // a0 -c0 (a1 - c1) (a1 - c2) = 0 // b0 -f0 (b1 - f1) (b1 - f2) = 0 // // Returns failure if any AffineExpr cannot be flattened (due to it being // semi-affine). Returns success otherwise. static LogicalResult addMemRefAccessConstraints(const AffineValueMap &srcAccessMap, const AffineValueMap &dstAccessMap, const ValuePositionMap &valuePosMap, FlatAffineConstraints *dependenceDomain) { AffineMap srcMap = srcAccessMap.getAffineMap(); AffineMap dstMap = dstAccessMap.getAffineMap(); assert(srcMap.getNumResults() == dstMap.getNumResults()); unsigned numResults = srcMap.getNumResults(); unsigned srcNumIds = srcMap.getNumDims() + srcMap.getNumSymbols(); ArrayRef<Value> srcOperands = srcAccessMap.getOperands(); unsigned dstNumIds = dstMap.getNumDims() + dstMap.getNumSymbols(); ArrayRef<Value> dstOperands = dstAccessMap.getOperands(); std::vector<SmallVector<int64_t, 8>> srcFlatExprs; std::vector<SmallVector<int64_t, 8>> destFlatExprs; FlatAffineConstraints srcLocalVarCst, destLocalVarCst; // Get flattened expressions for the source destination maps. if (failed(getFlattenedAffineExprs(srcMap, &srcFlatExprs, &srcLocalVarCst)) || failed(getFlattenedAffineExprs(dstMap, &destFlatExprs, &destLocalVarCst))) return failure(); unsigned domNumLocalIds = dependenceDomain->getNumLocalIds(); unsigned srcNumLocalIds = srcLocalVarCst.getNumLocalIds(); unsigned dstNumLocalIds = destLocalVarCst.getNumLocalIds(); unsigned numLocalIdsToAdd = srcNumLocalIds + dstNumLocalIds; for (unsigned i = 0; i < numLocalIdsToAdd; i++) { dependenceDomain->addLocalId(dependenceDomain->getNumLocalIds()); } unsigned numDims = dependenceDomain->getNumDimIds(); unsigned numSymbols = dependenceDomain->getNumSymbolIds(); unsigned numSrcLocalIds = srcLocalVarCst.getNumLocalIds(); unsigned newLocalIdOffset = numDims + numSymbols + domNumLocalIds; // Equality to add. SmallVector<int64_t, 8> eq(dependenceDomain->getNumCols()); for (unsigned i = 0; i < numResults; ++i) { // Zero fill. std::fill(eq.begin(), eq.end(), 0); // Flattened AffineExpr for src result 'i'. const auto &srcFlatExpr = srcFlatExprs[i]; // Set identifier coefficients from src access function. for (unsigned j = 0, e = srcOperands.size(); j < e; ++j) eq[valuePosMap.getSrcDimOrSymPos(srcOperands[j])] = srcFlatExpr[j]; // Local terms. for (unsigned j = 0, e = srcNumLocalIds; j < e; j++) eq[newLocalIdOffset + j] = srcFlatExpr[srcNumIds + j]; // Set constant term. eq[eq.size() - 1] = srcFlatExpr[srcFlatExpr.size() - 1]; // Flattened AffineExpr for dest result 'i'. const auto &destFlatExpr = destFlatExprs[i]; // Set identifier coefficients from dst access function. for (unsigned j = 0, e = dstOperands.size(); j < e; ++j) eq[valuePosMap.getDstDimOrSymPos(dstOperands[j])] -= destFlatExpr[j]; // Local terms. for (unsigned j = 0, e = dstNumLocalIds; j < e; j++) eq[newLocalIdOffset + numSrcLocalIds + j] = -destFlatExpr[dstNumIds + j]; // Set constant term. eq[eq.size() - 1] -= destFlatExpr[destFlatExpr.size() - 1]; // Add equality constraint. dependenceDomain->addEquality(eq); } // Add equality constraints for any operands that are defined by constant ops. auto addEqForConstOperands = [&](ArrayRef<Value> operands) { for (unsigned i = 0, e = operands.size(); i < e; ++i) { if (isForInductionVar(operands[i])) continue; auto symbol = operands[i]; assert(isValidSymbol(symbol)); // Check if the symbol is a constant. if (auto cOp = symbol.getDefiningOp<ConstantIndexOp>()) dependenceDomain->setIdToConstant(valuePosMap.getSymPos(symbol), cOp.getValue()); } }; // Add equality constraints for any src symbols defined by constant ops. addEqForConstOperands(srcOperands); // Add equality constraints for any dst symbols defined by constant ops. addEqForConstOperands(dstOperands); // By construction (see flattener), local var constraints will not have any // equalities. assert(srcLocalVarCst.getNumEqualities() == 0 && destLocalVarCst.getNumEqualities() == 0); // Add inequalities from srcLocalVarCst and destLocalVarCst into the // dependence domain. SmallVector<int64_t, 8> ineq(dependenceDomain->getNumCols()); for (unsigned r = 0, e = srcLocalVarCst.getNumInequalities(); r < e; r++) { std::fill(ineq.begin(), ineq.end(), 0); // Set identifier coefficients from src local var constraints. for (unsigned j = 0, e = srcOperands.size(); j < e; ++j) ineq[valuePosMap.getSrcDimOrSymPos(srcOperands[j])] = srcLocalVarCst.atIneq(r, j); // Local terms. for (unsigned j = 0, e = srcNumLocalIds; j < e; j++) ineq[newLocalIdOffset + j] = srcLocalVarCst.atIneq(r, srcNumIds + j); // Set constant term. ineq[ineq.size() - 1] = srcLocalVarCst.atIneq(r, srcLocalVarCst.getNumCols() - 1); dependenceDomain->addInequality(ineq); } for (unsigned r = 0, e = destLocalVarCst.getNumInequalities(); r < e; r++) { std::fill(ineq.begin(), ineq.end(), 0); // Set identifier coefficients from dest local var constraints. for (unsigned j = 0, e = dstOperands.size(); j < e; ++j) ineq[valuePosMap.getDstDimOrSymPos(dstOperands[j])] = destLocalVarCst.atIneq(r, j); // Local terms. for (unsigned j = 0, e = dstNumLocalIds; j < e; j++) ineq[newLocalIdOffset + numSrcLocalIds + j] = destLocalVarCst.atIneq(r, dstNumIds + j); // Set constant term. ineq[ineq.size() - 1] = destLocalVarCst.atIneq(r, destLocalVarCst.getNumCols() - 1); dependenceDomain->addInequality(ineq); } return success(); } // Returns the number of outer loop common to 'src/dstDomain'. // Loops common to 'src/dst' domains are added to 'commonLoops' if non-null. static unsigned getNumCommonLoops(const FlatAffineConstraints &srcDomain, const FlatAffineConstraints &dstDomain, SmallVectorImpl<AffineForOp> *commonLoops = nullptr) { // Find the number of common loops shared by src and dst accesses. unsigned minNumLoops = std::min(srcDomain.getNumDimIds(), dstDomain.getNumDimIds()); unsigned numCommonLoops = 0; for (unsigned i = 0; i < minNumLoops; ++i) { if (!isForInductionVar(srcDomain.getIdValue(i)) || !isForInductionVar(dstDomain.getIdValue(i)) || srcDomain.getIdValue(i) != dstDomain.getIdValue(i)) break; if (commonLoops != nullptr) commonLoops->push_back(getForInductionVarOwner(srcDomain.getIdValue(i))); ++numCommonLoops; } if (commonLoops != nullptr) assert(commonLoops->size() == numCommonLoops); return numCommonLoops; } // Returns Block common to 'srcAccess.opInst' and 'dstAccess.opInst'. static Block *getCommonBlock(const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, const FlatAffineConstraints &srcDomain, unsigned numCommonLoops) { if (numCommonLoops == 0) { auto *block = srcAccess.opInst->getBlock(); while (!llvm::isa<FuncOp>(block->getParentOp())) { block = block->getParentOp()->getBlock(); } return block; } auto commonForValue = srcDomain.getIdValue(numCommonLoops - 1); auto forOp = getForInductionVarOwner(commonForValue); assert(forOp && "commonForValue was not an induction variable"); return forOp.getBody(); } // Returns true if the ancestor operation of 'srcAccess' appears before the // ancestor operation of 'dstAccess' in the common ancestral block. Returns // false otherwise. // Note that because 'srcAccess' or 'dstAccess' may be nested in conditionals, // the function is named 'srcAppearsBeforeDstInCommonBlock'. Note that // 'numCommonLoops' is the number of contiguous surrounding outer loops. static bool srcAppearsBeforeDstInAncestralBlock( const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, const FlatAffineConstraints &srcDomain, unsigned numCommonLoops) { // Get Block common to 'srcAccess.opInst' and 'dstAccess.opInst'. auto *commonBlock = getCommonBlock(srcAccess, dstAccess, srcDomain, numCommonLoops); // Check the dominance relationship between the respective ancestors of the // src and dst in the Block of the innermost among the common loops. auto *srcInst = commonBlock->findAncestorOpInBlock(*srcAccess.opInst); assert(srcInst != nullptr); auto *dstInst = commonBlock->findAncestorOpInBlock(*dstAccess.opInst); assert(dstInst != nullptr); // Determine whether dstInst comes after srcInst. return srcInst->isBeforeInBlock(dstInst); } // Adds ordering constraints to 'dependenceDomain' based on number of loops // common to 'src/dstDomain' and requested 'loopDepth'. // Note that 'loopDepth' cannot exceed the number of common loops plus one. // EX: Given a loop nest of depth 2 with IVs 'i' and 'j': // *) If 'loopDepth == 1' then one constraint is added: i' >= i + 1 // *) If 'loopDepth == 2' then two constraints are added: i == i' and j' > j + 1 // *) If 'loopDepth == 3' then two constraints are added: i == i' and j == j' static void addOrderingConstraints(const FlatAffineConstraints &srcDomain, const FlatAffineConstraints &dstDomain, unsigned loopDepth, FlatAffineConstraints *dependenceDomain) { unsigned numCols = dependenceDomain->getNumCols(); SmallVector<int64_t, 4> eq(numCols); unsigned numSrcDims = srcDomain.getNumDimIds(); unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain); unsigned numCommonLoopConstraints = std::min(numCommonLoops, loopDepth); for (unsigned i = 0; i < numCommonLoopConstraints; ++i) { std::fill(eq.begin(), eq.end(), 0); eq[i] = -1; eq[i + numSrcDims] = 1; if (i == loopDepth - 1) { eq[numCols - 1] = -1; dependenceDomain->addInequality(eq); } else { dependenceDomain->addEquality(eq); } } } // Computes distance and direction vectors in 'dependences', by adding // variables to 'dependenceDomain' which represent the difference of the IVs, // eliminating all other variables, and reading off distance vectors from // equality constraints (if possible), and direction vectors from inequalities. static void computeDirectionVector( const FlatAffineConstraints &srcDomain, const FlatAffineConstraints &dstDomain, unsigned loopDepth, FlatAffineConstraints *dependenceDomain, SmallVector<DependenceComponent, 2> *dependenceComponents) { // Find the number of common loops shared by src and dst accesses. SmallVector<AffineForOp, 4> commonLoops; unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain, &commonLoops); if (numCommonLoops == 0) return; // Compute direction vectors for requested loop depth. unsigned numIdsToEliminate = dependenceDomain->getNumIds(); // Add new variables to 'dependenceDomain' to represent the direction // constraints for each shared loop. for (unsigned j = 0; j < numCommonLoops; ++j) { dependenceDomain->addDimId(j); } // Add equality constraints for each common loop, setting newly introduced // variable at column 'j' to the 'dst' IV minus the 'src IV. SmallVector<int64_t, 4> eq; eq.resize(dependenceDomain->getNumCols()); unsigned numSrcDims = srcDomain.getNumDimIds(); // Constraint variables format: // [num-common-loops][num-src-dim-ids][num-dst-dim-ids][num-symbols][constant] for (unsigned j = 0; j < numCommonLoops; ++j) { std::fill(eq.begin(), eq.end(), 0); eq[j] = 1; eq[j + numCommonLoops] = 1; eq[j + numCommonLoops + numSrcDims] = -1; dependenceDomain->addEquality(eq); } // Eliminate all variables other than the direction variables just added. dependenceDomain->projectOut(numCommonLoops, numIdsToEliminate); // Scan each common loop variable column and set direction vectors based // on eliminated constraint system. dependenceComponents->resize(numCommonLoops); for (unsigned j = 0; j < numCommonLoops; ++j) { (*dependenceComponents)[j].op = commonLoops[j].getOperation(); auto lbConst = dependenceDomain->getConstantLowerBound(j); (*dependenceComponents)[j].lb = lbConst.getValueOr(std::numeric_limits<int64_t>::min()); auto ubConst = dependenceDomain->getConstantUpperBound(j); (*dependenceComponents)[j].ub = ubConst.getValueOr(std::numeric_limits<int64_t>::max()); } } // Populates 'accessMap' with composition of AffineApplyOps reachable from // indices of MemRefAccess. void MemRefAccess::getAccessMap(AffineValueMap *accessMap) const { // Get affine map from AffineLoad/Store. AffineMap map; if (auto loadOp = dyn_cast<AffineReadOpInterface>(opInst)) { map = loadOp.getAffineMap(); } else { auto storeOp = cast<AffineWriteOpInterface>(opInst); map = storeOp.getAffineMap(); } SmallVector<Value, 8> operands(indices.begin(), indices.end()); fullyComposeAffineMapAndOperands(&map, &operands); map = simplifyAffineMap(map); canonicalizeMapAndOperands(&map, &operands); accessMap->reset(map, operands); } // Builds a flat affine constraint system to check if there exists a dependence // between memref accesses 'srcAccess' and 'dstAccess'. // Returns 'NoDependence' if the accesses can be definitively shown not to // access the same element. // Returns 'HasDependence' if the accesses do access the same element. // Returns 'Failure' if an error or unsupported case was encountered. // If a dependence exists, returns in 'dependenceComponents' a direction // vector for the dependence, with a component for each loop IV in loops // common to both accesses (see Dependence in AffineAnalysis.h for details). // // The memref access dependence check is comprised of the following steps: // *) Compute access functions for each access. Access functions are computed // using AffineValueMaps initialized with the indices from an access, then // composed with AffineApplyOps reachable from operands of that access, // until operands of the AffineValueMap are loop IVs or symbols. // *) Build iteration domain constraints for each access. Iteration domain // constraints are pairs of inequality constraints representing the // upper/lower loop bounds for each AffineForOp in the loop nest associated // with each access. // *) Build dimension and symbol position maps for each access, which map // Values from access functions and iteration domains to their position // in the merged constraint system built by this method. // // This method builds a constraint system with the following column format: // // [src-dim-identifiers, dst-dim-identifiers, symbols, constant] // // For example, given the following MLIR code with "source" and "destination" // accesses to the same memref label, and symbols %M, %N, %K: // // affine.for %i0 = 0 to 100 { // affine.for %i1 = 0 to 50 { // %a0 = affine.apply // (d0, d1) -> (d0 * 2 - d1 * 4 + s1, d1 * 3 - s0) (%i0, %i1)[%M, %N] // // Source memref access. // store %v0, %m[%a0#0, %a0#1] : memref<4x4xf32> // } // } // // affine.for %i2 = 0 to 100 { // affine.for %i3 = 0 to 50 { // %a1 = affine.apply // (d0, d1) -> (d0 * 7 + d1 * 9 - s1, d1 * 11 + s0) (%i2, %i3)[%K, %M] // // Destination memref access. // %v1 = load %m[%a1#0, %a1#1] : memref<4x4xf32> // } // } // // The access functions would be the following: // // src: (%i0 * 2 - %i1 * 4 + %N, %i1 * 3 - %M) // dst: (%i2 * 7 + %i3 * 9 - %M, %i3 * 11 - %K) // // The iteration domains for the src/dst accesses would be the following: // // src: 0 <= %i0 <= 100, 0 <= %i1 <= 50 // dst: 0 <= %i2 <= 100, 0 <= %i3 <= 50 // // The symbols by both accesses would be assigned to a canonical position order // which will be used in the dependence constraint system: // // symbol name: %M %N %K // symbol pos: 0 1 2 // // Equality constraints are built by equating each result of src/destination // access functions. For this example, the following two equality constraints // will be added to the dependence constraint system: // // [src_dim0, src_dim1, dst_dim0, dst_dim1, sym0, sym1, sym2, const] // 2 -4 -7 -9 1 1 0 0 = 0 // 0 3 0 -11 -1 0 1 0 = 0 // // Inequality constraints from the iteration domain will be meged into // the dependence constraint system // // [src_dim0, src_dim1, dst_dim0, dst_dim1, sym0, sym1, sym2, const] // 1 0 0 0 0 0 0 0 >= 0 // -1 0 0 0 0 0 0 100 >= 0 // 0 1 0 0 0 0 0 0 >= 0 // 0 -1 0 0 0 0 0 50 >= 0 // 0 0 1 0 0 0 0 0 >= 0 // 0 0 -1 0 0 0 0 100 >= 0 // 0 0 0 1 0 0 0 0 >= 0 // 0 0 0 -1 0 0 0 50 >= 0 // // // TODO(andydavis) Support AffineExprs mod/floordiv/ceildiv. DependenceResult mlir::checkMemrefAccessDependence( const MemRefAccess &srcAccess, const MemRefAccess &dstAccess, unsigned loopDepth, FlatAffineConstraints *dependenceConstraints, SmallVector<DependenceComponent, 2> *dependenceComponents, bool allowRAR) { LLVM_DEBUG(llvm::dbgs() << "Checking for dependence at depth: " << Twine(loopDepth) << " between:\n";); LLVM_DEBUG(srcAccess.opInst->dump();); LLVM_DEBUG(dstAccess.opInst->dump();); // Return 'NoDependence' if these accesses do not access the same memref. if (srcAccess.memref != dstAccess.memref) return DependenceResult::NoDependence; // Return 'NoDependence' if one of these accesses is not an // AffineWriteOpInterface. if (!allowRAR && !isa<AffineWriteOpInterface>(srcAccess.opInst) && !isa<AffineWriteOpInterface>(dstAccess.opInst)) return DependenceResult::NoDependence; // Get composed access function for 'srcAccess'. AffineValueMap srcAccessMap; srcAccess.getAccessMap(&srcAccessMap); // Get composed access function for 'dstAccess'. AffineValueMap dstAccessMap; dstAccess.getAccessMap(&dstAccessMap); // Get iteration domain for the 'srcAccess' operation. FlatAffineConstraints srcDomain; if (failed(getInstIndexSet(srcAccess.opInst, &srcDomain))) return DependenceResult::Failure; // Get iteration domain for 'dstAccess' operation. FlatAffineConstraints dstDomain; if (failed(getInstIndexSet(dstAccess.opInst, &dstDomain))) return DependenceResult::Failure; // Return 'NoDependence' if loopDepth > numCommonLoops and if the ancestor // operation of 'srcAccess' does not properly dominate the ancestor // operation of 'dstAccess' in the same common operation block. // Note: this check is skipped if 'allowRAR' is true, because because RAR // deps can exist irrespective of lexicographic ordering b/w src and dst. unsigned numCommonLoops = getNumCommonLoops(srcDomain, dstDomain); assert(loopDepth <= numCommonLoops + 1); if (!allowRAR && loopDepth > numCommonLoops && !srcAppearsBeforeDstInAncestralBlock(srcAccess, dstAccess, srcDomain, numCommonLoops)) { return DependenceResult::NoDependence; } // Build dim and symbol position maps for each access from access operand // Value to position in merged constraint system. ValuePositionMap valuePosMap; buildDimAndSymbolPositionMaps(srcDomain, dstDomain, srcAccessMap, dstAccessMap, &valuePosMap, dependenceConstraints); initDependenceConstraints(srcDomain, dstDomain, srcAccessMap, dstAccessMap, valuePosMap, dependenceConstraints); assert(valuePosMap.getNumDims() == srcDomain.getNumDimIds() + dstDomain.getNumDimIds()); // Create memref access constraint by equating src/dst access functions. // Note that this check is conservative, and will fail in the future when // local variables for mod/div exprs are supported. if (failed(addMemRefAccessConstraints(srcAccessMap, dstAccessMap, valuePosMap, dependenceConstraints))) return DependenceResult::Failure; // Add 'src' happens before 'dst' ordering constraints. addOrderingConstraints(srcDomain, dstDomain, loopDepth, dependenceConstraints); // Add src and dst domain constraints. addDomainConstraints(srcDomain, dstDomain, valuePosMap, dependenceConstraints); // Return 'NoDependence' if the solution space is empty: no dependence. if (dependenceConstraints->isEmpty()) { return DependenceResult::NoDependence; } // Compute dependence direction vector and return true. if (dependenceComponents != nullptr) { computeDirectionVector(srcDomain, dstDomain, loopDepth, dependenceConstraints, dependenceComponents); } LLVM_DEBUG(llvm::dbgs() << "Dependence polyhedron:\n"); LLVM_DEBUG(dependenceConstraints->dump()); return DependenceResult::HasDependence; } /// Gathers dependence components for dependences between all ops in loop nest /// rooted at 'forOp' at loop depths in range [1, maxLoopDepth]. void mlir::getDependenceComponents( AffineForOp forOp, unsigned maxLoopDepth, std::vector<SmallVector<DependenceComponent, 2>> *depCompsVec) { // Collect all load and store ops in loop nest rooted at 'forOp'. SmallVector<Operation *, 8> loadAndStoreOpInsts; forOp.getOperation()->walk([&](Operation *opInst) { if (isa<AffineReadOpInterface>(opInst) || isa<AffineWriteOpInterface>(opInst)) loadAndStoreOpInsts.push_back(opInst); }); unsigned numOps = loadAndStoreOpInsts.size(); for (unsigned d = 1; d <= maxLoopDepth; ++d) { for (unsigned i = 0; i < numOps; ++i) { auto *srcOpInst = loadAndStoreOpInsts[i]; MemRefAccess srcAccess(srcOpInst); for (unsigned j = 0; j < numOps; ++j) { auto *dstOpInst = loadAndStoreOpInsts[j]; MemRefAccess dstAccess(dstOpInst); FlatAffineConstraints dependenceConstraints; SmallVector<DependenceComponent, 2> depComps; // TODO(andydavis,bondhugula) Explore whether it would be profitable // to pre-compute and store deps instead of repeatedly checking. DependenceResult result = checkMemrefAccessDependence( srcAccess, dstAccess, d, &dependenceConstraints, &depComps); if (hasDependence(result)) depCompsVec->push_back(depComps); } } } }