CbC/CbC_llvm: mlir/lib/Transforms/PipelineDataTransfer.cpp comparison

comparison mlir/lib/Transforms/PipelineDataTransfer.cpp @ 173:0572611fdcc8 llvm10 llvm12

reorgnization done

author	Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date	Mon, 25 May 2020 11:55:54 +0900
parents	1d019706d866
children	2e18cbf3894f

comparison

equal deleted inserted replaced

-:9fbae9c8bf63
+:0572611fdcc8
 //
 // This file implements a pass to pipeline data transfers.
 //
 //===----------------------------------------------------------------------===//
+#include "PassDetail.h"
 #include "mlir/Transforms/Passes.h"
 #include "mlir/Analysis/AffineAnalysis.h"
 #include "mlir/Analysis/LoopAnalysis.h"
 #include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
 #include "mlir/IR/Builders.h"
-#include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/LoopUtils.h"
 #include "mlir/Transforms/Utils.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/Debug.h"
 #define DEBUG_TYPE "affine-pipeline-data-transfer"
 using namespace mlir;
 namespace {
+struct PipelineDataTransfer
-struct PipelineDataTransfer : public FunctionPass<PipelineDataTransfer> {
+: public AffinePipelineDataTransferBase<PipelineDataTransfer> {
 void runOnFunction() override;
 void runOnAffineForOp(AffineForOp forOp);
 std::vector<AffineForOp> forOps;
 };
 } // end anonymous namespace
 /// Creates a pass to pipeline explicit movement of data across levels of the
 /// memory hierarchy.
-std::unique_ptr<OpPassBase<FuncOp>> mlir::createPipelineDataTransferPass() {
+std::unique_ptr<OperationPass<FuncOp>> mlir::createPipelineDataTransferPass() {
 return std::make_unique<PipelineDataTransfer>();
 }
 // Returns the position of the tag memref operand given a DMA operation.
 // Temporary utility: will be replaced when DmaStart/DmaFinish abstract op's are
 // added.  TODO(b/117228571)
-static unsigned getTagMemRefPos(Operation &dmaInst) {
+static unsigned getTagMemRefPos(Operation &dmaOp) {
-assert(isa<AffineDmaStartOp>(dmaInst) || isa<AffineDmaWaitOp>(dmaInst));
+assert(isa<AffineDmaStartOp>(dmaOp) || isa<AffineDmaWaitOp>(dmaOp));
-if (auto dmaStartOp = dyn_cast<AffineDmaStartOp>(dmaInst)) {
+if (auto dmaStartOp = dyn_cast<AffineDmaStartOp>(dmaOp)) {
 return dmaStartOp.getTagMemRefOperandIndex();
 }
 // First operand for a dma finish operation.
 return 0;
 }
 };
 auto oldMemRefType = oldMemRef.getType().cast<MemRefType>();
 auto newMemRefType = doubleShape(oldMemRefType);
-// The double buffer is allocated right before 'forInst'.
+// The double buffer is allocated right before 'forOp'.
-auto *forInst = forOp.getOperation();
+OpBuilder bOuter(forOp);
-OpBuilder bOuter(forInst);
 // Put together alloc operands for any dynamic dimensions of the memref.
 SmallVector<Value, 4> allocOperands;
 unsigned dynamicDimCount = 0;
 for (auto dimSize : oldMemRefType.getShape()) {
 if (dimSize == -1)
-allocOperands.push_back(bOuter.create<DimOp>(forInst->getLoc(), oldMemRef,
+allocOperands.push_back(
-dynamicDimCount++));
+bOuter.create<DimOp>(forOp.getLoc(), oldMemRef, dynamicDimCount++));
 }
 // Create and place the alloc right before the 'affine.for' operation.
 Value newMemRef =
-bOuter.create<AllocOp>(forInst->getLoc(), newMemRefType, allocOperands);
+bOuter.create<AllocOp>(forOp.getLoc(), newMemRefType, allocOperands);
 // Create 'iv mod 2' value to index the leading dimension.
 auto d0 = bInner.getAffineDimExpr(0);
 int64_t step = forOp.getStep();
-auto modTwoMap = AffineMap::get(/*dimCount=*/1, /*symbolCount=*/0,
+auto modTwoMap =
-{d0.floorDiv(step) % 2});
+AffineMap::get(/*dimCount=*/1, /*symbolCount=*/0, d0.floorDiv(step) % 2);
 auto ivModTwoOp = bInner.create<AffineApplyOp>(forOp.getLoc(), modTwoMap,
 forOp.getInductionVar());
 // replaceAllMemRefUsesWith will succeed unless the forOp body has
 // non-dereferencing uses of the memref (dealloc's are fine though).
 forOp.emitError("memref replacement for double buffering failed"));
 ivModTwoOp.erase();
 return false;
 }
 // Insert the dealloc op right after the for loop.
-bOuter.setInsertionPointAfter(forInst);
+bOuter.setInsertionPointAfter(forOp);
-bOuter.create<DeallocOp>(forInst->getLoc(), newMemRef);
+bOuter.create<DeallocOp>(forOp.getLoc(), newMemRef);
 return true;
 }
 /// Returns success if the IR is in a valid state.
 if (!escapingUses)
 dmaStartInsts.push_back(&op);
 }
 // For each start operation, we look for a matching finish operation.
-for (auto *dmaStartInst : dmaStartInsts) {
+for (auto *dmaStartOp : dmaStartInsts) {
-for (auto *dmaFinishInst : dmaFinishInsts) {
+for (auto *dmaFinishOp : dmaFinishInsts) {
-if (checkTagMatch(cast<AffineDmaStartOp>(dmaStartInst),
+if (checkTagMatch(cast<AffineDmaStartOp>(dmaStartOp),
-cast<AffineDmaWaitOp>(dmaFinishInst))) {
+cast<AffineDmaWaitOp>(dmaFinishOp))) {
-startWaitPairs.push_back({dmaStartInst, dmaFinishInst});
+startWaitPairs.push_back({dmaStartOp, dmaFinishOp});
 break;
 }
 }
 }
 }
 /// 'forOp' is deleted, and a prologue, a new pipelined loop, and epilogue are
 /// inserted right before where it was.
 void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) {
 auto mayBeConstTripCount = getConstantTripCount(forOp);
 if (!mayBeConstTripCount.hasValue()) {
-LLVM_DEBUG(
+LLVM_DEBUG(forOp.emitRemark("won't pipeline due to unknown trip count"));
-forOp.emitRemark("won't pipeline due to unknown trip count loop"));
 return;
 }
 SmallVector<std::pair<Operation *, Operation *>, 4> startWaitPairs;
 findMatchingStartFinishInsts(forOp, startWaitPairs);
 // TODO(bondhugula): check whether double-buffering is even necessary.
 // TODO(bondhugula): make this work with different layouts: assuming here that
 // the dimension we are adding here for the double buffering is the outermost
 // dimension.
 for (auto &pair : startWaitPairs) {
-auto *dmaStartInst = pair.first;
+auto *dmaStartOp = pair.first;
-Value oldMemRef = dmaStartInst->getOperand(
+Value oldMemRef = dmaStartOp->getOperand(
-cast<AffineDmaStartOp>(dmaStartInst).getFasterMemPos());
+cast<AffineDmaStartOp>(dmaStartOp).getFasterMemPos());
 if (!doubleBuffer(oldMemRef, forOp)) {
 // Normally, double buffering should not fail because we already checked
 // that there are no uses outside.
 LLVM_DEBUG(llvm::dbgs()
-<< "double buffering failed for" << dmaStartInst << "\n";);
+<< "double buffering failed for" << dmaStartOp << "\n";);
 // IR still valid and semantically correct.
 return;
 }
 // If the old memref has no more uses, remove its 'dead' alloc if it was
 // alloc'ed. (note: DMA buffers are rarely function live-in; but a 'dim'
 // operation could have been used on it if it was dynamically shaped in
 // order to create the double buffer above.)
 // '-canonicalize' does this in a more general way, but we'll anyway do the
 // simple/common case so that the output / test cases looks clear.
-if (auto *allocInst = oldMemRef.getDefiningOp()) {
+if (auto *allocOp = oldMemRef.getDefiningOp()) {
 if (oldMemRef.use_empty()) {
-allocInst->erase();
+allocOp->erase();
 } else if (oldMemRef.hasOneUse()) {
 if (auto dealloc = dyn_cast<DeallocOp>(*oldMemRef.user_begin())) {
 dealloc.erase();
-allocInst->erase();
+allocOp->erase();
 }
 }
 }
 }
 // Double the buffers for tag memrefs.
 for (auto &pair : startWaitPairs) {
-auto *dmaFinishInst = pair.second;
+auto *dmaFinishOp = pair.second;
-Value oldTagMemRef =
+Value oldTagMemRef = dmaFinishOp->getOperand(getTagMemRefPos(*dmaFinishOp));
-dmaFinishInst->getOperand(getTagMemRefPos(*dmaFinishInst));
 if (!doubleBuffer(oldTagMemRef, forOp)) {
 LLVM_DEBUG(llvm::dbgs() << "tag double buffering failed\n";);
 return;
 }
 // If the old tag has no uses or a single dealloc use, remove it.
 // (canonicalization handles more complex cases).
-if (auto *tagAllocInst = oldTagMemRef.getDefiningOp()) {
+if (auto *tagAllocOp = oldTagMemRef.getDefiningOp()) {
 if (oldTagMemRef.use_empty()) {
-tagAllocInst->erase();
+tagAllocOp->erase();
 } else if (oldTagMemRef.hasOneUse()) {
 if (auto dealloc = dyn_cast<DeallocOp>(*oldTagMemRef.user_begin())) {
 dealloc.erase();
-tagAllocInst->erase();
+tagAllocOp->erase();
 }
 }
 }
 }
 findMatchingStartFinishInsts(forOp, startWaitPairs);
 // Store shift for operation for later lookup for AffineApplyOp's.
 DenseMap<Operation *, unsigned> instShiftMap;
 for (auto &pair : startWaitPairs) {
-auto *dmaStartInst = pair.first;
+auto *dmaStartOp = pair.first;
-assert(isa<AffineDmaStartOp>(dmaStartInst));
+assert(isa<AffineDmaStartOp>(dmaStartOp));
-instShiftMap[dmaStartInst] = 0;
+instShiftMap[dmaStartOp] = 0;
 // Set shifts for DMA start op's affine operand computation slices to 0.
 SmallVector<AffineApplyOp, 4> sliceOps;
-mlir::createAffineComputationSlice(dmaStartInst, &sliceOps);
+mlir::createAffineComputationSlice(dmaStartOp, &sliceOps);
 if (!sliceOps.empty()) {
 for (auto sliceOp : sliceOps) {
 instShiftMap[sliceOp.getOperation()] = 0;
 }
 } else {
 // If a slice wasn't created, the reachable affine.apply op's from its
 // operands are the ones that go with it.
 SmallVector<Operation *, 4> affineApplyInsts;
-SmallVector<Value, 4> operands(dmaStartInst->getOperands());
+SmallVector<Value, 4> operands(dmaStartOp->getOperands());
 getReachableAffineApplyOps(operands, affineApplyInsts);
 for (auto *op : affineApplyInsts) {
 instShiftMap[op] = 0;
 }
 }
 }
 // Everything else (including compute ops and dma finish) are shifted by one.
-for (auto &op : *forOp.getBody()) {
+for (auto &op : forOp.getBody()->without_terminator())
-if (instShiftMap.find(&op) == instShiftMap.end()) {
+if (instShiftMap.find(&op) == instShiftMap.end())
 instShiftMap[&op] = 1;
-}
-}
 // Get shifts stored in map.
-std::vector<uint64_t> shifts(forOp.getBody()->getOperations().size());
+SmallVector<uint64_t, 8> shifts(forOp.getBody()->getOperations().size());
 unsigned s = 0;
-for (auto &op : *forOp.getBody()) {
+for (auto &op : forOp.getBody()->without_terminator()) {
 assert(instShiftMap.find(&op) != instShiftMap.end());
 shifts[s++] = instShiftMap[&op];
 // Tagging operations with shifts for debugging purposes.
 LLVM_DEBUG({
 OpBuilder b(&op);
 op.setAttr("shift", b.getI64IntegerAttr(shifts[s - 1]));
 });
 }
-if (!isInstwiseShiftValid(forOp, shifts)) {
+if (!isOpwiseShiftValid(forOp, shifts)) {
 // Violates dependences.
 LLVM_DEBUG(llvm::dbgs() << "Shifts invalid - unexpected\n";);
 return;
 }
-if (failed(instBodySkew(forOp, shifts))) {
+if (failed(affineForOpBodySkew(forOp, shifts))) {
 LLVM_DEBUG(llvm::dbgs() << "op body skewing failed - unexpected\n";);
 return;
 }
 }
-static PassRegistration<PipelineDataTransfer> pass(
-"affine-pipeline-data-transfer",
-"Pipeline non-blocking data transfers between explicitly managed levels of "
-"the memory hierarchy");

Mercurial > hg > CbC > CbC_llvm

comparison mlir/lib/Transforms/PipelineDataTransfer.cpp @ 173:0572611fdcc8 llvm10 llvm12