CbC/CbC_llvm: mlir/lib/Transforms/PipelineDataTransfer.cpp annotate

annotate mlir/lib/Transforms/PipelineDataTransfer.cpp @ 173:0572611fdcc8 llvm10 llvm12

reorgnization done

author	Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date	Mon, 25 May 2020 11:55:54 +0900
parents	1d019706d866
children	2e18cbf3894f

rev	line source
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	1 //===- PipelineDataTransfer.cpp --- Pass for pipelining data movement ---*-===//
1d019706d866 LLVM10 anatofuz parents: diff changeset	2 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
1d019706d866 LLVM10 anatofuz parents: diff changeset	4 // See https://llvm.org/LICENSE.txt for license information.
1d019706d866 LLVM10 anatofuz parents: diff changeset	5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
1d019706d866 LLVM10 anatofuz parents: diff changeset	6 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	7 //===----------------------------------------------------------------------===//
1d019706d866 LLVM10 anatofuz parents: diff changeset	8 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	9 // This file implements a pass to pipeline data transfers.
1d019706d866 LLVM10 anatofuz parents: diff changeset	10 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	11 //===----------------------------------------------------------------------===//
1d019706d866 LLVM10 anatofuz parents: diff changeset	12
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	13 #include "PassDetail.h"
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	14 #include "mlir/Transforms/Passes.h"
1d019706d866 LLVM10 anatofuz parents: diff changeset	15
1d019706d866 LLVM10 anatofuz parents: diff changeset	16 #include "mlir/Analysis/AffineAnalysis.h"
1d019706d866 LLVM10 anatofuz parents: diff changeset	17 #include "mlir/Analysis/LoopAnalysis.h"
1d019706d866 LLVM10 anatofuz parents: diff changeset	18 #include "mlir/Analysis/Utils.h"
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	19 #include "mlir/Dialect/Affine/IR/AffineOps.h"
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	20 #include "mlir/IR/Builders.h"
1d019706d866 LLVM10 anatofuz parents: diff changeset	21 #include "mlir/Transforms/LoopUtils.h"
1d019706d866 LLVM10 anatofuz parents: diff changeset	22 #include "mlir/Transforms/Utils.h"
1d019706d866 LLVM10 anatofuz parents: diff changeset	23 #include "llvm/ADT/DenseMap.h"
1d019706d866 LLVM10 anatofuz parents: diff changeset	24 #include "llvm/Support/Debug.h"
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	25
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	26 #define DEBUG_TYPE "affine-pipeline-data-transfer"
1d019706d866 LLVM10 anatofuz parents: diff changeset	27
1d019706d866 LLVM10 anatofuz parents: diff changeset	28 using namespace mlir;
1d019706d866 LLVM10 anatofuz parents: diff changeset	29
1d019706d866 LLVM10 anatofuz parents: diff changeset	30 namespace {
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	31 struct PipelineDataTransfer
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	32 : public AffinePipelineDataTransferBase<PipelineDataTransfer> {
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	33 void runOnFunction() override;
1d019706d866 LLVM10 anatofuz parents: diff changeset	34 void runOnAffineForOp(AffineForOp forOp);
1d019706d866 LLVM10 anatofuz parents: diff changeset	35
1d019706d866 LLVM10 anatofuz parents: diff changeset	36 std::vector<AffineForOp> forOps;
1d019706d866 LLVM10 anatofuz parents: diff changeset	37 };
1d019706d866 LLVM10 anatofuz parents: diff changeset	38
1d019706d866 LLVM10 anatofuz parents: diff changeset	39 } // end anonymous namespace
1d019706d866 LLVM10 anatofuz parents: diff changeset	40
1d019706d866 LLVM10 anatofuz parents: diff changeset	41 /// Creates a pass to pipeline explicit movement of data across levels of the
1d019706d866 LLVM10 anatofuz parents: diff changeset	42 /// memory hierarchy.
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	43 std::unique_ptr<OperationPass<FuncOp>> mlir::createPipelineDataTransferPass() {
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	44 return std::make_unique<PipelineDataTransfer>();
1d019706d866 LLVM10 anatofuz parents: diff changeset	45 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	46
1d019706d866 LLVM10 anatofuz parents: diff changeset	47 // Returns the position of the tag memref operand given a DMA operation.
1d019706d866 LLVM10 anatofuz parents: diff changeset	48 // Temporary utility: will be replaced when DmaStart/DmaFinish abstract op's are
1d019706d866 LLVM10 anatofuz parents: diff changeset	49 // added. TODO(b/117228571)
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	50 static unsigned getTagMemRefPos(Operation &dmaOp) {
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	51 assert(isa<AffineDmaStartOp>(dmaOp) \|\| isa<AffineDmaWaitOp>(dmaOp));
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	52 if (auto dmaStartOp = dyn_cast<AffineDmaStartOp>(dmaOp)) {
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	53 return dmaStartOp.getTagMemRefOperandIndex();
1d019706d866 LLVM10 anatofuz parents: diff changeset	54 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	55 // First operand for a dma finish operation.
1d019706d866 LLVM10 anatofuz parents: diff changeset	56 return 0;
1d019706d866 LLVM10 anatofuz parents: diff changeset	57 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	58
1d019706d866 LLVM10 anatofuz parents: diff changeset	59 /// Doubles the buffer of the supplied memref on the specified 'affine.for'
1d019706d866 LLVM10 anatofuz parents: diff changeset	60 /// operation by adding a leading dimension of size two to the memref.
1d019706d866 LLVM10 anatofuz parents: diff changeset	61 /// Replaces all uses of the old memref by the new one while indexing the newly
1d019706d866 LLVM10 anatofuz parents: diff changeset	62 /// added dimension by the loop IV of the specified 'affine.for' operation
1d019706d866 LLVM10 anatofuz parents: diff changeset	63 /// modulo 2. Returns false if such a replacement cannot be performed.
1d019706d866 LLVM10 anatofuz parents: diff changeset	64 static bool doubleBuffer(Value oldMemRef, AffineForOp forOp) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	65 auto *forBody = forOp.getBody();
1d019706d866 LLVM10 anatofuz parents: diff changeset	66 OpBuilder bInner(forBody, forBody->begin());
1d019706d866 LLVM10 anatofuz parents: diff changeset	67
1d019706d866 LLVM10 anatofuz parents: diff changeset	68 // Doubles the shape with a leading dimension extent of 2.
1d019706d866 LLVM10 anatofuz parents: diff changeset	69 auto doubleShape = [&](MemRefType oldMemRefType) -> MemRefType {
1d019706d866 LLVM10 anatofuz parents: diff changeset	70 // Add the leading dimension in the shape for the double buffer.
1d019706d866 LLVM10 anatofuz parents: diff changeset	71 ArrayRef<int64_t> oldShape = oldMemRefType.getShape();
1d019706d866 LLVM10 anatofuz parents: diff changeset	72 SmallVector<int64_t, 4> newShape(1 + oldMemRefType.getRank());
1d019706d866 LLVM10 anatofuz parents: diff changeset	73 newShape[0] = 2;
1d019706d866 LLVM10 anatofuz parents: diff changeset	74 std::copy(oldShape.begin(), oldShape.end(), newShape.begin() + 1);
1d019706d866 LLVM10 anatofuz parents: diff changeset	75 return MemRefType::Builder(oldMemRefType)
1d019706d866 LLVM10 anatofuz parents: diff changeset	76 .setShape(newShape)
1d019706d866 LLVM10 anatofuz parents: diff changeset	77 .setAffineMaps({});
1d019706d866 LLVM10 anatofuz parents: diff changeset	78 };
1d019706d866 LLVM10 anatofuz parents: diff changeset	79
1d019706d866 LLVM10 anatofuz parents: diff changeset	80 auto oldMemRefType = oldMemRef.getType().cast<MemRefType>();
1d019706d866 LLVM10 anatofuz parents: diff changeset	81 auto newMemRefType = doubleShape(oldMemRefType);
1d019706d866 LLVM10 anatofuz parents: diff changeset	82
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	83 // The double buffer is allocated right before 'forOp'.
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	84 OpBuilder bOuter(forOp);
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	85 // Put together alloc operands for any dynamic dimensions of the memref.
1d019706d866 LLVM10 anatofuz parents: diff changeset	86 SmallVector<Value, 4> allocOperands;
1d019706d866 LLVM10 anatofuz parents: diff changeset	87 unsigned dynamicDimCount = 0;
1d019706d866 LLVM10 anatofuz parents: diff changeset	88 for (auto dimSize : oldMemRefType.getShape()) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	89 if (dimSize == -1)
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	90 allocOperands.push_back(
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	91 bOuter.create<DimOp>(forOp.getLoc(), oldMemRef, dynamicDimCount++));
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	92 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	93
1d019706d866 LLVM10 anatofuz parents: diff changeset	94 // Create and place the alloc right before the 'affine.for' operation.
1d019706d866 LLVM10 anatofuz parents: diff changeset	95 Value newMemRef =
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	96 bOuter.create<AllocOp>(forOp.getLoc(), newMemRefType, allocOperands);
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	97
1d019706d866 LLVM10 anatofuz parents: diff changeset	98 // Create 'iv mod 2' value to index the leading dimension.
1d019706d866 LLVM10 anatofuz parents: diff changeset	99 auto d0 = bInner.getAffineDimExpr(0);
1d019706d866 LLVM10 anatofuz parents: diff changeset	100 int64_t step = forOp.getStep();
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	101 auto modTwoMap =
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	102 AffineMap::get(/dimCount=/1, /symbolCount=/0, d0.floorDiv(step) % 2);
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	103 auto ivModTwoOp = bInner.create<AffineApplyOp>(forOp.getLoc(), modTwoMap,
1d019706d866 LLVM10 anatofuz parents: diff changeset	104 forOp.getInductionVar());
1d019706d866 LLVM10 anatofuz parents: diff changeset	105
1d019706d866 LLVM10 anatofuz parents: diff changeset	106 // replaceAllMemRefUsesWith will succeed unless the forOp body has
1d019706d866 LLVM10 anatofuz parents: diff changeset	107 // non-dereferencing uses of the memref (dealloc's are fine though).
1d019706d866 LLVM10 anatofuz parents: diff changeset	108 if (failed(replaceAllMemRefUsesWith(
1d019706d866 LLVM10 anatofuz parents: diff changeset	109 oldMemRef, newMemRef,
1d019706d866 LLVM10 anatofuz parents: diff changeset	110 /extraIndices=/{ivModTwoOp},
1d019706d866 LLVM10 anatofuz parents: diff changeset	111 /indexRemap=/AffineMap(),
1d019706d866 LLVM10 anatofuz parents: diff changeset	112 /extraOperands=/{},
1d019706d866 LLVM10 anatofuz parents: diff changeset	113 /symbolOperands=/{},
1d019706d866 LLVM10 anatofuz parents: diff changeset	114 /domInstFilter=/&*forOp.getBody()->begin()))) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	115 LLVM_DEBUG(
1d019706d866 LLVM10 anatofuz parents: diff changeset	116 forOp.emitError("memref replacement for double buffering failed"));
1d019706d866 LLVM10 anatofuz parents: diff changeset	117 ivModTwoOp.erase();
1d019706d866 LLVM10 anatofuz parents: diff changeset	118 return false;
1d019706d866 LLVM10 anatofuz parents: diff changeset	119 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	120 // Insert the dealloc op right after the for loop.
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	121 bOuter.setInsertionPointAfter(forOp);
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	122 bOuter.create<DeallocOp>(forOp.getLoc(), newMemRef);
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	123
1d019706d866 LLVM10 anatofuz parents: diff changeset	124 return true;
1d019706d866 LLVM10 anatofuz parents: diff changeset	125 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	126
1d019706d866 LLVM10 anatofuz parents: diff changeset	127 /// Returns success if the IR is in a valid state.
1d019706d866 LLVM10 anatofuz parents: diff changeset	128 void PipelineDataTransfer::runOnFunction() {
1d019706d866 LLVM10 anatofuz parents: diff changeset	129 // Do a post order walk so that inner loop DMAs are processed first. This is
1d019706d866 LLVM10 anatofuz parents: diff changeset	130 // necessary since 'affine.for' operations nested within would otherwise
1d019706d866 LLVM10 anatofuz parents: diff changeset	131 // become invalid (erased) when the outer loop is pipelined (the pipelined one
1d019706d866 LLVM10 anatofuz parents: diff changeset	132 // gets deleted and replaced by a prologue, a new steady-state loop and an
1d019706d866 LLVM10 anatofuz parents: diff changeset	133 // epilogue).
1d019706d866 LLVM10 anatofuz parents: diff changeset	134 forOps.clear();
1d019706d866 LLVM10 anatofuz parents: diff changeset	135 getFunction().walk([&](AffineForOp forOp) { forOps.push_back(forOp); });
1d019706d866 LLVM10 anatofuz parents: diff changeset	136 for (auto forOp : forOps)
1d019706d866 LLVM10 anatofuz parents: diff changeset	137 runOnAffineForOp(forOp);
1d019706d866 LLVM10 anatofuz parents: diff changeset	138 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	139
1d019706d866 LLVM10 anatofuz parents: diff changeset	140 // Check if tags of the dma start op and dma wait op match.
1d019706d866 LLVM10 anatofuz parents: diff changeset	141 static bool checkTagMatch(AffineDmaStartOp startOp, AffineDmaWaitOp waitOp) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	142 if (startOp.getTagMemRef() != waitOp.getTagMemRef())
1d019706d866 LLVM10 anatofuz parents: diff changeset	143 return false;
1d019706d866 LLVM10 anatofuz parents: diff changeset	144 auto startIndices = startOp.getTagIndices();
1d019706d866 LLVM10 anatofuz parents: diff changeset	145 auto waitIndices = waitOp.getTagIndices();
1d019706d866 LLVM10 anatofuz parents: diff changeset	146 // Both of these have the same number of indices since they correspond to the
1d019706d866 LLVM10 anatofuz parents: diff changeset	147 // same tag memref.
1d019706d866 LLVM10 anatofuz parents: diff changeset	148 for (auto it = startIndices.begin(), wIt = waitIndices.begin(),
1d019706d866 LLVM10 anatofuz parents: diff changeset	149 e = startIndices.end();
1d019706d866 LLVM10 anatofuz parents: diff changeset	150 it != e; ++it, ++wIt) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	151 // Keep it simple for now, just checking if indices match.
1d019706d866 LLVM10 anatofuz parents: diff changeset	152 // TODO(mlir-team): this would in general need to check if there is no
1d019706d866 LLVM10 anatofuz parents: diff changeset	153 // intervening write writing to the same tag location, i.e., memory last
1d019706d866 LLVM10 anatofuz parents: diff changeset	154 // write/data flow analysis. This is however sufficient/powerful enough for
1d019706d866 LLVM10 anatofuz parents: diff changeset	155 // now since the DMA generation pass or the input for it will always have
1d019706d866 LLVM10 anatofuz parents: diff changeset	156 // start/wait with matching tags (same SSA operand indices).
1d019706d866 LLVM10 anatofuz parents: diff changeset	157 if (it != wIt)
1d019706d866 LLVM10 anatofuz parents: diff changeset	158 return false;
1d019706d866 LLVM10 anatofuz parents: diff changeset	159 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	160 return true;
1d019706d866 LLVM10 anatofuz parents: diff changeset	161 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	162
1d019706d866 LLVM10 anatofuz parents: diff changeset	163 // Identify matching DMA start/finish operations to overlap computation with.
1d019706d866 LLVM10 anatofuz parents: diff changeset	164 static void findMatchingStartFinishInsts(
1d019706d866 LLVM10 anatofuz parents: diff changeset	165 AffineForOp forOp,
1d019706d866 LLVM10 anatofuz parents: diff changeset	166 SmallVectorImpl<std::pair<Operation , Operation >> &startWaitPairs) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	167
1d019706d866 LLVM10 anatofuz parents: diff changeset	168 // Collect outgoing DMA operations - needed to check for dependences below.
1d019706d866 LLVM10 anatofuz parents: diff changeset	169 SmallVector<AffineDmaStartOp, 4> outgoingDmaOps;
1d019706d866 LLVM10 anatofuz parents: diff changeset	170 for (auto &op : *forOp.getBody()) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	171 auto dmaStartOp = dyn_cast<AffineDmaStartOp>(op);
1d019706d866 LLVM10 anatofuz parents: diff changeset	172 if (dmaStartOp && dmaStartOp.isSrcMemorySpaceFaster())
1d019706d866 LLVM10 anatofuz parents: diff changeset	173 outgoingDmaOps.push_back(dmaStartOp);
1d019706d866 LLVM10 anatofuz parents: diff changeset	174 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	175
1d019706d866 LLVM10 anatofuz parents: diff changeset	176 SmallVector<Operation *, 4> dmaStartInsts, dmaFinishInsts;
1d019706d866 LLVM10 anatofuz parents: diff changeset	177 for (auto &op : *forOp.getBody()) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	178 // Collect DMA finish operations.
1d019706d866 LLVM10 anatofuz parents: diff changeset	179 if (isa<AffineDmaWaitOp>(op)) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	180 dmaFinishInsts.push_back(&op);
1d019706d866 LLVM10 anatofuz parents: diff changeset	181 continue;
1d019706d866 LLVM10 anatofuz parents: diff changeset	182 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	183 auto dmaStartOp = dyn_cast<AffineDmaStartOp>(op);
1d019706d866 LLVM10 anatofuz parents: diff changeset	184 if (!dmaStartOp)
1d019706d866 LLVM10 anatofuz parents: diff changeset	185 continue;
1d019706d866 LLVM10 anatofuz parents: diff changeset	186
1d019706d866 LLVM10 anatofuz parents: diff changeset	187 // Only DMAs incoming into higher memory spaces are pipelined for now.
1d019706d866 LLVM10 anatofuz parents: diff changeset	188 // TODO(bondhugula): handle outgoing DMA pipelining.
1d019706d866 LLVM10 anatofuz parents: diff changeset	189 if (!dmaStartOp.isDestMemorySpaceFaster())
1d019706d866 LLVM10 anatofuz parents: diff changeset	190 continue;
1d019706d866 LLVM10 anatofuz parents: diff changeset	191
1d019706d866 LLVM10 anatofuz parents: diff changeset	192 // Check for dependence with outgoing DMAs. Doing this conservatively.
1d019706d866 LLVM10 anatofuz parents: diff changeset	193 // TODO(andydavis,bondhugula): use the dependence analysis to check for
1d019706d866 LLVM10 anatofuz parents: diff changeset	194 // dependences between an incoming and outgoing DMA in the same iteration.
1d019706d866 LLVM10 anatofuz parents: diff changeset	195 auto it = outgoingDmaOps.begin();
1d019706d866 LLVM10 anatofuz parents: diff changeset	196 for (; it != outgoingDmaOps.end(); ++it) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	197 if (it->getDstMemRef() == dmaStartOp.getSrcMemRef())
1d019706d866 LLVM10 anatofuz parents: diff changeset	198 break;
1d019706d866 LLVM10 anatofuz parents: diff changeset	199 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	200 if (it != outgoingDmaOps.end())
1d019706d866 LLVM10 anatofuz parents: diff changeset	201 continue;
1d019706d866 LLVM10 anatofuz parents: diff changeset	202
1d019706d866 LLVM10 anatofuz parents: diff changeset	203 // We only double buffer if the buffer is not live out of loop.
1d019706d866 LLVM10 anatofuz parents: diff changeset	204 auto memref = dmaStartOp.getOperand(dmaStartOp.getFasterMemPos());
1d019706d866 LLVM10 anatofuz parents: diff changeset	205 bool escapingUses = false;
1d019706d866 LLVM10 anatofuz parents: diff changeset	206 for (auto *user : memref.getUsers()) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	207 // We can double buffer regardless of dealloc's outside the loop.
1d019706d866 LLVM10 anatofuz parents: diff changeset	208 if (isa<DeallocOp>(user))
1d019706d866 LLVM10 anatofuz parents: diff changeset	209 continue;
1d019706d866 LLVM10 anatofuz parents: diff changeset	210 if (!forOp.getBody()->findAncestorOpInBlock(*user)) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	211 LLVM_DEBUG(llvm::dbgs()
1d019706d866 LLVM10 anatofuz parents: diff changeset	212 << "can't pipeline: buffer is live out of loop\n";);
1d019706d866 LLVM10 anatofuz parents: diff changeset	213 escapingUses = true;
1d019706d866 LLVM10 anatofuz parents: diff changeset	214 break;
1d019706d866 LLVM10 anatofuz parents: diff changeset	215 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	216 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	217 if (!escapingUses)
1d019706d866 LLVM10 anatofuz parents: diff changeset	218 dmaStartInsts.push_back(&op);
1d019706d866 LLVM10 anatofuz parents: diff changeset	219 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	220
1d019706d866 LLVM10 anatofuz parents: diff changeset	221 // For each start operation, we look for a matching finish operation.
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	222 for (auto *dmaStartOp : dmaStartInsts) {
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	223 for (auto *dmaFinishOp : dmaFinishInsts) {
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	224 if (checkTagMatch(cast<AffineDmaStartOp>(dmaStartOp),
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	225 cast<AffineDmaWaitOp>(dmaFinishOp))) {
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	226 startWaitPairs.push_back({dmaStartOp, dmaFinishOp});
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	227 break;
1d019706d866 LLVM10 anatofuz parents: diff changeset	228 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	229 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	230 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	231 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	232
1d019706d866 LLVM10 anatofuz parents: diff changeset	233 /// Overlap DMA transfers with computation in this loop. If successful,
1d019706d866 LLVM10 anatofuz parents: diff changeset	234 /// 'forOp' is deleted, and a prologue, a new pipelined loop, and epilogue are
1d019706d866 LLVM10 anatofuz parents: diff changeset	235 /// inserted right before where it was.
1d019706d866 LLVM10 anatofuz parents: diff changeset	236 void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	237 auto mayBeConstTripCount = getConstantTripCount(forOp);
1d019706d866 LLVM10 anatofuz parents: diff changeset	238 if (!mayBeConstTripCount.hasValue()) {
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	239 LLVM_DEBUG(forOp.emitRemark("won't pipeline due to unknown trip count"));
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	240 return;
1d019706d866 LLVM10 anatofuz parents: diff changeset	241 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	242
1d019706d866 LLVM10 anatofuz parents: diff changeset	243 SmallVector<std::pair<Operation , Operation >, 4> startWaitPairs;
1d019706d866 LLVM10 anatofuz parents: diff changeset	244 findMatchingStartFinishInsts(forOp, startWaitPairs);
1d019706d866 LLVM10 anatofuz parents: diff changeset	245
1d019706d866 LLVM10 anatofuz parents: diff changeset	246 if (startWaitPairs.empty()) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	247 LLVM_DEBUG(forOp.emitRemark("No dma start/finish pairs\n"));
1d019706d866 LLVM10 anatofuz parents: diff changeset	248 return;
1d019706d866 LLVM10 anatofuz parents: diff changeset	249 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	250
1d019706d866 LLVM10 anatofuz parents: diff changeset	251 // Double the buffers for the higher memory space memref's.
1d019706d866 LLVM10 anatofuz parents: diff changeset	252 // Identify memref's to replace by scanning through all DMA start
1d019706d866 LLVM10 anatofuz parents: diff changeset	253 // operations. A DMA start operation has two memref's - the one from the
1d019706d866 LLVM10 anatofuz parents: diff changeset	254 // higher level of memory hierarchy is the one to double buffer.
1d019706d866 LLVM10 anatofuz parents: diff changeset	255 // TODO(bondhugula): check whether double-buffering is even necessary.
1d019706d866 LLVM10 anatofuz parents: diff changeset	256 // TODO(bondhugula): make this work with different layouts: assuming here that
1d019706d866 LLVM10 anatofuz parents: diff changeset	257 // the dimension we are adding here for the double buffering is the outermost
1d019706d866 LLVM10 anatofuz parents: diff changeset	258 // dimension.
1d019706d866 LLVM10 anatofuz parents: diff changeset	259 for (auto &pair : startWaitPairs) {
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	260 auto *dmaStartOp = pair.first;
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	261 Value oldMemRef = dmaStartOp->getOperand(
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	262 cast<AffineDmaStartOp>(dmaStartOp).getFasterMemPos());
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	263 if (!doubleBuffer(oldMemRef, forOp)) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	264 // Normally, double buffering should not fail because we already checked
1d019706d866 LLVM10 anatofuz parents: diff changeset	265 // that there are no uses outside.
1d019706d866 LLVM10 anatofuz parents: diff changeset	266 LLVM_DEBUG(llvm::dbgs()
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	267 << "double buffering failed for" << dmaStartOp << "\n";);
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	268 // IR still valid and semantically correct.
1d019706d866 LLVM10 anatofuz parents: diff changeset	269 return;
1d019706d866 LLVM10 anatofuz parents: diff changeset	270 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	271 // If the old memref has no more uses, remove its 'dead' alloc if it was
1d019706d866 LLVM10 anatofuz parents: diff changeset	272 // alloc'ed. (note: DMA buffers are rarely function live-in; but a 'dim'
1d019706d866 LLVM10 anatofuz parents: diff changeset	273 // operation could have been used on it if it was dynamically shaped in
1d019706d866 LLVM10 anatofuz parents: diff changeset	274 // order to create the double buffer above.)
1d019706d866 LLVM10 anatofuz parents: diff changeset	275 // '-canonicalize' does this in a more general way, but we'll anyway do the
1d019706d866 LLVM10 anatofuz parents: diff changeset	276 // simple/common case so that the output / test cases looks clear.
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	277 if (auto *allocOp = oldMemRef.getDefiningOp()) {
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	278 if (oldMemRef.use_empty()) {
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	279 allocOp->erase();
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	280 } else if (oldMemRef.hasOneUse()) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	281 if (auto dealloc = dyn_cast<DeallocOp>(*oldMemRef.user_begin())) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	282 dealloc.erase();
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	283 allocOp->erase();
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	284 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	285 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	286 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	287 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	288
1d019706d866 LLVM10 anatofuz parents: diff changeset	289 // Double the buffers for tag memrefs.
1d019706d866 LLVM10 anatofuz parents: diff changeset	290 for (auto &pair : startWaitPairs) {
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	291 auto *dmaFinishOp = pair.second;
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	292 Value oldTagMemRef = dmaFinishOp->getOperand(getTagMemRefPos(*dmaFinishOp));
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	293 if (!doubleBuffer(oldTagMemRef, forOp)) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	294 LLVM_DEBUG(llvm::dbgs() << "tag double buffering failed\n";);
1d019706d866 LLVM10 anatofuz parents: diff changeset	295 return;
1d019706d866 LLVM10 anatofuz parents: diff changeset	296 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	297 // If the old tag has no uses or a single dealloc use, remove it.
1d019706d866 LLVM10 anatofuz parents: diff changeset	298 // (canonicalization handles more complex cases).
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	299 if (auto *tagAllocOp = oldTagMemRef.getDefiningOp()) {
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	300 if (oldTagMemRef.use_empty()) {
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	301 tagAllocOp->erase();
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	302 } else if (oldTagMemRef.hasOneUse()) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	303 if (auto dealloc = dyn_cast<DeallocOp>(*oldTagMemRef.user_begin())) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	304 dealloc.erase();
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	305 tagAllocOp->erase();
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	306 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	307 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	308 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	309 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	310
1d019706d866 LLVM10 anatofuz parents: diff changeset	311 // Double buffering would have invalidated all the old DMA start/wait insts.
1d019706d866 LLVM10 anatofuz parents: diff changeset	312 startWaitPairs.clear();
1d019706d866 LLVM10 anatofuz parents: diff changeset	313 findMatchingStartFinishInsts(forOp, startWaitPairs);
1d019706d866 LLVM10 anatofuz parents: diff changeset	314
1d019706d866 LLVM10 anatofuz parents: diff changeset	315 // Store shift for operation for later lookup for AffineApplyOp's.
1d019706d866 LLVM10 anatofuz parents: diff changeset	316 DenseMap<Operation *, unsigned> instShiftMap;
1d019706d866 LLVM10 anatofuz parents: diff changeset	317 for (auto &pair : startWaitPairs) {
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	318 auto *dmaStartOp = pair.first;
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	319 assert(isa<AffineDmaStartOp>(dmaStartOp));
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	320 instShiftMap[dmaStartOp] = 0;
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	321 // Set shifts for DMA start op's affine operand computation slices to 0.
1d019706d866 LLVM10 anatofuz parents: diff changeset	322 SmallVector<AffineApplyOp, 4> sliceOps;
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	323 mlir::createAffineComputationSlice(dmaStartOp, &sliceOps);
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	324 if (!sliceOps.empty()) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	325 for (auto sliceOp : sliceOps) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	326 instShiftMap[sliceOp.getOperation()] = 0;
1d019706d866 LLVM10 anatofuz parents: diff changeset	327 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	328 } else {
1d019706d866 LLVM10 anatofuz parents: diff changeset	329 // If a slice wasn't created, the reachable affine.apply op's from its
1d019706d866 LLVM10 anatofuz parents: diff changeset	330 // operands are the ones that go with it.
1d019706d866 LLVM10 anatofuz parents: diff changeset	331 SmallVector<Operation *, 4> affineApplyInsts;
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	332 SmallVector<Value, 4> operands(dmaStartOp->getOperands());
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	333 getReachableAffineApplyOps(operands, affineApplyInsts);
1d019706d866 LLVM10 anatofuz parents: diff changeset	334 for (auto *op : affineApplyInsts) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	335 instShiftMap[op] = 0;
1d019706d866 LLVM10 anatofuz parents: diff changeset	336 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	337 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	338 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	339 // Everything else (including compute ops and dma finish) are shifted by one.
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	340 for (auto &op : forOp.getBody()->without_terminator())
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	341 if (instShiftMap.find(&op) == instShiftMap.end())
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	342 instShiftMap[&op] = 1;
1d019706d866 LLVM10 anatofuz parents: diff changeset	343
1d019706d866 LLVM10 anatofuz parents: diff changeset	344 // Get shifts stored in map.
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	345 SmallVector<uint64_t, 8> shifts(forOp.getBody()->getOperations().size());
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	346 unsigned s = 0;
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	347 for (auto &op : forOp.getBody()->without_terminator()) {
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	348 assert(instShiftMap.find(&op) != instShiftMap.end());
1d019706d866 LLVM10 anatofuz parents: diff changeset	349 shifts[s++] = instShiftMap[&op];
1d019706d866 LLVM10 anatofuz parents: diff changeset	350
1d019706d866 LLVM10 anatofuz parents: diff changeset	351 // Tagging operations with shifts for debugging purposes.
1d019706d866 LLVM10 anatofuz parents: diff changeset	352 LLVM_DEBUG({
1d019706d866 LLVM10 anatofuz parents: diff changeset	353 OpBuilder b(&op);
1d019706d866 LLVM10 anatofuz parents: diff changeset	354 op.setAttr("shift", b.getI64IntegerAttr(shifts[s - 1]));
1d019706d866 LLVM10 anatofuz parents: diff changeset	355 });
1d019706d866 LLVM10 anatofuz parents: diff changeset	356 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	357
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	358 if (!isOpwiseShiftValid(forOp, shifts)) {
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	359 // Violates dependences.
1d019706d866 LLVM10 anatofuz parents: diff changeset	360 LLVM_DEBUG(llvm::dbgs() << "Shifts invalid - unexpected\n";);
1d019706d866 LLVM10 anatofuz parents: diff changeset	361 return;
1d019706d866 LLVM10 anatofuz parents: diff changeset	362 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	363
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	364 if (failed(affineForOpBodySkew(forOp, shifts))) {
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	365 LLVM_DEBUG(llvm::dbgs() << "op body skewing failed - unexpected\n";);
1d019706d866 LLVM10 anatofuz parents: diff changeset	366 return;
1d019706d866 LLVM10 anatofuz parents: diff changeset	367 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	368 }

Mercurial > hg > CbC > CbC_llvm

annotate mlir/lib/Transforms/PipelineDataTransfer.cpp @ 173:0572611fdcc8 llvm10 llvm12