diff lib/CodeGen/InterleavedAccessPass.cpp @ 121:803732b1fca8

LLVM 5.0
author kono
date Fri, 27 Oct 2017 17:07:41 +0900
parents 1172e4bd9c6f
children
line wrap: on
line diff
--- a/lib/CodeGen/InterleavedAccessPass.cpp	Fri Nov 25 19:14:25 2016 +0900
+++ b/lib/CodeGen/InterleavedAccessPass.cpp	Fri Oct 27 17:07:41 2017 +0900
@@ -1,4 +1,4 @@
-//===--------------------- InterleavedAccessPass.cpp ----------------------===//
+//===- InterleavedAccessPass.cpp ------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -42,16 +42,32 @@
 //
 // Similarly, a set of interleaved stores can be transformed into an optimized
 // sequence of shuffles followed by a set of target specific stores for X86.
+//
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <utility>
 
 using namespace llvm;
 
@@ -65,11 +81,10 @@
 namespace {
 
 class InterleavedAccess : public FunctionPass {
-
 public:
   static char ID;
-  InterleavedAccess(const TargetMachine *TM = nullptr)
-      : FunctionPass(ID), DT(nullptr), TM(TM), TLI(nullptr) {
+
+  InterleavedAccess() : FunctionPass(ID) {
     initializeInterleavedAccessPass(*PassRegistry::getPassRegistry());
   }
 
@@ -83,9 +98,8 @@
   }
 
 private:
-  DominatorTree *DT;
-  const TargetMachine *TM;
-  const TargetLowering *TLI;
+  DominatorTree *DT = nullptr;
+  const TargetLowering *TLI = nullptr;
 
   /// The maximum supported interleave factor.
   unsigned MaxFactor;
@@ -105,21 +119,21 @@
   bool tryReplaceExtracts(ArrayRef<ExtractElementInst *> Extracts,
                           ArrayRef<ShuffleVectorInst *> Shuffles);
 };
+
 } // end anonymous namespace.
 
 char InterleavedAccess::ID = 0;
-INITIALIZE_TM_PASS_BEGIN(
-    InterleavedAccess, "interleaved-access",
+
+INITIALIZE_PASS_BEGIN(InterleavedAccess, DEBUG_TYPE,
     "Lower interleaved memory accesses to target specific intrinsics", false,
     false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_TM_PASS_END(
-    InterleavedAccess, "interleaved-access",
+INITIALIZE_PASS_END(InterleavedAccess, DEBUG_TYPE,
     "Lower interleaved memory accesses to target specific intrinsics", false,
     false)
 
-FunctionPass *llvm::createInterleavedAccessPass(const TargetMachine *TM) {
-  return new InterleavedAccess(TM);
+FunctionPass *llvm::createInterleavedAccessPass() {
+  return new InterleavedAccess();
 }
 
 /// \brief Check if the mask is a DE-interleave mask of the given factor
@@ -162,14 +176,19 @@
   return false;
 }
 
-/// \brief Check if the mask is RE-interleave mask for an interleaved store.
-///
-/// I.e. <0, NumSubElts, ... , NumSubElts*(Factor - 1), 1, NumSubElts + 1, ...>
+/// \brief Check if the mask can be used in an interleaved store.
+//
+/// It checks for a more general pattern than the RE-interleave mask.
+/// I.e. <x, y, ... z, x+1, y+1, ...z+1, x+2, y+2, ...z+2, ...>
+/// E.g. For a Factor of 2 (LaneLen=4): <4, 32, 5, 33, 6, 34, 7, 35>
+/// E.g. For a Factor of 3 (LaneLen=4): <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
+/// E.g. For a Factor of 4 (LaneLen=2): <8, 2, 12, 4, 9, 3, 13, 5>
 ///
-/// E.g. The RE-interleave mask (Factor = 2) could be:
-///     <0, 4, 1, 5, 2, 6, 3, 7>
+/// The particular case of an RE-interleave mask is:
+/// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...>
+/// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7>
 static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
-                               unsigned MaxFactor) {
+                               unsigned MaxFactor, unsigned OpNumElts) {
   unsigned NumElts = Mask.size();
   if (NumElts < 4)
     return false;
@@ -179,21 +198,75 @@
     if (NumElts % Factor)
       continue;
 
-    unsigned NumSubElts = NumElts / Factor;
-    if (!isPowerOf2_32(NumSubElts))
+    unsigned LaneLen = NumElts / Factor;
+    if (!isPowerOf2_32(LaneLen))
       continue;
 
-    // Check whether each element matchs the RE-interleaved rule. Ignore undef
-    // elements.
-    unsigned i = 0;
-    for (; i < NumElts; i++)
-      if (Mask[i] >= 0 &&
-          static_cast<unsigned>(Mask[i]) !=
-              (i % Factor) * NumSubElts + i / Factor)
+    // Check whether each element matches the general interleaved rule.
+    // Ignore undef elements, as long as the defined elements match the rule.
+    // Outer loop processes all factors (x, y, z in the above example)
+    unsigned I = 0, J;
+    for (; I < Factor; I++) {
+      unsigned SavedLaneValue;
+      unsigned SavedNoUndefs = 0;
+
+      // Inner loop processes consecutive accesses (x, x+1... in the example)
+      for (J = 0; J < LaneLen - 1; J++) {
+        // Lane computes x's position in the Mask
+        unsigned Lane = J * Factor + I;
+        unsigned NextLane = Lane + Factor;
+        int LaneValue = Mask[Lane];
+        int NextLaneValue = Mask[NextLane];
+
+        // If both are defined, values must be sequential
+        if (LaneValue >= 0 && NextLaneValue >= 0 &&
+            LaneValue + 1 != NextLaneValue)
+          break;
+
+        // If the next value is undef, save the current one as reference
+        if (LaneValue >= 0 && NextLaneValue < 0) {
+          SavedLaneValue = LaneValue;
+          SavedNoUndefs = 1;
+        }
+
+        // Undefs are allowed, but defined elements must still be consecutive:
+        // i.e.: x,..., undef,..., x + 2,..., undef,..., undef,..., x + 5, ....
+        // Verify this by storing the last non-undef followed by an undef
+        // Check that following non-undef masks are incremented with the
+        // corresponding distance.
+        if (SavedNoUndefs > 0 && LaneValue < 0) {
+          SavedNoUndefs++;
+          if (NextLaneValue >= 0 &&
+              SavedLaneValue + SavedNoUndefs != (unsigned)NextLaneValue)
+            break;
+        }
+      }
+
+      if (J < LaneLen - 1)
         break;
 
-    // Find a RE-interleaved mask of current factor.
-    if (i == NumElts)
+      int StartMask = 0;
+      if (Mask[I] >= 0) {
+        // Check that the start of the I range (J=0) is greater than 0
+        StartMask = Mask[I];
+      } else if (Mask[(LaneLen - 1) * Factor + I] >= 0) {
+        // StartMask defined by the last value in lane
+        StartMask = Mask[(LaneLen - 1) * Factor + I] - J;
+      } else if (SavedNoUndefs > 0) {
+        // StartMask defined by some non-zero value in the j loop
+        StartMask = SavedLaneValue - (LaneLen - 1 - SavedNoUndefs);
+      }
+      // else StartMask remains set to 0, i.e. all elements are undefs
+
+      if (StartMask < 0)
+        break;
+      // We must stay within the vectors; This case can happen with undefs.
+      if (StartMask + LaneLen > OpNumElts*2)
+        break;
+    }
+
+    // Found an interleaved mask of current factor.
+    if (I == Factor)
       return true;
   }
 
@@ -275,7 +348,6 @@
 bool InterleavedAccess::tryReplaceExtracts(
     ArrayRef<ExtractElementInst *> Extracts,
     ArrayRef<ShuffleVectorInst *> Shuffles) {
-
   // If there aren't any extractelement instructions to modify, there's nothing
   // to do.
   if (Extracts.empty())
@@ -286,7 +358,6 @@
   DenseMap<ExtractElementInst *, std::pair<Value *, int>> ReplacementMap;
 
   for (auto *Extract : Extracts) {
-
     // The vector index that is extracted.
     auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand());
     auto Index = IndexOperand->getSExtValue();
@@ -295,7 +366,6 @@
     // extractelement instruction (which uses an interleaved load) to use one
     // of the shufflevector instructions instead of the load.
     for (auto *Shuffle : Shuffles) {
-
       // If the shufflevector instruction doesn't dominate the extract, we
       // can't create a use of it.
       if (!DT->dominates(Shuffle, Extract))
@@ -350,7 +420,8 @@
 
   // Check if the shufflevector is RE-interleave shuffle.
   unsigned Factor;
-  if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor))
+  unsigned OpNumElts = SVI->getOperand(0)->getType()->getVectorNumElements();
+  if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor, OpNumElts))
     return false;
 
   DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");
@@ -366,13 +437,15 @@
 }
 
 bool InterleavedAccess::runOnFunction(Function &F) {
-  if (!TM || !LowerInterleavedAccesses)
+  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+  if (!TPC || !LowerInterleavedAccesses)
     return false;
 
   DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n");
 
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  TLI = TM->getSubtargetImpl(F)->getTargetLowering();
+  auto &TM = TPC->getTM<TargetMachine>();
+  TLI = TM.getSubtargetImpl(F)->getTargetLowering();
   MaxFactor = TLI->getMaxSupportedInterleaveFactor();
 
   // Holds dead instructions that will be erased later.