diff lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp @ 147:c2174574ed3a

LLVM 10
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Wed, 14 Aug 2019 16:55:33 +0900
parents 3a76565eade5
children
line wrap: on
line diff
--- a/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp	Sat Feb 17 09:57:20 2018 +0900
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp	Wed Aug 14 16:55:33 2019 +0900
@@ -1,9 +1,8 @@
 //===-- HexagonISelDAGToDAGHVX.cpp ----------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -120,7 +119,7 @@
     return Color == ColorKind::Red ? ColorKind::Black : ColorKind::Red;
   }
 
-  void dump() const;
+  LLVM_DUMP_METHOD void dump() const;
 
 private:
   ArrayRef<Node> Order;
@@ -259,7 +258,7 @@
     Colors[C] = ColorC;
   }
 
-  // Explicitly assign "None" all all uncolored nodes.
+  // Explicitly assign "None" to all uncolored nodes.
   for (unsigned I = 0; I != Order.size(); ++I)
     if (Colors.count(I) == 0)
       Colors[I] = ColorKind::None;
@@ -267,7 +266,7 @@
   return true;
 }
 
-LLVM_DUMP_METHOD
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void Coloring::dump() const {
   dbgs() << "{ Order:   {";
   for (unsigned I = 0; I != Order.size(); ++I) {
@@ -309,6 +308,7 @@
     dbgs() << "    " << C.first << " -> " << ColorKindToName(C.second) << "\n";
   dbgs() << "  }\n}\n";
 }
+#endif
 
 namespace {
 // Base class of for reordering networks. They don't strictly need to be
@@ -651,6 +651,7 @@
     IndexBits = 28,
   };
 
+  LLVM_DUMP_METHOD
   void print(raw_ostream &OS, const SelectionDAG &G) const;
 
 private:
@@ -663,7 +664,7 @@
   MVT Ty = MVT::Other;
   std::vector<OpRef> Ops;
 
-  void print(raw_ostream &OS, const SelectionDAG &G) const;
+  LLVM_DUMP_METHOD void print(raw_ostream &OS, const SelectionDAG &G) const;
 };
 
 struct ResultStack {
@@ -699,10 +700,12 @@
 
   BaseType List;
 
+  LLVM_DUMP_METHOD
   void print(raw_ostream &OS, const SelectionDAG &G) const;
 };
 } // namespace
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void OpRef::print(raw_ostream &OS, const SelectionDAG &G) const {
   if (isValue()) {
     OpV.getNode()->print(OS, &G);
@@ -752,6 +755,7 @@
     OS << '\n';
   }
 }
+#endif
 
 namespace {
 struct ShuffleMask {
@@ -776,6 +780,13 @@
     size_t H = Mask.size()/2;
     return ShuffleMask(Mask.take_back(H));
   }
+
+  void print(raw_ostream &OS) const {
+    OS << "MinSrc:" << MinSrc << ", MaxSrc:" << MaxSrc << " {";
+    for (int M : Mask)
+      OS << ' ' << M;
+    OS << " }";
+  }
 };
 } // namespace
 
@@ -813,6 +824,7 @@
 
     void selectShuffle(SDNode *N);
     void selectRor(SDNode *N);
+    void selectVAlign(SDNode *N);
 
   private:
     void materialize(const ResultStack &Results);
@@ -911,36 +923,40 @@
 }
 
 bool HvxSelector::selectVectorConstants(SDNode *N) {
-  // Constant vectors are generated as loads from constant pools or
-  // as VSPLATs of a constant value.
-  // Since they are generated during the selection process, the main
-  // selection algorithm is not aware of them. Select them directly
-  // here.
+  // Constant vectors are generated as loads from constant pools or as
+  // splats of a constant value. Since they are generated during the
+  // selection process, the main selection algorithm is not aware of them.
+  // Select them directly here.
   SmallVector<SDNode*,4> Nodes;
   SetVector<SDNode*> WorkQ;
 
+  // The one-use test for VSPLATW's operand may fail due to dead nodes
+  // left over in the DAG.
+  DAG.RemoveDeadNodes();
+
   // The DAG can change (due to CSE) during selection, so cache all the
   // unselected nodes first to avoid traversing a mutating DAG.
 
   auto IsNodeToSelect = [] (SDNode *N) {
     if (N->isMachineOpcode())
       return false;
-    unsigned Opc = N->getOpcode();
-    if (Opc == HexagonISD::VSPLAT || Opc == HexagonISD::VZERO)
-      return true;
-    if (Opc == ISD::BITCAST) {
-      // Only select bitcasts of VSPLATs.
-      if (N->getOperand(0).getOpcode() == HexagonISD::VSPLAT)
+    switch (N->getOpcode()) {
+      case HexagonISD::VZERO:
+      case HexagonISD::VSPLATW:
         return true;
+      case ISD::LOAD: {
+        SDValue Addr = cast<LoadSDNode>(N)->getBasePtr();
+        unsigned AddrOpc = Addr.getOpcode();
+        if (AddrOpc == HexagonISD::AT_PCREL || AddrOpc == HexagonISD::CP)
+          if (Addr.getOperand(0).getOpcode() == ISD::TargetConstantPool)
+            return true;
+      }
+      break;
     }
-    if (Opc == ISD::LOAD) {
-      SDValue Addr = cast<LoadSDNode>(N)->getBasePtr();
-      unsigned AddrOpc = Addr.getOpcode();
-      if (AddrOpc == HexagonISD::AT_PCREL || AddrOpc == HexagonISD::CP)
-        if (Addr.getOperand(0).getOpcode() == ISD::TargetConstantPool)
-          return true;
-    }
-    return false;
+    // Make sure to select the operand of VSPLATW.
+    bool IsSplatOp = N->hasOneUse() &&
+                     N->use_begin()->getOpcode() == HexagonISD::VSPLATW;
+    return IsSplatOp;
   };
 
   WorkQ.insert(N);
@@ -1042,25 +1058,53 @@
   int VecLen = SM.Mask.size();
   MVT Ty = getSingleVT(MVT::i8);
 
-  if (SM.MaxSrc - SM.MinSrc < int(HwLen)) {
-    if (SM.MaxSrc < int(HwLen)) {
-      memcpy(NewMask.data(), SM.Mask.data(), sizeof(int)*VecLen);
-      return Va;
+  auto IsExtSubvector = [] (ShuffleMask M) {
+    assert(M.MinSrc >= 0 && M.MaxSrc >= 0);
+    for (int I = 0, E = M.Mask.size(); I != E; ++I) {
+      if (M.Mask[I] >= 0 && M.Mask[I]-I != M.MinSrc)
+        return false;
     }
-    if (SM.MinSrc >= int(HwLen)) {
-      for (int I = 0; I != VecLen; ++I) {
-        int M = SM.Mask[I];
-        if (M != -1)
-          M -= HwLen;
-        NewMask[I] = M;
+    return true;
+  };
+
+  if (SM.MaxSrc - SM.MinSrc < int(HwLen)) {
+    if (SM.MinSrc == 0 || SM.MinSrc == int(HwLen) || !IsExtSubvector(SM)) {
+      // If the mask picks elements from only one of the operands, return
+      // that operand, and update the mask to use index 0 to refer to the
+      // first element of that operand.
+      // If the mask extracts a subvector, it will be handled below, so
+      // skip it here.
+      if (SM.MaxSrc < int(HwLen)) {
+        memcpy(NewMask.data(), SM.Mask.data(), sizeof(int)*VecLen);
+        return Va;
       }
-      return Vb;
+      if (SM.MinSrc >= int(HwLen)) {
+        for (int I = 0; I != VecLen; ++I) {
+          int M = SM.Mask[I];
+          if (M != -1)
+            M -= HwLen;
+          NewMask[I] = M;
+        }
+        return Vb;
+      }
+    }
+    int MinSrc = SM.MinSrc;
+    if (SM.MaxSrc < int(HwLen)) {
+      Vb = Va;
+    } else if (SM.MinSrc > int(HwLen)) {
+      Va = Vb;
+      MinSrc = SM.MinSrc - HwLen;
     }
     const SDLoc &dl(Results.InpNode);
-    SDValue S = DAG.getTargetConstant(SM.MinSrc, dl, MVT::i32);
-    if (isUInt<3>(SM.MinSrc)) {
-      Results.push(Hexagon::V6_valignbi, Ty, {Vb, Va, S});
+    if (isUInt<3>(MinSrc) || isUInt<3>(HwLen-MinSrc)) {
+      bool IsRight = isUInt<3>(MinSrc); // Right align.
+      SDValue S = DAG.getTargetConstant(IsRight ? MinSrc : HwLen-MinSrc,
+                                        dl, MVT::i32);
+      unsigned Opc = IsRight ? Hexagon::V6_valignbi
+                             : Hexagon::V6_vlalignbi;
+      Results.push(Opc, Ty, {Vb, Va, S});
     } else {
+      SDValue S = DAG.getTargetConstant(MinSrc, dl, MVT::i32);
       Results.push(Hexagon::A2_tfrsi, MVT::i32, {S});
       unsigned Top = Results.top();
       Results.push(Hexagon::V6_valignb, Ty, {Vb, Va, OpRef::res(Top)});
@@ -1287,6 +1331,32 @@
   return vmuxp(Bytes, L, R, Results);
 }
 
+namespace {
+  struct Deleter : public SelectionDAG::DAGNodeDeletedListener {
+    template <typename T>
+    Deleter(SelectionDAG &D, T &C)
+      : SelectionDAG::DAGNodeDeletedListener(D, [&C] (SDNode *N, SDNode *E) {
+                                                  C.erase(N);
+                                                }) {}
+  };
+
+  template <typename T>
+  struct NullifyingVector : public T {
+    DenseMap<SDNode*, SDNode**> Refs;
+    NullifyingVector(T &&V) : T(V) {
+      for (unsigned i = 0, e = T::size(); i != e; ++i) {
+        SDNode *&N = T::operator[](i);
+        Refs[N] = &N;
+      }
+    }
+    void erase(SDNode *N) {
+      auto F = Refs.find(N);
+      if (F != Refs.end())
+        *F->second = nullptr;
+    }
+  };
+}
+
 bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
                                    MVT ResTy, SDValue Va, SDValue Vb,
                                    SDNode *N) {
@@ -1297,10 +1367,30 @@
   bool HavePairs = (2*HwLen == VecLen);
   MVT SingleTy = getSingleVT(MVT::i8);
 
+  // The prior attempts to handle this shuffle may have left a bunch of
+  // dead nodes in the DAG (such as constants). These nodes will be added
+  // at the end of DAG's node list, which at that point had already been
+  // sorted topologically. In the main selection loop, the node list is
+  // traversed backwards from the root node, which means that any new
+  // nodes (from the end of the list) will not be visited.
+  // Scalarization will replace the shuffle node with the scalarized
+  // expression, and if that expression reused any if the leftoever (dead)
+  // nodes, these nodes would not be selected (since the "local" selection
+  // only visits nodes that are not in AllNodes).
+  // To avoid this issue, remove all dead nodes from the DAG now.
+  DAG.RemoveDeadNodes();
+  DenseSet<SDNode*> AllNodes;
+  for (SDNode &S : DAG.allnodes())
+    AllNodes.insert(&S);
+
+  Deleter DUA(DAG, AllNodes);
+
   SmallVector<SDValue,128> Ops;
+  LLVMContext &Ctx = *DAG.getContext();
+  MVT LegalTy = Lower.getTypeToTransformTo(Ctx, ElemTy).getSimpleVT();
   for (int I : Mask) {
     if (I < 0) {
-      Ops.push_back(ISel.selectUndef(dl, ElemTy));
+      Ops.push_back(ISel.selectUndef(dl, LegalTy));
       continue;
     }
     SDValue Vec;
@@ -1320,7 +1410,7 @@
       }
     }
     SDValue Idx = DAG.getConstant(M, dl, MVT::i32);
-    SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemTy, {Vec, Idx});
+    SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LegalTy, {Vec, Idx});
     SDValue L = Lower.LowerOperation(Ex, DAG);
     assert(L.getNode());
     Ops.push_back(L);
@@ -1344,32 +1434,55 @@
 
   assert(!N->use_empty());
   ISel.ReplaceNode(N, LV.getNode());
-  DAG.RemoveDeadNodes();
 
-  std::deque<SDNode*> SubNodes;
-  SubNodes.push_back(LV.getNode());
-  for (unsigned I = 0; I != SubNodes.size(); ++I) {
-    for (SDValue Op : SubNodes[I]->ops())
-      SubNodes.push_back(Op.getNode());
+  if (AllNodes.count(LV.getNode())) {
+    DAG.RemoveDeadNodes();
+    return true;
   }
-  while (!SubNodes.empty()) {
-    SDNode *S = SubNodes.front();
-    SubNodes.pop_front();
-    if (S->use_empty())
-      continue;
-    // This isn't great, but users need to be selected before any nodes that
-    // they use. (The reason is to match larger patterns, and avoid nodes that
-    // cannot be matched on their own, e.g. ValueType, TokenFactor, etc.).
-    bool PendingUser = llvm::any_of(S->uses(), [&SubNodes](const SDNode *U) {
-                         return llvm::any_of(SubNodes, [U](const SDNode *T) {
-                           return T == U;
-                         });
-                       });
-    if (PendingUser)
-      SubNodes.push_back(S);
-    else
+
+  // The lowered build-vector node will now need to be selected. It needs
+  // to be done here because this node and its submodes are not included
+  // in the main selection loop.
+  // Implement essentially the same topological ordering algorithm as is
+  // used in SelectionDAGISel.
+
+  SetVector<SDNode*> SubNodes, TmpQ;
+  std::map<SDNode*,unsigned> NumOps;
+
+  SubNodes.insert(LV.getNode());
+  for (unsigned I = 0; I != SubNodes.size(); ++I) {
+    unsigned OpN = 0;
+    SDNode *S = SubNodes[I];
+    for (SDValue Op : S->ops()) {
+      if (AllNodes.count(Op.getNode()))
+        continue;
+      SubNodes.insert(Op.getNode());
+      ++OpN;
+    }
+    NumOps.insert({S, OpN});
+    if (OpN == 0)
+      TmpQ.insert(S);
+  }
+
+  for (unsigned I = 0; I != TmpQ.size(); ++I) {
+    SDNode *S = TmpQ[I];
+    for (SDNode *U : S->uses()) {
+      if (!SubNodes.count(U))
+        continue;
+      auto F = NumOps.find(U);
+      assert(F != NumOps.end());
+      assert(F->second > 0);
+      if (!--F->second)
+        TmpQ.insert(F->first);
+    }
+  }
+  assert(SubNodes.size() == TmpQ.size());
+  NullifyingVector<decltype(TmpQ)::vector_type> Queue(TmpQ.takeVector());
+
+  Deleter DUQ(DAG, Queue);
+  for (SDNode *S : reverse(Queue))
+    if (S != nullptr)
       ISel.Select(S);
-  }
 
   DAG.RemoveDeadNodes();
   return true;
@@ -1928,7 +2041,6 @@
   // If the mask is all -1's, generate "undef".
   if (!UseLeft && !UseRight) {
     ISel.ReplaceNode(N, ISel.selectUndef(SDLoc(SN), ResTy).getNode());
-    DAG.RemoveDeadNode(N);
     return;
   }
 
@@ -1984,6 +2096,15 @@
     NewN = DAG.getMachineNode(Hexagon::V6_vror, dl, Ty, {VecV, RotV});
 
   ISel.ReplaceNode(N, NewN);
+}
+
+void HvxSelector::selectVAlign(SDNode *N) {
+  SDValue Vv = N->getOperand(0);
+  SDValue Vu = N->getOperand(1);
+  SDValue Rt = N->getOperand(2);
+  SDNode *NewN = DAG.getMachineNode(Hexagon::V6_valignb, SDLoc(N),
+                                    N->getValueType(0), {Vv, Vu, Rt});
+  ISel.ReplaceNode(N, NewN);
   DAG.RemoveDeadNode(N);
 }
 
@@ -1995,6 +2116,10 @@
   HvxSelector(*this, *CurDAG).selectRor(N);
 }
 
+void HexagonDAGToDAGISel::SelectHvxVAlign(SDNode *N) {
+  HvxSelector(*this, *CurDAG).selectVAlign(N);
+}
+
 void HexagonDAGToDAGISel::SelectV65GatherPred(SDNode *N) {
   const SDLoc &dl(N);
   SDValue Chain = N->getOperand(0);
@@ -2027,12 +2152,10 @@
   SDValue Ops[] = { Address, Predicate, Base, Modifier, Offset, Chain };
   SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
 
-  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
-  cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
 
-  ReplaceUses(N, Result);
-  CurDAG->RemoveDeadNode(N);
+  ReplaceNode(N, Result);
 }
 
 void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) {
@@ -2066,12 +2189,10 @@
   SDValue Ops[] = { Address, Base, Modifier, Offset, Chain };
   SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
 
-  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
-  cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
 
-  ReplaceUses(N, Result);
-  CurDAG->RemoveDeadNode(N);
+  ReplaceNode(N, Result);
 }
 
 void HexagonDAGToDAGISel::SelectHVXDualOutput(SDNode *N) {
@@ -2114,5 +2235,3 @@
   ReplaceUses(SDValue(N, 1), SDValue(Result, 1));
   CurDAG->RemoveDeadNode(N);
 }
-
-