Mercurial > hg > CbC > CbC_llvm
diff lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @ 120:1172e4bd9c6f
update 4.0.0
author | mir3636 |
---|---|
date | Fri, 25 Nov 2016 19:14:25 +0900 |
parents | 7d135dc70f03 |
children | 803732b1fca8 |
line wrap: on
line diff
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp Tue Jan 26 22:56:36 2016 +0900 +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp Fri Nov 25 19:14:25 2016 +0900 @@ -106,7 +106,8 @@ SDValue ExpandStore(SDValue Op); SDValue ExpandFNEG(SDValue Op); SDValue ExpandBITREVERSE(SDValue Op); - SDValue ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op); + SDValue ExpandCTLZ(SDValue Op); + SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op); /// \brief Implements vector promotion. /// @@ -358,12 +359,11 @@ case TargetLowering::Legal: break; case TargetLowering::Custom: { - SDValue Tmp1 = TLI.LowerOperation(Op, DAG); - if (Tmp1.getNode()) { + if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) { Result = Tmp1; break; } - // FALL THROUGH + LLVM_FALLTHROUGH; } case TargetLowering::Expand: Result = Expand(Op); @@ -493,21 +493,26 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { - SDLoc dl(Op); LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); - SDValue Chain = LD->getChain(); - SDValue BasePTR = LD->getBasePtr(); + EVT SrcVT = LD->getMemoryVT(); - ISD::LoadExtType ExtType = LD->getExtensionType(); - - SmallVector<SDValue, 8> Vals; - SmallVector<SDValue, 8> LoadChains; + EVT SrcEltVT = SrcVT.getScalarType(); unsigned NumElem = SrcVT.getVectorNumElements(); - EVT SrcEltVT = SrcVT.getScalarType(); - EVT DstEltVT = Op.getNode()->getValueType(0).getScalarType(); + + SDValue NewChain; + SDValue Value; + if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) { + SDLoc dl(Op); - if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) { + SmallVector<SDValue, 8> Vals; + SmallVector<SDValue, 8> LoadChains; + + EVT DstEltVT = LD->getValueType(0).getScalarType(); + SDValue Chain = LD->getChain(); + SDValue BasePTR = LD->getBasePtr(); + ISD::LoadExtType ExtType = LD->getExtensionType(); + // When elements in a vector is not byte-addressable, we cannot directly // load each element by advancing pointer, which could only address bytes. // Instead, we load all significant words, mask bits off, and concatenate @@ -531,24 +536,22 @@ unsigned LoadBytes = WideBytes; if (RemainingBytes >= LoadBytes) { - ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR, - LD->getPointerInfo().getWithOffset(Offset), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), - MinAlign(LD->getAlignment(), Offset), - LD->getAAInfo()); + ScalarLoad = + DAG.getLoad(WideVT, dl, Chain, BasePTR, + LD->getPointerInfo().getWithOffset(Offset), + MinAlign(LD->getAlignment(), Offset), + LD->getMemOperand()->getFlags(), LD->getAAInfo()); } else { EVT LoadVT = WideVT; while (RemainingBytes < LoadBytes) { LoadBytes >>= 1; // Reduce the load size by half. LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3); } - ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, - LD->getPointerInfo().getWithOffset(Offset), - LoadVT, LD->isVolatile(), - LD->isNonTemporal(), LD->isInvariant(), - MinAlign(LD->getAlignment(), Offset), - LD->getAAInfo()); + ScalarLoad = + DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, + LD->getPointerInfo().getWithOffset(Offset), LoadVT, + MinAlign(LD->getAlignment(), Offset), + LD->getMemOperand()->getFlags(), LD->getAAInfo()); } RemainingBytes -= LoadBytes; @@ -614,29 +617,17 @@ } Vals.push_back(Lo); } - } else { - unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; - - for (unsigned Idx=0; Idx<NumElem; Idx++) { - SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl, - Op.getNode()->getValueType(0).getScalarType(), - Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), - SrcVT.getScalarType(), - LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), - MinAlign(LD->getAlignment(), Idx * Stride), LD->getAAInfo()); - BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getConstant(Stride, dl, BasePTR.getValueType())); + NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); + Value = DAG.getNode(ISD::BUILD_VECTOR, dl, + Op.getNode()->getValueType(0), Vals); + } else { + SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG); - Vals.push_back(ScalarLoad.getValue(0)); - LoadChains.push_back(ScalarLoad.getValue(1)); - } + NewChain = Scalarized.getValue(1); + Value = Scalarized.getValue(0); } - SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); - SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, - Op.getNode()->getValueType(0), Vals); - AddLegalizedOperand(Op.getValue(0), Value); AddLegalizedOperand(Op.getValue(1), NewChain); @@ -644,54 +635,37 @@ } SDValue VectorLegalizer::ExpandStore(SDValue Op) { - SDLoc dl(Op); StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); - SDValue Chain = ST->getChain(); - SDValue BasePTR = ST->getBasePtr(); - SDValue Value = ST->getValue(); - EVT StVT = ST->getMemoryVT(); - unsigned Alignment = ST->getAlignment(); - bool isVolatile = ST->isVolatile(); - bool isNonTemporal = ST->isNonTemporal(); - AAMDNodes AAInfo = ST->getAAInfo(); - - unsigned NumElem = StVT.getVectorNumElements(); - // The type of the data we want to save - EVT RegVT = Value.getValueType(); - EVT RegSclVT = RegVT.getScalarType(); - // The type of data as saved in memory. + EVT StVT = ST->getMemoryVT(); EVT MemSclVT = StVT.getScalarType(); - - // Cast floats into integers unsigned ScalarSize = MemSclVT.getSizeInBits(); // Round odd types to the next pow of two. - if (!isPowerOf2_32(ScalarSize)) - ScalarSize = NextPowerOf2(ScalarSize); + if (!isPowerOf2_32(ScalarSize)) { + // FIXME: This is completely broken and inconsistent with ExpandLoad + // handling. - // Store Stride in bytes - unsigned Stride = ScalarSize/8; - // Extract each of the elements from the original vector - // and save them into memory individually. - SmallVector<SDValue, 8> Stores; - for (unsigned Idx = 0; Idx < NumElem; Idx++) { - SDValue Ex = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, RegSclVT, Value, - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + // For sub-byte element sizes, this ends up with 0 stride between elements, + // so the same element just gets re-written to the same location. There seem + // to be tests explicitly testing for this broken behavior though. tests + // for this broken behavior. + + LLVMContext &Ctx = *DAG.getContext(); - // This scalar TruncStore may be illegal, but we legalize it later. - SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR, - ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT, - isVolatile, isNonTemporal, MinAlign(Alignment, Idx*Stride), - AAInfo); + EVT NewMemVT + = EVT::getVectorVT(Ctx, + MemSclVT.getIntegerVT(Ctx, NextPowerOf2(ScalarSize)), + StVT.getVectorNumElements()); - BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getConstant(Stride, dl, BasePTR.getValueType())); + SDValue NewVectorStore = DAG.getTruncStore( + ST->getChain(), SDLoc(Op), ST->getValue(), ST->getBasePtr(), + ST->getPointerInfo(), NewMemVT, ST->getAlignment(), + ST->getMemOperand()->getFlags(), ST->getAAInfo()); + ST = cast<StoreSDNode>(NewVectorStore.getNode()); + } - Stores.push_back(Store); - } - SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); + SDValue TF = TLI.scalarizeVectorStore(ST, DAG); AddLegalizedOperand(Op, TF); return TF; } @@ -720,9 +694,11 @@ return UnrollVSETCC(Op); case ISD::BITREVERSE: return ExpandBITREVERSE(Op); + case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: + return ExpandCTLZ(Op); case ISD::CTTZ_ZERO_UNDEF: - return ExpandCTLZ_CTTZ_ZERO_UNDEF(Op); + return ExpandCTTZ_ZERO_UNDEF(Op); default: return DAG.UnrollVectorOp(Op.getNode()); } @@ -797,8 +773,8 @@ SDLoc DL(Op); EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT(); - unsigned BW = VT.getScalarType().getSizeInBits(); - unsigned OrigBW = OrigTy.getScalarType().getSizeInBits(); + unsigned BW = VT.getScalarSizeInBits(); + unsigned OrigBW = OrigTy.getScalarSizeInBits(); SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT); Op = Op.getOperand(0); @@ -844,8 +820,8 @@ // Now we need sign extend. Do this by shifting the elements. Even if these // aren't legal operations, they have a better chance of being legalized // without full scalarization than the sign extension does. - unsigned EltWidth = VT.getVectorElementType().getSizeInBits(); - unsigned SrcEltWidth = SrcVT.getVectorElementType().getSizeInBits(); + unsigned EltWidth = VT.getScalarSizeInBits(); + unsigned SrcEltWidth = SrcVT.getScalarSizeInBits(); SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT); return DAG.getNode(ISD::SRA, DL, VT, DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount), @@ -864,10 +840,7 @@ int NumSrcElements = SrcVT.getVectorNumElements(); // Build up a zero vector to blend into this one. - EVT SrcScalarVT = SrcVT.getScalarType(); - SDValue ScalarZero = DAG.getTargetConstant(0, DL, SrcScalarVT); - SmallVector<SDValue, 4> BuildVectorOperands(NumSrcElements, ScalarZero); - SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, DL, SrcVT, BuildVectorOperands); + SDValue Zero = DAG.getConstant(0, DL, SrcVT); // Shuffle the incoming lanes into the correct position, and pull all other // lanes from the zero vector. @@ -885,16 +858,19 @@ DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask)); } +static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) { + int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8; + for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I) + for (int J = ScalarSizeInBytes - 1; J >= 0; --J) + ShuffleMask.push_back((I * ScalarSizeInBytes) + J); +} + SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) { EVT VT = Op.getValueType(); // Generate a byte wise shuffle mask for the BSWAP. SmallVector<int, 16> ShuffleMask; - int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8; - for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I) - for (int J = ScalarSizeInBytes - 1; J >= 0; --J) - ShuffleMask.push_back((I * ScalarSizeInBytes) + J); - + createBSWAPShuffleMask(VT, ShuffleMask); EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size()); // Only emit a shuffle if the mask is legal. @@ -903,8 +879,7 @@ SDLoc DL(Op); Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0)); - Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), - ShuffleMask.data()); + Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask); return DAG.getNode(ISD::BITCAST, DL, VT, Op); } @@ -915,12 +890,36 @@ if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) return DAG.UnrollVectorOp(Op.getNode()); + // If the vector element width is a whole number of bytes, test if its legal + // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte + // vector. This greatly reduces the number of bit shifts necessary. + unsigned ScalarSizeInBits = VT.getScalarSizeInBits(); + if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) { + SmallVector<int, 16> BSWAPMask; + createBSWAPShuffleMask(VT, BSWAPMask); + + EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size()); + if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) && + (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) || + (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) && + TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) && + TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) && + TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) { + SDLoc DL(Op); + Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0)); + Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), + BSWAPMask); + Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op); + return DAG.getNode(ISD::BITCAST, DL, VT, Op); + } + } + // If we have the appropriate vector bit operations, it is better to use them // than unrolling and expanding each component. if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) || !TLI.isOperationLegalOrCustom(ISD::SRL, VT) || - !TLI.isOperationLegalOrCustom(ISD::AND, VT) || - !TLI.isOperationLegalOrCustom(ISD::OR, VT)) + !TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) || + !TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) return DAG.UnrollVectorOp(Op.getNode()); // Let LegalizeDAG handle this later. @@ -955,7 +954,7 @@ // If the mask and the type are different sizes, unroll the vector op. This // can occur when getSetCCResultType returns something that is different in // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8. - if (VT.getSizeInBits() != Op1.getValueType().getSizeInBits()) + if (VT.getSizeInBits() != Op1.getValueSizeInBits()) return DAG.UnrollVectorOp(Op.getNode()); // Bitcast the operands to be the same type as the mask. @@ -965,7 +964,7 @@ Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2); SDValue AllOnes = DAG.getConstant( - APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()), DL, VT); + APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, VT); SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes); Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask); @@ -983,21 +982,20 @@ TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) return DAG.UnrollVectorOp(Op.getNode()); - EVT SVT = VT.getScalarType(); - assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) && - "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide"); + unsigned BW = VT.getScalarSizeInBits(); + assert((BW == 64 || BW == 32) && + "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide"); - unsigned BW = SVT.getSizeInBits(); - SDValue HalfWord = DAG.getConstant(BW/2, DL, VT); + SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT); // Constants to clear the upper part of the word. // Notice that we can also use SHL+SHR, but using a constant is slightly // faster on x86. - uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF; + uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF; SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT); // Two to the power of half-word-size. - SDValue TWOHW = DAG.getConstantFP(1 << (BW/2), DL, Op.getValueType()); + SDValue TWOHW = DAG.getConstantFP(1 << (BW / 2), DL, Op.getValueType()); // Clear upper part of LO, lower HI SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord); @@ -1014,7 +1012,6 @@ return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO); } - SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { SDLoc DL(Op); @@ -1026,11 +1023,54 @@ return DAG.UnrollVectorOp(Op.getNode()); } -SDValue VectorLegalizer::ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op) { - // If the non-ZERO_UNDEF version is supported we can let LegalizeDAG handle. - unsigned Opc = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ? ISD::CTLZ : ISD::CTTZ; - if (TLI.isOperationLegalOrCustom(Opc, Op.getValueType())) - return Op; +SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) { + EVT VT = Op.getValueType(); + unsigned NumBitsPerElt = VT.getScalarSizeInBits(); + + // If the non-ZERO_UNDEF version is supported we can use that instead. + if (Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF && + TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) { + SDLoc DL(Op); + return DAG.getNode(ISD::CTLZ, DL, Op.getValueType(), Op.getOperand(0)); + } + + // If CTPOP is available we can lower with a CTPOP based method: + // u16 ctlz(u16 x) { + // x |= (x >> 1); + // x |= (x >> 2); + // x |= (x >> 4); + // x |= (x >> 8); + // return ctpop(~x); + // } + // Ref: "Hacker's Delight" by Henry Warren + if (isPowerOf2_32(NumBitsPerElt) && + TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) && + TLI.isOperationLegalOrCustom(ISD::SRL, VT) && + TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT) && + TLI.isOperationLegalOrCustomOrPromote(ISD::XOR, VT)) { + SDLoc DL(Op); + SDValue Res = Op.getOperand(0); + EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + + for (unsigned i = 1; i != NumBitsPerElt; i *= 2) + Res = DAG.getNode( + ISD::OR, DL, VT, Res, + DAG.getNode(ISD::SRL, DL, VT, Res, DAG.getConstant(i, DL, ShiftTy))); + + Res = DAG.getNOT(DL, Res, VT); + return DAG.getNode(ISD::CTPOP, DL, VT, Res); + } + + // Otherwise go ahead and unroll. + return DAG.UnrollVectorOp(Op.getNode()); +} + +SDValue VectorLegalizer::ExpandCTTZ_ZERO_UNDEF(SDValue Op) { + // If the non-ZERO_UNDEF version is supported we can use that instead. + if (TLI.isOperationLegalOrCustom(ISD::CTTZ, Op.getValueType())) { + SDLoc DL(Op); + return DAG.getNode(ISD::CTTZ, DL, Op.getValueType(), Op.getOperand(0)); + } // Otherwise go ahead and unroll. return DAG.UnrollVectorOp(Op.getNode());