Mercurial > hg > CbC > CbC_llvm
comparison lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @ 120:1172e4bd9c6f
update 4.0.0
author | mir3636 |
---|---|
date | Fri, 25 Nov 2016 19:14:25 +0900 |
parents | 7d135dc70f03 |
children | 803732b1fca8 |
comparison
equal
deleted
inserted
replaced
101:34baf5011add | 120:1172e4bd9c6f |
---|---|
104 SDValue ExpandSELECT(SDValue Op); | 104 SDValue ExpandSELECT(SDValue Op); |
105 SDValue ExpandLoad(SDValue Op); | 105 SDValue ExpandLoad(SDValue Op); |
106 SDValue ExpandStore(SDValue Op); | 106 SDValue ExpandStore(SDValue Op); |
107 SDValue ExpandFNEG(SDValue Op); | 107 SDValue ExpandFNEG(SDValue Op); |
108 SDValue ExpandBITREVERSE(SDValue Op); | 108 SDValue ExpandBITREVERSE(SDValue Op); |
109 SDValue ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op); | 109 SDValue ExpandCTLZ(SDValue Op); |
110 SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op); | |
110 | 111 |
111 /// \brief Implements vector promotion. | 112 /// \brief Implements vector promotion. |
112 /// | 113 /// |
113 /// This is essentially just bitcasting the operands to a different type and | 114 /// This is essentially just bitcasting the operands to a different type and |
114 /// bitcasting the result back to the original type. | 115 /// bitcasting the result back to the original type. |
356 Changed = true; | 357 Changed = true; |
357 break; | 358 break; |
358 case TargetLowering::Legal: | 359 case TargetLowering::Legal: |
359 break; | 360 break; |
360 case TargetLowering::Custom: { | 361 case TargetLowering::Custom: { |
361 SDValue Tmp1 = TLI.LowerOperation(Op, DAG); | 362 if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) { |
362 if (Tmp1.getNode()) { | |
363 Result = Tmp1; | 363 Result = Tmp1; |
364 break; | 364 break; |
365 } | 365 } |
366 // FALL THROUGH | 366 LLVM_FALLTHROUGH; |
367 } | 367 } |
368 case TargetLowering::Expand: | 368 case TargetLowering::Expand: |
369 Result = Expand(Op); | 369 Result = Expand(Op); |
370 } | 370 } |
371 | 371 |
491 return DAG.getNode(ISD::TRUNCATE, SDLoc(Op), VT, promoted); | 491 return DAG.getNode(ISD::TRUNCATE, SDLoc(Op), VT, promoted); |
492 } | 492 } |
493 | 493 |
494 | 494 |
495 SDValue VectorLegalizer::ExpandLoad(SDValue Op) { | 495 SDValue VectorLegalizer::ExpandLoad(SDValue Op) { |
496 SDLoc dl(Op); | |
497 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); | 496 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); |
498 SDValue Chain = LD->getChain(); | 497 |
499 SDValue BasePTR = LD->getBasePtr(); | |
500 EVT SrcVT = LD->getMemoryVT(); | 498 EVT SrcVT = LD->getMemoryVT(); |
501 ISD::LoadExtType ExtType = LD->getExtensionType(); | 499 EVT SrcEltVT = SrcVT.getScalarType(); |
502 | |
503 SmallVector<SDValue, 8> Vals; | |
504 SmallVector<SDValue, 8> LoadChains; | |
505 unsigned NumElem = SrcVT.getVectorNumElements(); | 500 unsigned NumElem = SrcVT.getVectorNumElements(); |
506 | 501 |
507 EVT SrcEltVT = SrcVT.getScalarType(); | 502 |
508 EVT DstEltVT = Op.getNode()->getValueType(0).getScalarType(); | 503 SDValue NewChain; |
509 | 504 SDValue Value; |
510 if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) { | 505 if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) { |
506 SDLoc dl(Op); | |
507 | |
508 SmallVector<SDValue, 8> Vals; | |
509 SmallVector<SDValue, 8> LoadChains; | |
510 | |
511 EVT DstEltVT = LD->getValueType(0).getScalarType(); | |
512 SDValue Chain = LD->getChain(); | |
513 SDValue BasePTR = LD->getBasePtr(); | |
514 ISD::LoadExtType ExtType = LD->getExtensionType(); | |
515 | |
511 // When elements in a vector is not byte-addressable, we cannot directly | 516 // When elements in a vector is not byte-addressable, we cannot directly |
512 // load each element by advancing pointer, which could only address bytes. | 517 // load each element by advancing pointer, which could only address bytes. |
513 // Instead, we load all significant words, mask bits off, and concatenate | 518 // Instead, we load all significant words, mask bits off, and concatenate |
514 // them to form each element. Finally, they are extended to destination | 519 // them to form each element. Finally, they are extended to destination |
515 // scalar type to build the destination vector. | 520 // scalar type to build the destination vector. |
529 while (RemainingBytes > 0) { | 534 while (RemainingBytes > 0) { |
530 SDValue ScalarLoad; | 535 SDValue ScalarLoad; |
531 unsigned LoadBytes = WideBytes; | 536 unsigned LoadBytes = WideBytes; |
532 | 537 |
533 if (RemainingBytes >= LoadBytes) { | 538 if (RemainingBytes >= LoadBytes) { |
534 ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR, | 539 ScalarLoad = |
535 LD->getPointerInfo().getWithOffset(Offset), | 540 DAG.getLoad(WideVT, dl, Chain, BasePTR, |
536 LD->isVolatile(), LD->isNonTemporal(), | 541 LD->getPointerInfo().getWithOffset(Offset), |
537 LD->isInvariant(), | 542 MinAlign(LD->getAlignment(), Offset), |
538 MinAlign(LD->getAlignment(), Offset), | 543 LD->getMemOperand()->getFlags(), LD->getAAInfo()); |
539 LD->getAAInfo()); | |
540 } else { | 544 } else { |
541 EVT LoadVT = WideVT; | 545 EVT LoadVT = WideVT; |
542 while (RemainingBytes < LoadBytes) { | 546 while (RemainingBytes < LoadBytes) { |
543 LoadBytes >>= 1; // Reduce the load size by half. | 547 LoadBytes >>= 1; // Reduce the load size by half. |
544 LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3); | 548 LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3); |
545 } | 549 } |
546 ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, | 550 ScalarLoad = |
547 LD->getPointerInfo().getWithOffset(Offset), | 551 DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, |
548 LoadVT, LD->isVolatile(), | 552 LD->getPointerInfo().getWithOffset(Offset), LoadVT, |
549 LD->isNonTemporal(), LD->isInvariant(), | 553 MinAlign(LD->getAlignment(), Offset), |
550 MinAlign(LD->getAlignment(), Offset), | 554 LD->getMemOperand()->getFlags(), LD->getAAInfo()); |
551 LD->getAAInfo()); | |
552 } | 555 } |
553 | 556 |
554 RemainingBytes -= LoadBytes; | 557 RemainingBytes -= LoadBytes; |
555 Offset += LoadBytes; | 558 Offset += LoadBytes; |
556 BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, | 559 BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, |
612 Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT); | 615 Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT); |
613 break; | 616 break; |
614 } | 617 } |
615 Vals.push_back(Lo); | 618 Vals.push_back(Lo); |
616 } | 619 } |
620 | |
621 NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); | |
622 Value = DAG.getNode(ISD::BUILD_VECTOR, dl, | |
623 Op.getNode()->getValueType(0), Vals); | |
617 } else { | 624 } else { |
618 unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; | 625 SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG); |
619 | 626 |
620 for (unsigned Idx=0; Idx<NumElem; Idx++) { | 627 NewChain = Scalarized.getValue(1); |
621 SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl, | 628 Value = Scalarized.getValue(0); |
622 Op.getNode()->getValueType(0).getScalarType(), | 629 } |
623 Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), | |
624 SrcVT.getScalarType(), | |
625 LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), | |
626 MinAlign(LD->getAlignment(), Idx * Stride), LD->getAAInfo()); | |
627 | |
628 BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, | |
629 DAG.getConstant(Stride, dl, BasePTR.getValueType())); | |
630 | |
631 Vals.push_back(ScalarLoad.getValue(0)); | |
632 LoadChains.push_back(ScalarLoad.getValue(1)); | |
633 } | |
634 } | |
635 | |
636 SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); | |
637 SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, | |
638 Op.getNode()->getValueType(0), Vals); | |
639 | 630 |
640 AddLegalizedOperand(Op.getValue(0), Value); | 631 AddLegalizedOperand(Op.getValue(0), Value); |
641 AddLegalizedOperand(Op.getValue(1), NewChain); | 632 AddLegalizedOperand(Op.getValue(1), NewChain); |
642 | 633 |
643 return (Op.getResNo() ? NewChain : Value); | 634 return (Op.getResNo() ? NewChain : Value); |
644 } | 635 } |
645 | 636 |
646 SDValue VectorLegalizer::ExpandStore(SDValue Op) { | 637 SDValue VectorLegalizer::ExpandStore(SDValue Op) { |
647 SDLoc dl(Op); | |
648 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); | 638 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); |
649 SDValue Chain = ST->getChain(); | 639 |
650 SDValue BasePTR = ST->getBasePtr(); | |
651 SDValue Value = ST->getValue(); | |
652 EVT StVT = ST->getMemoryVT(); | 640 EVT StVT = ST->getMemoryVT(); |
653 | |
654 unsigned Alignment = ST->getAlignment(); | |
655 bool isVolatile = ST->isVolatile(); | |
656 bool isNonTemporal = ST->isNonTemporal(); | |
657 AAMDNodes AAInfo = ST->getAAInfo(); | |
658 | |
659 unsigned NumElem = StVT.getVectorNumElements(); | |
660 // The type of the data we want to save | |
661 EVT RegVT = Value.getValueType(); | |
662 EVT RegSclVT = RegVT.getScalarType(); | |
663 // The type of data as saved in memory. | |
664 EVT MemSclVT = StVT.getScalarType(); | 641 EVT MemSclVT = StVT.getScalarType(); |
665 | |
666 // Cast floats into integers | |
667 unsigned ScalarSize = MemSclVT.getSizeInBits(); | 642 unsigned ScalarSize = MemSclVT.getSizeInBits(); |
668 | 643 |
669 // Round odd types to the next pow of two. | 644 // Round odd types to the next pow of two. |
670 if (!isPowerOf2_32(ScalarSize)) | 645 if (!isPowerOf2_32(ScalarSize)) { |
671 ScalarSize = NextPowerOf2(ScalarSize); | 646 // FIXME: This is completely broken and inconsistent with ExpandLoad |
672 | 647 // handling. |
673 // Store Stride in bytes | 648 |
674 unsigned Stride = ScalarSize/8; | 649 // For sub-byte element sizes, this ends up with 0 stride between elements, |
675 // Extract each of the elements from the original vector | 650 // so the same element just gets re-written to the same location. There seem |
676 // and save them into memory individually. | 651 // to be tests explicitly testing for this broken behavior though. tests |
677 SmallVector<SDValue, 8> Stores; | 652 // for this broken behavior. |
678 for (unsigned Idx = 0; Idx < NumElem; Idx++) { | 653 |
679 SDValue Ex = DAG.getNode( | 654 LLVMContext &Ctx = *DAG.getContext(); |
680 ISD::EXTRACT_VECTOR_ELT, dl, RegSclVT, Value, | 655 |
681 DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); | 656 EVT NewMemVT |
682 | 657 = EVT::getVectorVT(Ctx, |
683 // This scalar TruncStore may be illegal, but we legalize it later. | 658 MemSclVT.getIntegerVT(Ctx, NextPowerOf2(ScalarSize)), |
684 SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR, | 659 StVT.getVectorNumElements()); |
685 ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT, | 660 |
686 isVolatile, isNonTemporal, MinAlign(Alignment, Idx*Stride), | 661 SDValue NewVectorStore = DAG.getTruncStore( |
687 AAInfo); | 662 ST->getChain(), SDLoc(Op), ST->getValue(), ST->getBasePtr(), |
688 | 663 ST->getPointerInfo(), NewMemVT, ST->getAlignment(), |
689 BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, | 664 ST->getMemOperand()->getFlags(), ST->getAAInfo()); |
690 DAG.getConstant(Stride, dl, BasePTR.getValueType())); | 665 ST = cast<StoreSDNode>(NewVectorStore.getNode()); |
691 | 666 } |
692 Stores.push_back(Store); | 667 |
693 } | 668 SDValue TF = TLI.scalarizeVectorStore(ST, DAG); |
694 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); | |
695 AddLegalizedOperand(Op, TF); | 669 AddLegalizedOperand(Op, TF); |
696 return TF; | 670 return TF; |
697 } | 671 } |
698 | 672 |
699 SDValue VectorLegalizer::Expand(SDValue Op) { | 673 SDValue VectorLegalizer::Expand(SDValue Op) { |
718 return ExpandFNEG(Op); | 692 return ExpandFNEG(Op); |
719 case ISD::SETCC: | 693 case ISD::SETCC: |
720 return UnrollVSETCC(Op); | 694 return UnrollVSETCC(Op); |
721 case ISD::BITREVERSE: | 695 case ISD::BITREVERSE: |
722 return ExpandBITREVERSE(Op); | 696 return ExpandBITREVERSE(Op); |
697 case ISD::CTLZ: | |
723 case ISD::CTLZ_ZERO_UNDEF: | 698 case ISD::CTLZ_ZERO_UNDEF: |
699 return ExpandCTLZ(Op); | |
724 case ISD::CTTZ_ZERO_UNDEF: | 700 case ISD::CTTZ_ZERO_UNDEF: |
725 return ExpandCTLZ_CTTZ_ZERO_UNDEF(Op); | 701 return ExpandCTTZ_ZERO_UNDEF(Op); |
726 default: | 702 default: |
727 return DAG.UnrollVectorOp(Op.getNode()); | 703 return DAG.UnrollVectorOp(Op.getNode()); |
728 } | 704 } |
729 } | 705 } |
730 | 706 |
795 return DAG.UnrollVectorOp(Op.getNode()); | 771 return DAG.UnrollVectorOp(Op.getNode()); |
796 | 772 |
797 SDLoc DL(Op); | 773 SDLoc DL(Op); |
798 EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT(); | 774 EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT(); |
799 | 775 |
800 unsigned BW = VT.getScalarType().getSizeInBits(); | 776 unsigned BW = VT.getScalarSizeInBits(); |
801 unsigned OrigBW = OrigTy.getScalarType().getSizeInBits(); | 777 unsigned OrigBW = OrigTy.getScalarSizeInBits(); |
802 SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT); | 778 SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT); |
803 | 779 |
804 Op = Op.getOperand(0); | 780 Op = Op.getOperand(0); |
805 Op = DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz); | 781 Op = DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz); |
806 return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz); | 782 return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz); |
842 Op = DAG.getAnyExtendVectorInReg(Src, DL, VT); | 818 Op = DAG.getAnyExtendVectorInReg(Src, DL, VT); |
843 | 819 |
844 // Now we need sign extend. Do this by shifting the elements. Even if these | 820 // Now we need sign extend. Do this by shifting the elements. Even if these |
845 // aren't legal operations, they have a better chance of being legalized | 821 // aren't legal operations, they have a better chance of being legalized |
846 // without full scalarization than the sign extension does. | 822 // without full scalarization than the sign extension does. |
847 unsigned EltWidth = VT.getVectorElementType().getSizeInBits(); | 823 unsigned EltWidth = VT.getScalarSizeInBits(); |
848 unsigned SrcEltWidth = SrcVT.getVectorElementType().getSizeInBits(); | 824 unsigned SrcEltWidth = SrcVT.getScalarSizeInBits(); |
849 SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT); | 825 SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT); |
850 return DAG.getNode(ISD::SRA, DL, VT, | 826 return DAG.getNode(ISD::SRA, DL, VT, |
851 DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount), | 827 DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount), |
852 ShiftAmount); | 828 ShiftAmount); |
853 } | 829 } |
862 SDValue Src = Op.getOperand(0); | 838 SDValue Src = Op.getOperand(0); |
863 EVT SrcVT = Src.getValueType(); | 839 EVT SrcVT = Src.getValueType(); |
864 int NumSrcElements = SrcVT.getVectorNumElements(); | 840 int NumSrcElements = SrcVT.getVectorNumElements(); |
865 | 841 |
866 // Build up a zero vector to blend into this one. | 842 // Build up a zero vector to blend into this one. |
867 EVT SrcScalarVT = SrcVT.getScalarType(); | 843 SDValue Zero = DAG.getConstant(0, DL, SrcVT); |
868 SDValue ScalarZero = DAG.getTargetConstant(0, DL, SrcScalarVT); | |
869 SmallVector<SDValue, 4> BuildVectorOperands(NumSrcElements, ScalarZero); | |
870 SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, DL, SrcVT, BuildVectorOperands); | |
871 | 844 |
872 // Shuffle the incoming lanes into the correct position, and pull all other | 845 // Shuffle the incoming lanes into the correct position, and pull all other |
873 // lanes from the zero vector. | 846 // lanes from the zero vector. |
874 SmallVector<int, 16> ShuffleMask; | 847 SmallVector<int, 16> ShuffleMask; |
875 ShuffleMask.reserve(NumSrcElements); | 848 ShuffleMask.reserve(NumSrcElements); |
883 | 856 |
884 return DAG.getNode(ISD::BITCAST, DL, VT, | 857 return DAG.getNode(ISD::BITCAST, DL, VT, |
885 DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask)); | 858 DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask)); |
886 } | 859 } |
887 | 860 |
888 SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) { | 861 static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) { |
889 EVT VT = Op.getValueType(); | |
890 | |
891 // Generate a byte wise shuffle mask for the BSWAP. | |
892 SmallVector<int, 16> ShuffleMask; | |
893 int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8; | 862 int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8; |
894 for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I) | 863 for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I) |
895 for (int J = ScalarSizeInBytes - 1; J >= 0; --J) | 864 for (int J = ScalarSizeInBytes - 1; J >= 0; --J) |
896 ShuffleMask.push_back((I * ScalarSizeInBytes) + J); | 865 ShuffleMask.push_back((I * ScalarSizeInBytes) + J); |
897 | 866 } |
867 | |
868 SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) { | |
869 EVT VT = Op.getValueType(); | |
870 | |
871 // Generate a byte wise shuffle mask for the BSWAP. | |
872 SmallVector<int, 16> ShuffleMask; | |
873 createBSWAPShuffleMask(VT, ShuffleMask); | |
898 EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size()); | 874 EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size()); |
899 | 875 |
900 // Only emit a shuffle if the mask is legal. | 876 // Only emit a shuffle if the mask is legal. |
901 if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) | 877 if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) |
902 return DAG.UnrollVectorOp(Op.getNode()); | 878 return DAG.UnrollVectorOp(Op.getNode()); |
903 | 879 |
904 SDLoc DL(Op); | 880 SDLoc DL(Op); |
905 Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0)); | 881 Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0)); |
906 Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), | 882 Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask); |
907 ShuffleMask.data()); | |
908 return DAG.getNode(ISD::BITCAST, DL, VT, Op); | 883 return DAG.getNode(ISD::BITCAST, DL, VT, Op); |
909 } | 884 } |
910 | 885 |
911 SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) { | 886 SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) { |
912 EVT VT = Op.getValueType(); | 887 EVT VT = Op.getValueType(); |
913 | 888 |
914 // If we have the scalar operation, it's probably cheaper to unroll it. | 889 // If we have the scalar operation, it's probably cheaper to unroll it. |
915 if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) | 890 if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) |
916 return DAG.UnrollVectorOp(Op.getNode()); | 891 return DAG.UnrollVectorOp(Op.getNode()); |
892 | |
893 // If the vector element width is a whole number of bytes, test if its legal | |
894 // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte | |
895 // vector. This greatly reduces the number of bit shifts necessary. | |
896 unsigned ScalarSizeInBits = VT.getScalarSizeInBits(); | |
897 if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) { | |
898 SmallVector<int, 16> BSWAPMask; | |
899 createBSWAPShuffleMask(VT, BSWAPMask); | |
900 | |
901 EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size()); | |
902 if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) && | |
903 (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) || | |
904 (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) && | |
905 TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) && | |
906 TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) && | |
907 TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) { | |
908 SDLoc DL(Op); | |
909 Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0)); | |
910 Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), | |
911 BSWAPMask); | |
912 Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op); | |
913 return DAG.getNode(ISD::BITCAST, DL, VT, Op); | |
914 } | |
915 } | |
917 | 916 |
918 // If we have the appropriate vector bit operations, it is better to use them | 917 // If we have the appropriate vector bit operations, it is better to use them |
919 // than unrolling and expanding each component. | 918 // than unrolling and expanding each component. |
920 if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) || | 919 if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) || |
921 !TLI.isOperationLegalOrCustom(ISD::SRL, VT) || | 920 !TLI.isOperationLegalOrCustom(ISD::SRL, VT) || |
922 !TLI.isOperationLegalOrCustom(ISD::AND, VT) || | 921 !TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) || |
923 !TLI.isOperationLegalOrCustom(ISD::OR, VT)) | 922 !TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) |
924 return DAG.UnrollVectorOp(Op.getNode()); | 923 return DAG.UnrollVectorOp(Op.getNode()); |
925 | 924 |
926 // Let LegalizeDAG handle this later. | 925 // Let LegalizeDAG handle this later. |
927 return Op; | 926 return Op; |
928 } | 927 } |
953 return DAG.UnrollVectorOp(Op.getNode()); | 952 return DAG.UnrollVectorOp(Op.getNode()); |
954 | 953 |
955 // If the mask and the type are different sizes, unroll the vector op. This | 954 // If the mask and the type are different sizes, unroll the vector op. This |
956 // can occur when getSetCCResultType returns something that is different in | 955 // can occur when getSetCCResultType returns something that is different in |
957 // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8. | 956 // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8. |
958 if (VT.getSizeInBits() != Op1.getValueType().getSizeInBits()) | 957 if (VT.getSizeInBits() != Op1.getValueSizeInBits()) |
959 return DAG.UnrollVectorOp(Op.getNode()); | 958 return DAG.UnrollVectorOp(Op.getNode()); |
960 | 959 |
961 // Bitcast the operands to be the same type as the mask. | 960 // Bitcast the operands to be the same type as the mask. |
962 // This is needed when we select between FP types because | 961 // This is needed when we select between FP types because |
963 // the mask is a vector of integers. | 962 // the mask is a vector of integers. |
964 Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1); | 963 Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1); |
965 Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2); | 964 Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2); |
966 | 965 |
967 SDValue AllOnes = DAG.getConstant( | 966 SDValue AllOnes = DAG.getConstant( |
968 APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()), DL, VT); | 967 APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, VT); |
969 SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes); | 968 SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes); |
970 | 969 |
971 Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask); | 970 Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask); |
972 Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask); | 971 Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask); |
973 SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2); | 972 SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2); |
981 // Make sure that the SINT_TO_FP and SRL instructions are available. | 980 // Make sure that the SINT_TO_FP and SRL instructions are available. |
982 if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand || | 981 if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand || |
983 TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) | 982 TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) |
984 return DAG.UnrollVectorOp(Op.getNode()); | 983 return DAG.UnrollVectorOp(Op.getNode()); |
985 | 984 |
986 EVT SVT = VT.getScalarType(); | 985 unsigned BW = VT.getScalarSizeInBits(); |
987 assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) && | 986 assert((BW == 64 || BW == 32) && |
988 "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide"); | 987 "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide"); |
989 | 988 |
990 unsigned BW = SVT.getSizeInBits(); | 989 SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT); |
991 SDValue HalfWord = DAG.getConstant(BW/2, DL, VT); | |
992 | 990 |
993 // Constants to clear the upper part of the word. | 991 // Constants to clear the upper part of the word. |
994 // Notice that we can also use SHL+SHR, but using a constant is slightly | 992 // Notice that we can also use SHL+SHR, but using a constant is slightly |
995 // faster on x86. | 993 // faster on x86. |
996 uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF; | 994 uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF; |
997 SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT); | 995 SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT); |
998 | 996 |
999 // Two to the power of half-word-size. | 997 // Two to the power of half-word-size. |
1000 SDValue TWOHW = DAG.getConstantFP(1 << (BW/2), DL, Op.getValueType()); | 998 SDValue TWOHW = DAG.getConstantFP(1 << (BW / 2), DL, Op.getValueType()); |
1001 | 999 |
1002 // Clear upper part of LO, lower HI | 1000 // Clear upper part of LO, lower HI |
1003 SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord); | 1001 SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord); |
1004 SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask); | 1002 SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask); |
1005 | 1003 |
1011 SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO); | 1009 SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO); |
1012 | 1010 |
1013 // Add the two halves | 1011 // Add the two halves |
1014 return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO); | 1012 return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO); |
1015 } | 1013 } |
1016 | |
1017 | 1014 |
1018 SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { | 1015 SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { |
1019 if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { | 1016 if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { |
1020 SDLoc DL(Op); | 1017 SDLoc DL(Op); |
1021 SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType()); | 1018 SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType()); |
1024 Zero, Op.getOperand(0)); | 1021 Zero, Op.getOperand(0)); |
1025 } | 1022 } |
1026 return DAG.UnrollVectorOp(Op.getNode()); | 1023 return DAG.UnrollVectorOp(Op.getNode()); |
1027 } | 1024 } |
1028 | 1025 |
1029 SDValue VectorLegalizer::ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op) { | 1026 SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) { |
1030 // If the non-ZERO_UNDEF version is supported we can let LegalizeDAG handle. | 1027 EVT VT = Op.getValueType(); |
1031 unsigned Opc = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ? ISD::CTLZ : ISD::CTTZ; | 1028 unsigned NumBitsPerElt = VT.getScalarSizeInBits(); |
1032 if (TLI.isOperationLegalOrCustom(Opc, Op.getValueType())) | 1029 |
1033 return Op; | 1030 // If the non-ZERO_UNDEF version is supported we can use that instead. |
1031 if (Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF && | |
1032 TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) { | |
1033 SDLoc DL(Op); | |
1034 return DAG.getNode(ISD::CTLZ, DL, Op.getValueType(), Op.getOperand(0)); | |
1035 } | |
1036 | |
1037 // If CTPOP is available we can lower with a CTPOP based method: | |
1038 // u16 ctlz(u16 x) { | |
1039 // x |= (x >> 1); | |
1040 // x |= (x >> 2); | |
1041 // x |= (x >> 4); | |
1042 // x |= (x >> 8); | |
1043 // return ctpop(~x); | |
1044 // } | |
1045 // Ref: "Hacker's Delight" by Henry Warren | |
1046 if (isPowerOf2_32(NumBitsPerElt) && | |
1047 TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) && | |
1048 TLI.isOperationLegalOrCustom(ISD::SRL, VT) && | |
1049 TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT) && | |
1050 TLI.isOperationLegalOrCustomOrPromote(ISD::XOR, VT)) { | |
1051 SDLoc DL(Op); | |
1052 SDValue Res = Op.getOperand(0); | |
1053 EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); | |
1054 | |
1055 for (unsigned i = 1; i != NumBitsPerElt; i *= 2) | |
1056 Res = DAG.getNode( | |
1057 ISD::OR, DL, VT, Res, | |
1058 DAG.getNode(ISD::SRL, DL, VT, Res, DAG.getConstant(i, DL, ShiftTy))); | |
1059 | |
1060 Res = DAG.getNOT(DL, Res, VT); | |
1061 return DAG.getNode(ISD::CTPOP, DL, VT, Res); | |
1062 } | |
1063 | |
1064 // Otherwise go ahead and unroll. | |
1065 return DAG.UnrollVectorOp(Op.getNode()); | |
1066 } | |
1067 | |
1068 SDValue VectorLegalizer::ExpandCTTZ_ZERO_UNDEF(SDValue Op) { | |
1069 // If the non-ZERO_UNDEF version is supported we can use that instead. | |
1070 if (TLI.isOperationLegalOrCustom(ISD::CTTZ, Op.getValueType())) { | |
1071 SDLoc DL(Op); | |
1072 return DAG.getNode(ISD::CTTZ, DL, Op.getValueType(), Op.getOperand(0)); | |
1073 } | |
1034 | 1074 |
1035 // Otherwise go ahead and unroll. | 1075 // Otherwise go ahead and unroll. |
1036 return DAG.UnrollVectorOp(Op.getNode()); | 1076 return DAG.UnrollVectorOp(Op.getNode()); |
1037 } | 1077 } |
1038 | 1078 |