comparison lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @ 120:1172e4bd9c6f

update 4.0.0
author mir3636
date Fri, 25 Nov 2016 19:14:25 +0900
parents 7d135dc70f03
children 803732b1fca8
comparison
equal deleted inserted replaced
101:34baf5011add 120:1172e4bd9c6f
104 SDValue ExpandSELECT(SDValue Op); 104 SDValue ExpandSELECT(SDValue Op);
105 SDValue ExpandLoad(SDValue Op); 105 SDValue ExpandLoad(SDValue Op);
106 SDValue ExpandStore(SDValue Op); 106 SDValue ExpandStore(SDValue Op);
107 SDValue ExpandFNEG(SDValue Op); 107 SDValue ExpandFNEG(SDValue Op);
108 SDValue ExpandBITREVERSE(SDValue Op); 108 SDValue ExpandBITREVERSE(SDValue Op);
109 SDValue ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op); 109 SDValue ExpandCTLZ(SDValue Op);
110 SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op);
110 111
111 /// \brief Implements vector promotion. 112 /// \brief Implements vector promotion.
112 /// 113 ///
113 /// This is essentially just bitcasting the operands to a different type and 114 /// This is essentially just bitcasting the operands to a different type and
114 /// bitcasting the result back to the original type. 115 /// bitcasting the result back to the original type.
356 Changed = true; 357 Changed = true;
357 break; 358 break;
358 case TargetLowering::Legal: 359 case TargetLowering::Legal:
359 break; 360 break;
360 case TargetLowering::Custom: { 361 case TargetLowering::Custom: {
361 SDValue Tmp1 = TLI.LowerOperation(Op, DAG); 362 if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) {
362 if (Tmp1.getNode()) {
363 Result = Tmp1; 363 Result = Tmp1;
364 break; 364 break;
365 } 365 }
366 // FALL THROUGH 366 LLVM_FALLTHROUGH;
367 } 367 }
368 case TargetLowering::Expand: 368 case TargetLowering::Expand:
369 Result = Expand(Op); 369 Result = Expand(Op);
370 } 370 }
371 371
491 return DAG.getNode(ISD::TRUNCATE, SDLoc(Op), VT, promoted); 491 return DAG.getNode(ISD::TRUNCATE, SDLoc(Op), VT, promoted);
492 } 492 }
493 493
494 494
495 SDValue VectorLegalizer::ExpandLoad(SDValue Op) { 495 SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
496 SDLoc dl(Op);
497 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); 496 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
498 SDValue Chain = LD->getChain(); 497
499 SDValue BasePTR = LD->getBasePtr();
500 EVT SrcVT = LD->getMemoryVT(); 498 EVT SrcVT = LD->getMemoryVT();
501 ISD::LoadExtType ExtType = LD->getExtensionType(); 499 EVT SrcEltVT = SrcVT.getScalarType();
502
503 SmallVector<SDValue, 8> Vals;
504 SmallVector<SDValue, 8> LoadChains;
505 unsigned NumElem = SrcVT.getVectorNumElements(); 500 unsigned NumElem = SrcVT.getVectorNumElements();
506 501
507 EVT SrcEltVT = SrcVT.getScalarType(); 502
508 EVT DstEltVT = Op.getNode()->getValueType(0).getScalarType(); 503 SDValue NewChain;
509 504 SDValue Value;
510 if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) { 505 if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
506 SDLoc dl(Op);
507
508 SmallVector<SDValue, 8> Vals;
509 SmallVector<SDValue, 8> LoadChains;
510
511 EVT DstEltVT = LD->getValueType(0).getScalarType();
512 SDValue Chain = LD->getChain();
513 SDValue BasePTR = LD->getBasePtr();
514 ISD::LoadExtType ExtType = LD->getExtensionType();
515
511 // When elements in a vector is not byte-addressable, we cannot directly 516 // When elements in a vector is not byte-addressable, we cannot directly
512 // load each element by advancing pointer, which could only address bytes. 517 // load each element by advancing pointer, which could only address bytes.
513 // Instead, we load all significant words, mask bits off, and concatenate 518 // Instead, we load all significant words, mask bits off, and concatenate
514 // them to form each element. Finally, they are extended to destination 519 // them to form each element. Finally, they are extended to destination
515 // scalar type to build the destination vector. 520 // scalar type to build the destination vector.
529 while (RemainingBytes > 0) { 534 while (RemainingBytes > 0) {
530 SDValue ScalarLoad; 535 SDValue ScalarLoad;
531 unsigned LoadBytes = WideBytes; 536 unsigned LoadBytes = WideBytes;
532 537
533 if (RemainingBytes >= LoadBytes) { 538 if (RemainingBytes >= LoadBytes) {
534 ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR, 539 ScalarLoad =
535 LD->getPointerInfo().getWithOffset(Offset), 540 DAG.getLoad(WideVT, dl, Chain, BasePTR,
536 LD->isVolatile(), LD->isNonTemporal(), 541 LD->getPointerInfo().getWithOffset(Offset),
537 LD->isInvariant(), 542 MinAlign(LD->getAlignment(), Offset),
538 MinAlign(LD->getAlignment(), Offset), 543 LD->getMemOperand()->getFlags(), LD->getAAInfo());
539 LD->getAAInfo());
540 } else { 544 } else {
541 EVT LoadVT = WideVT; 545 EVT LoadVT = WideVT;
542 while (RemainingBytes < LoadBytes) { 546 while (RemainingBytes < LoadBytes) {
543 LoadBytes >>= 1; // Reduce the load size by half. 547 LoadBytes >>= 1; // Reduce the load size by half.
544 LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3); 548 LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3);
545 } 549 }
546 ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, 550 ScalarLoad =
547 LD->getPointerInfo().getWithOffset(Offset), 551 DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR,
548 LoadVT, LD->isVolatile(), 552 LD->getPointerInfo().getWithOffset(Offset), LoadVT,
549 LD->isNonTemporal(), LD->isInvariant(), 553 MinAlign(LD->getAlignment(), Offset),
550 MinAlign(LD->getAlignment(), Offset), 554 LD->getMemOperand()->getFlags(), LD->getAAInfo());
551 LD->getAAInfo());
552 } 555 }
553 556
554 RemainingBytes -= LoadBytes; 557 RemainingBytes -= LoadBytes;
555 Offset += LoadBytes; 558 Offset += LoadBytes;
556 BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, 559 BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
612 Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT); 615 Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT);
613 break; 616 break;
614 } 617 }
615 Vals.push_back(Lo); 618 Vals.push_back(Lo);
616 } 619 }
620
621 NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
622 Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
623 Op.getNode()->getValueType(0), Vals);
617 } else { 624 } else {
618 unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; 625 SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG);
619 626
620 for (unsigned Idx=0; Idx<NumElem; Idx++) { 627 NewChain = Scalarized.getValue(1);
621 SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl, 628 Value = Scalarized.getValue(0);
622 Op.getNode()->getValueType(0).getScalarType(), 629 }
623 Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
624 SrcVT.getScalarType(),
625 LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(),
626 MinAlign(LD->getAlignment(), Idx * Stride), LD->getAAInfo());
627
628 BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
629 DAG.getConstant(Stride, dl, BasePTR.getValueType()));
630
631 Vals.push_back(ScalarLoad.getValue(0));
632 LoadChains.push_back(ScalarLoad.getValue(1));
633 }
634 }
635
636 SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
637 SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
638 Op.getNode()->getValueType(0), Vals);
639 630
640 AddLegalizedOperand(Op.getValue(0), Value); 631 AddLegalizedOperand(Op.getValue(0), Value);
641 AddLegalizedOperand(Op.getValue(1), NewChain); 632 AddLegalizedOperand(Op.getValue(1), NewChain);
642 633
643 return (Op.getResNo() ? NewChain : Value); 634 return (Op.getResNo() ? NewChain : Value);
644 } 635 }
645 636
646 SDValue VectorLegalizer::ExpandStore(SDValue Op) { 637 SDValue VectorLegalizer::ExpandStore(SDValue Op) {
647 SDLoc dl(Op);
648 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); 638 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
649 SDValue Chain = ST->getChain(); 639
650 SDValue BasePTR = ST->getBasePtr();
651 SDValue Value = ST->getValue();
652 EVT StVT = ST->getMemoryVT(); 640 EVT StVT = ST->getMemoryVT();
653
654 unsigned Alignment = ST->getAlignment();
655 bool isVolatile = ST->isVolatile();
656 bool isNonTemporal = ST->isNonTemporal();
657 AAMDNodes AAInfo = ST->getAAInfo();
658
659 unsigned NumElem = StVT.getVectorNumElements();
660 // The type of the data we want to save
661 EVT RegVT = Value.getValueType();
662 EVT RegSclVT = RegVT.getScalarType();
663 // The type of data as saved in memory.
664 EVT MemSclVT = StVT.getScalarType(); 641 EVT MemSclVT = StVT.getScalarType();
665
666 // Cast floats into integers
667 unsigned ScalarSize = MemSclVT.getSizeInBits(); 642 unsigned ScalarSize = MemSclVT.getSizeInBits();
668 643
669 // Round odd types to the next pow of two. 644 // Round odd types to the next pow of two.
670 if (!isPowerOf2_32(ScalarSize)) 645 if (!isPowerOf2_32(ScalarSize)) {
671 ScalarSize = NextPowerOf2(ScalarSize); 646 // FIXME: This is completely broken and inconsistent with ExpandLoad
672 647 // handling.
673 // Store Stride in bytes 648
674 unsigned Stride = ScalarSize/8; 649 // For sub-byte element sizes, this ends up with 0 stride between elements,
675 // Extract each of the elements from the original vector 650 // so the same element just gets re-written to the same location. There seem
676 // and save them into memory individually. 651 // to be tests explicitly testing for this broken behavior though. tests
677 SmallVector<SDValue, 8> Stores; 652 // for this broken behavior.
678 for (unsigned Idx = 0; Idx < NumElem; Idx++) { 653
679 SDValue Ex = DAG.getNode( 654 LLVMContext &Ctx = *DAG.getContext();
680 ISD::EXTRACT_VECTOR_ELT, dl, RegSclVT, Value, 655
681 DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); 656 EVT NewMemVT
682 657 = EVT::getVectorVT(Ctx,
683 // This scalar TruncStore may be illegal, but we legalize it later. 658 MemSclVT.getIntegerVT(Ctx, NextPowerOf2(ScalarSize)),
684 SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR, 659 StVT.getVectorNumElements());
685 ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT, 660
686 isVolatile, isNonTemporal, MinAlign(Alignment, Idx*Stride), 661 SDValue NewVectorStore = DAG.getTruncStore(
687 AAInfo); 662 ST->getChain(), SDLoc(Op), ST->getValue(), ST->getBasePtr(),
688 663 ST->getPointerInfo(), NewMemVT, ST->getAlignment(),
689 BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, 664 ST->getMemOperand()->getFlags(), ST->getAAInfo());
690 DAG.getConstant(Stride, dl, BasePTR.getValueType())); 665 ST = cast<StoreSDNode>(NewVectorStore.getNode());
691 666 }
692 Stores.push_back(Store); 667
693 } 668 SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
694 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
695 AddLegalizedOperand(Op, TF); 669 AddLegalizedOperand(Op, TF);
696 return TF; 670 return TF;
697 } 671 }
698 672
699 SDValue VectorLegalizer::Expand(SDValue Op) { 673 SDValue VectorLegalizer::Expand(SDValue Op) {
718 return ExpandFNEG(Op); 692 return ExpandFNEG(Op);
719 case ISD::SETCC: 693 case ISD::SETCC:
720 return UnrollVSETCC(Op); 694 return UnrollVSETCC(Op);
721 case ISD::BITREVERSE: 695 case ISD::BITREVERSE:
722 return ExpandBITREVERSE(Op); 696 return ExpandBITREVERSE(Op);
697 case ISD::CTLZ:
723 case ISD::CTLZ_ZERO_UNDEF: 698 case ISD::CTLZ_ZERO_UNDEF:
699 return ExpandCTLZ(Op);
724 case ISD::CTTZ_ZERO_UNDEF: 700 case ISD::CTTZ_ZERO_UNDEF:
725 return ExpandCTLZ_CTTZ_ZERO_UNDEF(Op); 701 return ExpandCTTZ_ZERO_UNDEF(Op);
726 default: 702 default:
727 return DAG.UnrollVectorOp(Op.getNode()); 703 return DAG.UnrollVectorOp(Op.getNode());
728 } 704 }
729 } 705 }
730 706
795 return DAG.UnrollVectorOp(Op.getNode()); 771 return DAG.UnrollVectorOp(Op.getNode());
796 772
797 SDLoc DL(Op); 773 SDLoc DL(Op);
798 EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT(); 774 EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT();
799 775
800 unsigned BW = VT.getScalarType().getSizeInBits(); 776 unsigned BW = VT.getScalarSizeInBits();
801 unsigned OrigBW = OrigTy.getScalarType().getSizeInBits(); 777 unsigned OrigBW = OrigTy.getScalarSizeInBits();
802 SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT); 778 SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT);
803 779
804 Op = Op.getOperand(0); 780 Op = Op.getOperand(0);
805 Op = DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz); 781 Op = DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz);
806 return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz); 782 return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
842 Op = DAG.getAnyExtendVectorInReg(Src, DL, VT); 818 Op = DAG.getAnyExtendVectorInReg(Src, DL, VT);
843 819
844 // Now we need sign extend. Do this by shifting the elements. Even if these 820 // Now we need sign extend. Do this by shifting the elements. Even if these
845 // aren't legal operations, they have a better chance of being legalized 821 // aren't legal operations, they have a better chance of being legalized
846 // without full scalarization than the sign extension does. 822 // without full scalarization than the sign extension does.
847 unsigned EltWidth = VT.getVectorElementType().getSizeInBits(); 823 unsigned EltWidth = VT.getScalarSizeInBits();
848 unsigned SrcEltWidth = SrcVT.getVectorElementType().getSizeInBits(); 824 unsigned SrcEltWidth = SrcVT.getScalarSizeInBits();
849 SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT); 825 SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT);
850 return DAG.getNode(ISD::SRA, DL, VT, 826 return DAG.getNode(ISD::SRA, DL, VT,
851 DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount), 827 DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
852 ShiftAmount); 828 ShiftAmount);
853 } 829 }
862 SDValue Src = Op.getOperand(0); 838 SDValue Src = Op.getOperand(0);
863 EVT SrcVT = Src.getValueType(); 839 EVT SrcVT = Src.getValueType();
864 int NumSrcElements = SrcVT.getVectorNumElements(); 840 int NumSrcElements = SrcVT.getVectorNumElements();
865 841
866 // Build up a zero vector to blend into this one. 842 // Build up a zero vector to blend into this one.
867 EVT SrcScalarVT = SrcVT.getScalarType(); 843 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
868 SDValue ScalarZero = DAG.getTargetConstant(0, DL, SrcScalarVT);
869 SmallVector<SDValue, 4> BuildVectorOperands(NumSrcElements, ScalarZero);
870 SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, DL, SrcVT, BuildVectorOperands);
871 844
872 // Shuffle the incoming lanes into the correct position, and pull all other 845 // Shuffle the incoming lanes into the correct position, and pull all other
873 // lanes from the zero vector. 846 // lanes from the zero vector.
874 SmallVector<int, 16> ShuffleMask; 847 SmallVector<int, 16> ShuffleMask;
875 ShuffleMask.reserve(NumSrcElements); 848 ShuffleMask.reserve(NumSrcElements);
883 856
884 return DAG.getNode(ISD::BITCAST, DL, VT, 857 return DAG.getNode(ISD::BITCAST, DL, VT,
885 DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask)); 858 DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask));
886 } 859 }
887 860
888 SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) { 861 static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
889 EVT VT = Op.getValueType();
890
891 // Generate a byte wise shuffle mask for the BSWAP.
892 SmallVector<int, 16> ShuffleMask;
893 int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8; 862 int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
894 for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I) 863 for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
895 for (int J = ScalarSizeInBytes - 1; J >= 0; --J) 864 for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
896 ShuffleMask.push_back((I * ScalarSizeInBytes) + J); 865 ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
897 866 }
867
868 SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
869 EVT VT = Op.getValueType();
870
871 // Generate a byte wise shuffle mask for the BSWAP.
872 SmallVector<int, 16> ShuffleMask;
873 createBSWAPShuffleMask(VT, ShuffleMask);
898 EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size()); 874 EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
899 875
900 // Only emit a shuffle if the mask is legal. 876 // Only emit a shuffle if the mask is legal.
901 if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) 877 if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT))
902 return DAG.UnrollVectorOp(Op.getNode()); 878 return DAG.UnrollVectorOp(Op.getNode());
903 879
904 SDLoc DL(Op); 880 SDLoc DL(Op);
905 Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0)); 881 Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
906 Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), 882 Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
907 ShuffleMask.data());
908 return DAG.getNode(ISD::BITCAST, DL, VT, Op); 883 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
909 } 884 }
910 885
911 SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) { 886 SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) {
912 EVT VT = Op.getValueType(); 887 EVT VT = Op.getValueType();
913 888
914 // If we have the scalar operation, it's probably cheaper to unroll it. 889 // If we have the scalar operation, it's probably cheaper to unroll it.
915 if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) 890 if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType()))
916 return DAG.UnrollVectorOp(Op.getNode()); 891 return DAG.UnrollVectorOp(Op.getNode());
892
893 // If the vector element width is a whole number of bytes, test if its legal
894 // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
895 // vector. This greatly reduces the number of bit shifts necessary.
896 unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
897 if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) {
898 SmallVector<int, 16> BSWAPMask;
899 createBSWAPShuffleMask(VT, BSWAPMask);
900
901 EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size());
902 if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) &&
903 (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) ||
904 (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) &&
905 TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) &&
906 TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) &&
907 TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) {
908 SDLoc DL(Op);
909 Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
910 Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT),
911 BSWAPMask);
912 Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op);
913 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
914 }
915 }
917 916
918 // If we have the appropriate vector bit operations, it is better to use them 917 // If we have the appropriate vector bit operations, it is better to use them
919 // than unrolling and expanding each component. 918 // than unrolling and expanding each component.
920 if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) || 919 if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) ||
921 !TLI.isOperationLegalOrCustom(ISD::SRL, VT) || 920 !TLI.isOperationLegalOrCustom(ISD::SRL, VT) ||
922 !TLI.isOperationLegalOrCustom(ISD::AND, VT) || 921 !TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
923 !TLI.isOperationLegalOrCustom(ISD::OR, VT)) 922 !TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
924 return DAG.UnrollVectorOp(Op.getNode()); 923 return DAG.UnrollVectorOp(Op.getNode());
925 924
926 // Let LegalizeDAG handle this later. 925 // Let LegalizeDAG handle this later.
927 return Op; 926 return Op;
928 } 927 }
953 return DAG.UnrollVectorOp(Op.getNode()); 952 return DAG.UnrollVectorOp(Op.getNode());
954 953
955 // If the mask and the type are different sizes, unroll the vector op. This 954 // If the mask and the type are different sizes, unroll the vector op. This
956 // can occur when getSetCCResultType returns something that is different in 955 // can occur when getSetCCResultType returns something that is different in
957 // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8. 956 // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
958 if (VT.getSizeInBits() != Op1.getValueType().getSizeInBits()) 957 if (VT.getSizeInBits() != Op1.getValueSizeInBits())
959 return DAG.UnrollVectorOp(Op.getNode()); 958 return DAG.UnrollVectorOp(Op.getNode());
960 959
961 // Bitcast the operands to be the same type as the mask. 960 // Bitcast the operands to be the same type as the mask.
962 // This is needed when we select between FP types because 961 // This is needed when we select between FP types because
963 // the mask is a vector of integers. 962 // the mask is a vector of integers.
964 Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1); 963 Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
965 Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2); 964 Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
966 965
967 SDValue AllOnes = DAG.getConstant( 966 SDValue AllOnes = DAG.getConstant(
968 APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()), DL, VT); 967 APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, VT);
969 SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes); 968 SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);
970 969
971 Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask); 970 Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
972 Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask); 971 Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
973 SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2); 972 SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
981 // Make sure that the SINT_TO_FP and SRL instructions are available. 980 // Make sure that the SINT_TO_FP and SRL instructions are available.
982 if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand || 981 if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
983 TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) 982 TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand)
984 return DAG.UnrollVectorOp(Op.getNode()); 983 return DAG.UnrollVectorOp(Op.getNode());
985 984
986 EVT SVT = VT.getScalarType(); 985 unsigned BW = VT.getScalarSizeInBits();
987 assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) && 986 assert((BW == 64 || BW == 32) &&
988 "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide"); 987 "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
989 988
990 unsigned BW = SVT.getSizeInBits(); 989 SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT);
991 SDValue HalfWord = DAG.getConstant(BW/2, DL, VT);
992 990
993 // Constants to clear the upper part of the word. 991 // Constants to clear the upper part of the word.
994 // Notice that we can also use SHL+SHR, but using a constant is slightly 992 // Notice that we can also use SHL+SHR, but using a constant is slightly
995 // faster on x86. 993 // faster on x86.
996 uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF; 994 uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
997 SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT); 995 SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);
998 996
999 // Two to the power of half-word-size. 997 // Two to the power of half-word-size.
1000 SDValue TWOHW = DAG.getConstantFP(1 << (BW/2), DL, Op.getValueType()); 998 SDValue TWOHW = DAG.getConstantFP(1 << (BW / 2), DL, Op.getValueType());
1001 999
1002 // Clear upper part of LO, lower HI 1000 // Clear upper part of LO, lower HI
1003 SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord); 1001 SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
1004 SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask); 1002 SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask);
1005 1003
1011 SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO); 1009 SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
1012 1010
1013 // Add the two halves 1011 // Add the two halves
1014 return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO); 1012 return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO);
1015 } 1013 }
1016
1017 1014
1018 SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { 1015 SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
1019 if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { 1016 if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
1020 SDLoc DL(Op); 1017 SDLoc DL(Op);
1021 SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType()); 1018 SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType());
1024 Zero, Op.getOperand(0)); 1021 Zero, Op.getOperand(0));
1025 } 1022 }
1026 return DAG.UnrollVectorOp(Op.getNode()); 1023 return DAG.UnrollVectorOp(Op.getNode());
1027 } 1024 }
1028 1025
1029 SDValue VectorLegalizer::ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op) { 1026 SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) {
1030 // If the non-ZERO_UNDEF version is supported we can let LegalizeDAG handle. 1027 EVT VT = Op.getValueType();
1031 unsigned Opc = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ? ISD::CTLZ : ISD::CTTZ; 1028 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
1032 if (TLI.isOperationLegalOrCustom(Opc, Op.getValueType())) 1029
1033 return Op; 1030 // If the non-ZERO_UNDEF version is supported we can use that instead.
1031 if (Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
1032 TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) {
1033 SDLoc DL(Op);
1034 return DAG.getNode(ISD::CTLZ, DL, Op.getValueType(), Op.getOperand(0));
1035 }
1036
1037 // If CTPOP is available we can lower with a CTPOP based method:
1038 // u16 ctlz(u16 x) {
1039 // x |= (x >> 1);
1040 // x |= (x >> 2);
1041 // x |= (x >> 4);
1042 // x |= (x >> 8);
1043 // return ctpop(~x);
1044 // }
1045 // Ref: "Hacker's Delight" by Henry Warren
1046 if (isPowerOf2_32(NumBitsPerElt) &&
1047 TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
1048 TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
1049 TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT) &&
1050 TLI.isOperationLegalOrCustomOrPromote(ISD::XOR, VT)) {
1051 SDLoc DL(Op);
1052 SDValue Res = Op.getOperand(0);
1053 EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
1054
1055 for (unsigned i = 1; i != NumBitsPerElt; i *= 2)
1056 Res = DAG.getNode(
1057 ISD::OR, DL, VT, Res,
1058 DAG.getNode(ISD::SRL, DL, VT, Res, DAG.getConstant(i, DL, ShiftTy)));
1059
1060 Res = DAG.getNOT(DL, Res, VT);
1061 return DAG.getNode(ISD::CTPOP, DL, VT, Res);
1062 }
1063
1064 // Otherwise go ahead and unroll.
1065 return DAG.UnrollVectorOp(Op.getNode());
1066 }
1067
1068 SDValue VectorLegalizer::ExpandCTTZ_ZERO_UNDEF(SDValue Op) {
1069 // If the non-ZERO_UNDEF version is supported we can use that instead.
1070 if (TLI.isOperationLegalOrCustom(ISD::CTTZ, Op.getValueType())) {
1071 SDLoc DL(Op);
1072 return DAG.getNode(ISD::CTTZ, DL, Op.getValueType(), Op.getOperand(0));
1073 }
1034 1074
1035 // Otherwise go ahead and unroll. 1075 // Otherwise go ahead and unroll.
1036 return DAG.UnrollVectorOp(Op.getNode()); 1076 return DAG.UnrollVectorOp(Op.getNode());
1037 } 1077 }
1038 1078