Mercurial > hg > CbC > CbC_llvm
diff lib/Target/Hexagon/HexagonPatterns.td @ 147:c2174574ed3a
LLVM 10
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 14 Aug 2019 16:55:33 +0900 |
parents | 3a76565eade5 |
children |
line wrap: on
line diff
--- a/lib/Target/Hexagon/HexagonPatterns.td Sat Feb 17 09:57:20 2018 +0900 +++ b/lib/Target/Hexagon/HexagonPatterns.td Wed Aug 14 16:55:33 2019 +0900 @@ -1,9 +1,8 @@ //==- HexagonPatterns.td - Target Description for Hexagon -*- tablegen -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -100,6 +99,17 @@ def HWI16: PatLeaf<(VecPI16 HvxWR:$R)>; def HWI32: PatLeaf<(VecPI32 HvxWR:$R)>; +def SDTVecVecIntOp: + SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>, + SDTCisVT<3,i32>]>; + +def HexagonVALIGN: SDNode<"HexagonISD::VALIGN", SDTVecVecIntOp>; +def HexagonVALIGNADDR: SDNode<"HexagonISD::VALIGNADDR", SDTIntUnaryOp>; + +def valign: PatFrag<(ops node:$Vt, node:$Vs, node:$Ru), + (HexagonVALIGN node:$Vt, node:$Vs, node:$Ru)>; +def valignaddr: PatFrag<(ops node:$Addr), (HexagonVALIGNADDR node:$Addr)>; + // Pattern fragments to extract the low and high subregisters from a // 64-bit value. def LoReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_lo)>; @@ -166,6 +176,11 @@ return CurDAG->getTargetConstant(V-32, SDLoc(N), MVT::i32); }]>; +class Subi<int From>: SDNodeXForm<imm, + "int32_t V = " # From # " - N->getSExtValue();" # + "return CurDAG->getTargetConstant(V, SDLoc(N), MVT::i32);" +>; + def Log2_32: SDNodeXForm<imm, [{ uint32_t V = N->getZExtValue(); return CurDAG->getTargetConstant(Log2_32(V), SDLoc(N), MVT::i32); @@ -207,6 +222,8 @@ def I32toI1: OutPatFrag<(ops node:$Rs), (i1 (C2_cmpgtui (i32 $Rs), (i32 0)))>; def ToZext64: OutPatFrag<(ops node:$Rs), (i64 (A4_combineir 0, (i32 $Rs)))>; def ToSext64: OutPatFrag<(ops node:$Rs), (i64 (A2_sxtw (i32 $Rs)))>; +def ToAext64: OutPatFrag<(ops node:$Rs), + (REG_SEQUENCE DoubleRegs, (i32 (IMPLICIT_DEF)), isub_hi, (i32 $Rs), isub_lo)>; def Combinew: OutPatFrag<(ops node:$Rs, node:$Rt), (REG_SEQUENCE DoubleRegs, $Rs, isub_hi, $Rt, isub_lo)>; @@ -235,6 +252,9 @@ def Zext64: PatFrag<(ops node:$Rs), (i64 (zext node:$Rs))>; def Sext64: PatLeaf<(i64 Usxtw:$Rs)>; +def azext: PatFrags<(ops node:$Rs), [(zext node:$Rs), (anyext node:$Rs)]>; +def asext: PatFrags<(ops node:$Rs), [(sext node:$Rs), (anyext node:$Rs)]>; + def: Pat<(IsOrAdd (i32 AddrFI:$Rs), s32_0ImmPred:$off), (PS_fi (i32 AddrFI:$Rs), imm:$off)>; @@ -246,8 +266,25 @@ class Not2<PatFrag P> : PatFrag<(ops node:$A, node:$B), (P node:$A, (not node:$B))>; +// If there is a constant operand that feeds the and/or instruction, +// do not generate the compound instructions. +// It is not always profitable, as some times we end up with a transfer. +// Check the below example. +// ra = #65820; rb = lsr(rb, #8); rc ^= and (rb, ra) +// Instead this is preferable. +// ra = and (#65820, lsr(ra, #8)); rb = xor(rb, ra) +class Su_ni1<PatFrag Op> + : PatFrag<Op.Operands, !head(Op.Fragments), [{ + if (hasOneUse(N)){ + // Check if Op1 is an immediate operand. + SDValue Op1 = N->getOperand(1); + return !isa<ConstantSDNode>(Op1); + } + return false;}], + Op.OperandTransform>; + class Su<PatFrag Op> - : PatFrag<Op.Operands, Op.Fragment, [{ return hasOneUse(N); }], + : PatFrag<Op.Operands, !head(Op.Fragments), [{ return hasOneUse(N); }], Op.OperandTransform>; // Main selection macros. @@ -271,9 +308,9 @@ (MI RegPred:$Rx, RegPred:$Rs, imm:$I)>; class AccRRR_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op, - PatFrag RsPred, PatFrag RtPred> - : Pat<(AccOp RsPred:$Rx, (Op RsPred:$Rs, RtPred:$Rt)), - (MI RsPred:$Rx, RsPred:$Rs, RtPred:$Rt)>; + PatFrag RxPred, PatFrag RsPred, PatFrag RtPred> + : Pat<(AccOp RxPred:$Rx, (Op RsPred:$Rs, RtPred:$Rt)), + (MI RxPred:$Rx, RsPred:$Rs, RtPred:$Rt)>; multiclass SelMinMax_pats<PatFrag CmpOp, PatFrag Val, InstHexagon InstA, InstHexagon InstB> { @@ -289,6 +326,7 @@ def Sub: pf2<sub>; def Or: pf2<or>; def Srl: pf2<srl>; def Mul: pf2<mul>; def Xor: pf2<xor>; def Shl: pf2<shl>; +def Rol: pf2<rotl>; // --(1) Immediate ------------------------------------------------------- // @@ -336,38 +374,34 @@ // --(2) Type cast ------------------------------------------------------- // -let Predicates = [HasV5T] in { - def: OpR_R_pat<F2_conv_sf2df, pf1<fpextend>, f64, F32>; - def: OpR_R_pat<F2_conv_df2sf, pf1<fpround>, f32, F64>; - - def: OpR_R_pat<F2_conv_w2sf, pf1<sint_to_fp>, f32, I32>; - def: OpR_R_pat<F2_conv_d2sf, pf1<sint_to_fp>, f32, I64>; - def: OpR_R_pat<F2_conv_w2df, pf1<sint_to_fp>, f64, I32>; - def: OpR_R_pat<F2_conv_d2df, pf1<sint_to_fp>, f64, I64>; - - def: OpR_R_pat<F2_conv_uw2sf, pf1<uint_to_fp>, f32, I32>; - def: OpR_R_pat<F2_conv_ud2sf, pf1<uint_to_fp>, f32, I64>; - def: OpR_R_pat<F2_conv_uw2df, pf1<uint_to_fp>, f64, I32>; - def: OpR_R_pat<F2_conv_ud2df, pf1<uint_to_fp>, f64, I64>; - - def: OpR_R_pat<F2_conv_sf2w_chop, pf1<fp_to_sint>, i32, F32>; - def: OpR_R_pat<F2_conv_df2w_chop, pf1<fp_to_sint>, i32, F64>; - def: OpR_R_pat<F2_conv_sf2d_chop, pf1<fp_to_sint>, i64, F32>; - def: OpR_R_pat<F2_conv_df2d_chop, pf1<fp_to_sint>, i64, F64>; - - def: OpR_R_pat<F2_conv_sf2uw_chop, pf1<fp_to_uint>, i32, F32>; - def: OpR_R_pat<F2_conv_df2uw_chop, pf1<fp_to_uint>, i32, F64>; - def: OpR_R_pat<F2_conv_sf2ud_chop, pf1<fp_to_uint>, i64, F32>; - def: OpR_R_pat<F2_conv_df2ud_chop, pf1<fp_to_uint>, i64, F64>; -} +def: OpR_R_pat<F2_conv_sf2df, pf1<fpextend>, f64, F32>; +def: OpR_R_pat<F2_conv_df2sf, pf1<fpround>, f32, F64>; + +def: OpR_R_pat<F2_conv_w2sf, pf1<sint_to_fp>, f32, I32>; +def: OpR_R_pat<F2_conv_d2sf, pf1<sint_to_fp>, f32, I64>; +def: OpR_R_pat<F2_conv_w2df, pf1<sint_to_fp>, f64, I32>; +def: OpR_R_pat<F2_conv_d2df, pf1<sint_to_fp>, f64, I64>; + +def: OpR_R_pat<F2_conv_uw2sf, pf1<uint_to_fp>, f32, I32>; +def: OpR_R_pat<F2_conv_ud2sf, pf1<uint_to_fp>, f32, I64>; +def: OpR_R_pat<F2_conv_uw2df, pf1<uint_to_fp>, f64, I32>; +def: OpR_R_pat<F2_conv_ud2df, pf1<uint_to_fp>, f64, I64>; + +def: OpR_R_pat<F2_conv_sf2w_chop, pf1<fp_to_sint>, i32, F32>; +def: OpR_R_pat<F2_conv_df2w_chop, pf1<fp_to_sint>, i32, F64>; +def: OpR_R_pat<F2_conv_sf2d_chop, pf1<fp_to_sint>, i64, F32>; +def: OpR_R_pat<F2_conv_df2d_chop, pf1<fp_to_sint>, i64, F64>; + +def: OpR_R_pat<F2_conv_sf2uw_chop, pf1<fp_to_uint>, i32, F32>; +def: OpR_R_pat<F2_conv_df2uw_chop, pf1<fp_to_uint>, i32, F64>; +def: OpR_R_pat<F2_conv_sf2ud_chop, pf1<fp_to_uint>, i64, F32>; +def: OpR_R_pat<F2_conv_df2ud_chop, pf1<fp_to_uint>, i64, F64>; // Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp]. -let Predicates = [HasV5T] in { - def: Pat<(i32 (bitconvert F32:$v)), (I32:$v)>; - def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>; - def: Pat<(i64 (bitconvert F64:$v)), (I64:$v)>; - def: Pat<(f64 (bitconvert I64:$v)), (F64:$v)>; -} +def: Pat<(i32 (bitconvert F32:$v)), (I32:$v)>; +def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>; +def: Pat<(i64 (bitconvert F64:$v)), (I64:$v)>; +def: Pat<(f64 (bitconvert I64:$v)), (F64:$v)>; multiclass Cast_pat<ValueType Ta, ValueType Tb, RegisterClass RC> { def: Pat<(Tb (bitconvert (Ta RC:$Rs))), (Tb RC:$Rs)>; @@ -391,44 +425,48 @@ def: Pat<(sext_inreg I64:$Rs, i16), (A2_sxtw (A2_sxth (LoReg $Rs)))>; def: Pat<(sext_inreg I64:$Rs, i8), (A2_sxtw (A2_sxtb (LoReg $Rs)))>; -def: Pat<(i64 (sext I1:$Pu)), - (Combinew (C2_muxii PredRegs:$Pu, -1, 0), - (C2_muxii PredRegs:$Pu, -1, 0))>; - -def: Pat<(i32 (sext I1:$Pu)), (C2_muxii I1:$Pu, -1, 0)>; -def: Pat<(i32 (zext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>; -def: Pat<(i64 (zext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>; -def: Pat<(v2i16 (sext V2I1:$Pu)), (S2_vtrunehb (C2_mask V2I1:$Pu))>; -def: Pat<(v2i32 (sext V2I1:$Pu)), (C2_mask V2I1:$Pu)>; -def: Pat<(v4i8 (sext V4I1:$Pu)), (S2_vtrunehb (C2_mask V4I1:$Pu))>; -def: Pat<(v4i16 (sext V4I1:$Pu)), (C2_mask V4I1:$Pu)>; -def: Pat<(v8i8 (sext V8I1:$Pu)), (C2_mask V8I1:$Pu)>; - def: Pat<(i64 (sext I32:$Rs)), (A2_sxtw I32:$Rs)>; def: Pat<(Zext64 I32:$Rs), (ToZext64 $Rs)>; def: Pat<(Aext64 I32:$Rs), (ToZext64 $Rs)>; def: Pat<(i32 (trunc I64:$Rs)), (LoReg $Rs)>; -def: Pat<(i1 (trunc I64:$Rs)), (C2_tfrrp (LoReg $Rs))>; +def: Pat<(i1 (trunc I32:$Rs)), (S2_tstbit_i I32:$Rs, 0)>; +def: Pat<(i1 (trunc I64:$Rs)), (S2_tstbit_i (LoReg $Rs), 0)>; let AddedComplexity = 20 in { def: Pat<(and I32:$Rs, 255), (A2_zxtb I32:$Rs)>; def: Pat<(and I32:$Rs, 65535), (A2_zxth I32:$Rs)>; } -def: Pat<(i32 (anyext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>; -def: Pat<(i64 (anyext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>; - -def: Pat<(v8i8 (zext V8I1:$Pu)), (C2_mask V8I1:$Pu)>; -def: Pat<(v4i16 (zext V4I1:$Pu)), (C2_mask V4I1:$Pu)>; -def: Pat<(v2i32 (zext V2I1:$Pu)), (C2_mask V2I1:$Pu)>; -def: Pat<(v4i8 (zext V4I1:$Pu)), (LoReg (C2_mask V4I1:$Pu))>; -def: Pat<(v2i16 (zext V2I1:$Pu)), (LoReg (C2_mask V2I1:$Pu))>; - -def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; -def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; -def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; -def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; +// Extensions from i1 or vectors of i1. +def: Pat<(i32 (azext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>; +def: Pat<(i64 (azext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>; +def: Pat<(i32 (sext I1:$Pu)), (C2_muxii I1:$Pu, -1, 0)>; +def: Pat<(i64 (sext I1:$Pu)), (Combinew (C2_muxii PredRegs:$Pu, -1, 0), + (C2_muxii PredRegs:$Pu, -1, 0))>; + +def: Pat<(v2i16 (sext V2I1:$Pu)), (S2_vtrunehb (C2_mask V2I1:$Pu))>; +def: Pat<(v2i32 (sext V2I1:$Pu)), (C2_mask V2I1:$Pu)>; +def: Pat<(v4i8 (sext V4I1:$Pu)), (S2_vtrunehb (C2_mask V4I1:$Pu))>; +def: Pat<(v4i16 (sext V4I1:$Pu)), (C2_mask V4I1:$Pu)>; +def: Pat<(v8i8 (sext V8I1:$Pu)), (C2_mask V8I1:$Pu)>; + +def Vsplatpi: OutPatFrag<(ops node:$V), + (Combinew (A2_tfrsi $V), (A2_tfrsi $V))>; + +def: Pat<(v2i16 (azext V2I1:$Pu)), + (A2_andir (LoReg (C2_mask V2I1:$Pu)), (i32 0x00010001))>; +def: Pat<(v2i32 (azext V2I1:$Pu)), + (A2_andp (C2_mask V2I1:$Pu), (A2_combineii (i32 1), (i32 1)))>; +def: Pat<(v4i8 (azext V4I1:$Pu)), + (A2_andir (LoReg (C2_mask V4I1:$Pu)), (i32 0x01010101))>; +def: Pat<(v4i16 (azext V4I1:$Pu)), + (A2_andp (C2_mask V4I1:$Pu), (Vsplatpi (i32 0x00010001)))>; +def: Pat<(v8i8 (azext V8I1:$Pu)), + (A2_andp (C2_mask V8I1:$Pu), (Vsplatpi (i32 0x01010101)))>; + +def: Pat<(v4i16 (azext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; +def: Pat<(v2i32 (azext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>; def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>; @@ -470,10 +508,10 @@ } multiclass BoolAccRRR_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op> { - def: AccRRR_pat<MI, AccOp, Op, I1, I1>; - def: AccRRR_pat<MI, AccOp, Op, V2I1, V2I1>; - def: AccRRR_pat<MI, AccOp, Op, V4I1, V4I1>; - def: AccRRR_pat<MI, AccOp, Op, V8I1, V8I1>; + def: AccRRR_pat<MI, AccOp, Op, I1, I1, I1>; + def: AccRRR_pat<MI, AccOp, Op, V2I1, V2I1, V2I1>; + def: AccRRR_pat<MI, AccOp, Op, V4I1, V4I1, V4I1>; + def: AccRRR_pat<MI, AccOp, Op, V8I1, V8I1, V8I1>; } defm: BoolOpR_RR_pat<C2_and, And>; @@ -518,7 +556,7 @@ // Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones // that reverse the order of the operands. class RevCmp<PatFrag F> - : PatFrag<(ops node:$rhs, node:$lhs), F.Fragment, F.PredicateCode, + : PatFrag<(ops node:$rhs, node:$lhs), !head(F.Fragments), F.PredicateCode, F.OperandTransform>; def: OpR_RR_pat<C2_cmpeq, seteq, i1, I32>; @@ -562,31 +600,29 @@ def: OpR_RR_pat<A2_vcmpwgtu, setugt, i1, V2I32>; def: OpR_RR_pat<A2_vcmpwgtu, setugt, v2i1, V2I32>; -let Predicates = [HasV5T] in { - def: OpR_RR_pat<F2_sfcmpeq, seteq, i1, F32>; - def: OpR_RR_pat<F2_sfcmpgt, setgt, i1, F32>; - def: OpR_RR_pat<F2_sfcmpge, setge, i1, F32>; - def: OpR_RR_pat<F2_sfcmpeq, setoeq, i1, F32>; - def: OpR_RR_pat<F2_sfcmpgt, setogt, i1, F32>; - def: OpR_RR_pat<F2_sfcmpge, setoge, i1, F32>; - def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setolt>, i1, F32>; - def: OpR_RR_pat<F2_sfcmpge, RevCmp<setole>, i1, F32>; - def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setlt>, i1, F32>; - def: OpR_RR_pat<F2_sfcmpge, RevCmp<setle>, i1, F32>; - def: OpR_RR_pat<F2_sfcmpuo, setuo, i1, F32>; - - def: OpR_RR_pat<F2_dfcmpeq, seteq, i1, F64>; - def: OpR_RR_pat<F2_dfcmpgt, setgt, i1, F64>; - def: OpR_RR_pat<F2_dfcmpge, setge, i1, F64>; - def: OpR_RR_pat<F2_dfcmpeq, setoeq, i1, F64>; - def: OpR_RR_pat<F2_dfcmpgt, setogt, i1, F64>; - def: OpR_RR_pat<F2_dfcmpge, setoge, i1, F64>; - def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setolt>, i1, F64>; - def: OpR_RR_pat<F2_dfcmpge, RevCmp<setole>, i1, F64>; - def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setlt>, i1, F64>; - def: OpR_RR_pat<F2_dfcmpge, RevCmp<setle>, i1, F64>; - def: OpR_RR_pat<F2_dfcmpuo, setuo, i1, F64>; -} +def: OpR_RR_pat<F2_sfcmpeq, seteq, i1, F32>; +def: OpR_RR_pat<F2_sfcmpgt, setgt, i1, F32>; +def: OpR_RR_pat<F2_sfcmpge, setge, i1, F32>; +def: OpR_RR_pat<F2_sfcmpeq, setoeq, i1, F32>; +def: OpR_RR_pat<F2_sfcmpgt, setogt, i1, F32>; +def: OpR_RR_pat<F2_sfcmpge, setoge, i1, F32>; +def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setolt>, i1, F32>; +def: OpR_RR_pat<F2_sfcmpge, RevCmp<setole>, i1, F32>; +def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setlt>, i1, F32>; +def: OpR_RR_pat<F2_sfcmpge, RevCmp<setle>, i1, F32>; +def: OpR_RR_pat<F2_sfcmpuo, setuo, i1, F32>; + +def: OpR_RR_pat<F2_dfcmpeq, seteq, i1, F64>; +def: OpR_RR_pat<F2_dfcmpgt, setgt, i1, F64>; +def: OpR_RR_pat<F2_dfcmpge, setge, i1, F64>; +def: OpR_RR_pat<F2_dfcmpeq, setoeq, i1, F64>; +def: OpR_RR_pat<F2_dfcmpgt, setogt, i1, F64>; +def: OpR_RR_pat<F2_dfcmpge, setoge, i1, F64>; +def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setolt>, i1, F64>; +def: OpR_RR_pat<F2_dfcmpge, RevCmp<setole>, i1, F64>; +def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setlt>, i1, F64>; +def: OpR_RR_pat<F2_dfcmpge, RevCmp<setle>, i1, F64>; +def: OpR_RR_pat<F2_dfcmpuo, setuo, i1, F64>; // Avoid C4_cmpneqi, C4_cmpltei, C4_cmplteui, since they cannot form compounds. @@ -597,27 +633,40 @@ def: Pat<(i1 (setule I32:$Rs, anyimm:$u5)), (C2_not (C2_cmpgtui I32:$Rs, imm:$u5))>; -def: Pat<(i1 (setne I32:$Rs, I32:$Rt)), - (C2_not (C2_cmpeq I32:$Rs, I32:$Rt))>; -def: Pat<(i1 (setle I32:$Rs, I32:$Rt)), - (C2_not (C2_cmpgt I32:$Rs, I32:$Rt))>; -def: Pat<(i1 (setule I32:$Rs, I32:$Rt)), - (C2_not (C2_cmpgtu I32:$Rs, I32:$Rt))>; -def: Pat<(i1 (setge I32:$Rs, I32:$Rt)), - (C2_not (C2_cmpgt I32:$Rt, I32:$Rs))>; -def: Pat<(i1 (setuge I32:$Rs, I32:$Rt)), - (C2_not (C2_cmpgtu I32:$Rt, I32:$Rs))>; - -def: Pat<(i1 (setle I64:$Rs, I64:$Rt)), - (C2_not (C2_cmpgtp I64:$Rs, I64:$Rt))>; -def: Pat<(i1 (setne I64:$Rs, I64:$Rt)), - (C2_not (C2_cmpeqp I64:$Rs, I64:$Rt))>; -def: Pat<(i1 (setge I64:$Rs, I64:$Rt)), - (C2_not (C2_cmpgtp I64:$Rt, I64:$Rs))>; -def: Pat<(i1 (setuge I64:$Rs, I64:$Rt)), - (C2_not (C2_cmpgtup I64:$Rt, I64:$Rs))>; -def: Pat<(i1 (setule I64:$Rs, I64:$Rt)), - (C2_not (C2_cmpgtup I64:$Rs, I64:$Rt))>; +class OpmR_RR_pat<PatFrag Output, PatFrag Op, ValueType ResType, + PatFrag RsPred, PatFrag RtPred = RsPred> + : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)), + (Output RsPred:$Rs, RtPred:$Rt)>; + +class Outn<InstHexagon MI> + : OutPatFrag<(ops node:$Rs, node:$Rt), + (C2_not (MI $Rs, $Rt))>; + +def: OpmR_RR_pat<Outn<C2_cmpeq>, setne, i1, I32>; +def: OpmR_RR_pat<Outn<C2_cmpgt>, setle, i1, I32>; +def: OpmR_RR_pat<Outn<C2_cmpgtu>, setule, i1, I32>; +def: OpmR_RR_pat<Outn<C2_cmpgt>, RevCmp<setge>, i1, I32>; +def: OpmR_RR_pat<Outn<C2_cmpgtu>, RevCmp<setuge>, i1, I32>; +def: OpmR_RR_pat<Outn<C2_cmpeqp>, setne, i1, I64>; +def: OpmR_RR_pat<Outn<C2_cmpgtp>, setle, i1, I64>; +def: OpmR_RR_pat<Outn<C2_cmpgtup>, setule, i1, I64>; +def: OpmR_RR_pat<Outn<C2_cmpgtp>, RevCmp<setge>, i1, I64>; +def: OpmR_RR_pat<Outn<C2_cmpgtup>, RevCmp<setuge>, i1, I64>; +def: OpmR_RR_pat<Outn<A2_vcmpbeq>, setne, v8i1, V8I8>; +def: OpmR_RR_pat<Outn<A4_vcmpbgt>, setle, v8i1, V8I8>; +def: OpmR_RR_pat<Outn<A2_vcmpbgtu>, setule, v8i1, V8I8>; +def: OpmR_RR_pat<Outn<A4_vcmpbgt>, RevCmp<setge>, v8i1, V8I8>; +def: OpmR_RR_pat<Outn<A2_vcmpbgtu>, RevCmp<setuge>, v8i1, V8I8>; +def: OpmR_RR_pat<Outn<A2_vcmpheq>, setne, v4i1, V4I16>; +def: OpmR_RR_pat<Outn<A2_vcmphgt>, setle, v4i1, V4I16>; +def: OpmR_RR_pat<Outn<A2_vcmphgtu>, setule, v4i1, V4I16>; +def: OpmR_RR_pat<Outn<A2_vcmphgt>, RevCmp<setge>, v4i1, V4I16>; +def: OpmR_RR_pat<Outn<A2_vcmphgtu>, RevCmp<setuge>, v4i1, V4I16>; +def: OpmR_RR_pat<Outn<A2_vcmpweq>, setne, v2i1, V2I32>; +def: OpmR_RR_pat<Outn<A2_vcmpwgt>, setle, v2i1, V2I32>; +def: OpmR_RR_pat<Outn<A2_vcmpwgtu>, setule, v2i1, V2I32>; +def: OpmR_RR_pat<Outn<A2_vcmpwgt>, RevCmp<setge>, v2i1, V2I32>; +def: OpmR_RR_pat<Outn<A2_vcmpwgtu>, RevCmp<setuge>, v2i1, V2I32>; let AddedComplexity = 100 in { def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt), 255), 0)), @@ -679,25 +728,10 @@ def: Pat<(i32 (zext (i1 (setne I32:$Rs, anyimm:$s8)))), (A4_rcmpneqi I32:$Rs, imm:$s8)>; -def: Pat<(i1 (setne I1:$Ps, I1:$Pt)), - (C2_xor I1:$Ps, I1:$Pt)>; - -def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)), - (A2_vcmpbeq (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)), - (A4_vcmpbgt (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)), - (A2_vcmpbgtu (ToZext64 $Rs), (ToZext64 $Rt))>; - -def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)), - (A2_vcmpheq (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)), - (A2_vcmphgt (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)), - (A2_vcmphgtu (ToZext64 $Rs), (ToZext64 $Rt))>; - -def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)), - (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>; +def: Pat<(i1 (seteq I1:$Ps, (i1 -1))), (I1:$Ps)>; +def: Pat<(i1 (setne I1:$Ps, (i1 -1))), (C2_not I1:$Ps)>; +def: Pat<(i1 (seteq I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, (C2_not I1:$Pt))>; +def: Pat<(i1 (setne I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, I1:$Pt)>; // Floating-point comparisons with checks for ordered/unordered status. @@ -705,47 +739,34 @@ : OutPatFrag<(ops node:$Rs, node:$Rt), (MI1 (MI2 $Rs, $Rt), (MI3 $Rs, $Rt))>; -class OpmR_RR_pat<PatFrag Output, PatFrag Op, ValueType ResType, - PatFrag RsPred, PatFrag RtPred = RsPred> - : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)), - (Output RsPred:$Rs, RtPred:$Rt)>; - class Cmpuf<InstHexagon MI>: T3<C2_or, F2_sfcmpuo, MI>; class Cmpud<InstHexagon MI>: T3<C2_or, F2_dfcmpuo, MI>; class Cmpufn<InstHexagon MI>: T3<C2_orn, F2_sfcmpuo, MI>; class Cmpudn<InstHexagon MI>: T3<C2_orn, F2_dfcmpuo, MI>; -let Predicates = [HasV5T] in { - def: OpmR_RR_pat<Cmpuf<F2_sfcmpeq>, setueq, i1, F32>; - def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, setuge, i1, F32>; - def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, setugt, i1, F32>; - def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, RevCmp<setule>, i1, F32>; - def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, RevCmp<setult>, i1, F32>; - def: OpmR_RR_pat<Cmpufn<F2_sfcmpeq>, setune, i1, F32>; - - def: OpmR_RR_pat<Cmpud<F2_dfcmpeq>, setueq, i1, F64>; - def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, setuge, i1, F64>; - def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, setugt, i1, F64>; - def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, RevCmp<setule>, i1, F64>; - def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, RevCmp<setult>, i1, F64>; - def: OpmR_RR_pat<Cmpudn<F2_dfcmpeq>, setune, i1, F64>; -} - -class Outn<InstHexagon MI> - : OutPatFrag<(ops node:$Rs, node:$Rt), - (C2_not (MI $Rs, $Rt))>; - -let Predicates = [HasV5T] in { - def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setone, i1, F32>; - def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne, i1, F32>; - - def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setone, i1, F64>; - def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setne, i1, F64>; - - def: OpmR_RR_pat<Outn<F2_sfcmpuo>, seto, i1, F32>; - def: OpmR_RR_pat<Outn<F2_dfcmpuo>, seto, i1, F64>; -} +def: OpmR_RR_pat<Cmpuf<F2_sfcmpeq>, setueq, i1, F32>; +def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, setuge, i1, F32>; +def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, setugt, i1, F32>; +def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, RevCmp<setule>, i1, F32>; +def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, RevCmp<setult>, i1, F32>; +def: OpmR_RR_pat<Cmpufn<F2_sfcmpeq>, setune, i1, F32>; + +def: OpmR_RR_pat<Cmpud<F2_dfcmpeq>, setueq, i1, F64>; +def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, setuge, i1, F64>; +def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, setugt, i1, F64>; +def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, RevCmp<setule>, i1, F64>; +def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, RevCmp<setult>, i1, F64>; +def: OpmR_RR_pat<Cmpudn<F2_dfcmpeq>, setune, i1, F64>; + +def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setone, i1, F32>; +def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne, i1, F32>; + +def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setone, i1, F64>; +def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setne, i1, F64>; + +def: OpmR_RR_pat<Outn<F2_sfcmpuo>, seto, i1, F32>; +def: OpmR_RR_pat<Outn<F2_dfcmpuo>, seto, i1, F64>; // --(6) Select ---------------------------------------------------------- @@ -775,32 +796,30 @@ (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)), (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>; -let Predicates = [HasV5T] in { - def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$I), - (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>; - def: Pat<(select I1:$Pu, f32ImmPred:$I, F32:$Rt), - (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>; - def: Pat<(select I1:$Pu, F32:$Rs, F32:$Rt), - (C2_mux I1:$Pu, F32:$Rs, F32:$Rt)>; - def: Pat<(select I1:$Pu, F64:$Rs, F64:$Rt), - (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)), - (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>; - - def: Pat<(select (i1 (setult F32:$Ra, F32:$Rb)), F32:$Rs, F32:$Rt), - (C2_mux (F2_sfcmpgt F32:$Rb, F32:$Ra), F32:$Rs, F32:$Rt)>; - def: Pat<(select (i1 (setult F64:$Ra, F64:$Rb)), F64:$Rs, F64:$Rt), - (C2_vmux (F2_dfcmpgt F64:$Rb, F64:$Ra), F64:$Rs, F64:$Rt)>; - - def: Pat<(select (not I1:$Pu), f32ImmPred:$I, F32:$Rs), - (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>; - def: Pat<(select (not I1:$Pu), F32:$Rt, f32ImmPred:$I), - (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>; -} +def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$I), + (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>; +def: Pat<(select I1:$Pu, f32ImmPred:$I, F32:$Rt), + (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>; +def: Pat<(select I1:$Pu, F32:$Rs, F32:$Rt), + (C2_mux I1:$Pu, F32:$Rs, F32:$Rt)>; +def: Pat<(select I1:$Pu, F64:$Rs, F64:$Rt), + (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)), + (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>; + +def: Pat<(select (i1 (setult F32:$Ra, F32:$Rb)), F32:$Rs, F32:$Rt), + (C2_mux (F2_sfcmpgt F32:$Rb, F32:$Ra), F32:$Rs, F32:$Rt)>; +def: Pat<(select (i1 (setult F64:$Ra, F64:$Rb)), F64:$Rs, F64:$Rt), + (C2_vmux (F2_dfcmpgt F64:$Rb, F64:$Ra), F64:$Rs, F64:$Rt)>; + +def: Pat<(select (not I1:$Pu), f32ImmPred:$I, F32:$Rs), + (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>; +def: Pat<(select (not I1:$Pu), F32:$Rt, f32ImmPred:$I), + (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>; def: Pat<(select I1:$Pu, V4I8:$Rs, V4I8:$Rt), - (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>; + (LoReg (C2_vmux I1:$Pu, (ToAext64 $Rs), (ToAext64 $Rt)))>; def: Pat<(select I1:$Pu, V2I16:$Rs, V2I16:$Rt), - (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>; + (LoReg (C2_vmux I1:$Pu, (ToAext64 $Rs), (ToAext64 $Rt)))>; def: Pat<(select I1:$Pu, V2I32:$Rs, V2I32:$Rt), (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)), (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>; @@ -863,7 +882,7 @@ defm: SelMinMax_pats<setult, I64, A2_minup, A2_maxup>; } -let AddedComplexity = 100, Predicates = [HasV5T] in { +let AddedComplexity = 100 in { defm: SelMinMax_pats<setolt, F32, F2_sfmin, F2_sfmax>; defm: SelMinMax_pats<setole, F32, F2_sfmin, F2_sfmax>; defm: SelMinMax_pats<setogt, F32, F2_sfmax, F2_sfmin>; @@ -919,7 +938,7 @@ let AddedComplexity = 10 in def: Pat<(v8i8 (HexagonVSPLAT I32:$Rs)), (S6_vsplatrbp I32:$Rs)>, - Requires<[HasV62T]>; + Requires<[HasV62]>; def: Pat<(v8i8 (HexagonVSPLAT I32:$Rs)), (Combinew (S2_vsplatrb I32:$Rs), (S2_vsplatrb I32:$Rs))>; @@ -980,11 +999,95 @@ def: OpR_RR_pat<S2_lsr_r_p, Srl, i64, I64, I32>; def: OpR_RR_pat<S2_asl_r_p, Shl, i64, I64, I32>; +// Funnel shifts. +def IsMul8_U3: PatLeaf<(i32 imm), [{ + uint64_t V = N->getZExtValue(); + return V % 8 == 0 && isUInt<3>(V / 8); +}]>; + +def Divu8: SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getZExtValue() / 8, SDLoc(N), MVT::i32); +}]>; + +// Funnel shift-left. +def FShl32i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S), + (HiReg (S2_asl_i_p (Combinew $Rs, $Rt), $S))>; +def FShl32r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru), + (HiReg (S2_asl_r_p (Combinew $Rs, $Rt), $Ru))>; + +def FShl64i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S), + (S2_lsr_i_p_or (S2_asl_i_p $Rt, $S), $Rs, (Subi<64> $S))>; +def FShl64r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru), + (S2_lsr_r_p_or (S2_asl_r_p $Rt, $Ru), $Rs, (A2_subri 64, $Ru))>; + +// Combined SDNodeXForm: (Divu8 (Subi<64> $S)) +def Divu64_8: SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant((64 - N->getSExtValue()) / 8, + SDLoc(N), MVT::i32); +}]>; + +// Special cases: +let AddedComplexity = 100 in { + def: Pat<(fshl I32:$Rs, I32:$Rt, (i32 16)), + (A2_combine_hl I32:$Rs, I32:$Rt)>; + def: Pat<(fshl I64:$Rs, I64:$Rt, IsMul8_U3:$S), + (S2_valignib I64:$Rs, I64:$Rt, (Divu64_8 $S))>; +} + +let Predicates = [HasV60], AddedComplexity = 50 in { + def: OpR_RI_pat<S6_rol_i_r, Rol, i32, I32, u5_0ImmPred>; + def: OpR_RI_pat<S6_rol_i_p, Rol, i64, I64, u6_0ImmPred>; +} +let AddedComplexity = 30 in { + def: Pat<(rotl I32:$Rs, u5_0ImmPred:$S), (FShl32i $Rs, $Rs, imm:$S)>; + def: Pat<(rotl I64:$Rs, u6_0ImmPred:$S), (FShl64i $Rs, $Rs, imm:$S)>; + def: Pat<(fshl I32:$Rs, I32:$Rt, u5_0ImmPred:$S), (FShl32i $Rs, $Rt, imm:$S)>; + def: Pat<(fshl I64:$Rs, I64:$Rt, u6_0ImmPred:$S), (FShl64i $Rs, $Rt, imm:$S)>; +} +def: Pat<(rotl I32:$Rs, I32:$Rt), (FShl32r $Rs, $Rs, $Rt)>; +def: Pat<(rotl I64:$Rs, I32:$Rt), (FShl64r $Rs, $Rs, $Rt)>; +def: Pat<(fshl I32:$Rs, I32:$Rt, I32:$Ru), (FShl32r $Rs, $Rt, $Ru)>; +def: Pat<(fshl I64:$Rs, I64:$Rt, I32:$Ru), (FShl64r $Rs, $Rt, $Ru)>; + +// Funnel shift-right. +def FShr32i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S), + (LoReg (S2_lsr_i_p (Combinew $Rs, $Rt), $S))>; +def FShr32r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru), + (LoReg (S2_lsr_r_p (Combinew $Rs, $Rt), $Ru))>; + +def FShr64i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S), + (S2_asl_i_p_or (S2_lsr_i_p $Rt, $S), $Rs, (Subi<64> $S))>; +def FShr64r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru), + (S2_asl_r_p_or (S2_lsr_r_p $Rt, $Ru), $Rs, (A2_subri 64, $Ru))>; + +// Special cases: +let AddedComplexity = 100 in { + def: Pat<(fshr I32:$Rs, I32:$Rt, (i32 16)), + (A2_combine_hl I32:$Rs, I32:$Rt)>; + def: Pat<(fshr I64:$Rs, I64:$Rt, IsMul8_U3:$S), + (S2_valignib I64:$Rs, I64:$Rt, (Divu8 $S))>; +} + +let Predicates = [HasV60], AddedComplexity = 50 in { + def: Pat<(rotr I32:$Rs, u5_0ImmPred:$S), (S6_rol_i_r I32:$Rs, (Subi<32> $S))>; + def: Pat<(rotr I64:$Rs, u6_0ImmPred:$S), (S6_rol_i_p I64:$Rs, (Subi<64> $S))>; +} +let AddedComplexity = 30 in { + def: Pat<(rotr I32:$Rs, u5_0ImmPred:$S), (FShr32i $Rs, $Rs, imm:$S)>; + def: Pat<(rotr I64:$Rs, u6_0ImmPred:$S), (FShr64i $Rs, $Rs, imm:$S)>; + def: Pat<(fshr I32:$Rs, I32:$Rt, u5_0ImmPred:$S), (FShr32i $Rs, $Rt, imm:$S)>; + def: Pat<(fshr I64:$Rs, I64:$Rt, u6_0ImmPred:$S), (FShr64i $Rs, $Rt, imm:$S)>; +} +def: Pat<(rotr I32:$Rs, I32:$Rt), (FShr32r $Rs, $Rs, $Rt)>; +def: Pat<(rotr I64:$Rs, I32:$Rt), (FShr64r $Rs, $Rs, $Rt)>; +def: Pat<(fshr I32:$Rs, I32:$Rt, I32:$Ru), (FShr32r $Rs, $Rt, $Ru)>; +def: Pat<(fshr I64:$Rs, I64:$Rt, I32:$Ru), (FShr64r $Rs, $Rt, $Ru)>; + def: Pat<(sra (add (sra I32:$Rs, u5_0ImmPred:$u5), 1), (i32 1)), (S2_asr_i_r_rnd I32:$Rs, imm:$u5)>; def: Pat<(sra (add (sra I64:$Rs, u6_0ImmPred:$u6), 1), (i32 1)), - (S2_asr_i_p_rnd I64:$Rs, imm:$u6)>, Requires<[HasV5T]>; + (S2_asr_i_p_rnd I64:$Rs, imm:$u6)>; // Prefer S2_addasl_rrri over S2_asl_i_r_acc. let AddedComplexity = 120 in @@ -1025,41 +1128,55 @@ def: AccRRI_pat<S2_asl_i_p_and, And, Su<Shl>, I64, u6_0ImmPred>; def: AccRRI_pat<S2_asl_i_p_or, Or, Su<Shl>, I64, u6_0ImmPred>; def: AccRRI_pat<S2_asl_i_p_xacc, Xor, Su<Shl>, I64, u6_0ImmPred>; + + let Predicates = [HasV60] in { + def: AccRRI_pat<S6_rol_i_r_acc, Add, Su<Rol>, I32, u5_0ImmPred>; + def: AccRRI_pat<S6_rol_i_r_nac, Sub, Su<Rol>, I32, u5_0ImmPred>; + def: AccRRI_pat<S6_rol_i_r_and, And, Su<Rol>, I32, u5_0ImmPred>; + def: AccRRI_pat<S6_rol_i_r_or, Or, Su<Rol>, I32, u5_0ImmPred>; + def: AccRRI_pat<S6_rol_i_r_xacc, Xor, Su<Rol>, I32, u5_0ImmPred>; + + def: AccRRI_pat<S6_rol_i_p_acc, Add, Su<Rol>, I64, u6_0ImmPred>; + def: AccRRI_pat<S6_rol_i_p_nac, Sub, Su<Rol>, I64, u6_0ImmPred>; + def: AccRRI_pat<S6_rol_i_p_and, And, Su<Rol>, I64, u6_0ImmPred>; + def: AccRRI_pat<S6_rol_i_p_or, Or, Su<Rol>, I64, u6_0ImmPred>; + def: AccRRI_pat<S6_rol_i_p_xacc, Xor, Su<Rol>, I64, u6_0ImmPred>; + } } let AddedComplexity = 100 in { - def: AccRRR_pat<S2_asr_r_r_acc, Add, Su<Sra>, I32, I32>; - def: AccRRR_pat<S2_asr_r_r_nac, Sub, Su<Sra>, I32, I32>; - def: AccRRR_pat<S2_asr_r_r_and, And, Su<Sra>, I32, I32>; - def: AccRRR_pat<S2_asr_r_r_or, Or, Su<Sra>, I32, I32>; - - def: AccRRR_pat<S2_asr_r_p_acc, Add, Su<Sra>, I64, I32>; - def: AccRRR_pat<S2_asr_r_p_nac, Sub, Su<Sra>, I64, I32>; - def: AccRRR_pat<S2_asr_r_p_and, And, Su<Sra>, I64, I32>; - def: AccRRR_pat<S2_asr_r_p_or, Or, Su<Sra>, I64, I32>; - def: AccRRR_pat<S2_asr_r_p_xor, Xor, Su<Sra>, I64, I32>; - - def: AccRRR_pat<S2_lsr_r_r_acc, Add, Su<Srl>, I32, I32>; - def: AccRRR_pat<S2_lsr_r_r_nac, Sub, Su<Srl>, I32, I32>; - def: AccRRR_pat<S2_lsr_r_r_and, And, Su<Srl>, I32, I32>; - def: AccRRR_pat<S2_lsr_r_r_or, Or, Su<Srl>, I32, I32>; - - def: AccRRR_pat<S2_lsr_r_p_acc, Add, Su<Srl>, I64, I32>; - def: AccRRR_pat<S2_lsr_r_p_nac, Sub, Su<Srl>, I64, I32>; - def: AccRRR_pat<S2_lsr_r_p_and, And, Su<Srl>, I64, I32>; - def: AccRRR_pat<S2_lsr_r_p_or, Or, Su<Srl>, I64, I32>; - def: AccRRR_pat<S2_lsr_r_p_xor, Xor, Su<Srl>, I64, I32>; - - def: AccRRR_pat<S2_asl_r_r_acc, Add, Su<Shl>, I32, I32>; - def: AccRRR_pat<S2_asl_r_r_nac, Sub, Su<Shl>, I32, I32>; - def: AccRRR_pat<S2_asl_r_r_and, And, Su<Shl>, I32, I32>; - def: AccRRR_pat<S2_asl_r_r_or, Or, Su<Shl>, I32, I32>; - - def: AccRRR_pat<S2_asl_r_p_acc, Add, Su<Shl>, I64, I32>; - def: AccRRR_pat<S2_asl_r_p_nac, Sub, Su<Shl>, I64, I32>; - def: AccRRR_pat<S2_asl_r_p_and, And, Su<Shl>, I64, I32>; - def: AccRRR_pat<S2_asl_r_p_or, Or, Su<Shl>, I64, I32>; - def: AccRRR_pat<S2_asl_r_p_xor, Xor, Su<Shl>, I64, I32>; + def: AccRRR_pat<S2_asr_r_r_acc, Add, Su<Sra>, I32, I32, I32>; + def: AccRRR_pat<S2_asr_r_r_nac, Sub, Su<Sra>, I32, I32, I32>; + def: AccRRR_pat<S2_asr_r_r_and, And, Su<Sra>, I32, I32, I32>; + def: AccRRR_pat<S2_asr_r_r_or, Or, Su<Sra>, I32, I32, I32>; + + def: AccRRR_pat<S2_asr_r_p_acc, Add, Su<Sra>, I64, I64, I32>; + def: AccRRR_pat<S2_asr_r_p_nac, Sub, Su<Sra>, I64, I64, I32>; + def: AccRRR_pat<S2_asr_r_p_and, And, Su<Sra>, I64, I64, I32>; + def: AccRRR_pat<S2_asr_r_p_or, Or, Su<Sra>, I64, I64, I32>; + def: AccRRR_pat<S2_asr_r_p_xor, Xor, Su<Sra>, I64, I64, I32>; + + def: AccRRR_pat<S2_lsr_r_r_acc, Add, Su<Srl>, I32, I32, I32>; + def: AccRRR_pat<S2_lsr_r_r_nac, Sub, Su<Srl>, I32, I32, I32>; + def: AccRRR_pat<S2_lsr_r_r_and, And, Su<Srl>, I32, I32, I32>; + def: AccRRR_pat<S2_lsr_r_r_or, Or, Su<Srl>, I32, I32, I32>; + + def: AccRRR_pat<S2_lsr_r_p_acc, Add, Su<Srl>, I64, I64, I32>; + def: AccRRR_pat<S2_lsr_r_p_nac, Sub, Su<Srl>, I64, I64, I32>; + def: AccRRR_pat<S2_lsr_r_p_and, And, Su<Srl>, I64, I64, I32>; + def: AccRRR_pat<S2_lsr_r_p_or, Or, Su<Srl>, I64, I64, I32>; + def: AccRRR_pat<S2_lsr_r_p_xor, Xor, Su<Srl>, I64, I64, I32>; + + def: AccRRR_pat<S2_asl_r_r_acc, Add, Su<Shl>, I32, I32, I32>; + def: AccRRR_pat<S2_asl_r_r_nac, Sub, Su<Shl>, I32, I32, I32>; + def: AccRRR_pat<S2_asl_r_r_and, And, Su<Shl>, I32, I32, I32>; + def: AccRRR_pat<S2_asl_r_r_or, Or, Su<Shl>, I32, I32, I32>; + + def: AccRRR_pat<S2_asl_r_p_acc, Add, Su<Shl>, I64, I64, I32>; + def: AccRRR_pat<S2_asl_r_p_nac, Sub, Su<Shl>, I64, I64, I32>; + def: AccRRR_pat<S2_asl_r_p_and, And, Su<Shl>, I64, I64, I32>; + def: AccRRR_pat<S2_asl_r_p_or, Or, Su<Shl>, I64, I64, I32>; + def: AccRRR_pat<S2_asl_r_p_xor, Xor, Su<Shl>, I64, I64, I32>; } @@ -1092,14 +1209,6 @@ (Combinew (A2_combine_ll I32:$d, I32:$c), (A2_combine_ll I32:$b, I32:$a))>; -def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add I32:$b, 3))), - (i32 8)), - (i32 (zextloadi8 (add I32:$b, 2)))), - (i32 16)), - (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))), - (zextloadi8 I32:$b)), - (A2_swiz (L2_loadri_io IntRegs:$b, 0))>; - let AddedComplexity = 200 in { def: Pat<(or (shl I32:$Rt, (i32 16)), (and I32:$Rs, (i32 65535))), (A2_combine_ll I32:$Rt, I32:$Rs)>; @@ -1145,32 +1254,38 @@ def: Pat<(shl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))), (S2_asl_i_vh V4I16:$b, imm:$c)>; +def: Pat<(HexagonVASR V2I16:$Rs, u4_0ImmPred:$S), + (LoReg (S2_asr_i_vh (ToAext64 $Rs), imm:$S))>; +def: Pat<(HexagonVASL V2I16:$Rs, u4_0ImmPred:$S), + (LoReg (S2_asl_i_vh (ToAext64 $Rs), imm:$S))>; +def: Pat<(HexagonVLSR V2I16:$Rs, u4_0ImmPred:$S), + (LoReg (S2_lsr_i_vh (ToAext64 $Rs), imm:$S))>; +def: Pat<(HexagonVASR V2I16:$Rs, I32:$Rt), + (LoReg (S2_asr_i_vh (ToAext64 $Rs), I32:$Rt))>; +def: Pat<(HexagonVASL V2I16:$Rs, I32:$Rt), + (LoReg (S2_asl_i_vh (ToAext64 $Rs), I32:$Rt))>; +def: Pat<(HexagonVLSR V2I16:$Rs, I32:$Rt), + (LoReg (S2_lsr_i_vh (ToAext64 $Rs), I32:$Rt))>; + // --(9) Arithmetic/bitwise ---------------------------------------------- // -def: Pat<(abs I32:$Rs), (A2_abs I32:$Rs)>; -def: Pat<(not I32:$Rs), (A2_subri -1, I32:$Rs)>; -def: Pat<(not I64:$Rs), (A2_notp I64:$Rs)>; - -let Predicates = [HasV5T] in { - def: Pat<(fabs F32:$Rs), (S2_clrbit_i F32:$Rs, 31)>; - def: Pat<(fneg F32:$Rs), (S2_togglebit_i F32:$Rs, 31)>; - - def: Pat<(fabs F64:$Rs), - (Combinew (S2_clrbit_i (HiReg $Rs), 31), - (i32 (LoReg $Rs)))>; - def: Pat<(fneg F64:$Rs), - (Combinew (S2_togglebit_i (HiReg $Rs), 31), - (i32 (LoReg $Rs)))>; -} - -let AddedComplexity = 50 in -def: Pat<(xor (add (sra I32:$Rs, (i32 31)), - I32:$Rs), - (sra I32:$Rs, (i32 31))), - (A2_abs I32:$Rs)>; - +def: Pat<(abs I32:$Rs), (A2_abs I32:$Rs)>; +def: Pat<(abs I64:$Rs), (A2_absp I64:$Rs)>; +def: Pat<(not I32:$Rs), (A2_subri -1, I32:$Rs)>; +def: Pat<(not I64:$Rs), (A2_notp I64:$Rs)>; +def: Pat<(ineg I64:$Rs), (A2_negp I64:$Rs)>; + +def: Pat<(fabs F32:$Rs), (S2_clrbit_i F32:$Rs, 31)>; +def: Pat<(fneg F32:$Rs), (S2_togglebit_i F32:$Rs, 31)>; + +def: Pat<(fabs F64:$Rs), + (Combinew (S2_clrbit_i (HiReg $Rs), 31), + (i32 (LoReg $Rs)))>; +def: Pat<(fneg F64:$Rs), + (Combinew (S2_togglebit_i (HiReg $Rs), 31), + (i32 (LoReg $Rs)))>; def: Pat<(add I32:$Rs, anyimm:$s16), (A2_addi I32:$Rs, imm:$s16)>; def: Pat<(or I32:$Rs, anyimm:$s10), (A2_orir I32:$Rs, imm:$s10)>; @@ -1200,18 +1315,20 @@ def: OpR_RR_pat<A2_vsubh, Sub, v4i16, V4I16>; def: OpR_RR_pat<A2_vsubw, Sub, v2i32, V2I32>; +def: OpR_RR_pat<A2_and, And, v4i8, V4I8>; +def: OpR_RR_pat<A2_xor, Xor, v4i8, V4I8>; +def: OpR_RR_pat<A2_or, Or, v4i8, V4I8>; def: OpR_RR_pat<A2_and, And, v2i16, V2I16>; def: OpR_RR_pat<A2_xor, Xor, v2i16, V2I16>; def: OpR_RR_pat<A2_or, Or, v2i16, V2I16>; - def: OpR_RR_pat<A2_andp, And, v8i8, V8I8>; -def: OpR_RR_pat<A2_andp, And, v4i16, V4I16>; -def: OpR_RR_pat<A2_andp, And, v2i32, V2I32>; def: OpR_RR_pat<A2_orp, Or, v8i8, V8I8>; +def: OpR_RR_pat<A2_xorp, Xor, v8i8, V8I8>; +def: OpR_RR_pat<A2_andp, And, v4i16, V4I16>; def: OpR_RR_pat<A2_orp, Or, v4i16, V4I16>; +def: OpR_RR_pat<A2_xorp, Xor, v4i16, V4I16>; +def: OpR_RR_pat<A2_andp, And, v2i32, V2I32>; def: OpR_RR_pat<A2_orp, Or, v2i32, V2I32>; -def: OpR_RR_pat<A2_xorp, Xor, v8i8, V8I8>; -def: OpR_RR_pat<A2_xorp, Xor, v4i16, V4I16>; def: OpR_RR_pat<A2_xorp, Xor, v2i32, V2I32>; def: OpR_RR_pat<M2_mpyi, Mul, i32, I32>; @@ -1234,12 +1351,15 @@ def: OpR_RR_pat<C2_and, Mul, v4i1, V4I1>; def: OpR_RR_pat<C2_and, Mul, v8i1, V8I1>; -let Predicates = [HasV5T] in { - def: OpR_RR_pat<F2_sfadd, pf2<fadd>, f32, F32>; - def: OpR_RR_pat<F2_sfsub, pf2<fsub>, f32, F32>; - def: OpR_RR_pat<F2_sfmpy, pf2<fmul>, f32, F32>; - def: OpR_RR_pat<F2_sfmin, pf2<fminnum>, f32, F32>; - def: OpR_RR_pat<F2_sfmax, pf2<fmaxnum>, f32, F32>; +def: OpR_RR_pat<F2_sfadd, pf2<fadd>, f32, F32>; +def: OpR_RR_pat<F2_sfsub, pf2<fsub>, f32, F32>; +def: OpR_RR_pat<F2_sfmpy, pf2<fmul>, f32, F32>; +def: OpR_RR_pat<F2_sfmin, pf2<fminnum>, f32, F32>; +def: OpR_RR_pat<F2_sfmax, pf2<fmaxnum>, f32, F32>; + +let Predicates = [HasV66] in { + def: OpR_RR_pat<F2_dfadd, pf2<fadd>, f64, F64>; + def: OpR_RR_pat<F2_dfsub, pf2<fsub>, f64, F64>; } // In expressions like a0*b0 + a1*b1 + ..., prefer to generate multiply-add, @@ -1247,12 +1367,14 @@ let AddedComplexity = 10 in { def: AccRRI_pat<M2_macsip, Add, Su<Mul>, I32, u32_0ImmPred>; def: AccRRI_pat<M2_macsin, Sub, Su<Mul>, I32, u32_0ImmPred>; - def: AccRRR_pat<M2_maci, Add, Su<Mul>, I32, I32>; + def: AccRRR_pat<M2_maci, Add, Su<Mul>, I32, I32, I32>; + let Predicates = [HasV66] in + def: AccRRR_pat<M2_mnaci, Sub, Su<Mul>, I32, I32, I32>; } def: AccRRI_pat<M2_naccii, Sub, Su<Add>, I32, s32_0ImmPred>; def: AccRRI_pat<M2_accii, Add, Su<Add>, I32, s32_0ImmPred>; -def: AccRRR_pat<M2_acci, Add, Su<Add>, I32, I32>; +def: AccRRR_pat<M2_acci, Add, Su<Add>, I32, I32, I32>; // Mulh for vectors // @@ -1320,24 +1442,24 @@ def: Pat<(add Sext64:$Rs, I64:$Rt), (A2_addsp (LoReg Sext64:$Rs), I64:$Rt)>; -def: AccRRR_pat<M4_and_and, And, Su<And>, I32, I32>; -def: AccRRR_pat<M4_and_or, And, Su<Or>, I32, I32>; -def: AccRRR_pat<M4_and_xor, And, Su<Xor>, I32, I32>; -def: AccRRR_pat<M4_or_and, Or, Su<And>, I32, I32>; -def: AccRRR_pat<M4_or_or, Or, Su<Or>, I32, I32>; -def: AccRRR_pat<M4_or_xor, Or, Su<Xor>, I32, I32>; -def: AccRRR_pat<M4_xor_and, Xor, Su<And>, I32, I32>; -def: AccRRR_pat<M4_xor_or, Xor, Su<Or>, I32, I32>; -def: AccRRR_pat<M2_xor_xacc, Xor, Su<Xor>, I32, I32>; -def: AccRRR_pat<M4_xor_xacc, Xor, Su<Xor>, I64, I64>; +def: AccRRR_pat<M4_and_and, And, Su_ni1<And>, I32, I32, I32>; +def: AccRRR_pat<M4_and_or, And, Su_ni1<Or>, I32, I32, I32>; +def: AccRRR_pat<M4_and_xor, And, Su<Xor>, I32, I32, I32>; +def: AccRRR_pat<M4_or_and, Or, Su_ni1<And>, I32, I32, I32>; +def: AccRRR_pat<M4_or_or, Or, Su_ni1<Or>, I32, I32, I32>; +def: AccRRR_pat<M4_or_xor, Or, Su<Xor>, I32, I32, I32>; +def: AccRRR_pat<M4_xor_and, Xor, Su_ni1<And>, I32, I32, I32>; +def: AccRRR_pat<M4_xor_or, Xor, Su_ni1<Or>, I32, I32, I32>; +def: AccRRR_pat<M2_xor_xacc, Xor, Su<Xor>, I32, I32, I32>; +def: AccRRR_pat<M4_xor_xacc, Xor, Su<Xor>, I64, I64, I64>; // For dags like (or (and (not _), _), (shl _, _)) where the "or" with // one argument matches the patterns below, and with the other argument // matches S2_asl_r_r_or, etc, prefer the patterns below. let AddedComplexity = 110 in { // greater than S2_asl_r_r_and/or/xor. - def: AccRRR_pat<M4_and_andn, And, Su<Not2<And>>, I32, I32>; - def: AccRRR_pat<M4_or_andn, Or, Su<Not2<And>>, I32, I32>; - def: AccRRR_pat<M4_xor_andn, Xor, Su<Not2<And>>, I32, I32>; + def: AccRRR_pat<M4_and_andn, And, Su<Not2<And>>, I32, I32, I32>; + def: AccRRR_pat<M4_or_andn, Or, Su<Not2<And>>, I32, I32, I32>; + def: AccRRR_pat<M4_xor_andn, Xor, Su<Not2<And>>, I32, I32, I32>; } // S4_addaddi and S4_subaddi don't have tied operands, so give them @@ -1473,14 +1595,12 @@ (M4_mpyrr_addr IntRegs:$Ru, IntRegs:$Ry, IntRegs:$Rs)>; -let Predicates = [HasV5T] in { - def: Pat<(fma F32:$Rs, F32:$Rt, F32:$Rx), - (F2_sffma F32:$Rx, F32:$Rs, F32:$Rt)>; - def: Pat<(fma (fneg F32:$Rs), F32:$Rt, F32:$Rx), - (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>; - def: Pat<(fma F32:$Rs, (fneg F32:$Rt), F32:$Rx), - (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>; -} +def: Pat<(fma F32:$Rs, F32:$Rt, F32:$Rx), + (F2_sffma F32:$Rx, F32:$Rs, F32:$Rt)>; +def: Pat<(fma (fneg F32:$Rs), F32:$Rt, F32:$Rx), + (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>; +def: Pat<(fma F32:$Rs, (fneg F32:$Rt), F32:$Rx), + (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>; def: Pat<(mul V2I32:$Rs, V2I32:$Rt), @@ -1491,9 +1611,9 @@ // Add/subtract two v4i8: Hexagon does not have an insn for this one, so // we use the double add v8i8, and use only the low part of the result. def: Pat<(add V4I8:$Rs, V4I8:$Rt), - (LoReg (A2_vaddub (ToZext64 $Rs), (ToZext64 $Rt)))>; + (LoReg (A2_vaddub (ToAext64 $Rs), (ToAext64 $Rt)))>; def: Pat<(sub V4I8:$Rs, V4I8:$Rt), - (LoReg (A2_vsubub (ToZext64 $Rs), (ToZext64 $Rt)))>; + (LoReg (A2_vsubub (ToAext64 $Rs), (ToAext64 $Rt)))>; // Use M2_vmpy2s_s0 for half-word vector multiply. It multiplies two // half-words, and saturates the result to a 32-bit value, except the @@ -1507,14 +1627,12 @@ // Multiplies two v4i8 vectors. def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), - (S2_vtrunehb (M5_vmpybuu V4I8:$Rs, V4I8:$Rt))>, - Requires<[HasV5T]>; + (S2_vtrunehb (M5_vmpybuu V4I8:$Rs, V4I8:$Rt))>; // Multiplies two v8i8 vectors. def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), (Combinew (S2_vtrunehb (M5_vmpybuu (HiReg $Rs), (HiReg $Rt))), - (S2_vtrunehb (M5_vmpybuu (LoReg $Rs), (LoReg $Rt))))>, - Requires<[HasV5T]>; + (S2_vtrunehb (M5_vmpybuu (LoReg $Rs), (LoReg $Rt))))>; // --(10) Bit ------------------------------------------------------------ @@ -1827,7 +1945,12 @@ defm: Loadxi_pat<zextloadv2i8, v2i16, anyimm1, L2_loadbzw2_io>; defm: Loadxi_pat<zextloadv4i8, v4i16, anyimm2, L2_loadbzw4_io>; defm: Loadxi_pat<load, i32, anyimm2, L2_loadri_io>; + defm: Loadxi_pat<load, v2i16, anyimm2, L2_loadri_io>; + defm: Loadxi_pat<load, v4i8, anyimm2, L2_loadri_io>; defm: Loadxi_pat<load, i64, anyimm3, L2_loadrd_io>; + defm: Loadxi_pat<load, v2i32, anyimm3, L2_loadrd_io>; + defm: Loadxi_pat<load, v4i16, anyimm3, L2_loadrd_io>; + defm: Loadxi_pat<load, v8i8, anyimm3, L2_loadrd_io>; defm: Loadxi_pat<load, f32, anyimm2, L2_loadri_io>; defm: Loadxi_pat<load, f64, anyimm3, L2_loadrd_io>; // No sextloadi1. @@ -1839,10 +1962,10 @@ } let AddedComplexity = 30 in { - defm: Loadxim_pat<extloadi1, i64, ToZext64, anyimm0, L2_loadrub_io>; - defm: Loadxim_pat<extloadi8, i64, ToZext64, anyimm0, L2_loadrub_io>; - defm: Loadxim_pat<extloadi16, i64, ToZext64, anyimm1, L2_loadruh_io>; - defm: Loadxim_pat<extloadi32, i64, ToZext64, anyimm2, L2_loadri_io>; + defm: Loadxim_pat<extloadi1, i64, ToAext64, anyimm0, L2_loadrub_io>; + defm: Loadxim_pat<extloadi8, i64, ToAext64, anyimm0, L2_loadrub_io>; + defm: Loadxim_pat<extloadi16, i64, ToAext64, anyimm1, L2_loadruh_io>; + defm: Loadxim_pat<extloadi32, i64, ToAext64, anyimm2, L2_loadri_io>; defm: Loadxim_pat<zextloadi1, i64, ToZext64, anyimm0, L2_loadrub_io>; defm: Loadxim_pat<zextloadi8, i64, ToZext64, anyimm0, L2_loadrub_io>; defm: Loadxim_pat<zextloadi16, i64, ToZext64, anyimm1, L2_loadruh_io>; @@ -1865,68 +1988,83 @@ def: Loadxu_pat<zextloadi16, i32, anyimm1, L4_loadruh_ur>; def: Loadxu_pat<zextloadv2i8, v2i16, anyimm1, L4_loadbzw2_ur>; def: Loadxu_pat<zextloadv4i8, v4i16, anyimm2, L4_loadbzw4_ur>; + def: Loadxu_pat<load, i32, anyimm2, L4_loadri_ur>; + def: Loadxu_pat<load, v2i16, anyimm2, L4_loadri_ur>; + def: Loadxu_pat<load, v4i8, anyimm2, L4_loadri_ur>; + def: Loadxu_pat<load, i64, anyimm3, L4_loadrd_ur>; + def: Loadxu_pat<load, v2i32, anyimm3, L4_loadrd_ur>; + def: Loadxu_pat<load, v4i16, anyimm3, L4_loadrd_ur>; + def: Loadxu_pat<load, v8i8, anyimm3, L4_loadrd_ur>; def: Loadxu_pat<load, f32, anyimm2, L4_loadri_ur>; def: Loadxu_pat<load, f64, anyimm3, L4_loadrd_ur>; - def: Loadxu_pat<load, i32, anyimm2, L4_loadri_ur>; - def: Loadxu_pat<load, i64, anyimm3, L4_loadrd_ur>; def: Loadxum_pat<sextloadi8, i64, anyimm0, ToSext64, L4_loadrb_ur>; def: Loadxum_pat<zextloadi8, i64, anyimm0, ToZext64, L4_loadrub_ur>; - def: Loadxum_pat<extloadi8, i64, anyimm0, ToZext64, L4_loadrub_ur>; + def: Loadxum_pat<extloadi8, i64, anyimm0, ToAext64, L4_loadrub_ur>; def: Loadxum_pat<sextloadi16, i64, anyimm1, ToSext64, L4_loadrh_ur>; def: Loadxum_pat<zextloadi16, i64, anyimm1, ToZext64, L4_loadruh_ur>; - def: Loadxum_pat<extloadi16, i64, anyimm1, ToZext64, L4_loadruh_ur>; + def: Loadxum_pat<extloadi16, i64, anyimm1, ToAext64, L4_loadruh_ur>; def: Loadxum_pat<sextloadi32, i64, anyimm2, ToSext64, L4_loadri_ur>; def: Loadxum_pat<zextloadi32, i64, anyimm2, ToZext64, L4_loadri_ur>; - def: Loadxum_pat<extloadi32, i64, anyimm2, ToZext64, L4_loadri_ur>; + def: Loadxum_pat<extloadi32, i64, anyimm2, ToAext64, L4_loadri_ur>; } let AddedComplexity = 40 in { - def: Loadxr_shl_pat<extloadi8, i32, L4_loadrub_rr>; - def: Loadxr_shl_pat<zextloadi8, i32, L4_loadrub_rr>; - def: Loadxr_shl_pat<sextloadi8, i32, L4_loadrb_rr>; - def: Loadxr_shl_pat<extloadi16, i32, L4_loadruh_rr>; - def: Loadxr_shl_pat<zextloadi16, i32, L4_loadruh_rr>; - def: Loadxr_shl_pat<sextloadi16, i32, L4_loadrh_rr>; - def: Loadxr_shl_pat<load, i32, L4_loadri_rr>; - def: Loadxr_shl_pat<load, i64, L4_loadrd_rr>; - def: Loadxr_shl_pat<load, f32, L4_loadri_rr>; - def: Loadxr_shl_pat<load, f64, L4_loadrd_rr>; + def: Loadxr_shl_pat<extloadi8, i32, L4_loadrub_rr>; + def: Loadxr_shl_pat<zextloadi8, i32, L4_loadrub_rr>; + def: Loadxr_shl_pat<sextloadi8, i32, L4_loadrb_rr>; + def: Loadxr_shl_pat<extloadi16, i32, L4_loadruh_rr>; + def: Loadxr_shl_pat<zextloadi16, i32, L4_loadruh_rr>; + def: Loadxr_shl_pat<sextloadi16, i32, L4_loadrh_rr>; + def: Loadxr_shl_pat<load, i32, L4_loadri_rr>; + def: Loadxr_shl_pat<load, v2i16, L4_loadri_rr>; + def: Loadxr_shl_pat<load, v4i8, L4_loadri_rr>; + def: Loadxr_shl_pat<load, i64, L4_loadrd_rr>; + def: Loadxr_shl_pat<load, v2i32, L4_loadrd_rr>; + def: Loadxr_shl_pat<load, v4i16, L4_loadrd_rr>; + def: Loadxr_shl_pat<load, v8i8, L4_loadrd_rr>; + def: Loadxr_shl_pat<load, f32, L4_loadri_rr>; + def: Loadxr_shl_pat<load, f64, L4_loadrd_rr>; } let AddedComplexity = 20 in { - def: Loadxr_add_pat<extloadi8, i32, L4_loadrub_rr>; - def: Loadxr_add_pat<zextloadi8, i32, L4_loadrub_rr>; - def: Loadxr_add_pat<sextloadi8, i32, L4_loadrb_rr>; - def: Loadxr_add_pat<extloadi16, i32, L4_loadruh_rr>; - def: Loadxr_add_pat<zextloadi16, i32, L4_loadruh_rr>; - def: Loadxr_add_pat<sextloadi16, i32, L4_loadrh_rr>; - def: Loadxr_add_pat<load, i32, L4_loadri_rr>; - def: Loadxr_add_pat<load, i64, L4_loadrd_rr>; - def: Loadxr_add_pat<load, f32, L4_loadri_rr>; - def: Loadxr_add_pat<load, f64, L4_loadrd_rr>; + def: Loadxr_add_pat<extloadi8, i32, L4_loadrub_rr>; + def: Loadxr_add_pat<zextloadi8, i32, L4_loadrub_rr>; + def: Loadxr_add_pat<sextloadi8, i32, L4_loadrb_rr>; + def: Loadxr_add_pat<extloadi16, i32, L4_loadruh_rr>; + def: Loadxr_add_pat<zextloadi16, i32, L4_loadruh_rr>; + def: Loadxr_add_pat<sextloadi16, i32, L4_loadrh_rr>; + def: Loadxr_add_pat<load, i32, L4_loadri_rr>; + def: Loadxr_add_pat<load, v2i16, L4_loadri_rr>; + def: Loadxr_add_pat<load, v4i8, L4_loadri_rr>; + def: Loadxr_add_pat<load, i64, L4_loadrd_rr>; + def: Loadxr_add_pat<load, v2i32, L4_loadrd_rr>; + def: Loadxr_add_pat<load, v4i16, L4_loadrd_rr>; + def: Loadxr_add_pat<load, v8i8, L4_loadrd_rr>; + def: Loadxr_add_pat<load, f32, L4_loadri_rr>; + def: Loadxr_add_pat<load, f64, L4_loadrd_rr>; } let AddedComplexity = 40 in { - def: Loadxrm_shl_pat<extloadi8, i64, ToZext64, L4_loadrub_rr>; + def: Loadxrm_shl_pat<extloadi8, i64, ToAext64, L4_loadrub_rr>; def: Loadxrm_shl_pat<zextloadi8, i64, ToZext64, L4_loadrub_rr>; def: Loadxrm_shl_pat<sextloadi8, i64, ToSext64, L4_loadrb_rr>; - def: Loadxrm_shl_pat<extloadi16, i64, ToZext64, L4_loadruh_rr>; + def: Loadxrm_shl_pat<extloadi16, i64, ToAext64, L4_loadruh_rr>; def: Loadxrm_shl_pat<zextloadi16, i64, ToZext64, L4_loadruh_rr>; def: Loadxrm_shl_pat<sextloadi16, i64, ToSext64, L4_loadrh_rr>; - def: Loadxrm_shl_pat<extloadi32, i64, ToZext64, L4_loadri_rr>; + def: Loadxrm_shl_pat<extloadi32, i64, ToAext64, L4_loadri_rr>; def: Loadxrm_shl_pat<zextloadi32, i64, ToZext64, L4_loadri_rr>; def: Loadxrm_shl_pat<sextloadi32, i64, ToSext64, L4_loadri_rr>; } let AddedComplexity = 20 in { - def: Loadxrm_add_pat<extloadi8, i64, ToZext64, L4_loadrub_rr>; + def: Loadxrm_add_pat<extloadi8, i64, ToAext64, L4_loadrub_rr>; def: Loadxrm_add_pat<zextloadi8, i64, ToZext64, L4_loadrub_rr>; def: Loadxrm_add_pat<sextloadi8, i64, ToSext64, L4_loadrb_rr>; - def: Loadxrm_add_pat<extloadi16, i64, ToZext64, L4_loadruh_rr>; + def: Loadxrm_add_pat<extloadi16, i64, ToAext64, L4_loadruh_rr>; def: Loadxrm_add_pat<zextloadi16, i64, ToZext64, L4_loadruh_rr>; def: Loadxrm_add_pat<sextloadi16, i64, ToSext64, L4_loadrh_rr>; - def: Loadxrm_add_pat<extloadi32, i64, ToZext64, L4_loadri_rr>; + def: Loadxrm_add_pat<extloadi32, i64, ToAext64, L4_loadri_rr>; def: Loadxrm_add_pat<zextloadi32, i64, ToZext64, L4_loadri_rr>; def: Loadxrm_add_pat<sextloadi32, i64, ToSext64, L4_loadri_rr>; } @@ -1934,17 +2072,22 @@ // Absolute address let AddedComplexity = 60 in { - def: Loada_pat<zextloadi1, i32, anyimm0, PS_loadrubabs>; - def: Loada_pat<sextloadi8, i32, anyimm0, PS_loadrbabs>; - def: Loada_pat<extloadi8, i32, anyimm0, PS_loadrubabs>; - def: Loada_pat<zextloadi8, i32, anyimm0, PS_loadrubabs>; - def: Loada_pat<sextloadi16, i32, anyimm1, PS_loadrhabs>; - def: Loada_pat<extloadi16, i32, anyimm1, PS_loadruhabs>; - def: Loada_pat<zextloadi16, i32, anyimm1, PS_loadruhabs>; - def: Loada_pat<load, i32, anyimm2, PS_loadriabs>; - def: Loada_pat<load, i64, anyimm3, PS_loadrdabs>; - def: Loada_pat<load, f32, anyimm2, PS_loadriabs>; - def: Loada_pat<load, f64, anyimm3, PS_loadrdabs>; + def: Loada_pat<zextloadi1, i32, anyimm0, PS_loadrubabs>; + def: Loada_pat<sextloadi8, i32, anyimm0, PS_loadrbabs>; + def: Loada_pat<extloadi8, i32, anyimm0, PS_loadrubabs>; + def: Loada_pat<zextloadi8, i32, anyimm0, PS_loadrubabs>; + def: Loada_pat<sextloadi16, i32, anyimm1, PS_loadrhabs>; + def: Loada_pat<extloadi16, i32, anyimm1, PS_loadruhabs>; + def: Loada_pat<zextloadi16, i32, anyimm1, PS_loadruhabs>; + def: Loada_pat<load, i32, anyimm2, PS_loadriabs>; + def: Loada_pat<load, v2i16, anyimm2, PS_loadriabs>; + def: Loada_pat<load, v4i8, anyimm2, PS_loadriabs>; + def: Loada_pat<load, i64, anyimm3, PS_loadrdabs>; + def: Loada_pat<load, v2i32, anyimm3, PS_loadrdabs>; + def: Loada_pat<load, v4i16, anyimm3, PS_loadrdabs>; + def: Loada_pat<load, v8i8, anyimm3, PS_loadrdabs>; + def: Loada_pat<load, f32, anyimm2, PS_loadriabs>; + def: Loada_pat<load, f64, anyimm3, PS_loadrdabs>; def: Loada_pat<atomic_load_8, i32, anyimm0, PS_loadrubabs>; def: Loada_pat<atomic_load_16, i32, anyimm1, PS_loadruhabs>; @@ -1953,13 +2096,13 @@ } let AddedComplexity = 30 in { - def: Loadam_pat<extloadi8, i64, anyimm0, ToZext64, PS_loadrubabs>; + def: Loadam_pat<extloadi8, i64, anyimm0, ToAext64, PS_loadrubabs>; def: Loadam_pat<sextloadi8, i64, anyimm0, ToSext64, PS_loadrbabs>; def: Loadam_pat<zextloadi8, i64, anyimm0, ToZext64, PS_loadrubabs>; - def: Loadam_pat<extloadi16, i64, anyimm1, ToZext64, PS_loadruhabs>; + def: Loadam_pat<extloadi16, i64, anyimm1, ToAext64, PS_loadruhabs>; def: Loadam_pat<sextloadi16, i64, anyimm1, ToSext64, PS_loadrhabs>; def: Loadam_pat<zextloadi16, i64, anyimm1, ToZext64, PS_loadruhabs>; - def: Loadam_pat<extloadi32, i64, anyimm2, ToZext64, PS_loadriabs>; + def: Loadam_pat<extloadi32, i64, anyimm2, ToAext64, PS_loadriabs>; def: Loadam_pat<sextloadi32, i64, anyimm2, ToSext64, PS_loadriabs>; def: Loadam_pat<zextloadi32, i64, anyimm2, ToZext64, PS_loadriabs>; @@ -1970,18 +2113,23 @@ // GP-relative address let AddedComplexity = 100 in { - def: Loada_pat<extloadi1, i32, addrgp, L2_loadrubgp>; - def: Loada_pat<zextloadi1, i32, addrgp, L2_loadrubgp>; - def: Loada_pat<extloadi8, i32, addrgp, L2_loadrubgp>; - def: Loada_pat<sextloadi8, i32, addrgp, L2_loadrbgp>; - def: Loada_pat<zextloadi8, i32, addrgp, L2_loadrubgp>; - def: Loada_pat<extloadi16, i32, addrgp, L2_loadruhgp>; - def: Loada_pat<sextloadi16, i32, addrgp, L2_loadrhgp>; - def: Loada_pat<zextloadi16, i32, addrgp, L2_loadruhgp>; - def: Loada_pat<load, i32, addrgp, L2_loadrigp>; - def: Loada_pat<load, i64, addrgp, L2_loadrdgp>; - def: Loada_pat<load, f32, addrgp, L2_loadrigp>; - def: Loada_pat<load, f64, addrgp, L2_loadrdgp>; + def: Loada_pat<extloadi1, i32, addrgp, L2_loadrubgp>; + def: Loada_pat<zextloadi1, i32, addrgp, L2_loadrubgp>; + def: Loada_pat<extloadi8, i32, addrgp, L2_loadrubgp>; + def: Loada_pat<sextloadi8, i32, addrgp, L2_loadrbgp>; + def: Loada_pat<zextloadi8, i32, addrgp, L2_loadrubgp>; + def: Loada_pat<extloadi16, i32, addrgp, L2_loadruhgp>; + def: Loada_pat<sextloadi16, i32, addrgp, L2_loadrhgp>; + def: Loada_pat<zextloadi16, i32, addrgp, L2_loadruhgp>; + def: Loada_pat<load, i32, addrgp, L2_loadrigp>; + def: Loada_pat<load, v2i16, addrgp, L2_loadrigp>; + def: Loada_pat<load, v4i8, addrgp, L2_loadrigp>; + def: Loada_pat<load, i64, addrgp, L2_loadrdgp>; + def: Loada_pat<load, v2i32, addrgp, L2_loadrdgp>; + def: Loada_pat<load, v4i16, addrgp, L2_loadrdgp>; + def: Loada_pat<load, v8i8, addrgp, L2_loadrdgp>; + def: Loada_pat<load, f32, addrgp, L2_loadrigp>; + def: Loada_pat<load, f64, addrgp, L2_loadrdgp>; def: Loada_pat<atomic_load_8, i32, addrgp, L2_loadrubgp>; def: Loada_pat<atomic_load_16, i32, addrgp, L2_loadruhgp>; @@ -1990,13 +2138,13 @@ } let AddedComplexity = 70 in { - def: Loadam_pat<extloadi8, i64, addrgp, ToZext64, L2_loadrubgp>; + def: Loadam_pat<extloadi8, i64, addrgp, ToAext64, L2_loadrubgp>; def: Loadam_pat<sextloadi8, i64, addrgp, ToSext64, L2_loadrbgp>; def: Loadam_pat<zextloadi8, i64, addrgp, ToZext64, L2_loadrubgp>; - def: Loadam_pat<extloadi16, i64, addrgp, ToZext64, L2_loadruhgp>; + def: Loadam_pat<extloadi16, i64, addrgp, ToAext64, L2_loadruhgp>; def: Loadam_pat<sextloadi16, i64, addrgp, ToSext64, L2_loadrhgp>; def: Loadam_pat<zextloadi16, i64, addrgp, ToZext64, L2_loadruhgp>; - def: Loadam_pat<extloadi32, i64, addrgp, ToZext64, L2_loadrigp>; + def: Loadam_pat<extloadi32, i64, addrgp, ToAext64, L2_loadrigp>; def: Loadam_pat<sextloadi32, i64, addrgp, ToSext64, L2_loadrigp>; def: Loadam_pat<zextloadi32, i64, addrgp, ToZext64, L2_loadrigp>; @@ -2136,7 +2284,7 @@ // swapped. This relies on the knowledge that the F.Fragment uses names // "ptr" and "val". class AtomSt<PatFrag F> - : PatFrag<(ops node:$val, node:$ptr), F.Fragment, F.PredicateCode, + : PatFrag<(ops node:$val, node:$ptr), !head(F.Fragments), F.PredicateCode, F.OperandTransform> { let IsAtomic = F.IsAtomic; let MemoryVT = F.MemoryVT; @@ -2252,16 +2400,26 @@ // GP-relative address let AddedComplexity = 120 in { - def: Storea_pat<truncstorei8, I32, addrgp, S2_storerbgp>; - def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhgp>; - def: Storea_pat<store, I32, addrgp, S2_storerigp>; - def: Storea_pat<store, I64, addrgp, S2_storerdgp>; - def: Storea_pat<store, F32, addrgp, S2_storerigp>; - def: Storea_pat<store, F64, addrgp, S2_storerdgp>; - def: Storea_pat<AtomSt<atomic_store_8>, I32, addrgp, S2_storerbgp>; - def: Storea_pat<AtomSt<atomic_store_16>, I32, addrgp, S2_storerhgp>; - def: Storea_pat<AtomSt<atomic_store_32>, I32, addrgp, S2_storerigp>; - def: Storea_pat<AtomSt<atomic_store_64>, I64, addrgp, S2_storerdgp>; + def: Storea_pat<truncstorei8, I32, addrgp, S2_storerbgp>; + def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhgp>; + def: Storea_pat<store, I32, addrgp, S2_storerigp>; + def: Storea_pat<store, V4I8, addrgp, S2_storerigp>; + def: Storea_pat<store, V2I16, addrgp, S2_storerigp>; + def: Storea_pat<store, I64, addrgp, S2_storerdgp>; + def: Storea_pat<store, V8I8, addrgp, S2_storerdgp>; + def: Storea_pat<store, V4I16, addrgp, S2_storerdgp>; + def: Storea_pat<store, V2I32, addrgp, S2_storerdgp>; + def: Storea_pat<store, F32, addrgp, S2_storerigp>; + def: Storea_pat<store, F64, addrgp, S2_storerdgp>; + def: Storea_pat<AtomSt<atomic_store_8>, I32, addrgp, S2_storerbgp>; + def: Storea_pat<AtomSt<atomic_store_16>, I32, addrgp, S2_storerhgp>; + def: Storea_pat<AtomSt<atomic_store_32>, I32, addrgp, S2_storerigp>; + def: Storea_pat<AtomSt<atomic_store_32>, V4I8, addrgp, S2_storerigp>; + def: Storea_pat<AtomSt<atomic_store_32>, V2I16, addrgp, S2_storerigp>; + def: Storea_pat<AtomSt<atomic_store_64>, I64, addrgp, S2_storerdgp>; + def: Storea_pat<AtomSt<atomic_store_64>, V8I8, addrgp, S2_storerdgp>; + def: Storea_pat<AtomSt<atomic_store_64>, V4I16, addrgp, S2_storerdgp>; + def: Storea_pat<AtomSt<atomic_store_64>, V2I32, addrgp, S2_storerdgp>; def: Stoream_pat<truncstorei8, I64, addrgp, LoReg, S2_storerbgp>; def: Stoream_pat<truncstorei16, I64, addrgp, LoReg, S2_storerhgp>; @@ -2271,16 +2429,26 @@ // Absolute address let AddedComplexity = 110 in { - def: Storea_pat<truncstorei8, I32, anyimm0, PS_storerbabs>; - def: Storea_pat<truncstorei16, I32, anyimm1, PS_storerhabs>; - def: Storea_pat<store, I32, anyimm2, PS_storeriabs>; - def: Storea_pat<store, I64, anyimm3, PS_storerdabs>; - def: Storea_pat<store, F32, anyimm2, PS_storeriabs>; - def: Storea_pat<store, F64, anyimm3, PS_storerdabs>; - def: Storea_pat<AtomSt<atomic_store_8>, I32, anyimm0, PS_storerbabs>; - def: Storea_pat<AtomSt<atomic_store_16>, I32, anyimm1, PS_storerhabs>; - def: Storea_pat<AtomSt<atomic_store_32>, I32, anyimm2, PS_storeriabs>; - def: Storea_pat<AtomSt<atomic_store_64>, I64, anyimm3, PS_storerdabs>; + def: Storea_pat<truncstorei8, I32, anyimm0, PS_storerbabs>; + def: Storea_pat<truncstorei16, I32, anyimm1, PS_storerhabs>; + def: Storea_pat<store, I32, anyimm2, PS_storeriabs>; + def: Storea_pat<store, V4I8, anyimm2, PS_storeriabs>; + def: Storea_pat<store, V2I16, anyimm2, PS_storeriabs>; + def: Storea_pat<store, I64, anyimm3, PS_storerdabs>; + def: Storea_pat<store, V8I8, anyimm3, PS_storerdabs>; + def: Storea_pat<store, V4I16, anyimm3, PS_storerdabs>; + def: Storea_pat<store, V2I32, anyimm3, PS_storerdabs>; + def: Storea_pat<store, F32, anyimm2, PS_storeriabs>; + def: Storea_pat<store, F64, anyimm3, PS_storerdabs>; + def: Storea_pat<AtomSt<atomic_store_8>, I32, anyimm0, PS_storerbabs>; + def: Storea_pat<AtomSt<atomic_store_16>, I32, anyimm1, PS_storerhabs>; + def: Storea_pat<AtomSt<atomic_store_32>, I32, anyimm2, PS_storeriabs>; + def: Storea_pat<AtomSt<atomic_store_32>, V4I8, anyimm2, PS_storeriabs>; + def: Storea_pat<AtomSt<atomic_store_32>, V2I16, anyimm2, PS_storeriabs>; + def: Storea_pat<AtomSt<atomic_store_64>, I64, anyimm3, PS_storerdabs>; + def: Storea_pat<AtomSt<atomic_store_64>, V8I8, anyimm3, PS_storerdabs>; + def: Storea_pat<AtomSt<atomic_store_64>, V4I16, anyimm3, PS_storerdabs>; + def: Storea_pat<AtomSt<atomic_store_64>, V2I32, anyimm3, PS_storerdabs>; def: Stoream_pat<truncstorei8, I64, anyimm0, LoReg, PS_storerbabs>; def: Stoream_pat<truncstorei16, I64, anyimm1, LoReg, PS_storerhabs>; @@ -2290,12 +2458,17 @@ // Reg<<S + Imm let AddedComplexity = 100 in { - def: Storexu_shl_pat<truncstorei8, I32, anyimm0, S4_storerb_ur>; - def: Storexu_shl_pat<truncstorei16, I32, anyimm1, S4_storerh_ur>; - def: Storexu_shl_pat<store, I32, anyimm2, S4_storeri_ur>; - def: Storexu_shl_pat<store, I64, anyimm3, S4_storerd_ur>; - def: Storexu_shl_pat<store, F32, anyimm2, S4_storeri_ur>; - def: Storexu_shl_pat<store, F64, anyimm3, S4_storerd_ur>; + def: Storexu_shl_pat<truncstorei8, I32, anyimm0, S4_storerb_ur>; + def: Storexu_shl_pat<truncstorei16, I32, anyimm1, S4_storerh_ur>; + def: Storexu_shl_pat<store, I32, anyimm2, S4_storeri_ur>; + def: Storexu_shl_pat<store, V4I8, anyimm2, S4_storeri_ur>; + def: Storexu_shl_pat<store, V2I16, anyimm2, S4_storeri_ur>; + def: Storexu_shl_pat<store, I64, anyimm3, S4_storerd_ur>; + def: Storexu_shl_pat<store, V8I8, anyimm3, S4_storerd_ur>; + def: Storexu_shl_pat<store, V4I16, anyimm3, S4_storerd_ur>; + def: Storexu_shl_pat<store, V2I32, anyimm3, S4_storerd_ur>; + def: Storexu_shl_pat<store, F32, anyimm2, S4_storeri_ur>; + def: Storexu_shl_pat<store, F64, anyimm3, S4_storerd_ur>; def: Pat<(store I1:$Pu, (add (shl I32:$Rs, u2_0ImmPred:$u2), anyimm:$A)), (S4_storerb_ur IntRegs:$Rs, imm:$u2, imm:$A, (I1toI32 I1:$Pu))>; @@ -2303,12 +2476,17 @@ // Reg<<S + Reg let AddedComplexity = 90 in { - def: Storexr_shl_pat<truncstorei8, I32, S4_storerb_rr>; - def: Storexr_shl_pat<truncstorei16, I32, S4_storerh_rr>; - def: Storexr_shl_pat<store, I32, S4_storeri_rr>; - def: Storexr_shl_pat<store, I64, S4_storerd_rr>; - def: Storexr_shl_pat<store, F32, S4_storeri_rr>; - def: Storexr_shl_pat<store, F64, S4_storerd_rr>; + def: Storexr_shl_pat<truncstorei8, I32, S4_storerb_rr>; + def: Storexr_shl_pat<truncstorei16, I32, S4_storerh_rr>; + def: Storexr_shl_pat<store, I32, S4_storeri_rr>; + def: Storexr_shl_pat<store, V4I8, S4_storeri_rr>; + def: Storexr_shl_pat<store, V2I16, S4_storeri_rr>; + def: Storexr_shl_pat<store, I64, S4_storerd_rr>; + def: Storexr_shl_pat<store, V8I8, S4_storerd_rr>; + def: Storexr_shl_pat<store, V4I16, S4_storerd_rr>; + def: Storexr_shl_pat<store, V2I32, S4_storerd_rr>; + def: Storexr_shl_pat<store, F32, S4_storeri_rr>; + def: Storexr_shl_pat<store, F64, S4_storerd_rr>; def: Pat<(store I1:$Pu, (add (shl I32:$Rs, u2_0ImmPred:$u2), I32:$Rt)), (S4_storerb_ur IntRegs:$Rt, IntRegs:$Rs, imm:$u2, (I1toI32 I1:$Pu))>; @@ -2360,20 +2538,30 @@ // Fi+Imm, Fi, store-register let AddedComplexity = 60 in { - defm: Storexi_fi_add_pat<truncstorei8, I32, anyimm, S2_storerb_io>; - defm: Storexi_fi_add_pat<truncstorei16, I32, anyimm, S2_storerh_io>; - defm: Storexi_fi_add_pat<store, I32, anyimm, S2_storeri_io>; - defm: Storexi_fi_add_pat<store, I64, anyimm, S2_storerd_io>; - defm: Storexi_fi_add_pat<store, F32, anyimm, S2_storeri_io>; - defm: Storexi_fi_add_pat<store, F64, anyimm, S2_storerd_io>; + defm: Storexi_fi_add_pat<truncstorei8, I32, anyimm, S2_storerb_io>; + defm: Storexi_fi_add_pat<truncstorei16, I32, anyimm, S2_storerh_io>; + defm: Storexi_fi_add_pat<store, I32, anyimm, S2_storeri_io>; + defm: Storexi_fi_add_pat<store, V4I8, anyimm, S2_storeri_io>; + defm: Storexi_fi_add_pat<store, V2I16, anyimm, S2_storeri_io>; + defm: Storexi_fi_add_pat<store, I64, anyimm, S2_storerd_io>; + defm: Storexi_fi_add_pat<store, V8I8, anyimm, S2_storerd_io>; + defm: Storexi_fi_add_pat<store, V4I16, anyimm, S2_storerd_io>; + defm: Storexi_fi_add_pat<store, V2I32, anyimm, S2_storerd_io>; + defm: Storexi_fi_add_pat<store, F32, anyimm, S2_storeri_io>; + defm: Storexi_fi_add_pat<store, F64, anyimm, S2_storerd_io>; defm: Storexim_fi_add_pat<store, I1, anyimm, I1toI32, S2_storerb_io>; - def: Storexi_fi_pat<truncstorei8, I32, S2_storerb_io>; - def: Storexi_fi_pat<truncstorei16, I32, S2_storerh_io>; - def: Storexi_fi_pat<store, I32, S2_storeri_io>; - def: Storexi_fi_pat<store, I64, S2_storerd_io>; - def: Storexi_fi_pat<store, F32, S2_storeri_io>; - def: Storexi_fi_pat<store, F64, S2_storerd_io>; + def: Storexi_fi_pat<truncstorei8, I32, S2_storerb_io>; + def: Storexi_fi_pat<truncstorei16, I32, S2_storerh_io>; + def: Storexi_fi_pat<store, I32, S2_storeri_io>; + def: Storexi_fi_pat<store, V4I8, S2_storeri_io>; + def: Storexi_fi_pat<store, V2I16, S2_storeri_io>; + def: Storexi_fi_pat<store, I64, S2_storerd_io>; + def: Storexi_fi_pat<store, V8I8, S2_storerd_io>; + def: Storexi_fi_pat<store, V4I16, S2_storerd_io>; + def: Storexi_fi_pat<store, V2I32, S2_storerd_io>; + def: Storexi_fi_pat<store, F32, S2_storeri_io>; + def: Storexi_fi_pat<store, F64, S2_storerd_io>; def: Storexim_fi_pat<store, I1, I1toI32, S2_storerb_io>; } @@ -2398,32 +2586,47 @@ // Reg+Imm, store-register let AddedComplexity = 40 in { - defm: Storexi_pat<truncstorei8, I32, anyimm0, S2_storerb_io>; - defm: Storexi_pat<truncstorei16, I32, anyimm1, S2_storerh_io>; - defm: Storexi_pat<store, I32, anyimm2, S2_storeri_io>; - defm: Storexi_pat<store, I64, anyimm3, S2_storerd_io>; - defm: Storexi_pat<store, F32, anyimm2, S2_storeri_io>; - defm: Storexi_pat<store, F64, anyimm3, S2_storerd_io>; + defm: Storexi_pat<truncstorei8, I32, anyimm0, S2_storerb_io>; + defm: Storexi_pat<truncstorei16, I32, anyimm1, S2_storerh_io>; + defm: Storexi_pat<store, I32, anyimm2, S2_storeri_io>; + defm: Storexi_pat<store, V4I8, anyimm2, S2_storeri_io>; + defm: Storexi_pat<store, V2I16, anyimm2, S2_storeri_io>; + defm: Storexi_pat<store, I64, anyimm3, S2_storerd_io>; + defm: Storexi_pat<store, V8I8, anyimm3, S2_storerd_io>; + defm: Storexi_pat<store, V4I16, anyimm3, S2_storerd_io>; + defm: Storexi_pat<store, V2I32, anyimm3, S2_storerd_io>; + defm: Storexi_pat<store, F32, anyimm2, S2_storeri_io>; + defm: Storexi_pat<store, F64, anyimm3, S2_storerd_io>; defm: Storexim_pat<truncstorei8, I64, anyimm0, LoReg, S2_storerb_io>; defm: Storexim_pat<truncstorei16, I64, anyimm1, LoReg, S2_storerh_io>; defm: Storexim_pat<truncstorei32, I64, anyimm2, LoReg, S2_storeri_io>; defm: Storexim_pat<store, I1, anyimm0, I1toI32, S2_storerb_io>; - defm: Storexi_pat<AtomSt<atomic_store_8>, I32, anyimm0, S2_storerb_io>; - defm: Storexi_pat<AtomSt<atomic_store_16>, I32, anyimm1, S2_storerh_io>; - defm: Storexi_pat<AtomSt<atomic_store_32>, I32, anyimm2, S2_storeri_io>; - defm: Storexi_pat<AtomSt<atomic_store_64>, I64, anyimm3, S2_storerd_io>; + defm: Storexi_pat<AtomSt<atomic_store_8>, I32, anyimm0, S2_storerb_io>; + defm: Storexi_pat<AtomSt<atomic_store_16>, I32, anyimm1, S2_storerh_io>; + defm: Storexi_pat<AtomSt<atomic_store_32>, I32, anyimm2, S2_storeri_io>; + defm: Storexi_pat<AtomSt<atomic_store_32>, V4I8, anyimm2, S2_storeri_io>; + defm: Storexi_pat<AtomSt<atomic_store_32>, V2I16, anyimm2, S2_storeri_io>; + defm: Storexi_pat<AtomSt<atomic_store_64>, I64, anyimm3, S2_storerd_io>; + defm: Storexi_pat<AtomSt<atomic_store_64>, V8I8, anyimm3, S2_storerd_io>; + defm: Storexi_pat<AtomSt<atomic_store_64>, V4I16, anyimm3, S2_storerd_io>; + defm: Storexi_pat<AtomSt<atomic_store_64>, V2I32, anyimm3, S2_storerd_io>; } // Reg+Reg let AddedComplexity = 30 in { - def: Storexr_add_pat<truncstorei8, I32, S4_storerb_rr>; - def: Storexr_add_pat<truncstorei16, I32, S4_storerh_rr>; - def: Storexr_add_pat<store, I32, S4_storeri_rr>; - def: Storexr_add_pat<store, I64, S4_storerd_rr>; - def: Storexr_add_pat<store, F32, S4_storeri_rr>; - def: Storexr_add_pat<store, F64, S4_storerd_rr>; + def: Storexr_add_pat<truncstorei8, I32, S4_storerb_rr>; + def: Storexr_add_pat<truncstorei16, I32, S4_storerh_rr>; + def: Storexr_add_pat<store, I32, S4_storeri_rr>; + def: Storexr_add_pat<store, V4I8, S4_storeri_rr>; + def: Storexr_add_pat<store, V2I16, S4_storeri_rr>; + def: Storexr_add_pat<store, I64, S4_storerd_rr>; + def: Storexr_add_pat<store, V8I8, S4_storerd_rr>; + def: Storexr_add_pat<store, V4I16, S4_storerd_rr>; + def: Storexr_add_pat<store, V2I32, S4_storerd_rr>; + def: Storexr_add_pat<store, F32, S4_storeri_rr>; + def: Storexr_add_pat<store, F64, S4_storerd_rr>; def: Pat<(store I1:$Pu, (add I32:$Rs, I32:$Rt)), (S4_storerb_rr IntRegs:$Rs, IntRegs:$Rt, 0, (I1toI32 I1:$Pu))>; @@ -2442,22 +2645,32 @@ // Reg, store-register let AddedComplexity = 10 in { - def: Storexi_base_pat<truncstorei8, I32, S2_storerb_io>; - def: Storexi_base_pat<truncstorei16, I32, S2_storerh_io>; - def: Storexi_base_pat<store, I32, S2_storeri_io>; - def: Storexi_base_pat<store, I64, S2_storerd_io>; - def: Storexi_base_pat<store, F32, S2_storeri_io>; - def: Storexi_base_pat<store, F64, S2_storerd_io>; + def: Storexi_base_pat<truncstorei8, I32, S2_storerb_io>; + def: Storexi_base_pat<truncstorei16, I32, S2_storerh_io>; + def: Storexi_base_pat<store, I32, S2_storeri_io>; + def: Storexi_base_pat<store, V4I8, S2_storeri_io>; + def: Storexi_base_pat<store, V2I16, S2_storeri_io>; + def: Storexi_base_pat<store, I64, S2_storerd_io>; + def: Storexi_base_pat<store, V8I8, S2_storerd_io>; + def: Storexi_base_pat<store, V4I16, S2_storerd_io>; + def: Storexi_base_pat<store, V2I32, S2_storerd_io>; + def: Storexi_base_pat<store, F32, S2_storeri_io>; + def: Storexi_base_pat<store, F64, S2_storerd_io>; def: Storexim_base_pat<truncstorei8, I64, LoReg, S2_storerb_io>; def: Storexim_base_pat<truncstorei16, I64, LoReg, S2_storerh_io>; def: Storexim_base_pat<truncstorei32, I64, LoReg, S2_storeri_io>; def: Storexim_base_pat<store, I1, I1toI32, S2_storerb_io>; - def: Storexi_base_pat<AtomSt<atomic_store_8>, I32, S2_storerb_io>; - def: Storexi_base_pat<AtomSt<atomic_store_16>, I32, S2_storerh_io>; - def: Storexi_base_pat<AtomSt<atomic_store_32>, I32, S2_storeri_io>; - def: Storexi_base_pat<AtomSt<atomic_store_64>, I64, S2_storerd_io>; + def: Storexi_base_pat<AtomSt<atomic_store_8>, I32, S2_storerb_io>; + def: Storexi_base_pat<AtomSt<atomic_store_16>, I32, S2_storerh_io>; + def: Storexi_base_pat<AtomSt<atomic_store_32>, I32, S2_storeri_io>; + def: Storexi_base_pat<AtomSt<atomic_store_32>, V4I8, S2_storeri_io>; + def: Storexi_base_pat<AtomSt<atomic_store_32>, V2I16, S2_storeri_io>; + def: Storexi_base_pat<AtomSt<atomic_store_64>, I64, S2_storerd_io>; + def: Storexi_base_pat<AtomSt<atomic_store_64>, V8I8, S2_storerd_io>; + def: Storexi_base_pat<AtomSt<atomic_store_64>, V4I16, S2_storerd_io>; + def: Storexi_base_pat<AtomSt<atomic_store_64>, V2I32, S2_storerd_io>; } @@ -2541,8 +2754,10 @@ multiclass Memopxr_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred, SDNode Oper, InstHexagon MI> { - defm: Memopxr_base_pat <Load, Store, Oper, MI>; - defm: Memopxr_add_pat <Load, Store, ImmPred, Oper, MI>; + let Predicates = [UseMEMOPS] in { + defm: Memopxr_base_pat <Load, Store, Oper, MI>; + defm: Memopxr_add_pat <Load, Store, ImmPred, Oper, MI>; + } } let AddedComplexity = 200 in { @@ -2640,8 +2855,10 @@ multiclass Memopxi_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred, SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod, InstHexagon MI> { - defm: Memopxi_base_pat <Load, Store, Oper, Arg, ArgMod, MI>; - defm: Memopxi_add_pat <Load, Store, ImmPred, Oper, Arg, ArgMod, MI>; + let Predicates = [UseMEMOPS] in { + defm: Memopxi_base_pat <Load, Store, Oper, Arg, ArgMod, MI>; + defm: Memopxi_add_pat <Load, Store, ImmPred, Oper, Arg, ArgMod, MI>; + } } let AddedComplexity = 220 in { @@ -2800,6 +3017,8 @@ (J2_jumpf I1:$Pu, bb:$dst)>; def: Pat<(brcond (i1 (setne I1:$Pu, -1)), bb:$dst), (J2_jumpf I1:$Pu, bb:$dst)>; +def: Pat<(brcond (i1 (seteq I1:$Pu, 0)), bb:$dst), + (J2_jumpf I1:$Pu, bb:$dst)>; def: Pat<(brcond (i1 (setne I1:$Pu, 0)), bb:$dst), (J2_jumpt I1:$Pu, bb:$dst)>; @@ -2862,9 +3081,26 @@ def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>; def: Pat<(HexagonBARRIER), (Y2_barrier)>; +def: Pat<(trap), (PS_crash)>; + // Read cycle counter. def SDTInt64Leaf: SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>; def HexagonREADCYCLE: SDNode<"HexagonISD::READCYCLE", SDTInt64Leaf, [SDNPHasChain]>; def: Pat<(HexagonREADCYCLE), (A4_tfrcpp UPCYCLE)>; + +// The declared return value of the store-locked intrinsics is i32, but +// the instructions actually define i1. To avoid register copies from +// IntRegs to PredRegs and back, fold the entire pattern checking the +// result against true/false. +let AddedComplexity = 100 in { + def: Pat<(i1 (setne (int_hexagon_S2_storew_locked I32:$Rs, I32:$Rt), 0)), + (S2_storew_locked I32:$Rs, I32:$Rt)>; + def: Pat<(i1 (seteq (int_hexagon_S2_storew_locked I32:$Rs, I32:$Rt), 0)), + (C2_not (S2_storew_locked I32:$Rs, I32:$Rt))>; + def: Pat<(i1 (setne (int_hexagon_S4_stored_locked I32:$Rs, I64:$Rt), 0)), + (S4_stored_locked I32:$Rs, I64:$Rt)>; + def: Pat<(i1 (seteq (int_hexagon_S4_stored_locked I32:$Rs, I64:$Rt), 0)), + (C2_not (S4_stored_locked I32:$Rs, I64:$Rt))>; +}