comparison lib/Target/Hexagon/HexagonPatterns.td @ 148:63bd29f05246

merged
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Wed, 14 Aug 2019 19:46:37 +0900
parents c2174574ed3a
children
comparison
equal deleted inserted replaced
146:3fc4d5c3e21e 148:63bd29f05246
1 //==- HexagonPatterns.td - Target Description for Hexagon -*- tablegen -*-===// 1 //==- HexagonPatterns.td - Target Description for Hexagon -*- tablegen -*-===//
2 // 2 //
3 // The LLVM Compiler Infrastructure 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // 4 // See https://llvm.org/LICENSE.txt for license information.
5 // This file is distributed under the University of Illinois Open Source 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 // License. See LICENSE.TXT for details.
7 // 6 //
8 //===----------------------------------------------------------------------===// 7 //===----------------------------------------------------------------------===//
9 8
10 // Table of contents: 9 // Table of contents:
11 // (0) Definitions 10 // (0) Definitions
98 97
99 def HWI8: PatLeaf<(VecPI8 HvxWR:$R)>; 98 def HWI8: PatLeaf<(VecPI8 HvxWR:$R)>;
100 def HWI16: PatLeaf<(VecPI16 HvxWR:$R)>; 99 def HWI16: PatLeaf<(VecPI16 HvxWR:$R)>;
101 def HWI32: PatLeaf<(VecPI32 HvxWR:$R)>; 100 def HWI32: PatLeaf<(VecPI32 HvxWR:$R)>;
102 101
102 def SDTVecVecIntOp:
103 SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>,
104 SDTCisVT<3,i32>]>;
105
106 def HexagonVALIGN: SDNode<"HexagonISD::VALIGN", SDTVecVecIntOp>;
107 def HexagonVALIGNADDR: SDNode<"HexagonISD::VALIGNADDR", SDTIntUnaryOp>;
108
109 def valign: PatFrag<(ops node:$Vt, node:$Vs, node:$Ru),
110 (HexagonVALIGN node:$Vt, node:$Vs, node:$Ru)>;
111 def valignaddr: PatFrag<(ops node:$Addr), (HexagonVALIGNADDR node:$Addr)>;
112
103 // Pattern fragments to extract the low and high subregisters from a 113 // Pattern fragments to extract the low and high subregisters from a
104 // 64-bit value. 114 // 64-bit value.
105 def LoReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_lo)>; 115 def LoReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_lo)>;
106 def HiReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_hi)>; 116 def HiReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_hi)>;
107 117
163 def UDEC32: SDNodeXForm<imm, [{ 173 def UDEC32: SDNodeXForm<imm, [{
164 uint32_t V = N->getZExtValue(); 174 uint32_t V = N->getZExtValue();
165 assert(V >= 32); 175 assert(V >= 32);
166 return CurDAG->getTargetConstant(V-32, SDLoc(N), MVT::i32); 176 return CurDAG->getTargetConstant(V-32, SDLoc(N), MVT::i32);
167 }]>; 177 }]>;
178
179 class Subi<int From>: SDNodeXForm<imm,
180 "int32_t V = " # From # " - N->getSExtValue();" #
181 "return CurDAG->getTargetConstant(V, SDLoc(N), MVT::i32);"
182 >;
168 183
169 def Log2_32: SDNodeXForm<imm, [{ 184 def Log2_32: SDNodeXForm<imm, [{
170 uint32_t V = N->getZExtValue(); 185 uint32_t V = N->getZExtValue();
171 return CurDAG->getTargetConstant(Log2_32(V), SDLoc(N), MVT::i32); 186 return CurDAG->getTargetConstant(Log2_32(V), SDLoc(N), MVT::i32);
172 }]>; 187 }]>;
205 // Helpers for type promotions/contractions. 220 // Helpers for type promotions/contractions.
206 def I1toI32: OutPatFrag<(ops node:$Rs), (C2_muxii (i1 $Rs), 1, 0)>; 221 def I1toI32: OutPatFrag<(ops node:$Rs), (C2_muxii (i1 $Rs), 1, 0)>;
207 def I32toI1: OutPatFrag<(ops node:$Rs), (i1 (C2_cmpgtui (i32 $Rs), (i32 0)))>; 222 def I32toI1: OutPatFrag<(ops node:$Rs), (i1 (C2_cmpgtui (i32 $Rs), (i32 0)))>;
208 def ToZext64: OutPatFrag<(ops node:$Rs), (i64 (A4_combineir 0, (i32 $Rs)))>; 223 def ToZext64: OutPatFrag<(ops node:$Rs), (i64 (A4_combineir 0, (i32 $Rs)))>;
209 def ToSext64: OutPatFrag<(ops node:$Rs), (i64 (A2_sxtw (i32 $Rs)))>; 224 def ToSext64: OutPatFrag<(ops node:$Rs), (i64 (A2_sxtw (i32 $Rs)))>;
225 def ToAext64: OutPatFrag<(ops node:$Rs),
226 (REG_SEQUENCE DoubleRegs, (i32 (IMPLICIT_DEF)), isub_hi, (i32 $Rs), isub_lo)>;
210 227
211 def Combinew: OutPatFrag<(ops node:$Rs, node:$Rt), 228 def Combinew: OutPatFrag<(ops node:$Rs, node:$Rt),
212 (REG_SEQUENCE DoubleRegs, $Rs, isub_hi, $Rt, isub_lo)>; 229 (REG_SEQUENCE DoubleRegs, $Rs, isub_hi, $Rt, isub_lo)>;
213 230
214 def addrga: PatLeaf<(i32 AddrGA:$Addr)>; 231 def addrga: PatLeaf<(i32 AddrGA:$Addr)>;
233 250
234 def Aext64: PatFrag<(ops node:$Rs), (i64 (anyext node:$Rs))>; 251 def Aext64: PatFrag<(ops node:$Rs), (i64 (anyext node:$Rs))>;
235 def Zext64: PatFrag<(ops node:$Rs), (i64 (zext node:$Rs))>; 252 def Zext64: PatFrag<(ops node:$Rs), (i64 (zext node:$Rs))>;
236 def Sext64: PatLeaf<(i64 Usxtw:$Rs)>; 253 def Sext64: PatLeaf<(i64 Usxtw:$Rs)>;
237 254
255 def azext: PatFrags<(ops node:$Rs), [(zext node:$Rs), (anyext node:$Rs)]>;
256 def asext: PatFrags<(ops node:$Rs), [(sext node:$Rs), (anyext node:$Rs)]>;
257
238 def: Pat<(IsOrAdd (i32 AddrFI:$Rs), s32_0ImmPred:$off), 258 def: Pat<(IsOrAdd (i32 AddrFI:$Rs), s32_0ImmPred:$off),
239 (PS_fi (i32 AddrFI:$Rs), imm:$off)>; 259 (PS_fi (i32 AddrFI:$Rs), imm:$off)>;
240 260
241 261
242 // Converters from unary/binary SDNode to PatFrag. 262 // Converters from unary/binary SDNode to PatFrag.
244 class pf2<SDNode Op> : PatFrag<(ops node:$a, node:$b), (Op node:$a, node:$b)>; 264 class pf2<SDNode Op> : PatFrag<(ops node:$a, node:$b), (Op node:$a, node:$b)>;
245 265
246 class Not2<PatFrag P> 266 class Not2<PatFrag P>
247 : PatFrag<(ops node:$A, node:$B), (P node:$A, (not node:$B))>; 267 : PatFrag<(ops node:$A, node:$B), (P node:$A, (not node:$B))>;
248 268
269 // If there is a constant operand that feeds the and/or instruction,
270 // do not generate the compound instructions.
271 // It is not always profitable, as some times we end up with a transfer.
272 // Check the below example.
273 // ra = #65820; rb = lsr(rb, #8); rc ^= and (rb, ra)
274 // Instead this is preferable.
275 // ra = and (#65820, lsr(ra, #8)); rb = xor(rb, ra)
276 class Su_ni1<PatFrag Op>
277 : PatFrag<Op.Operands, !head(Op.Fragments), [{
278 if (hasOneUse(N)){
279 // Check if Op1 is an immediate operand.
280 SDValue Op1 = N->getOperand(1);
281 return !isa<ConstantSDNode>(Op1);
282 }
283 return false;}],
284 Op.OperandTransform>;
285
249 class Su<PatFrag Op> 286 class Su<PatFrag Op>
250 : PatFrag<Op.Operands, Op.Fragment, [{ return hasOneUse(N); }], 287 : PatFrag<Op.Operands, !head(Op.Fragments), [{ return hasOneUse(N); }],
251 Op.OperandTransform>; 288 Op.OperandTransform>;
252 289
253 // Main selection macros. 290 // Main selection macros.
254 291
255 class OpR_R_pat<InstHexagon MI, PatFrag Op, ValueType ResVT, PatFrag RegPred> 292 class OpR_R_pat<InstHexagon MI, PatFrag Op, ValueType ResVT, PatFrag RegPred>
269 PatFrag RegPred, PatFrag ImmPred> 306 PatFrag RegPred, PatFrag ImmPred>
270 : Pat<(AccOp RegPred:$Rx, (Op RegPred:$Rs, ImmPred:$I)), 307 : Pat<(AccOp RegPred:$Rx, (Op RegPred:$Rs, ImmPred:$I)),
271 (MI RegPred:$Rx, RegPred:$Rs, imm:$I)>; 308 (MI RegPred:$Rx, RegPred:$Rs, imm:$I)>;
272 309
273 class AccRRR_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op, 310 class AccRRR_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op,
274 PatFrag RsPred, PatFrag RtPred> 311 PatFrag RxPred, PatFrag RsPred, PatFrag RtPred>
275 : Pat<(AccOp RsPred:$Rx, (Op RsPred:$Rs, RtPred:$Rt)), 312 : Pat<(AccOp RxPred:$Rx, (Op RsPred:$Rs, RtPred:$Rt)),
276 (MI RsPred:$Rx, RsPred:$Rs, RtPred:$Rt)>; 313 (MI RxPred:$Rx, RsPred:$Rs, RtPred:$Rt)>;
277 314
278 multiclass SelMinMax_pats<PatFrag CmpOp, PatFrag Val, 315 multiclass SelMinMax_pats<PatFrag CmpOp, PatFrag Val,
279 InstHexagon InstA, InstHexagon InstB> { 316 InstHexagon InstA, InstHexagon InstB> {
280 def: Pat<(select (i1 (CmpOp Val:$A, Val:$B)), Val:$A, Val:$B), 317 def: Pat<(select (i1 (CmpOp Val:$A, Val:$B)), Val:$A, Val:$B),
281 (InstA Val:$A, Val:$B)>; 318 (InstA Val:$A, Val:$B)>;
287 // Frags for commonly used SDNodes. 324 // Frags for commonly used SDNodes.
288 def Add: pf2<add>; def And: pf2<and>; def Sra: pf2<sra>; 325 def Add: pf2<add>; def And: pf2<and>; def Sra: pf2<sra>;
289 def Sub: pf2<sub>; def Or: pf2<or>; def Srl: pf2<srl>; 326 def Sub: pf2<sub>; def Or: pf2<or>; def Srl: pf2<srl>;
290 def Mul: pf2<mul>; def Xor: pf2<xor>; def Shl: pf2<shl>; 327 def Mul: pf2<mul>; def Xor: pf2<xor>; def Shl: pf2<shl>;
291 328
329 def Rol: pf2<rotl>;
292 330
293 // --(1) Immediate ------------------------------------------------------- 331 // --(1) Immediate -------------------------------------------------------
294 // 332 //
295 333
296 def SDTHexagonCONST32 334 def SDTHexagonCONST32
334 def ToI32: OutPatFrag<(ops node:$V), (A2_tfrsi $V)>; 372 def ToI32: OutPatFrag<(ops node:$V), (A2_tfrsi $V)>;
335 373
336 // --(2) Type cast ------------------------------------------------------- 374 // --(2) Type cast -------------------------------------------------------
337 // 375 //
338 376
339 let Predicates = [HasV5T] in { 377 def: OpR_R_pat<F2_conv_sf2df, pf1<fpextend>, f64, F32>;
340 def: OpR_R_pat<F2_conv_sf2df, pf1<fpextend>, f64, F32>; 378 def: OpR_R_pat<F2_conv_df2sf, pf1<fpround>, f32, F64>;
341 def: OpR_R_pat<F2_conv_df2sf, pf1<fpround>, f32, F64>; 379
342 380 def: OpR_R_pat<F2_conv_w2sf, pf1<sint_to_fp>, f32, I32>;
343 def: OpR_R_pat<F2_conv_w2sf, pf1<sint_to_fp>, f32, I32>; 381 def: OpR_R_pat<F2_conv_d2sf, pf1<sint_to_fp>, f32, I64>;
344 def: OpR_R_pat<F2_conv_d2sf, pf1<sint_to_fp>, f32, I64>; 382 def: OpR_R_pat<F2_conv_w2df, pf1<sint_to_fp>, f64, I32>;
345 def: OpR_R_pat<F2_conv_w2df, pf1<sint_to_fp>, f64, I32>; 383 def: OpR_R_pat<F2_conv_d2df, pf1<sint_to_fp>, f64, I64>;
346 def: OpR_R_pat<F2_conv_d2df, pf1<sint_to_fp>, f64, I64>; 384
347 385 def: OpR_R_pat<F2_conv_uw2sf, pf1<uint_to_fp>, f32, I32>;
348 def: OpR_R_pat<F2_conv_uw2sf, pf1<uint_to_fp>, f32, I32>; 386 def: OpR_R_pat<F2_conv_ud2sf, pf1<uint_to_fp>, f32, I64>;
349 def: OpR_R_pat<F2_conv_ud2sf, pf1<uint_to_fp>, f32, I64>; 387 def: OpR_R_pat<F2_conv_uw2df, pf1<uint_to_fp>, f64, I32>;
350 def: OpR_R_pat<F2_conv_uw2df, pf1<uint_to_fp>, f64, I32>; 388 def: OpR_R_pat<F2_conv_ud2df, pf1<uint_to_fp>, f64, I64>;
351 def: OpR_R_pat<F2_conv_ud2df, pf1<uint_to_fp>, f64, I64>; 389
352 390 def: OpR_R_pat<F2_conv_sf2w_chop, pf1<fp_to_sint>, i32, F32>;
353 def: OpR_R_pat<F2_conv_sf2w_chop, pf1<fp_to_sint>, i32, F32>; 391 def: OpR_R_pat<F2_conv_df2w_chop, pf1<fp_to_sint>, i32, F64>;
354 def: OpR_R_pat<F2_conv_df2w_chop, pf1<fp_to_sint>, i32, F64>; 392 def: OpR_R_pat<F2_conv_sf2d_chop, pf1<fp_to_sint>, i64, F32>;
355 def: OpR_R_pat<F2_conv_sf2d_chop, pf1<fp_to_sint>, i64, F32>; 393 def: OpR_R_pat<F2_conv_df2d_chop, pf1<fp_to_sint>, i64, F64>;
356 def: OpR_R_pat<F2_conv_df2d_chop, pf1<fp_to_sint>, i64, F64>; 394
357 395 def: OpR_R_pat<F2_conv_sf2uw_chop, pf1<fp_to_uint>, i32, F32>;
358 def: OpR_R_pat<F2_conv_sf2uw_chop, pf1<fp_to_uint>, i32, F32>; 396 def: OpR_R_pat<F2_conv_df2uw_chop, pf1<fp_to_uint>, i32, F64>;
359 def: OpR_R_pat<F2_conv_df2uw_chop, pf1<fp_to_uint>, i32, F64>; 397 def: OpR_R_pat<F2_conv_sf2ud_chop, pf1<fp_to_uint>, i64, F32>;
360 def: OpR_R_pat<F2_conv_sf2ud_chop, pf1<fp_to_uint>, i64, F32>; 398 def: OpR_R_pat<F2_conv_df2ud_chop, pf1<fp_to_uint>, i64, F64>;
361 def: OpR_R_pat<F2_conv_df2ud_chop, pf1<fp_to_uint>, i64, F64>;
362 }
363 399
364 // Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp]. 400 // Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp].
365 let Predicates = [HasV5T] in { 401 def: Pat<(i32 (bitconvert F32:$v)), (I32:$v)>;
366 def: Pat<(i32 (bitconvert F32:$v)), (I32:$v)>; 402 def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>;
367 def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>; 403 def: Pat<(i64 (bitconvert F64:$v)), (I64:$v)>;
368 def: Pat<(i64 (bitconvert F64:$v)), (I64:$v)>; 404 def: Pat<(f64 (bitconvert I64:$v)), (F64:$v)>;
369 def: Pat<(f64 (bitconvert I64:$v)), (F64:$v)>;
370 }
371 405
372 multiclass Cast_pat<ValueType Ta, ValueType Tb, RegisterClass RC> { 406 multiclass Cast_pat<ValueType Ta, ValueType Tb, RegisterClass RC> {
373 def: Pat<(Tb (bitconvert (Ta RC:$Rs))), (Tb RC:$Rs)>; 407 def: Pat<(Tb (bitconvert (Ta RC:$Rs))), (Tb RC:$Rs)>;
374 def: Pat<(Ta (bitconvert (Tb RC:$Rs))), (Ta RC:$Rs)>; 408 def: Pat<(Ta (bitconvert (Tb RC:$Rs))), (Ta RC:$Rs)>;
375 } 409 }
389 def: Pat<(sext_inreg I32:$Rs, i16), (A2_sxth I32:$Rs)>; 423 def: Pat<(sext_inreg I32:$Rs, i16), (A2_sxth I32:$Rs)>;
390 def: Pat<(sext_inreg I64:$Rs, i32), (A2_sxtw (LoReg $Rs))>; 424 def: Pat<(sext_inreg I64:$Rs, i32), (A2_sxtw (LoReg $Rs))>;
391 def: Pat<(sext_inreg I64:$Rs, i16), (A2_sxtw (A2_sxth (LoReg $Rs)))>; 425 def: Pat<(sext_inreg I64:$Rs, i16), (A2_sxtw (A2_sxth (LoReg $Rs)))>;
392 def: Pat<(sext_inreg I64:$Rs, i8), (A2_sxtw (A2_sxtb (LoReg $Rs)))>; 426 def: Pat<(sext_inreg I64:$Rs, i8), (A2_sxtw (A2_sxtb (LoReg $Rs)))>;
393 427
394 def: Pat<(i64 (sext I1:$Pu)), 428 def: Pat<(i64 (sext I32:$Rs)), (A2_sxtw I32:$Rs)>;
395 (Combinew (C2_muxii PredRegs:$Pu, -1, 0), 429 def: Pat<(Zext64 I32:$Rs), (ToZext64 $Rs)>;
396 (C2_muxii PredRegs:$Pu, -1, 0))>; 430 def: Pat<(Aext64 I32:$Rs), (ToZext64 $Rs)>;
397 431
398 def: Pat<(i32 (sext I1:$Pu)), (C2_muxii I1:$Pu, -1, 0)>; 432 def: Pat<(i32 (trunc I64:$Rs)), (LoReg $Rs)>;
399 def: Pat<(i32 (zext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>; 433 def: Pat<(i1 (trunc I32:$Rs)), (S2_tstbit_i I32:$Rs, 0)>;
400 def: Pat<(i64 (zext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>; 434 def: Pat<(i1 (trunc I64:$Rs)), (S2_tstbit_i (LoReg $Rs), 0)>;
435
436 let AddedComplexity = 20 in {
437 def: Pat<(and I32:$Rs, 255), (A2_zxtb I32:$Rs)>;
438 def: Pat<(and I32:$Rs, 65535), (A2_zxth I32:$Rs)>;
439 }
440
441 // Extensions from i1 or vectors of i1.
442 def: Pat<(i32 (azext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>;
443 def: Pat<(i64 (azext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>;
444 def: Pat<(i32 (sext I1:$Pu)), (C2_muxii I1:$Pu, -1, 0)>;
445 def: Pat<(i64 (sext I1:$Pu)), (Combinew (C2_muxii PredRegs:$Pu, -1, 0),
446 (C2_muxii PredRegs:$Pu, -1, 0))>;
447
401 def: Pat<(v2i16 (sext V2I1:$Pu)), (S2_vtrunehb (C2_mask V2I1:$Pu))>; 448 def: Pat<(v2i16 (sext V2I1:$Pu)), (S2_vtrunehb (C2_mask V2I1:$Pu))>;
402 def: Pat<(v2i32 (sext V2I1:$Pu)), (C2_mask V2I1:$Pu)>; 449 def: Pat<(v2i32 (sext V2I1:$Pu)), (C2_mask V2I1:$Pu)>;
403 def: Pat<(v4i8 (sext V4I1:$Pu)), (S2_vtrunehb (C2_mask V4I1:$Pu))>; 450 def: Pat<(v4i8 (sext V4I1:$Pu)), (S2_vtrunehb (C2_mask V4I1:$Pu))>;
404 def: Pat<(v4i16 (sext V4I1:$Pu)), (C2_mask V4I1:$Pu)>; 451 def: Pat<(v4i16 (sext V4I1:$Pu)), (C2_mask V4I1:$Pu)>;
405 def: Pat<(v8i8 (sext V8I1:$Pu)), (C2_mask V8I1:$Pu)>; 452 def: Pat<(v8i8 (sext V8I1:$Pu)), (C2_mask V8I1:$Pu)>;
406 453
407 def: Pat<(i64 (sext I32:$Rs)), (A2_sxtw I32:$Rs)>; 454 def Vsplatpi: OutPatFrag<(ops node:$V),
408 def: Pat<(Zext64 I32:$Rs), (ToZext64 $Rs)>; 455 (Combinew (A2_tfrsi $V), (A2_tfrsi $V))>;
409 def: Pat<(Aext64 I32:$Rs), (ToZext64 $Rs)>; 456
410 457 def: Pat<(v2i16 (azext V2I1:$Pu)),
411 def: Pat<(i32 (trunc I64:$Rs)), (LoReg $Rs)>; 458 (A2_andir (LoReg (C2_mask V2I1:$Pu)), (i32 0x00010001))>;
412 def: Pat<(i1 (trunc I64:$Rs)), (C2_tfrrp (LoReg $Rs))>; 459 def: Pat<(v2i32 (azext V2I1:$Pu)),
413 460 (A2_andp (C2_mask V2I1:$Pu), (A2_combineii (i32 1), (i32 1)))>;
414 let AddedComplexity = 20 in { 461 def: Pat<(v4i8 (azext V4I1:$Pu)),
415 def: Pat<(and I32:$Rs, 255), (A2_zxtb I32:$Rs)>; 462 (A2_andir (LoReg (C2_mask V4I1:$Pu)), (i32 0x01010101))>;
416 def: Pat<(and I32:$Rs, 65535), (A2_zxth I32:$Rs)>; 463 def: Pat<(v4i16 (azext V4I1:$Pu)),
417 } 464 (A2_andp (C2_mask V4I1:$Pu), (Vsplatpi (i32 0x00010001)))>;
418 465 def: Pat<(v8i8 (azext V8I1:$Pu)),
419 def: Pat<(i32 (anyext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>; 466 (A2_andp (C2_mask V8I1:$Pu), (Vsplatpi (i32 0x01010101)))>;
420 def: Pat<(i64 (anyext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>; 467
421 468 def: Pat<(v4i16 (azext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
422 def: Pat<(v8i8 (zext V8I1:$Pu)), (C2_mask V8I1:$Pu)>; 469 def: Pat<(v2i32 (azext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
423 def: Pat<(v4i16 (zext V4I1:$Pu)), (C2_mask V4I1:$Pu)>;
424 def: Pat<(v2i32 (zext V2I1:$Pu)), (C2_mask V2I1:$Pu)>;
425 def: Pat<(v4i8 (zext V4I1:$Pu)), (LoReg (C2_mask V4I1:$Pu))>;
426 def: Pat<(v2i16 (zext V2I1:$Pu)), (LoReg (C2_mask V2I1:$Pu))>;
427
428 def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
429 def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
430 def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
431 def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
432 def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>; 470 def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>;
433 def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>; 471 def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>;
434 472
435 def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)), 473 def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)),
436 (Combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>; 474 (Combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>;
468 def: OpR_RR_pat<MI, Op, v4i1, V4I1>; 506 def: OpR_RR_pat<MI, Op, v4i1, V4I1>;
469 def: OpR_RR_pat<MI, Op, v8i1, V8I1>; 507 def: OpR_RR_pat<MI, Op, v8i1, V8I1>;
470 } 508 }
471 509
472 multiclass BoolAccRRR_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op> { 510 multiclass BoolAccRRR_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op> {
473 def: AccRRR_pat<MI, AccOp, Op, I1, I1>; 511 def: AccRRR_pat<MI, AccOp, Op, I1, I1, I1>;
474 def: AccRRR_pat<MI, AccOp, Op, V2I1, V2I1>; 512 def: AccRRR_pat<MI, AccOp, Op, V2I1, V2I1, V2I1>;
475 def: AccRRR_pat<MI, AccOp, Op, V4I1, V4I1>; 513 def: AccRRR_pat<MI, AccOp, Op, V4I1, V4I1, V4I1>;
476 def: AccRRR_pat<MI, AccOp, Op, V8I1, V8I1>; 514 def: AccRRR_pat<MI, AccOp, Op, V8I1, V8I1, V8I1>;
477 } 515 }
478 516
479 defm: BoolOpR_RR_pat<C2_and, And>; 517 defm: BoolOpR_RR_pat<C2_and, And>;
480 defm: BoolOpR_RR_pat<C2_or, Or>; 518 defm: BoolOpR_RR_pat<C2_or, Or>;
481 defm: BoolOpR_RR_pat<C2_xor, Xor>; 519 defm: BoolOpR_RR_pat<C2_xor, Xor>;
516 (C2_not (C2_cmpgtui I32:$Rs, (UDEC1 imm:$u9)))>; 554 (C2_not (C2_cmpgtui I32:$Rs, (UDEC1 imm:$u9)))>;
517 555
518 // Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones 556 // Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones
519 // that reverse the order of the operands. 557 // that reverse the order of the operands.
520 class RevCmp<PatFrag F> 558 class RevCmp<PatFrag F>
521 : PatFrag<(ops node:$rhs, node:$lhs), F.Fragment, F.PredicateCode, 559 : PatFrag<(ops node:$rhs, node:$lhs), !head(F.Fragments), F.PredicateCode,
522 F.OperandTransform>; 560 F.OperandTransform>;
523 561
524 def: OpR_RR_pat<C2_cmpeq, seteq, i1, I32>; 562 def: OpR_RR_pat<C2_cmpeq, seteq, i1, I32>;
525 def: OpR_RR_pat<C2_cmpgt, setgt, i1, I32>; 563 def: OpR_RR_pat<C2_cmpgt, setgt, i1, I32>;
526 def: OpR_RR_pat<C2_cmpgtu, setugt, i1, I32>; 564 def: OpR_RR_pat<C2_cmpgtu, setugt, i1, I32>;
560 def: OpR_RR_pat<A2_vcmpwgtu, RevCmp<setult>, i1, V2I32>; 598 def: OpR_RR_pat<A2_vcmpwgtu, RevCmp<setult>, i1, V2I32>;
561 def: OpR_RR_pat<A2_vcmpwgtu, RevCmp<setult>, v2i1, V2I32>; 599 def: OpR_RR_pat<A2_vcmpwgtu, RevCmp<setult>, v2i1, V2I32>;
562 def: OpR_RR_pat<A2_vcmpwgtu, setugt, i1, V2I32>; 600 def: OpR_RR_pat<A2_vcmpwgtu, setugt, i1, V2I32>;
563 def: OpR_RR_pat<A2_vcmpwgtu, setugt, v2i1, V2I32>; 601 def: OpR_RR_pat<A2_vcmpwgtu, setugt, v2i1, V2I32>;
564 602
565 let Predicates = [HasV5T] in { 603 def: OpR_RR_pat<F2_sfcmpeq, seteq, i1, F32>;
566 def: OpR_RR_pat<F2_sfcmpeq, seteq, i1, F32>; 604 def: OpR_RR_pat<F2_sfcmpgt, setgt, i1, F32>;
567 def: OpR_RR_pat<F2_sfcmpgt, setgt, i1, F32>; 605 def: OpR_RR_pat<F2_sfcmpge, setge, i1, F32>;
568 def: OpR_RR_pat<F2_sfcmpge, setge, i1, F32>; 606 def: OpR_RR_pat<F2_sfcmpeq, setoeq, i1, F32>;
569 def: OpR_RR_pat<F2_sfcmpeq, setoeq, i1, F32>; 607 def: OpR_RR_pat<F2_sfcmpgt, setogt, i1, F32>;
570 def: OpR_RR_pat<F2_sfcmpgt, setogt, i1, F32>; 608 def: OpR_RR_pat<F2_sfcmpge, setoge, i1, F32>;
571 def: OpR_RR_pat<F2_sfcmpge, setoge, i1, F32>; 609 def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setolt>, i1, F32>;
572 def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setolt>, i1, F32>; 610 def: OpR_RR_pat<F2_sfcmpge, RevCmp<setole>, i1, F32>;
573 def: OpR_RR_pat<F2_sfcmpge, RevCmp<setole>, i1, F32>; 611 def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setlt>, i1, F32>;
574 def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setlt>, i1, F32>; 612 def: OpR_RR_pat<F2_sfcmpge, RevCmp<setle>, i1, F32>;
575 def: OpR_RR_pat<F2_sfcmpge, RevCmp<setle>, i1, F32>; 613 def: OpR_RR_pat<F2_sfcmpuo, setuo, i1, F32>;
576 def: OpR_RR_pat<F2_sfcmpuo, setuo, i1, F32>; 614
577 615 def: OpR_RR_pat<F2_dfcmpeq, seteq, i1, F64>;
578 def: OpR_RR_pat<F2_dfcmpeq, seteq, i1, F64>; 616 def: OpR_RR_pat<F2_dfcmpgt, setgt, i1, F64>;
579 def: OpR_RR_pat<F2_dfcmpgt, setgt, i1, F64>; 617 def: OpR_RR_pat<F2_dfcmpge, setge, i1, F64>;
580 def: OpR_RR_pat<F2_dfcmpge, setge, i1, F64>; 618 def: OpR_RR_pat<F2_dfcmpeq, setoeq, i1, F64>;
581 def: OpR_RR_pat<F2_dfcmpeq, setoeq, i1, F64>; 619 def: OpR_RR_pat<F2_dfcmpgt, setogt, i1, F64>;
582 def: OpR_RR_pat<F2_dfcmpgt, setogt, i1, F64>; 620 def: OpR_RR_pat<F2_dfcmpge, setoge, i1, F64>;
583 def: OpR_RR_pat<F2_dfcmpge, setoge, i1, F64>; 621 def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setolt>, i1, F64>;
584 def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setolt>, i1, F64>; 622 def: OpR_RR_pat<F2_dfcmpge, RevCmp<setole>, i1, F64>;
585 def: OpR_RR_pat<F2_dfcmpge, RevCmp<setole>, i1, F64>; 623 def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setlt>, i1, F64>;
586 def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setlt>, i1, F64>; 624 def: OpR_RR_pat<F2_dfcmpge, RevCmp<setle>, i1, F64>;
587 def: OpR_RR_pat<F2_dfcmpge, RevCmp<setle>, i1, F64>; 625 def: OpR_RR_pat<F2_dfcmpuo, setuo, i1, F64>;
588 def: OpR_RR_pat<F2_dfcmpuo, setuo, i1, F64>;
589 }
590 626
591 // Avoid C4_cmpneqi, C4_cmpltei, C4_cmplteui, since they cannot form compounds. 627 // Avoid C4_cmpneqi, C4_cmpltei, C4_cmplteui, since they cannot form compounds.
592 628
593 def: Pat<(i1 (setne I32:$Rs, anyimm:$u5)), 629 def: Pat<(i1 (setne I32:$Rs, anyimm:$u5)),
594 (C2_not (C2_cmpeqi I32:$Rs, imm:$u5))>; 630 (C2_not (C2_cmpeqi I32:$Rs, imm:$u5))>;
595 def: Pat<(i1 (setle I32:$Rs, anyimm:$u5)), 631 def: Pat<(i1 (setle I32:$Rs, anyimm:$u5)),
596 (C2_not (C2_cmpgti I32:$Rs, imm:$u5))>; 632 (C2_not (C2_cmpgti I32:$Rs, imm:$u5))>;
597 def: Pat<(i1 (setule I32:$Rs, anyimm:$u5)), 633 def: Pat<(i1 (setule I32:$Rs, anyimm:$u5)),
598 (C2_not (C2_cmpgtui I32:$Rs, imm:$u5))>; 634 (C2_not (C2_cmpgtui I32:$Rs, imm:$u5))>;
599 635
600 def: Pat<(i1 (setne I32:$Rs, I32:$Rt)), 636 class OpmR_RR_pat<PatFrag Output, PatFrag Op, ValueType ResType,
601 (C2_not (C2_cmpeq I32:$Rs, I32:$Rt))>; 637 PatFrag RsPred, PatFrag RtPred = RsPred>
602 def: Pat<(i1 (setle I32:$Rs, I32:$Rt)), 638 : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)),
603 (C2_not (C2_cmpgt I32:$Rs, I32:$Rt))>; 639 (Output RsPred:$Rs, RtPred:$Rt)>;
604 def: Pat<(i1 (setule I32:$Rs, I32:$Rt)), 640
605 (C2_not (C2_cmpgtu I32:$Rs, I32:$Rt))>; 641 class Outn<InstHexagon MI>
606 def: Pat<(i1 (setge I32:$Rs, I32:$Rt)), 642 : OutPatFrag<(ops node:$Rs, node:$Rt),
607 (C2_not (C2_cmpgt I32:$Rt, I32:$Rs))>; 643 (C2_not (MI $Rs, $Rt))>;
608 def: Pat<(i1 (setuge I32:$Rs, I32:$Rt)), 644
609 (C2_not (C2_cmpgtu I32:$Rt, I32:$Rs))>; 645 def: OpmR_RR_pat<Outn<C2_cmpeq>, setne, i1, I32>;
610 646 def: OpmR_RR_pat<Outn<C2_cmpgt>, setle, i1, I32>;
611 def: Pat<(i1 (setle I64:$Rs, I64:$Rt)), 647 def: OpmR_RR_pat<Outn<C2_cmpgtu>, setule, i1, I32>;
612 (C2_not (C2_cmpgtp I64:$Rs, I64:$Rt))>; 648 def: OpmR_RR_pat<Outn<C2_cmpgt>, RevCmp<setge>, i1, I32>;
613 def: Pat<(i1 (setne I64:$Rs, I64:$Rt)), 649 def: OpmR_RR_pat<Outn<C2_cmpgtu>, RevCmp<setuge>, i1, I32>;
614 (C2_not (C2_cmpeqp I64:$Rs, I64:$Rt))>; 650 def: OpmR_RR_pat<Outn<C2_cmpeqp>, setne, i1, I64>;
615 def: Pat<(i1 (setge I64:$Rs, I64:$Rt)), 651 def: OpmR_RR_pat<Outn<C2_cmpgtp>, setle, i1, I64>;
616 (C2_not (C2_cmpgtp I64:$Rt, I64:$Rs))>; 652 def: OpmR_RR_pat<Outn<C2_cmpgtup>, setule, i1, I64>;
617 def: Pat<(i1 (setuge I64:$Rs, I64:$Rt)), 653 def: OpmR_RR_pat<Outn<C2_cmpgtp>, RevCmp<setge>, i1, I64>;
618 (C2_not (C2_cmpgtup I64:$Rt, I64:$Rs))>; 654 def: OpmR_RR_pat<Outn<C2_cmpgtup>, RevCmp<setuge>, i1, I64>;
619 def: Pat<(i1 (setule I64:$Rs, I64:$Rt)), 655 def: OpmR_RR_pat<Outn<A2_vcmpbeq>, setne, v8i1, V8I8>;
620 (C2_not (C2_cmpgtup I64:$Rs, I64:$Rt))>; 656 def: OpmR_RR_pat<Outn<A4_vcmpbgt>, setle, v8i1, V8I8>;
657 def: OpmR_RR_pat<Outn<A2_vcmpbgtu>, setule, v8i1, V8I8>;
658 def: OpmR_RR_pat<Outn<A4_vcmpbgt>, RevCmp<setge>, v8i1, V8I8>;
659 def: OpmR_RR_pat<Outn<A2_vcmpbgtu>, RevCmp<setuge>, v8i1, V8I8>;
660 def: OpmR_RR_pat<Outn<A2_vcmpheq>, setne, v4i1, V4I16>;
661 def: OpmR_RR_pat<Outn<A2_vcmphgt>, setle, v4i1, V4I16>;
662 def: OpmR_RR_pat<Outn<A2_vcmphgtu>, setule, v4i1, V4I16>;
663 def: OpmR_RR_pat<Outn<A2_vcmphgt>, RevCmp<setge>, v4i1, V4I16>;
664 def: OpmR_RR_pat<Outn<A2_vcmphgtu>, RevCmp<setuge>, v4i1, V4I16>;
665 def: OpmR_RR_pat<Outn<A2_vcmpweq>, setne, v2i1, V2I32>;
666 def: OpmR_RR_pat<Outn<A2_vcmpwgt>, setle, v2i1, V2I32>;
667 def: OpmR_RR_pat<Outn<A2_vcmpwgtu>, setule, v2i1, V2I32>;
668 def: OpmR_RR_pat<Outn<A2_vcmpwgt>, RevCmp<setge>, v2i1, V2I32>;
669 def: OpmR_RR_pat<Outn<A2_vcmpwgtu>, RevCmp<setuge>, v2i1, V2I32>;
621 670
622 let AddedComplexity = 100 in { 671 let AddedComplexity = 100 in {
623 def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt), 255), 0)), 672 def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt), 255), 0)),
624 (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt)>; 673 (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt)>;
625 def: Pat<(i1 (setne (and (xor I32:$Rs, I32:$Rt), 255), 0)), 674 def: Pat<(i1 (setne (and (xor I32:$Rs, I32:$Rt), 255), 0)),
677 def: Pat<(i32 (zext (i1 (seteq I32:$Rs, anyimm:$s8)))), 726 def: Pat<(i32 (zext (i1 (seteq I32:$Rs, anyimm:$s8)))),
678 (A4_rcmpeqi I32:$Rs, imm:$s8)>; 727 (A4_rcmpeqi I32:$Rs, imm:$s8)>;
679 def: Pat<(i32 (zext (i1 (setne I32:$Rs, anyimm:$s8)))), 728 def: Pat<(i32 (zext (i1 (setne I32:$Rs, anyimm:$s8)))),
680 (A4_rcmpneqi I32:$Rs, imm:$s8)>; 729 (A4_rcmpneqi I32:$Rs, imm:$s8)>;
681 730
682 def: Pat<(i1 (setne I1:$Ps, I1:$Pt)), 731 def: Pat<(i1 (seteq I1:$Ps, (i1 -1))), (I1:$Ps)>;
683 (C2_xor I1:$Ps, I1:$Pt)>; 732 def: Pat<(i1 (setne I1:$Ps, (i1 -1))), (C2_not I1:$Ps)>;
684 733 def: Pat<(i1 (seteq I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, (C2_not I1:$Pt))>;
685 def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)), 734 def: Pat<(i1 (setne I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, I1:$Pt)>;
686 (A2_vcmpbeq (ToZext64 $Rs), (ToZext64 $Rt))>;
687 def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)),
688 (A4_vcmpbgt (ToZext64 $Rs), (ToZext64 $Rt))>;
689 def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)),
690 (A2_vcmpbgtu (ToZext64 $Rs), (ToZext64 $Rt))>;
691
692 def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)),
693 (A2_vcmpheq (ToZext64 $Rs), (ToZext64 $Rt))>;
694 def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)),
695 (A2_vcmphgt (ToZext64 $Rs), (ToZext64 $Rt))>;
696 def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)),
697 (A2_vcmphgtu (ToZext64 $Rs), (ToZext64 $Rt))>;
698
699 def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)),
700 (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>;
701 735
702 // Floating-point comparisons with checks for ordered/unordered status. 736 // Floating-point comparisons with checks for ordered/unordered status.
703 737
704 class T3<InstHexagon MI1, InstHexagon MI2, InstHexagon MI3> 738 class T3<InstHexagon MI1, InstHexagon MI2, InstHexagon MI3>
705 : OutPatFrag<(ops node:$Rs, node:$Rt), 739 : OutPatFrag<(ops node:$Rs, node:$Rt),
706 (MI1 (MI2 $Rs, $Rt), (MI3 $Rs, $Rt))>; 740 (MI1 (MI2 $Rs, $Rt), (MI3 $Rs, $Rt))>;
707 741
708 class OpmR_RR_pat<PatFrag Output, PatFrag Op, ValueType ResType,
709 PatFrag RsPred, PatFrag RtPred = RsPred>
710 : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)),
711 (Output RsPred:$Rs, RtPred:$Rt)>;
712
713 class Cmpuf<InstHexagon MI>: T3<C2_or, F2_sfcmpuo, MI>; 742 class Cmpuf<InstHexagon MI>: T3<C2_or, F2_sfcmpuo, MI>;
714 class Cmpud<InstHexagon MI>: T3<C2_or, F2_dfcmpuo, MI>; 743 class Cmpud<InstHexagon MI>: T3<C2_or, F2_dfcmpuo, MI>;
715 744
716 class Cmpufn<InstHexagon MI>: T3<C2_orn, F2_sfcmpuo, MI>; 745 class Cmpufn<InstHexagon MI>: T3<C2_orn, F2_sfcmpuo, MI>;
717 class Cmpudn<InstHexagon MI>: T3<C2_orn, F2_dfcmpuo, MI>; 746 class Cmpudn<InstHexagon MI>: T3<C2_orn, F2_dfcmpuo, MI>;
718 747
719 let Predicates = [HasV5T] in { 748 def: OpmR_RR_pat<Cmpuf<F2_sfcmpeq>, setueq, i1, F32>;
720 def: OpmR_RR_pat<Cmpuf<F2_sfcmpeq>, setueq, i1, F32>; 749 def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, setuge, i1, F32>;
721 def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, setuge, i1, F32>; 750 def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, setugt, i1, F32>;
722 def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, setugt, i1, F32>; 751 def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, RevCmp<setule>, i1, F32>;
723 def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, RevCmp<setule>, i1, F32>; 752 def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, RevCmp<setult>, i1, F32>;
724 def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, RevCmp<setult>, i1, F32>; 753 def: OpmR_RR_pat<Cmpufn<F2_sfcmpeq>, setune, i1, F32>;
725 def: OpmR_RR_pat<Cmpufn<F2_sfcmpeq>, setune, i1, F32>; 754
726 755 def: OpmR_RR_pat<Cmpud<F2_dfcmpeq>, setueq, i1, F64>;
727 def: OpmR_RR_pat<Cmpud<F2_dfcmpeq>, setueq, i1, F64>; 756 def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, setuge, i1, F64>;
728 def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, setuge, i1, F64>; 757 def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, setugt, i1, F64>;
729 def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, setugt, i1, F64>; 758 def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, RevCmp<setule>, i1, F64>;
730 def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, RevCmp<setule>, i1, F64>; 759 def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, RevCmp<setult>, i1, F64>;
731 def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, RevCmp<setult>, i1, F64>; 760 def: OpmR_RR_pat<Cmpudn<F2_dfcmpeq>, setune, i1, F64>;
732 def: OpmR_RR_pat<Cmpudn<F2_dfcmpeq>, setune, i1, F64>; 761
733 } 762 def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setone, i1, F32>;
734 763 def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne, i1, F32>;
735 class Outn<InstHexagon MI> 764
736 : OutPatFrag<(ops node:$Rs, node:$Rt), 765 def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setone, i1, F64>;
737 (C2_not (MI $Rs, $Rt))>; 766 def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setne, i1, F64>;
738 767
739 let Predicates = [HasV5T] in { 768 def: OpmR_RR_pat<Outn<F2_sfcmpuo>, seto, i1, F32>;
740 def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setone, i1, F32>; 769 def: OpmR_RR_pat<Outn<F2_dfcmpuo>, seto, i1, F64>;
741 def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne, i1, F32>;
742
743 def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setone, i1, F64>;
744 def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setne, i1, F64>;
745
746 def: OpmR_RR_pat<Outn<F2_sfcmpuo>, seto, i1, F32>;
747 def: OpmR_RR_pat<Outn<F2_dfcmpuo>, seto, i1, F64>;
748 }
749 770
750 771
751 // --(6) Select ---------------------------------------------------------- 772 // --(6) Select ----------------------------------------------------------
752 // 773 //
753 774
773 // Hexagon does not support 64-bit MUXes; so emulate with combines. 794 // Hexagon does not support 64-bit MUXes; so emulate with combines.
774 def: Pat<(select I1:$Pu, I64:$Rs, I64:$Rt), 795 def: Pat<(select I1:$Pu, I64:$Rs, I64:$Rt),
775 (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)), 796 (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
776 (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>; 797 (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;
777 798
778 let Predicates = [HasV5T] in { 799 def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$I),
779 def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$I), 800 (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>;
780 (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>; 801 def: Pat<(select I1:$Pu, f32ImmPred:$I, F32:$Rt),
781 def: Pat<(select I1:$Pu, f32ImmPred:$I, F32:$Rt), 802 (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>;
782 (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>; 803 def: Pat<(select I1:$Pu, F32:$Rs, F32:$Rt),
783 def: Pat<(select I1:$Pu, F32:$Rs, F32:$Rt), 804 (C2_mux I1:$Pu, F32:$Rs, F32:$Rt)>;
784 (C2_mux I1:$Pu, F32:$Rs, F32:$Rt)>; 805 def: Pat<(select I1:$Pu, F64:$Rs, F64:$Rt),
785 def: Pat<(select I1:$Pu, F64:$Rs, F64:$Rt), 806 (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
786 (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)), 807 (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;
787 (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>; 808
788 809 def: Pat<(select (i1 (setult F32:$Ra, F32:$Rb)), F32:$Rs, F32:$Rt),
789 def: Pat<(select (i1 (setult F32:$Ra, F32:$Rb)), F32:$Rs, F32:$Rt), 810 (C2_mux (F2_sfcmpgt F32:$Rb, F32:$Ra), F32:$Rs, F32:$Rt)>;
790 (C2_mux (F2_sfcmpgt F32:$Rb, F32:$Ra), F32:$Rs, F32:$Rt)>; 811 def: Pat<(select (i1 (setult F64:$Ra, F64:$Rb)), F64:$Rs, F64:$Rt),
791 def: Pat<(select (i1 (setult F64:$Ra, F64:$Rb)), F64:$Rs, F64:$Rt), 812 (C2_vmux (F2_dfcmpgt F64:$Rb, F64:$Ra), F64:$Rs, F64:$Rt)>;
792 (C2_vmux (F2_dfcmpgt F64:$Rb, F64:$Ra), F64:$Rs, F64:$Rt)>; 813
793 814 def: Pat<(select (not I1:$Pu), f32ImmPred:$I, F32:$Rs),
794 def: Pat<(select (not I1:$Pu), f32ImmPred:$I, F32:$Rs), 815 (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>;
795 (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>; 816 def: Pat<(select (not I1:$Pu), F32:$Rt, f32ImmPred:$I),
796 def: Pat<(select (not I1:$Pu), F32:$Rt, f32ImmPred:$I), 817 (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>;
797 (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>;
798 }
799 818
800 def: Pat<(select I1:$Pu, V4I8:$Rs, V4I8:$Rt), 819 def: Pat<(select I1:$Pu, V4I8:$Rs, V4I8:$Rt),
801 (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>; 820 (LoReg (C2_vmux I1:$Pu, (ToAext64 $Rs), (ToAext64 $Rt)))>;
802 def: Pat<(select I1:$Pu, V2I16:$Rs, V2I16:$Rt), 821 def: Pat<(select I1:$Pu, V2I16:$Rs, V2I16:$Rt),
803 (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>; 822 (LoReg (C2_vmux I1:$Pu, (ToAext64 $Rs), (ToAext64 $Rt)))>;
804 def: Pat<(select I1:$Pu, V2I32:$Rs, V2I32:$Rt), 823 def: Pat<(select I1:$Pu, V2I32:$Rs, V2I32:$Rt),
805 (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)), 824 (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
806 (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>; 825 (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;
807 826
808 def: Pat<(vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt), 827 def: Pat<(vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt),
861 defm: SelMinMax_pats<setugt, I64, A2_maxup, A2_minup>; 880 defm: SelMinMax_pats<setugt, I64, A2_maxup, A2_minup>;
862 defm: SelMinMax_pats<setule, I64, A2_minup, A2_maxup>; 881 defm: SelMinMax_pats<setule, I64, A2_minup, A2_maxup>;
863 defm: SelMinMax_pats<setult, I64, A2_minup, A2_maxup>; 882 defm: SelMinMax_pats<setult, I64, A2_minup, A2_maxup>;
864 } 883 }
865 884
866 let AddedComplexity = 100, Predicates = [HasV5T] in { 885 let AddedComplexity = 100 in {
867 defm: SelMinMax_pats<setolt, F32, F2_sfmin, F2_sfmax>; 886 defm: SelMinMax_pats<setolt, F32, F2_sfmin, F2_sfmax>;
868 defm: SelMinMax_pats<setole, F32, F2_sfmin, F2_sfmax>; 887 defm: SelMinMax_pats<setole, F32, F2_sfmin, F2_sfmax>;
869 defm: SelMinMax_pats<setogt, F32, F2_sfmax, F2_sfmin>; 888 defm: SelMinMax_pats<setogt, F32, F2_sfmax, F2_sfmin>;
870 defm: SelMinMax_pats<setoge, F32, F2_sfmax, F2_sfmin>; 889 defm: SelMinMax_pats<setoge, F32, F2_sfmax, F2_sfmin>;
871 } 890 }
917 (A2_combineii imm:$s8, imm:$s8)>; 936 (A2_combineii imm:$s8, imm:$s8)>;
918 def: Pat<(v2i32 (HexagonVSPLAT I32:$Rs)), (Combinew I32:$Rs, I32:$Rs)>; 937 def: Pat<(v2i32 (HexagonVSPLAT I32:$Rs)), (Combinew I32:$Rs, I32:$Rs)>;
919 938
920 let AddedComplexity = 10 in 939 let AddedComplexity = 10 in
921 def: Pat<(v8i8 (HexagonVSPLAT I32:$Rs)), (S6_vsplatrbp I32:$Rs)>, 940 def: Pat<(v8i8 (HexagonVSPLAT I32:$Rs)), (S6_vsplatrbp I32:$Rs)>,
922 Requires<[HasV62T]>; 941 Requires<[HasV62]>;
923 def: Pat<(v8i8 (HexagonVSPLAT I32:$Rs)), 942 def: Pat<(v8i8 (HexagonVSPLAT I32:$Rs)),
924 (Combinew (S2_vsplatrb I32:$Rs), (S2_vsplatrb I32:$Rs))>; 943 (Combinew (S2_vsplatrb I32:$Rs), (S2_vsplatrb I32:$Rs))>;
925 944
926 945
927 // --(8) Shift/permute --------------------------------------------------- 946 // --(8) Shift/permute ---------------------------------------------------
978 def: OpR_RR_pat<S2_asl_r_r, Shl, i32, I32, I32>; 997 def: OpR_RR_pat<S2_asl_r_r, Shl, i32, I32, I32>;
979 def: OpR_RR_pat<S2_asr_r_p, Sra, i64, I64, I32>; 998 def: OpR_RR_pat<S2_asr_r_p, Sra, i64, I64, I32>;
980 def: OpR_RR_pat<S2_lsr_r_p, Srl, i64, I64, I32>; 999 def: OpR_RR_pat<S2_lsr_r_p, Srl, i64, I64, I32>;
981 def: OpR_RR_pat<S2_asl_r_p, Shl, i64, I64, I32>; 1000 def: OpR_RR_pat<S2_asl_r_p, Shl, i64, I64, I32>;
982 1001
1002 // Funnel shifts.
1003 def IsMul8_U3: PatLeaf<(i32 imm), [{
1004 uint64_t V = N->getZExtValue();
1005 return V % 8 == 0 && isUInt<3>(V / 8);
1006 }]>;
1007
1008 def Divu8: SDNodeXForm<imm, [{
1009 return CurDAG->getTargetConstant(N->getZExtValue() / 8, SDLoc(N), MVT::i32);
1010 }]>;
1011
1012 // Funnel shift-left.
1013 def FShl32i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S),
1014 (HiReg (S2_asl_i_p (Combinew $Rs, $Rt), $S))>;
1015 def FShl32r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru),
1016 (HiReg (S2_asl_r_p (Combinew $Rs, $Rt), $Ru))>;
1017
1018 def FShl64i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S),
1019 (S2_lsr_i_p_or (S2_asl_i_p $Rt, $S), $Rs, (Subi<64> $S))>;
1020 def FShl64r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru),
1021 (S2_lsr_r_p_or (S2_asl_r_p $Rt, $Ru), $Rs, (A2_subri 64, $Ru))>;
1022
1023 // Combined SDNodeXForm: (Divu8 (Subi<64> $S))
1024 def Divu64_8: SDNodeXForm<imm, [{
1025 return CurDAG->getTargetConstant((64 - N->getSExtValue()) / 8,
1026 SDLoc(N), MVT::i32);
1027 }]>;
1028
1029 // Special cases:
1030 let AddedComplexity = 100 in {
1031 def: Pat<(fshl I32:$Rs, I32:$Rt, (i32 16)),
1032 (A2_combine_hl I32:$Rs, I32:$Rt)>;
1033 def: Pat<(fshl I64:$Rs, I64:$Rt, IsMul8_U3:$S),
1034 (S2_valignib I64:$Rs, I64:$Rt, (Divu64_8 $S))>;
1035 }
1036
1037 let Predicates = [HasV60], AddedComplexity = 50 in {
1038 def: OpR_RI_pat<S6_rol_i_r, Rol, i32, I32, u5_0ImmPred>;
1039 def: OpR_RI_pat<S6_rol_i_p, Rol, i64, I64, u6_0ImmPred>;
1040 }
1041 let AddedComplexity = 30 in {
1042 def: Pat<(rotl I32:$Rs, u5_0ImmPred:$S), (FShl32i $Rs, $Rs, imm:$S)>;
1043 def: Pat<(rotl I64:$Rs, u6_0ImmPred:$S), (FShl64i $Rs, $Rs, imm:$S)>;
1044 def: Pat<(fshl I32:$Rs, I32:$Rt, u5_0ImmPred:$S), (FShl32i $Rs, $Rt, imm:$S)>;
1045 def: Pat<(fshl I64:$Rs, I64:$Rt, u6_0ImmPred:$S), (FShl64i $Rs, $Rt, imm:$S)>;
1046 }
1047 def: Pat<(rotl I32:$Rs, I32:$Rt), (FShl32r $Rs, $Rs, $Rt)>;
1048 def: Pat<(rotl I64:$Rs, I32:$Rt), (FShl64r $Rs, $Rs, $Rt)>;
1049 def: Pat<(fshl I32:$Rs, I32:$Rt, I32:$Ru), (FShl32r $Rs, $Rt, $Ru)>;
1050 def: Pat<(fshl I64:$Rs, I64:$Rt, I32:$Ru), (FShl64r $Rs, $Rt, $Ru)>;
1051
1052 // Funnel shift-right.
1053 def FShr32i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S),
1054 (LoReg (S2_lsr_i_p (Combinew $Rs, $Rt), $S))>;
1055 def FShr32r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru),
1056 (LoReg (S2_lsr_r_p (Combinew $Rs, $Rt), $Ru))>;
1057
1058 def FShr64i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S),
1059 (S2_asl_i_p_or (S2_lsr_i_p $Rt, $S), $Rs, (Subi<64> $S))>;
1060 def FShr64r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru),
1061 (S2_asl_r_p_or (S2_lsr_r_p $Rt, $Ru), $Rs, (A2_subri 64, $Ru))>;
1062
1063 // Special cases:
1064 let AddedComplexity = 100 in {
1065 def: Pat<(fshr I32:$Rs, I32:$Rt, (i32 16)),
1066 (A2_combine_hl I32:$Rs, I32:$Rt)>;
1067 def: Pat<(fshr I64:$Rs, I64:$Rt, IsMul8_U3:$S),
1068 (S2_valignib I64:$Rs, I64:$Rt, (Divu8 $S))>;
1069 }
1070
1071 let Predicates = [HasV60], AddedComplexity = 50 in {
1072 def: Pat<(rotr I32:$Rs, u5_0ImmPred:$S), (S6_rol_i_r I32:$Rs, (Subi<32> $S))>;
1073 def: Pat<(rotr I64:$Rs, u6_0ImmPred:$S), (S6_rol_i_p I64:$Rs, (Subi<64> $S))>;
1074 }
1075 let AddedComplexity = 30 in {
1076 def: Pat<(rotr I32:$Rs, u5_0ImmPred:$S), (FShr32i $Rs, $Rs, imm:$S)>;
1077 def: Pat<(rotr I64:$Rs, u6_0ImmPred:$S), (FShr64i $Rs, $Rs, imm:$S)>;
1078 def: Pat<(fshr I32:$Rs, I32:$Rt, u5_0ImmPred:$S), (FShr32i $Rs, $Rt, imm:$S)>;
1079 def: Pat<(fshr I64:$Rs, I64:$Rt, u6_0ImmPred:$S), (FShr64i $Rs, $Rt, imm:$S)>;
1080 }
1081 def: Pat<(rotr I32:$Rs, I32:$Rt), (FShr32r $Rs, $Rs, $Rt)>;
1082 def: Pat<(rotr I64:$Rs, I32:$Rt), (FShr64r $Rs, $Rs, $Rt)>;
1083 def: Pat<(fshr I32:$Rs, I32:$Rt, I32:$Ru), (FShr32r $Rs, $Rt, $Ru)>;
1084 def: Pat<(fshr I64:$Rs, I64:$Rt, I32:$Ru), (FShr64r $Rs, $Rt, $Ru)>;
1085
983 1086
984 def: Pat<(sra (add (sra I32:$Rs, u5_0ImmPred:$u5), 1), (i32 1)), 1087 def: Pat<(sra (add (sra I32:$Rs, u5_0ImmPred:$u5), 1), (i32 1)),
985 (S2_asr_i_r_rnd I32:$Rs, imm:$u5)>; 1088 (S2_asr_i_r_rnd I32:$Rs, imm:$u5)>;
986 def: Pat<(sra (add (sra I64:$Rs, u6_0ImmPred:$u6), 1), (i32 1)), 1089 def: Pat<(sra (add (sra I64:$Rs, u6_0ImmPred:$u6), 1), (i32 1)),
987 (S2_asr_i_p_rnd I64:$Rs, imm:$u6)>, Requires<[HasV5T]>; 1090 (S2_asr_i_p_rnd I64:$Rs, imm:$u6)>;
988 1091
989 // Prefer S2_addasl_rrri over S2_asl_i_r_acc. 1092 // Prefer S2_addasl_rrri over S2_asl_i_r_acc.
990 let AddedComplexity = 120 in 1093 let AddedComplexity = 120 in
991 def: Pat<(add I32:$Rt, (shl I32:$Rs, u3_0ImmPred:$u3)), 1094 def: Pat<(add I32:$Rt, (shl I32:$Rs, u3_0ImmPred:$u3)),
992 (S2_addasl_rrri IntRegs:$Rt, IntRegs:$Rs, imm:$u3)>; 1095 (S2_addasl_rrri IntRegs:$Rt, IntRegs:$Rs, imm:$u3)>;
1023 def: AccRRI_pat<S2_asl_i_p_acc, Add, Su<Shl>, I64, u6_0ImmPred>; 1126 def: AccRRI_pat<S2_asl_i_p_acc, Add, Su<Shl>, I64, u6_0ImmPred>;
1024 def: AccRRI_pat<S2_asl_i_p_nac, Sub, Su<Shl>, I64, u6_0ImmPred>; 1127 def: AccRRI_pat<S2_asl_i_p_nac, Sub, Su<Shl>, I64, u6_0ImmPred>;
1025 def: AccRRI_pat<S2_asl_i_p_and, And, Su<Shl>, I64, u6_0ImmPred>; 1128 def: AccRRI_pat<S2_asl_i_p_and, And, Su<Shl>, I64, u6_0ImmPred>;
1026 def: AccRRI_pat<S2_asl_i_p_or, Or, Su<Shl>, I64, u6_0ImmPred>; 1129 def: AccRRI_pat<S2_asl_i_p_or, Or, Su<Shl>, I64, u6_0ImmPred>;
1027 def: AccRRI_pat<S2_asl_i_p_xacc, Xor, Su<Shl>, I64, u6_0ImmPred>; 1130 def: AccRRI_pat<S2_asl_i_p_xacc, Xor, Su<Shl>, I64, u6_0ImmPred>;
1131
1132 let Predicates = [HasV60] in {
1133 def: AccRRI_pat<S6_rol_i_r_acc, Add, Su<Rol>, I32, u5_0ImmPred>;
1134 def: AccRRI_pat<S6_rol_i_r_nac, Sub, Su<Rol>, I32, u5_0ImmPred>;
1135 def: AccRRI_pat<S6_rol_i_r_and, And, Su<Rol>, I32, u5_0ImmPred>;
1136 def: AccRRI_pat<S6_rol_i_r_or, Or, Su<Rol>, I32, u5_0ImmPred>;
1137 def: AccRRI_pat<S6_rol_i_r_xacc, Xor, Su<Rol>, I32, u5_0ImmPred>;
1138
1139 def: AccRRI_pat<S6_rol_i_p_acc, Add, Su<Rol>, I64, u6_0ImmPred>;
1140 def: AccRRI_pat<S6_rol_i_p_nac, Sub, Su<Rol>, I64, u6_0ImmPred>;
1141 def: AccRRI_pat<S6_rol_i_p_and, And, Su<Rol>, I64, u6_0ImmPred>;
1142 def: AccRRI_pat<S6_rol_i_p_or, Or, Su<Rol>, I64, u6_0ImmPred>;
1143 def: AccRRI_pat<S6_rol_i_p_xacc, Xor, Su<Rol>, I64, u6_0ImmPred>;
1144 }
1028 } 1145 }
1029 1146
1030 let AddedComplexity = 100 in { 1147 let AddedComplexity = 100 in {
1031 def: AccRRR_pat<S2_asr_r_r_acc, Add, Su<Sra>, I32, I32>; 1148 def: AccRRR_pat<S2_asr_r_r_acc, Add, Su<Sra>, I32, I32, I32>;
1032 def: AccRRR_pat<S2_asr_r_r_nac, Sub, Su<Sra>, I32, I32>; 1149 def: AccRRR_pat<S2_asr_r_r_nac, Sub, Su<Sra>, I32, I32, I32>;
1033 def: AccRRR_pat<S2_asr_r_r_and, And, Su<Sra>, I32, I32>; 1150 def: AccRRR_pat<S2_asr_r_r_and, And, Su<Sra>, I32, I32, I32>;
1034 def: AccRRR_pat<S2_asr_r_r_or, Or, Su<Sra>, I32, I32>; 1151 def: AccRRR_pat<S2_asr_r_r_or, Or, Su<Sra>, I32, I32, I32>;
1035 1152
1036 def: AccRRR_pat<S2_asr_r_p_acc, Add, Su<Sra>, I64, I32>; 1153 def: AccRRR_pat<S2_asr_r_p_acc, Add, Su<Sra>, I64, I64, I32>;
1037 def: AccRRR_pat<S2_asr_r_p_nac, Sub, Su<Sra>, I64, I32>; 1154 def: AccRRR_pat<S2_asr_r_p_nac, Sub, Su<Sra>, I64, I64, I32>;
1038 def: AccRRR_pat<S2_asr_r_p_and, And, Su<Sra>, I64, I32>; 1155 def: AccRRR_pat<S2_asr_r_p_and, And, Su<Sra>, I64, I64, I32>;
1039 def: AccRRR_pat<S2_asr_r_p_or, Or, Su<Sra>, I64, I32>; 1156 def: AccRRR_pat<S2_asr_r_p_or, Or, Su<Sra>, I64, I64, I32>;
1040 def: AccRRR_pat<S2_asr_r_p_xor, Xor, Su<Sra>, I64, I32>; 1157 def: AccRRR_pat<S2_asr_r_p_xor, Xor, Su<Sra>, I64, I64, I32>;
1041 1158
1042 def: AccRRR_pat<S2_lsr_r_r_acc, Add, Su<Srl>, I32, I32>; 1159 def: AccRRR_pat<S2_lsr_r_r_acc, Add, Su<Srl>, I32, I32, I32>;
1043 def: AccRRR_pat<S2_lsr_r_r_nac, Sub, Su<Srl>, I32, I32>; 1160 def: AccRRR_pat<S2_lsr_r_r_nac, Sub, Su<Srl>, I32, I32, I32>;
1044 def: AccRRR_pat<S2_lsr_r_r_and, And, Su<Srl>, I32, I32>; 1161 def: AccRRR_pat<S2_lsr_r_r_and, And, Su<Srl>, I32, I32, I32>;
1045 def: AccRRR_pat<S2_lsr_r_r_or, Or, Su<Srl>, I32, I32>; 1162 def: AccRRR_pat<S2_lsr_r_r_or, Or, Su<Srl>, I32, I32, I32>;
1046 1163
1047 def: AccRRR_pat<S2_lsr_r_p_acc, Add, Su<Srl>, I64, I32>; 1164 def: AccRRR_pat<S2_lsr_r_p_acc, Add, Su<Srl>, I64, I64, I32>;
1048 def: AccRRR_pat<S2_lsr_r_p_nac, Sub, Su<Srl>, I64, I32>; 1165 def: AccRRR_pat<S2_lsr_r_p_nac, Sub, Su<Srl>, I64, I64, I32>;
1049 def: AccRRR_pat<S2_lsr_r_p_and, And, Su<Srl>, I64, I32>; 1166 def: AccRRR_pat<S2_lsr_r_p_and, And, Su<Srl>, I64, I64, I32>;
1050 def: AccRRR_pat<S2_lsr_r_p_or, Or, Su<Srl>, I64, I32>; 1167 def: AccRRR_pat<S2_lsr_r_p_or, Or, Su<Srl>, I64, I64, I32>;
1051 def: AccRRR_pat<S2_lsr_r_p_xor, Xor, Su<Srl>, I64, I32>; 1168 def: AccRRR_pat<S2_lsr_r_p_xor, Xor, Su<Srl>, I64, I64, I32>;
1052 1169
1053 def: AccRRR_pat<S2_asl_r_r_acc, Add, Su<Shl>, I32, I32>; 1170 def: AccRRR_pat<S2_asl_r_r_acc, Add, Su<Shl>, I32, I32, I32>;
1054 def: AccRRR_pat<S2_asl_r_r_nac, Sub, Su<Shl>, I32, I32>; 1171 def: AccRRR_pat<S2_asl_r_r_nac, Sub, Su<Shl>, I32, I32, I32>;
1055 def: AccRRR_pat<S2_asl_r_r_and, And, Su<Shl>, I32, I32>; 1172 def: AccRRR_pat<S2_asl_r_r_and, And, Su<Shl>, I32, I32, I32>;
1056 def: AccRRR_pat<S2_asl_r_r_or, Or, Su<Shl>, I32, I32>; 1173 def: AccRRR_pat<S2_asl_r_r_or, Or, Su<Shl>, I32, I32, I32>;
1057 1174
1058 def: AccRRR_pat<S2_asl_r_p_acc, Add, Su<Shl>, I64, I32>; 1175 def: AccRRR_pat<S2_asl_r_p_acc, Add, Su<Shl>, I64, I64, I32>;
1059 def: AccRRR_pat<S2_asl_r_p_nac, Sub, Su<Shl>, I64, I32>; 1176 def: AccRRR_pat<S2_asl_r_p_nac, Sub, Su<Shl>, I64, I64, I32>;
1060 def: AccRRR_pat<S2_asl_r_p_and, And, Su<Shl>, I64, I32>; 1177 def: AccRRR_pat<S2_asl_r_p_and, And, Su<Shl>, I64, I64, I32>;
1061 def: AccRRR_pat<S2_asl_r_p_or, Or, Su<Shl>, I64, I32>; 1178 def: AccRRR_pat<S2_asl_r_p_or, Or, Su<Shl>, I64, I64, I32>;
1062 def: AccRRR_pat<S2_asl_r_p_xor, Xor, Su<Shl>, I64, I32>; 1179 def: AccRRR_pat<S2_asl_r_p_xor, Xor, Su<Shl>, I64, I64, I32>;
1063 } 1180 }
1064 1181
1065 1182
1066 class OpshIRI_pat<InstHexagon MI, PatFrag Op, PatFrag ShOp, 1183 class OpshIRI_pat<InstHexagon MI, PatFrag Op, PatFrag ShOp,
1067 PatFrag RegPred, PatFrag ImmPred> 1184 PatFrag RegPred, PatFrag ImmPred>
1089 (Zext64 (and I32:$a, (i32 65535)))), 1206 (Zext64 (and I32:$a, (i32 65535)))),
1090 (shl (Aext64 (and I32:$c, (i32 65535))), (i32 32))), 1207 (shl (Aext64 (and I32:$c, (i32 65535))), (i32 32))),
1091 (shl (Aext64 I32:$d), (i32 48))), 1208 (shl (Aext64 I32:$d), (i32 48))),
1092 (Combinew (A2_combine_ll I32:$d, I32:$c), 1209 (Combinew (A2_combine_ll I32:$d, I32:$c),
1093 (A2_combine_ll I32:$b, I32:$a))>; 1210 (A2_combine_ll I32:$b, I32:$a))>;
1094
1095 def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add I32:$b, 3))),
1096 (i32 8)),
1097 (i32 (zextloadi8 (add I32:$b, 2)))),
1098 (i32 16)),
1099 (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))),
1100 (zextloadi8 I32:$b)),
1101 (A2_swiz (L2_loadri_io IntRegs:$b, 0))>;
1102 1211
1103 let AddedComplexity = 200 in { 1212 let AddedComplexity = 200 in {
1104 def: Pat<(or (shl I32:$Rt, (i32 16)), (and I32:$Rs, (i32 65535))), 1213 def: Pat<(or (shl I32:$Rt, (i32 16)), (and I32:$Rs, (i32 65535))),
1105 (A2_combine_ll I32:$Rt, I32:$Rs)>; 1214 (A2_combine_ll I32:$Rt, I32:$Rs)>;
1106 def: Pat<(or (shl I32:$Rt, (i32 16)), (srl I32:$Rs, (i32 16))), 1215 def: Pat<(or (shl I32:$Rt, (i32 16)), (srl I32:$Rs, (i32 16))),
1143 def: Pat<(srl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))), 1252 def: Pat<(srl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))),
1144 (S2_lsr_i_vh V4I16:$b, imm:$c)>; 1253 (S2_lsr_i_vh V4I16:$b, imm:$c)>;
1145 def: Pat<(shl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))), 1254 def: Pat<(shl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))),
1146 (S2_asl_i_vh V4I16:$b, imm:$c)>; 1255 (S2_asl_i_vh V4I16:$b, imm:$c)>;
1147 1256
1257 def: Pat<(HexagonVASR V2I16:$Rs, u4_0ImmPred:$S),
1258 (LoReg (S2_asr_i_vh (ToAext64 $Rs), imm:$S))>;
1259 def: Pat<(HexagonVASL V2I16:$Rs, u4_0ImmPred:$S),
1260 (LoReg (S2_asl_i_vh (ToAext64 $Rs), imm:$S))>;
1261 def: Pat<(HexagonVLSR V2I16:$Rs, u4_0ImmPred:$S),
1262 (LoReg (S2_lsr_i_vh (ToAext64 $Rs), imm:$S))>;
1263 def: Pat<(HexagonVASR V2I16:$Rs, I32:$Rt),
1264 (LoReg (S2_asr_i_vh (ToAext64 $Rs), I32:$Rt))>;
1265 def: Pat<(HexagonVASL V2I16:$Rs, I32:$Rt),
1266 (LoReg (S2_asl_i_vh (ToAext64 $Rs), I32:$Rt))>;
1267 def: Pat<(HexagonVLSR V2I16:$Rs, I32:$Rt),
1268 (LoReg (S2_lsr_i_vh (ToAext64 $Rs), I32:$Rt))>;
1269
1148 1270
1149 // --(9) Arithmetic/bitwise ---------------------------------------------- 1271 // --(9) Arithmetic/bitwise ----------------------------------------------
1150 // 1272 //
1151 1273
1152 def: Pat<(abs I32:$Rs), (A2_abs I32:$Rs)>; 1274 def: Pat<(abs I32:$Rs), (A2_abs I32:$Rs)>;
1153 def: Pat<(not I32:$Rs), (A2_subri -1, I32:$Rs)>; 1275 def: Pat<(abs I64:$Rs), (A2_absp I64:$Rs)>;
1154 def: Pat<(not I64:$Rs), (A2_notp I64:$Rs)>; 1276 def: Pat<(not I32:$Rs), (A2_subri -1, I32:$Rs)>;
1155 1277 def: Pat<(not I64:$Rs), (A2_notp I64:$Rs)>;
1156 let Predicates = [HasV5T] in { 1278 def: Pat<(ineg I64:$Rs), (A2_negp I64:$Rs)>;
1157 def: Pat<(fabs F32:$Rs), (S2_clrbit_i F32:$Rs, 31)>; 1279
1158 def: Pat<(fneg F32:$Rs), (S2_togglebit_i F32:$Rs, 31)>; 1280 def: Pat<(fabs F32:$Rs), (S2_clrbit_i F32:$Rs, 31)>;
1159 1281 def: Pat<(fneg F32:$Rs), (S2_togglebit_i F32:$Rs, 31)>;
1160 def: Pat<(fabs F64:$Rs), 1282
1161 (Combinew (S2_clrbit_i (HiReg $Rs), 31), 1283 def: Pat<(fabs F64:$Rs),
1162 (i32 (LoReg $Rs)))>; 1284 (Combinew (S2_clrbit_i (HiReg $Rs), 31),
1163 def: Pat<(fneg F64:$Rs), 1285 (i32 (LoReg $Rs)))>;
1164 (Combinew (S2_togglebit_i (HiReg $Rs), 31), 1286 def: Pat<(fneg F64:$Rs),
1165 (i32 (LoReg $Rs)))>; 1287 (Combinew (S2_togglebit_i (HiReg $Rs), 31),
1166 } 1288 (i32 (LoReg $Rs)))>;
1167
1168 let AddedComplexity = 50 in
1169 def: Pat<(xor (add (sra I32:$Rs, (i32 31)),
1170 I32:$Rs),
1171 (sra I32:$Rs, (i32 31))),
1172 (A2_abs I32:$Rs)>;
1173
1174 1289
1175 def: Pat<(add I32:$Rs, anyimm:$s16), (A2_addi I32:$Rs, imm:$s16)>; 1290 def: Pat<(add I32:$Rs, anyimm:$s16), (A2_addi I32:$Rs, imm:$s16)>;
1176 def: Pat<(or I32:$Rs, anyimm:$s10), (A2_orir I32:$Rs, imm:$s10)>; 1291 def: Pat<(or I32:$Rs, anyimm:$s10), (A2_orir I32:$Rs, imm:$s10)>;
1177 def: Pat<(and I32:$Rs, anyimm:$s10), (A2_andir I32:$Rs, imm:$s10)>; 1292 def: Pat<(and I32:$Rs, anyimm:$s10), (A2_andir I32:$Rs, imm:$s10)>;
1178 def: Pat<(sub anyimm:$s10, I32:$Rs), (A2_subri imm:$s10, I32:$Rs)>; 1293 def: Pat<(sub anyimm:$s10, I32:$Rs), (A2_subri imm:$s10, I32:$Rs)>;
1198 def: OpR_RR_pat<A2_vaddw, Add, v2i32, V2I32>; 1313 def: OpR_RR_pat<A2_vaddw, Add, v2i32, V2I32>;
1199 def: OpR_RR_pat<A2_vsubub, Sub, v8i8, V8I8>; 1314 def: OpR_RR_pat<A2_vsubub, Sub, v8i8, V8I8>;
1200 def: OpR_RR_pat<A2_vsubh, Sub, v4i16, V4I16>; 1315 def: OpR_RR_pat<A2_vsubh, Sub, v4i16, V4I16>;
1201 def: OpR_RR_pat<A2_vsubw, Sub, v2i32, V2I32>; 1316 def: OpR_RR_pat<A2_vsubw, Sub, v2i32, V2I32>;
1202 1317
1318 def: OpR_RR_pat<A2_and, And, v4i8, V4I8>;
1319 def: OpR_RR_pat<A2_xor, Xor, v4i8, V4I8>;
1320 def: OpR_RR_pat<A2_or, Or, v4i8, V4I8>;
1203 def: OpR_RR_pat<A2_and, And, v2i16, V2I16>; 1321 def: OpR_RR_pat<A2_and, And, v2i16, V2I16>;
1204 def: OpR_RR_pat<A2_xor, Xor, v2i16, V2I16>; 1322 def: OpR_RR_pat<A2_xor, Xor, v2i16, V2I16>;
1205 def: OpR_RR_pat<A2_or, Or, v2i16, V2I16>; 1323 def: OpR_RR_pat<A2_or, Or, v2i16, V2I16>;
1206
1207 def: OpR_RR_pat<A2_andp, And, v8i8, V8I8>; 1324 def: OpR_RR_pat<A2_andp, And, v8i8, V8I8>;
1325 def: OpR_RR_pat<A2_orp, Or, v8i8, V8I8>;
1326 def: OpR_RR_pat<A2_xorp, Xor, v8i8, V8I8>;
1208 def: OpR_RR_pat<A2_andp, And, v4i16, V4I16>; 1327 def: OpR_RR_pat<A2_andp, And, v4i16, V4I16>;
1328 def: OpR_RR_pat<A2_orp, Or, v4i16, V4I16>;
1329 def: OpR_RR_pat<A2_xorp, Xor, v4i16, V4I16>;
1209 def: OpR_RR_pat<A2_andp, And, v2i32, V2I32>; 1330 def: OpR_RR_pat<A2_andp, And, v2i32, V2I32>;
1210 def: OpR_RR_pat<A2_orp, Or, v8i8, V8I8>;
1211 def: OpR_RR_pat<A2_orp, Or, v4i16, V4I16>;
1212 def: OpR_RR_pat<A2_orp, Or, v2i32, V2I32>; 1331 def: OpR_RR_pat<A2_orp, Or, v2i32, V2I32>;
1213 def: OpR_RR_pat<A2_xorp, Xor, v8i8, V8I8>;
1214 def: OpR_RR_pat<A2_xorp, Xor, v4i16, V4I16>;
1215 def: OpR_RR_pat<A2_xorp, Xor, v2i32, V2I32>; 1332 def: OpR_RR_pat<A2_xorp, Xor, v2i32, V2I32>;
1216 1333
1217 def: OpR_RR_pat<M2_mpyi, Mul, i32, I32>; 1334 def: OpR_RR_pat<M2_mpyi, Mul, i32, I32>;
1218 def: OpR_RR_pat<M2_mpy_up, pf2<mulhs>, i32, I32>; 1335 def: OpR_RR_pat<M2_mpy_up, pf2<mulhs>, i32, I32>;
1219 def: OpR_RR_pat<M2_mpyu_up, pf2<mulhu>, i32, I32>; 1336 def: OpR_RR_pat<M2_mpyu_up, pf2<mulhu>, i32, I32>;
1232 def: OpR_RR_pat<C2_and, Mul, i1, I1>; 1349 def: OpR_RR_pat<C2_and, Mul, i1, I1>;
1233 def: OpR_RR_pat<C2_and, Mul, v2i1, V2I1>; 1350 def: OpR_RR_pat<C2_and, Mul, v2i1, V2I1>;
1234 def: OpR_RR_pat<C2_and, Mul, v4i1, V4I1>; 1351 def: OpR_RR_pat<C2_and, Mul, v4i1, V4I1>;
1235 def: OpR_RR_pat<C2_and, Mul, v8i1, V8I1>; 1352 def: OpR_RR_pat<C2_and, Mul, v8i1, V8I1>;
1236 1353
1237 let Predicates = [HasV5T] in { 1354 def: OpR_RR_pat<F2_sfadd, pf2<fadd>, f32, F32>;
1238 def: OpR_RR_pat<F2_sfadd, pf2<fadd>, f32, F32>; 1355 def: OpR_RR_pat<F2_sfsub, pf2<fsub>, f32, F32>;
1239 def: OpR_RR_pat<F2_sfsub, pf2<fsub>, f32, F32>; 1356 def: OpR_RR_pat<F2_sfmpy, pf2<fmul>, f32, F32>;
1240 def: OpR_RR_pat<F2_sfmpy, pf2<fmul>, f32, F32>; 1357 def: OpR_RR_pat<F2_sfmin, pf2<fminnum>, f32, F32>;
1241 def: OpR_RR_pat<F2_sfmin, pf2<fminnum>, f32, F32>; 1358 def: OpR_RR_pat<F2_sfmax, pf2<fmaxnum>, f32, F32>;
1242 def: OpR_RR_pat<F2_sfmax, pf2<fmaxnum>, f32, F32>; 1359
1360 let Predicates = [HasV66] in {
1361 def: OpR_RR_pat<F2_dfadd, pf2<fadd>, f64, F64>;
1362 def: OpR_RR_pat<F2_dfsub, pf2<fsub>, f64, F64>;
1243 } 1363 }
1244 1364
1245 // In expressions like a0*b0 + a1*b1 + ..., prefer to generate multiply-add, 1365 // In expressions like a0*b0 + a1*b1 + ..., prefer to generate multiply-add,
1246 // over add-add with individual multiplies as inputs. 1366 // over add-add with individual multiplies as inputs.
1247 let AddedComplexity = 10 in { 1367 let AddedComplexity = 10 in {
1248 def: AccRRI_pat<M2_macsip, Add, Su<Mul>, I32, u32_0ImmPred>; 1368 def: AccRRI_pat<M2_macsip, Add, Su<Mul>, I32, u32_0ImmPred>;
1249 def: AccRRI_pat<M2_macsin, Sub, Su<Mul>, I32, u32_0ImmPred>; 1369 def: AccRRI_pat<M2_macsin, Sub, Su<Mul>, I32, u32_0ImmPred>;
1250 def: AccRRR_pat<M2_maci, Add, Su<Mul>, I32, I32>; 1370 def: AccRRR_pat<M2_maci, Add, Su<Mul>, I32, I32, I32>;
1371 let Predicates = [HasV66] in
1372 def: AccRRR_pat<M2_mnaci, Sub, Su<Mul>, I32, I32, I32>;
1251 } 1373 }
1252 1374
1253 def: AccRRI_pat<M2_naccii, Sub, Su<Add>, I32, s32_0ImmPred>; 1375 def: AccRRI_pat<M2_naccii, Sub, Su<Add>, I32, s32_0ImmPred>;
1254 def: AccRRI_pat<M2_accii, Add, Su<Add>, I32, s32_0ImmPred>; 1376 def: AccRRI_pat<M2_accii, Add, Su<Add>, I32, s32_0ImmPred>;
1255 def: AccRRR_pat<M2_acci, Add, Su<Add>, I32, I32>; 1377 def: AccRRR_pat<M2_acci, Add, Su<Add>, I32, I32, I32>;
1256 1378
1257 // Mulh for vectors 1379 // Mulh for vectors
1258 // 1380 //
1259 def: Pat<(v2i32 (mulhu V2I32:$Rss, V2I32:$Rtt)), 1381 def: Pat<(v2i32 (mulhu V2I32:$Rss, V2I32:$Rtt)),
1260 (Combinew (M2_mpyu_up (HiReg $Rss), (HiReg $Rtt)), 1382 (Combinew (M2_mpyu_up (HiReg $Rss), (HiReg $Rtt)),
1318 (M2_mpysin I32:$Rs, (NegImm8 imm:$n8))>; 1440 (M2_mpysin I32:$Rs, (NegImm8 imm:$n8))>;
1319 1441
1320 def: Pat<(add Sext64:$Rs, I64:$Rt), 1442 def: Pat<(add Sext64:$Rs, I64:$Rt),
1321 (A2_addsp (LoReg Sext64:$Rs), I64:$Rt)>; 1443 (A2_addsp (LoReg Sext64:$Rs), I64:$Rt)>;
1322 1444
1323 def: AccRRR_pat<M4_and_and, And, Su<And>, I32, I32>; 1445 def: AccRRR_pat<M4_and_and, And, Su_ni1<And>, I32, I32, I32>;
1324 def: AccRRR_pat<M4_and_or, And, Su<Or>, I32, I32>; 1446 def: AccRRR_pat<M4_and_or, And, Su_ni1<Or>, I32, I32, I32>;
1325 def: AccRRR_pat<M4_and_xor, And, Su<Xor>, I32, I32>; 1447 def: AccRRR_pat<M4_and_xor, And, Su<Xor>, I32, I32, I32>;
1326 def: AccRRR_pat<M4_or_and, Or, Su<And>, I32, I32>; 1448 def: AccRRR_pat<M4_or_and, Or, Su_ni1<And>, I32, I32, I32>;
1327 def: AccRRR_pat<M4_or_or, Or, Su<Or>, I32, I32>; 1449 def: AccRRR_pat<M4_or_or, Or, Su_ni1<Or>, I32, I32, I32>;
1328 def: AccRRR_pat<M4_or_xor, Or, Su<Xor>, I32, I32>; 1450 def: AccRRR_pat<M4_or_xor, Or, Su<Xor>, I32, I32, I32>;
1329 def: AccRRR_pat<M4_xor_and, Xor, Su<And>, I32, I32>; 1451 def: AccRRR_pat<M4_xor_and, Xor, Su_ni1<And>, I32, I32, I32>;
1330 def: AccRRR_pat<M4_xor_or, Xor, Su<Or>, I32, I32>; 1452 def: AccRRR_pat<M4_xor_or, Xor, Su_ni1<Or>, I32, I32, I32>;
1331 def: AccRRR_pat<M2_xor_xacc, Xor, Su<Xor>, I32, I32>; 1453 def: AccRRR_pat<M2_xor_xacc, Xor, Su<Xor>, I32, I32, I32>;
1332 def: AccRRR_pat<M4_xor_xacc, Xor, Su<Xor>, I64, I64>; 1454 def: AccRRR_pat<M4_xor_xacc, Xor, Su<Xor>, I64, I64, I64>;
1333 1455
1334 // For dags like (or (and (not _), _), (shl _, _)) where the "or" with 1456 // For dags like (or (and (not _), _), (shl _, _)) where the "or" with
1335 // one argument matches the patterns below, and with the other argument 1457 // one argument matches the patterns below, and with the other argument
1336 // matches S2_asl_r_r_or, etc, prefer the patterns below. 1458 // matches S2_asl_r_r_or, etc, prefer the patterns below.
1337 let AddedComplexity = 110 in { // greater than S2_asl_r_r_and/or/xor. 1459 let AddedComplexity = 110 in { // greater than S2_asl_r_r_and/or/xor.
1338 def: AccRRR_pat<M4_and_andn, And, Su<Not2<And>>, I32, I32>; 1460 def: AccRRR_pat<M4_and_andn, And, Su<Not2<And>>, I32, I32, I32>;
1339 def: AccRRR_pat<M4_or_andn, Or, Su<Not2<And>>, I32, I32>; 1461 def: AccRRR_pat<M4_or_andn, Or, Su<Not2<And>>, I32, I32, I32>;
1340 def: AccRRR_pat<M4_xor_andn, Xor, Su<Not2<And>>, I32, I32>; 1462 def: AccRRR_pat<M4_xor_andn, Xor, Su<Not2<And>>, I32, I32, I32>;
1341 } 1463 }
1342 1464
1343 // S4_addaddi and S4_subaddi don't have tied operands, so give them 1465 // S4_addaddi and S4_subaddi don't have tied operands, so give them
1344 // a bit of preference. 1466 // a bit of preference.
1345 let AddedComplexity = 30 in { 1467 let AddedComplexity = 30 in {
1471 (M4_mpyri_addr IntRegs:$Ru, IntRegs:$Rs, imm:$u6)>; 1593 (M4_mpyri_addr IntRegs:$Ru, IntRegs:$Rs, imm:$u6)>;
1472 def: Pat<(add I32:$Ru, (Su<Mul> I32:$Ry, I32:$Rs)), 1594 def: Pat<(add I32:$Ru, (Su<Mul> I32:$Ry, I32:$Rs)),
1473 (M4_mpyrr_addr IntRegs:$Ru, IntRegs:$Ry, IntRegs:$Rs)>; 1595 (M4_mpyrr_addr IntRegs:$Ru, IntRegs:$Ry, IntRegs:$Rs)>;
1474 1596
1475 1597
1476 let Predicates = [HasV5T] in { 1598 def: Pat<(fma F32:$Rs, F32:$Rt, F32:$Rx),
1477 def: Pat<(fma F32:$Rs, F32:$Rt, F32:$Rx), 1599 (F2_sffma F32:$Rx, F32:$Rs, F32:$Rt)>;
1478 (F2_sffma F32:$Rx, F32:$Rs, F32:$Rt)>; 1600 def: Pat<(fma (fneg F32:$Rs), F32:$Rt, F32:$Rx),
1479 def: Pat<(fma (fneg F32:$Rs), F32:$Rt, F32:$Rx), 1601 (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
1480 (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>; 1602 def: Pat<(fma F32:$Rs, (fneg F32:$Rt), F32:$Rx),
1481 def: Pat<(fma F32:$Rs, (fneg F32:$Rt), F32:$Rx), 1603 (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
1482 (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>;
1483 }
1484 1604
1485 1605
1486 def: Pat<(mul V2I32:$Rs, V2I32:$Rt), 1606 def: Pat<(mul V2I32:$Rs, V2I32:$Rt),
1487 (PS_vmulw V2I32:$Rs, V2I32:$Rt)>; 1607 (PS_vmulw V2I32:$Rs, V2I32:$Rt)>;
1488 def: Pat<(add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)), 1608 def: Pat<(add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)),
1489 (PS_vmulw_acc V2I32:$Rx, V2I32:$Rs, V2I32:$Rt)>; 1609 (PS_vmulw_acc V2I32:$Rx, V2I32:$Rs, V2I32:$Rt)>;
1490 1610
1491 // Add/subtract two v4i8: Hexagon does not have an insn for this one, so 1611 // Add/subtract two v4i8: Hexagon does not have an insn for this one, so
1492 // we use the double add v8i8, and use only the low part of the result. 1612 // we use the double add v8i8, and use only the low part of the result.
1493 def: Pat<(add V4I8:$Rs, V4I8:$Rt), 1613 def: Pat<(add V4I8:$Rs, V4I8:$Rt),
1494 (LoReg (A2_vaddub (ToZext64 $Rs), (ToZext64 $Rt)))>; 1614 (LoReg (A2_vaddub (ToAext64 $Rs), (ToAext64 $Rt)))>;
1495 def: Pat<(sub V4I8:$Rs, V4I8:$Rt), 1615 def: Pat<(sub V4I8:$Rs, V4I8:$Rt),
1496 (LoReg (A2_vsubub (ToZext64 $Rs), (ToZext64 $Rt)))>; 1616 (LoReg (A2_vsubub (ToAext64 $Rs), (ToAext64 $Rt)))>;
1497 1617
1498 // Use M2_vmpy2s_s0 for half-word vector multiply. It multiplies two 1618 // Use M2_vmpy2s_s0 for half-word vector multiply. It multiplies two
1499 // half-words, and saturates the result to a 32-bit value, except the 1619 // half-words, and saturates the result to a 32-bit value, except the
1500 // saturation never happens (it can only occur with scaling). 1620 // saturation never happens (it can only occur with scaling).
1501 def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)), 1621 def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)),
1505 (S2_vtrunewh (M2_vmpy2s_s0 (HiReg $Rs), (HiReg $Rt)), 1625 (S2_vtrunewh (M2_vmpy2s_s0 (HiReg $Rs), (HiReg $Rt)),
1506 (M2_vmpy2s_s0 (LoReg $Rs), (LoReg $Rt)))>; 1626 (M2_vmpy2s_s0 (LoReg $Rs), (LoReg $Rt)))>;
1507 1627
1508 // Multiplies two v4i8 vectors. 1628 // Multiplies two v4i8 vectors.
1509 def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), 1629 def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
1510 (S2_vtrunehb (M5_vmpybuu V4I8:$Rs, V4I8:$Rt))>, 1630 (S2_vtrunehb (M5_vmpybuu V4I8:$Rs, V4I8:$Rt))>;
1511 Requires<[HasV5T]>;
1512 1631
1513 // Multiplies two v8i8 vectors. 1632 // Multiplies two v8i8 vectors.
1514 def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), 1633 def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
1515 (Combinew (S2_vtrunehb (M5_vmpybuu (HiReg $Rs), (HiReg $Rt))), 1634 (Combinew (S2_vtrunehb (M5_vmpybuu (HiReg $Rs), (HiReg $Rt))),
1516 (S2_vtrunehb (M5_vmpybuu (LoReg $Rs), (LoReg $Rt))))>, 1635 (S2_vtrunehb (M5_vmpybuu (LoReg $Rs), (LoReg $Rt))))>;
1517 Requires<[HasV5T]>;
1518 1636
1519 1637
1520 // --(10) Bit ------------------------------------------------------------ 1638 // --(10) Bit ------------------------------------------------------------
1521 // 1639 //
1522 1640
1825 defm: Loadxi_pat<zextloadi8, i32, anyimm0, L2_loadrub_io>; 1943 defm: Loadxi_pat<zextloadi8, i32, anyimm0, L2_loadrub_io>;
1826 defm: Loadxi_pat<zextloadi16, i32, anyimm1, L2_loadruh_io>; 1944 defm: Loadxi_pat<zextloadi16, i32, anyimm1, L2_loadruh_io>;
1827 defm: Loadxi_pat<zextloadv2i8, v2i16, anyimm1, L2_loadbzw2_io>; 1945 defm: Loadxi_pat<zextloadv2i8, v2i16, anyimm1, L2_loadbzw2_io>;
1828 defm: Loadxi_pat<zextloadv4i8, v4i16, anyimm2, L2_loadbzw4_io>; 1946 defm: Loadxi_pat<zextloadv4i8, v4i16, anyimm2, L2_loadbzw4_io>;
1829 defm: Loadxi_pat<load, i32, anyimm2, L2_loadri_io>; 1947 defm: Loadxi_pat<load, i32, anyimm2, L2_loadri_io>;
1948 defm: Loadxi_pat<load, v2i16, anyimm2, L2_loadri_io>;
1949 defm: Loadxi_pat<load, v4i8, anyimm2, L2_loadri_io>;
1830 defm: Loadxi_pat<load, i64, anyimm3, L2_loadrd_io>; 1950 defm: Loadxi_pat<load, i64, anyimm3, L2_loadrd_io>;
1951 defm: Loadxi_pat<load, v2i32, anyimm3, L2_loadrd_io>;
1952 defm: Loadxi_pat<load, v4i16, anyimm3, L2_loadrd_io>;
1953 defm: Loadxi_pat<load, v8i8, anyimm3, L2_loadrd_io>;
1831 defm: Loadxi_pat<load, f32, anyimm2, L2_loadri_io>; 1954 defm: Loadxi_pat<load, f32, anyimm2, L2_loadri_io>;
1832 defm: Loadxi_pat<load, f64, anyimm3, L2_loadrd_io>; 1955 defm: Loadxi_pat<load, f64, anyimm3, L2_loadrd_io>;
1833 // No sextloadi1. 1956 // No sextloadi1.
1834 1957
1835 defm: Loadxi_pat<atomic_load_8 , i32, anyimm0, L2_loadrub_io>; 1958 defm: Loadxi_pat<atomic_load_8 , i32, anyimm0, L2_loadrub_io>;
1837 defm: Loadxi_pat<atomic_load_32, i32, anyimm2, L2_loadri_io>; 1960 defm: Loadxi_pat<atomic_load_32, i32, anyimm2, L2_loadri_io>;
1838 defm: Loadxi_pat<atomic_load_64, i64, anyimm3, L2_loadrd_io>; 1961 defm: Loadxi_pat<atomic_load_64, i64, anyimm3, L2_loadrd_io>;
1839 } 1962 }
1840 1963
1841 let AddedComplexity = 30 in { 1964 let AddedComplexity = 30 in {
1842 defm: Loadxim_pat<extloadi1, i64, ToZext64, anyimm0, L2_loadrub_io>; 1965 defm: Loadxim_pat<extloadi1, i64, ToAext64, anyimm0, L2_loadrub_io>;
1843 defm: Loadxim_pat<extloadi8, i64, ToZext64, anyimm0, L2_loadrub_io>; 1966 defm: Loadxim_pat<extloadi8, i64, ToAext64, anyimm0, L2_loadrub_io>;
1844 defm: Loadxim_pat<extloadi16, i64, ToZext64, anyimm1, L2_loadruh_io>; 1967 defm: Loadxim_pat<extloadi16, i64, ToAext64, anyimm1, L2_loadruh_io>;
1845 defm: Loadxim_pat<extloadi32, i64, ToZext64, anyimm2, L2_loadri_io>; 1968 defm: Loadxim_pat<extloadi32, i64, ToAext64, anyimm2, L2_loadri_io>;
1846 defm: Loadxim_pat<zextloadi1, i64, ToZext64, anyimm0, L2_loadrub_io>; 1969 defm: Loadxim_pat<zextloadi1, i64, ToZext64, anyimm0, L2_loadrub_io>;
1847 defm: Loadxim_pat<zextloadi8, i64, ToZext64, anyimm0, L2_loadrub_io>; 1970 defm: Loadxim_pat<zextloadi8, i64, ToZext64, anyimm0, L2_loadrub_io>;
1848 defm: Loadxim_pat<zextloadi16, i64, ToZext64, anyimm1, L2_loadruh_io>; 1971 defm: Loadxim_pat<zextloadi16, i64, ToZext64, anyimm1, L2_loadruh_io>;
1849 defm: Loadxim_pat<zextloadi32, i64, ToZext64, anyimm2, L2_loadri_io>; 1972 defm: Loadxim_pat<zextloadi32, i64, ToZext64, anyimm2, L2_loadri_io>;
1850 defm: Loadxim_pat<sextloadi8, i64, ToSext64, anyimm0, L2_loadrb_io>; 1973 defm: Loadxim_pat<sextloadi8, i64, ToSext64, anyimm0, L2_loadrb_io>;
1863 def: Loadxu_pat<sextloadv4i8, v4i16, anyimm2, L4_loadbzw4_ur>; 1986 def: Loadxu_pat<sextloadv4i8, v4i16, anyimm2, L4_loadbzw4_ur>;
1864 def: Loadxu_pat<zextloadi8, i32, anyimm0, L4_loadrub_ur>; 1987 def: Loadxu_pat<zextloadi8, i32, anyimm0, L4_loadrub_ur>;
1865 def: Loadxu_pat<zextloadi16, i32, anyimm1, L4_loadruh_ur>; 1988 def: Loadxu_pat<zextloadi16, i32, anyimm1, L4_loadruh_ur>;
1866 def: Loadxu_pat<zextloadv2i8, v2i16, anyimm1, L4_loadbzw2_ur>; 1989 def: Loadxu_pat<zextloadv2i8, v2i16, anyimm1, L4_loadbzw2_ur>;
1867 def: Loadxu_pat<zextloadv4i8, v4i16, anyimm2, L4_loadbzw4_ur>; 1990 def: Loadxu_pat<zextloadv4i8, v4i16, anyimm2, L4_loadbzw4_ur>;
1991 def: Loadxu_pat<load, i32, anyimm2, L4_loadri_ur>;
1992 def: Loadxu_pat<load, v2i16, anyimm2, L4_loadri_ur>;
1993 def: Loadxu_pat<load, v4i8, anyimm2, L4_loadri_ur>;
1994 def: Loadxu_pat<load, i64, anyimm3, L4_loadrd_ur>;
1995 def: Loadxu_pat<load, v2i32, anyimm3, L4_loadrd_ur>;
1996 def: Loadxu_pat<load, v4i16, anyimm3, L4_loadrd_ur>;
1997 def: Loadxu_pat<load, v8i8, anyimm3, L4_loadrd_ur>;
1868 def: Loadxu_pat<load, f32, anyimm2, L4_loadri_ur>; 1998 def: Loadxu_pat<load, f32, anyimm2, L4_loadri_ur>;
1869 def: Loadxu_pat<load, f64, anyimm3, L4_loadrd_ur>; 1999 def: Loadxu_pat<load, f64, anyimm3, L4_loadrd_ur>;
1870 def: Loadxu_pat<load, i32, anyimm2, L4_loadri_ur>;
1871 def: Loadxu_pat<load, i64, anyimm3, L4_loadrd_ur>;
1872 2000
1873 def: Loadxum_pat<sextloadi8, i64, anyimm0, ToSext64, L4_loadrb_ur>; 2001 def: Loadxum_pat<sextloadi8, i64, anyimm0, ToSext64, L4_loadrb_ur>;
1874 def: Loadxum_pat<zextloadi8, i64, anyimm0, ToZext64, L4_loadrub_ur>; 2002 def: Loadxum_pat<zextloadi8, i64, anyimm0, ToZext64, L4_loadrub_ur>;
1875 def: Loadxum_pat<extloadi8, i64, anyimm0, ToZext64, L4_loadrub_ur>; 2003 def: Loadxum_pat<extloadi8, i64, anyimm0, ToAext64, L4_loadrub_ur>;
1876 def: Loadxum_pat<sextloadi16, i64, anyimm1, ToSext64, L4_loadrh_ur>; 2004 def: Loadxum_pat<sextloadi16, i64, anyimm1, ToSext64, L4_loadrh_ur>;
1877 def: Loadxum_pat<zextloadi16, i64, anyimm1, ToZext64, L4_loadruh_ur>; 2005 def: Loadxum_pat<zextloadi16, i64, anyimm1, ToZext64, L4_loadruh_ur>;
1878 def: Loadxum_pat<extloadi16, i64, anyimm1, ToZext64, L4_loadruh_ur>; 2006 def: Loadxum_pat<extloadi16, i64, anyimm1, ToAext64, L4_loadruh_ur>;
1879 def: Loadxum_pat<sextloadi32, i64, anyimm2, ToSext64, L4_loadri_ur>; 2007 def: Loadxum_pat<sextloadi32, i64, anyimm2, ToSext64, L4_loadri_ur>;
1880 def: Loadxum_pat<zextloadi32, i64, anyimm2, ToZext64, L4_loadri_ur>; 2008 def: Loadxum_pat<zextloadi32, i64, anyimm2, ToZext64, L4_loadri_ur>;
1881 def: Loadxum_pat<extloadi32, i64, anyimm2, ToZext64, L4_loadri_ur>; 2009 def: Loadxum_pat<extloadi32, i64, anyimm2, ToAext64, L4_loadri_ur>;
1882 } 2010 }
1883 2011
1884 let AddedComplexity = 40 in { 2012 let AddedComplexity = 40 in {
1885 def: Loadxr_shl_pat<extloadi8, i32, L4_loadrub_rr>; 2013 def: Loadxr_shl_pat<extloadi8, i32, L4_loadrub_rr>;
1886 def: Loadxr_shl_pat<zextloadi8, i32, L4_loadrub_rr>; 2014 def: Loadxr_shl_pat<zextloadi8, i32, L4_loadrub_rr>;
1887 def: Loadxr_shl_pat<sextloadi8, i32, L4_loadrb_rr>; 2015 def: Loadxr_shl_pat<sextloadi8, i32, L4_loadrb_rr>;
1888 def: Loadxr_shl_pat<extloadi16, i32, L4_loadruh_rr>; 2016 def: Loadxr_shl_pat<extloadi16, i32, L4_loadruh_rr>;
1889 def: Loadxr_shl_pat<zextloadi16, i32, L4_loadruh_rr>; 2017 def: Loadxr_shl_pat<zextloadi16, i32, L4_loadruh_rr>;
1890 def: Loadxr_shl_pat<sextloadi16, i32, L4_loadrh_rr>; 2018 def: Loadxr_shl_pat<sextloadi16, i32, L4_loadrh_rr>;
1891 def: Loadxr_shl_pat<load, i32, L4_loadri_rr>; 2019 def: Loadxr_shl_pat<load, i32, L4_loadri_rr>;
1892 def: Loadxr_shl_pat<load, i64, L4_loadrd_rr>; 2020 def: Loadxr_shl_pat<load, v2i16, L4_loadri_rr>;
1893 def: Loadxr_shl_pat<load, f32, L4_loadri_rr>; 2021 def: Loadxr_shl_pat<load, v4i8, L4_loadri_rr>;
1894 def: Loadxr_shl_pat<load, f64, L4_loadrd_rr>; 2022 def: Loadxr_shl_pat<load, i64, L4_loadrd_rr>;
2023 def: Loadxr_shl_pat<load, v2i32, L4_loadrd_rr>;
2024 def: Loadxr_shl_pat<load, v4i16, L4_loadrd_rr>;
2025 def: Loadxr_shl_pat<load, v8i8, L4_loadrd_rr>;
2026 def: Loadxr_shl_pat<load, f32, L4_loadri_rr>;
2027 def: Loadxr_shl_pat<load, f64, L4_loadrd_rr>;
1895 } 2028 }
1896 2029
1897 let AddedComplexity = 20 in { 2030 let AddedComplexity = 20 in {
1898 def: Loadxr_add_pat<extloadi8, i32, L4_loadrub_rr>; 2031 def: Loadxr_add_pat<extloadi8, i32, L4_loadrub_rr>;
1899 def: Loadxr_add_pat<zextloadi8, i32, L4_loadrub_rr>; 2032 def: Loadxr_add_pat<zextloadi8, i32, L4_loadrub_rr>;
1900 def: Loadxr_add_pat<sextloadi8, i32, L4_loadrb_rr>; 2033 def: Loadxr_add_pat<sextloadi8, i32, L4_loadrb_rr>;
1901 def: Loadxr_add_pat<extloadi16, i32, L4_loadruh_rr>; 2034 def: Loadxr_add_pat<extloadi16, i32, L4_loadruh_rr>;
1902 def: Loadxr_add_pat<zextloadi16, i32, L4_loadruh_rr>; 2035 def: Loadxr_add_pat<zextloadi16, i32, L4_loadruh_rr>;
1903 def: Loadxr_add_pat<sextloadi16, i32, L4_loadrh_rr>; 2036 def: Loadxr_add_pat<sextloadi16, i32, L4_loadrh_rr>;
1904 def: Loadxr_add_pat<load, i32, L4_loadri_rr>; 2037 def: Loadxr_add_pat<load, i32, L4_loadri_rr>;
1905 def: Loadxr_add_pat<load, i64, L4_loadrd_rr>; 2038 def: Loadxr_add_pat<load, v2i16, L4_loadri_rr>;
1906 def: Loadxr_add_pat<load, f32, L4_loadri_rr>; 2039 def: Loadxr_add_pat<load, v4i8, L4_loadri_rr>;
1907 def: Loadxr_add_pat<load, f64, L4_loadrd_rr>; 2040 def: Loadxr_add_pat<load, i64, L4_loadrd_rr>;
2041 def: Loadxr_add_pat<load, v2i32, L4_loadrd_rr>;
2042 def: Loadxr_add_pat<load, v4i16, L4_loadrd_rr>;
2043 def: Loadxr_add_pat<load, v8i8, L4_loadrd_rr>;
2044 def: Loadxr_add_pat<load, f32, L4_loadri_rr>;
2045 def: Loadxr_add_pat<load, f64, L4_loadrd_rr>;
1908 } 2046 }
1909 2047
1910 let AddedComplexity = 40 in { 2048 let AddedComplexity = 40 in {
1911 def: Loadxrm_shl_pat<extloadi8, i64, ToZext64, L4_loadrub_rr>; 2049 def: Loadxrm_shl_pat<extloadi8, i64, ToAext64, L4_loadrub_rr>;
1912 def: Loadxrm_shl_pat<zextloadi8, i64, ToZext64, L4_loadrub_rr>; 2050 def: Loadxrm_shl_pat<zextloadi8, i64, ToZext64, L4_loadrub_rr>;
1913 def: Loadxrm_shl_pat<sextloadi8, i64, ToSext64, L4_loadrb_rr>; 2051 def: Loadxrm_shl_pat<sextloadi8, i64, ToSext64, L4_loadrb_rr>;
1914 def: Loadxrm_shl_pat<extloadi16, i64, ToZext64, L4_loadruh_rr>; 2052 def: Loadxrm_shl_pat<extloadi16, i64, ToAext64, L4_loadruh_rr>;
1915 def: Loadxrm_shl_pat<zextloadi16, i64, ToZext64, L4_loadruh_rr>; 2053 def: Loadxrm_shl_pat<zextloadi16, i64, ToZext64, L4_loadruh_rr>;
1916 def: Loadxrm_shl_pat<sextloadi16, i64, ToSext64, L4_loadrh_rr>; 2054 def: Loadxrm_shl_pat<sextloadi16, i64, ToSext64, L4_loadrh_rr>;
1917 def: Loadxrm_shl_pat<extloadi32, i64, ToZext64, L4_loadri_rr>; 2055 def: Loadxrm_shl_pat<extloadi32, i64, ToAext64, L4_loadri_rr>;
1918 def: Loadxrm_shl_pat<zextloadi32, i64, ToZext64, L4_loadri_rr>; 2056 def: Loadxrm_shl_pat<zextloadi32, i64, ToZext64, L4_loadri_rr>;
1919 def: Loadxrm_shl_pat<sextloadi32, i64, ToSext64, L4_loadri_rr>; 2057 def: Loadxrm_shl_pat<sextloadi32, i64, ToSext64, L4_loadri_rr>;
1920 } 2058 }
1921 2059
1922 let AddedComplexity = 20 in { 2060 let AddedComplexity = 20 in {
1923 def: Loadxrm_add_pat<extloadi8, i64, ToZext64, L4_loadrub_rr>; 2061 def: Loadxrm_add_pat<extloadi8, i64, ToAext64, L4_loadrub_rr>;
1924 def: Loadxrm_add_pat<zextloadi8, i64, ToZext64, L4_loadrub_rr>; 2062 def: Loadxrm_add_pat<zextloadi8, i64, ToZext64, L4_loadrub_rr>;
1925 def: Loadxrm_add_pat<sextloadi8, i64, ToSext64, L4_loadrb_rr>; 2063 def: Loadxrm_add_pat<sextloadi8, i64, ToSext64, L4_loadrb_rr>;
1926 def: Loadxrm_add_pat<extloadi16, i64, ToZext64, L4_loadruh_rr>; 2064 def: Loadxrm_add_pat<extloadi16, i64, ToAext64, L4_loadruh_rr>;
1927 def: Loadxrm_add_pat<zextloadi16, i64, ToZext64, L4_loadruh_rr>; 2065 def: Loadxrm_add_pat<zextloadi16, i64, ToZext64, L4_loadruh_rr>;
1928 def: Loadxrm_add_pat<sextloadi16, i64, ToSext64, L4_loadrh_rr>; 2066 def: Loadxrm_add_pat<sextloadi16, i64, ToSext64, L4_loadrh_rr>;
1929 def: Loadxrm_add_pat<extloadi32, i64, ToZext64, L4_loadri_rr>; 2067 def: Loadxrm_add_pat<extloadi32, i64, ToAext64, L4_loadri_rr>;
1930 def: Loadxrm_add_pat<zextloadi32, i64, ToZext64, L4_loadri_rr>; 2068 def: Loadxrm_add_pat<zextloadi32, i64, ToZext64, L4_loadri_rr>;
1931 def: Loadxrm_add_pat<sextloadi32, i64, ToSext64, L4_loadri_rr>; 2069 def: Loadxrm_add_pat<sextloadi32, i64, ToSext64, L4_loadri_rr>;
1932 } 2070 }
1933 2071
1934 // Absolute address 2072 // Absolute address
1935 2073
1936 let AddedComplexity = 60 in { 2074 let AddedComplexity = 60 in {
1937 def: Loada_pat<zextloadi1, i32, anyimm0, PS_loadrubabs>; 2075 def: Loada_pat<zextloadi1, i32, anyimm0, PS_loadrubabs>;
1938 def: Loada_pat<sextloadi8, i32, anyimm0, PS_loadrbabs>; 2076 def: Loada_pat<sextloadi8, i32, anyimm0, PS_loadrbabs>;
1939 def: Loada_pat<extloadi8, i32, anyimm0, PS_loadrubabs>; 2077 def: Loada_pat<extloadi8, i32, anyimm0, PS_loadrubabs>;
1940 def: Loada_pat<zextloadi8, i32, anyimm0, PS_loadrubabs>; 2078 def: Loada_pat<zextloadi8, i32, anyimm0, PS_loadrubabs>;
1941 def: Loada_pat<sextloadi16, i32, anyimm1, PS_loadrhabs>; 2079 def: Loada_pat<sextloadi16, i32, anyimm1, PS_loadrhabs>;
1942 def: Loada_pat<extloadi16, i32, anyimm1, PS_loadruhabs>; 2080 def: Loada_pat<extloadi16, i32, anyimm1, PS_loadruhabs>;
1943 def: Loada_pat<zextloadi16, i32, anyimm1, PS_loadruhabs>; 2081 def: Loada_pat<zextloadi16, i32, anyimm1, PS_loadruhabs>;
1944 def: Loada_pat<load, i32, anyimm2, PS_loadriabs>; 2082 def: Loada_pat<load, i32, anyimm2, PS_loadriabs>;
1945 def: Loada_pat<load, i64, anyimm3, PS_loadrdabs>; 2083 def: Loada_pat<load, v2i16, anyimm2, PS_loadriabs>;
1946 def: Loada_pat<load, f32, anyimm2, PS_loadriabs>; 2084 def: Loada_pat<load, v4i8, anyimm2, PS_loadriabs>;
1947 def: Loada_pat<load, f64, anyimm3, PS_loadrdabs>; 2085 def: Loada_pat<load, i64, anyimm3, PS_loadrdabs>;
2086 def: Loada_pat<load, v2i32, anyimm3, PS_loadrdabs>;
2087 def: Loada_pat<load, v4i16, anyimm3, PS_loadrdabs>;
2088 def: Loada_pat<load, v8i8, anyimm3, PS_loadrdabs>;
2089 def: Loada_pat<load, f32, anyimm2, PS_loadriabs>;
2090 def: Loada_pat<load, f64, anyimm3, PS_loadrdabs>;
1948 2091
1949 def: Loada_pat<atomic_load_8, i32, anyimm0, PS_loadrubabs>; 2092 def: Loada_pat<atomic_load_8, i32, anyimm0, PS_loadrubabs>;
1950 def: Loada_pat<atomic_load_16, i32, anyimm1, PS_loadruhabs>; 2093 def: Loada_pat<atomic_load_16, i32, anyimm1, PS_loadruhabs>;
1951 def: Loada_pat<atomic_load_32, i32, anyimm2, PS_loadriabs>; 2094 def: Loada_pat<atomic_load_32, i32, anyimm2, PS_loadriabs>;
1952 def: Loada_pat<atomic_load_64, i64, anyimm3, PS_loadrdabs>; 2095 def: Loada_pat<atomic_load_64, i64, anyimm3, PS_loadrdabs>;
1953 } 2096 }
1954 2097
1955 let AddedComplexity = 30 in { 2098 let AddedComplexity = 30 in {
1956 def: Loadam_pat<extloadi8, i64, anyimm0, ToZext64, PS_loadrubabs>; 2099 def: Loadam_pat<extloadi8, i64, anyimm0, ToAext64, PS_loadrubabs>;
1957 def: Loadam_pat<sextloadi8, i64, anyimm0, ToSext64, PS_loadrbabs>; 2100 def: Loadam_pat<sextloadi8, i64, anyimm0, ToSext64, PS_loadrbabs>;
1958 def: Loadam_pat<zextloadi8, i64, anyimm0, ToZext64, PS_loadrubabs>; 2101 def: Loadam_pat<zextloadi8, i64, anyimm0, ToZext64, PS_loadrubabs>;
1959 def: Loadam_pat<extloadi16, i64, anyimm1, ToZext64, PS_loadruhabs>; 2102 def: Loadam_pat<extloadi16, i64, anyimm1, ToAext64, PS_loadruhabs>;
1960 def: Loadam_pat<sextloadi16, i64, anyimm1, ToSext64, PS_loadrhabs>; 2103 def: Loadam_pat<sextloadi16, i64, anyimm1, ToSext64, PS_loadrhabs>;
1961 def: Loadam_pat<zextloadi16, i64, anyimm1, ToZext64, PS_loadruhabs>; 2104 def: Loadam_pat<zextloadi16, i64, anyimm1, ToZext64, PS_loadruhabs>;
1962 def: Loadam_pat<extloadi32, i64, anyimm2, ToZext64, PS_loadriabs>; 2105 def: Loadam_pat<extloadi32, i64, anyimm2, ToAext64, PS_loadriabs>;
1963 def: Loadam_pat<sextloadi32, i64, anyimm2, ToSext64, PS_loadriabs>; 2106 def: Loadam_pat<sextloadi32, i64, anyimm2, ToSext64, PS_loadriabs>;
1964 def: Loadam_pat<zextloadi32, i64, anyimm2, ToZext64, PS_loadriabs>; 2107 def: Loadam_pat<zextloadi32, i64, anyimm2, ToZext64, PS_loadriabs>;
1965 2108
1966 def: Loadam_pat<load, i1, anyimm0, I32toI1, PS_loadrubabs>; 2109 def: Loadam_pat<load, i1, anyimm0, I32toI1, PS_loadrubabs>;
1967 def: Loadam_pat<zextloadi1, i64, anyimm0, ToZext64, PS_loadrubabs>; 2110 def: Loadam_pat<zextloadi1, i64, anyimm0, ToZext64, PS_loadrubabs>;
1968 } 2111 }
1969 2112
1970 // GP-relative address 2113 // GP-relative address
1971 2114
1972 let AddedComplexity = 100 in { 2115 let AddedComplexity = 100 in {
1973 def: Loada_pat<extloadi1, i32, addrgp, L2_loadrubgp>; 2116 def: Loada_pat<extloadi1, i32, addrgp, L2_loadrubgp>;
1974 def: Loada_pat<zextloadi1, i32, addrgp, L2_loadrubgp>; 2117 def: Loada_pat<zextloadi1, i32, addrgp, L2_loadrubgp>;
1975 def: Loada_pat<extloadi8, i32, addrgp, L2_loadrubgp>; 2118 def: Loada_pat<extloadi8, i32, addrgp, L2_loadrubgp>;
1976 def: Loada_pat<sextloadi8, i32, addrgp, L2_loadrbgp>; 2119 def: Loada_pat<sextloadi8, i32, addrgp, L2_loadrbgp>;
1977 def: Loada_pat<zextloadi8, i32, addrgp, L2_loadrubgp>; 2120 def: Loada_pat<zextloadi8, i32, addrgp, L2_loadrubgp>;
1978 def: Loada_pat<extloadi16, i32, addrgp, L2_loadruhgp>; 2121 def: Loada_pat<extloadi16, i32, addrgp, L2_loadruhgp>;
1979 def: Loada_pat<sextloadi16, i32, addrgp, L2_loadrhgp>; 2122 def: Loada_pat<sextloadi16, i32, addrgp, L2_loadrhgp>;
1980 def: Loada_pat<zextloadi16, i32, addrgp, L2_loadruhgp>; 2123 def: Loada_pat<zextloadi16, i32, addrgp, L2_loadruhgp>;
1981 def: Loada_pat<load, i32, addrgp, L2_loadrigp>; 2124 def: Loada_pat<load, i32, addrgp, L2_loadrigp>;
1982 def: Loada_pat<load, i64, addrgp, L2_loadrdgp>; 2125 def: Loada_pat<load, v2i16, addrgp, L2_loadrigp>;
1983 def: Loada_pat<load, f32, addrgp, L2_loadrigp>; 2126 def: Loada_pat<load, v4i8, addrgp, L2_loadrigp>;
1984 def: Loada_pat<load, f64, addrgp, L2_loadrdgp>; 2127 def: Loada_pat<load, i64, addrgp, L2_loadrdgp>;
2128 def: Loada_pat<load, v2i32, addrgp, L2_loadrdgp>;
2129 def: Loada_pat<load, v4i16, addrgp, L2_loadrdgp>;
2130 def: Loada_pat<load, v8i8, addrgp, L2_loadrdgp>;
2131 def: Loada_pat<load, f32, addrgp, L2_loadrigp>;
2132 def: Loada_pat<load, f64, addrgp, L2_loadrdgp>;
1985 2133
1986 def: Loada_pat<atomic_load_8, i32, addrgp, L2_loadrubgp>; 2134 def: Loada_pat<atomic_load_8, i32, addrgp, L2_loadrubgp>;
1987 def: Loada_pat<atomic_load_16, i32, addrgp, L2_loadruhgp>; 2135 def: Loada_pat<atomic_load_16, i32, addrgp, L2_loadruhgp>;
1988 def: Loada_pat<atomic_load_32, i32, addrgp, L2_loadrigp>; 2136 def: Loada_pat<atomic_load_32, i32, addrgp, L2_loadrigp>;
1989 def: Loada_pat<atomic_load_64, i64, addrgp, L2_loadrdgp>; 2137 def: Loada_pat<atomic_load_64, i64, addrgp, L2_loadrdgp>;
1990 } 2138 }
1991 2139
1992 let AddedComplexity = 70 in { 2140 let AddedComplexity = 70 in {
1993 def: Loadam_pat<extloadi8, i64, addrgp, ToZext64, L2_loadrubgp>; 2141 def: Loadam_pat<extloadi8, i64, addrgp, ToAext64, L2_loadrubgp>;
1994 def: Loadam_pat<sextloadi8, i64, addrgp, ToSext64, L2_loadrbgp>; 2142 def: Loadam_pat<sextloadi8, i64, addrgp, ToSext64, L2_loadrbgp>;
1995 def: Loadam_pat<zextloadi8, i64, addrgp, ToZext64, L2_loadrubgp>; 2143 def: Loadam_pat<zextloadi8, i64, addrgp, ToZext64, L2_loadrubgp>;
1996 def: Loadam_pat<extloadi16, i64, addrgp, ToZext64, L2_loadruhgp>; 2144 def: Loadam_pat<extloadi16, i64, addrgp, ToAext64, L2_loadruhgp>;
1997 def: Loadam_pat<sextloadi16, i64, addrgp, ToSext64, L2_loadrhgp>; 2145 def: Loadam_pat<sextloadi16, i64, addrgp, ToSext64, L2_loadrhgp>;
1998 def: Loadam_pat<zextloadi16, i64, addrgp, ToZext64, L2_loadruhgp>; 2146 def: Loadam_pat<zextloadi16, i64, addrgp, ToZext64, L2_loadruhgp>;
1999 def: Loadam_pat<extloadi32, i64, addrgp, ToZext64, L2_loadrigp>; 2147 def: Loadam_pat<extloadi32, i64, addrgp, ToAext64, L2_loadrigp>;
2000 def: Loadam_pat<sextloadi32, i64, addrgp, ToSext64, L2_loadrigp>; 2148 def: Loadam_pat<sextloadi32, i64, addrgp, ToSext64, L2_loadrigp>;
2001 def: Loadam_pat<zextloadi32, i64, addrgp, ToZext64, L2_loadrigp>; 2149 def: Loadam_pat<zextloadi32, i64, addrgp, ToZext64, L2_loadrigp>;
2002 2150
2003 def: Loadam_pat<load, i1, addrgp, I32toI1, L2_loadrubgp>; 2151 def: Loadam_pat<load, i1, addrgp, I32toI1, L2_loadrubgp>;
2004 def: Loadam_pat<zextloadi1, i64, addrgp, ToZext64, L2_loadrubgp>; 2152 def: Loadam_pat<zextloadi1, i64, addrgp, ToZext64, L2_loadrubgp>;
2134 // Atomic stores also have two, but they are reversed: address, value. 2282 // Atomic stores also have two, but they are reversed: address, value.
2135 // To use atomic stores with the patterns, they need to have their operands 2283 // To use atomic stores with the patterns, they need to have their operands
2136 // swapped. This relies on the knowledge that the F.Fragment uses names 2284 // swapped. This relies on the knowledge that the F.Fragment uses names
2137 // "ptr" and "val". 2285 // "ptr" and "val".
2138 class AtomSt<PatFrag F> 2286 class AtomSt<PatFrag F>
2139 : PatFrag<(ops node:$val, node:$ptr), F.Fragment, F.PredicateCode, 2287 : PatFrag<(ops node:$val, node:$ptr), !head(F.Fragments), F.PredicateCode,
2140 F.OperandTransform> { 2288 F.OperandTransform> {
2141 let IsAtomic = F.IsAtomic; 2289 let IsAtomic = F.IsAtomic;
2142 let MemoryVT = F.MemoryVT; 2290 let MemoryVT = F.MemoryVT;
2143 } 2291 }
2144 2292
2250 def: Storexi_abs_pat<store, anyimm, S4_storeiri_io>; 2398 def: Storexi_abs_pat<store, anyimm, S4_storeiri_io>;
2251 } 2399 }
2252 2400
2253 // GP-relative address 2401 // GP-relative address
2254 let AddedComplexity = 120 in { 2402 let AddedComplexity = 120 in {
2255 def: Storea_pat<truncstorei8, I32, addrgp, S2_storerbgp>; 2403 def: Storea_pat<truncstorei8, I32, addrgp, S2_storerbgp>;
2256 def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhgp>; 2404 def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhgp>;
2257 def: Storea_pat<store, I32, addrgp, S2_storerigp>; 2405 def: Storea_pat<store, I32, addrgp, S2_storerigp>;
2258 def: Storea_pat<store, I64, addrgp, S2_storerdgp>; 2406 def: Storea_pat<store, V4I8, addrgp, S2_storerigp>;
2259 def: Storea_pat<store, F32, addrgp, S2_storerigp>; 2407 def: Storea_pat<store, V2I16, addrgp, S2_storerigp>;
2260 def: Storea_pat<store, F64, addrgp, S2_storerdgp>; 2408 def: Storea_pat<store, I64, addrgp, S2_storerdgp>;
2261 def: Storea_pat<AtomSt<atomic_store_8>, I32, addrgp, S2_storerbgp>; 2409 def: Storea_pat<store, V8I8, addrgp, S2_storerdgp>;
2262 def: Storea_pat<AtomSt<atomic_store_16>, I32, addrgp, S2_storerhgp>; 2410 def: Storea_pat<store, V4I16, addrgp, S2_storerdgp>;
2263 def: Storea_pat<AtomSt<atomic_store_32>, I32, addrgp, S2_storerigp>; 2411 def: Storea_pat<store, V2I32, addrgp, S2_storerdgp>;
2264 def: Storea_pat<AtomSt<atomic_store_64>, I64, addrgp, S2_storerdgp>; 2412 def: Storea_pat<store, F32, addrgp, S2_storerigp>;
2413 def: Storea_pat<store, F64, addrgp, S2_storerdgp>;
2414 def: Storea_pat<AtomSt<atomic_store_8>, I32, addrgp, S2_storerbgp>;
2415 def: Storea_pat<AtomSt<atomic_store_16>, I32, addrgp, S2_storerhgp>;
2416 def: Storea_pat<AtomSt<atomic_store_32>, I32, addrgp, S2_storerigp>;
2417 def: Storea_pat<AtomSt<atomic_store_32>, V4I8, addrgp, S2_storerigp>;
2418 def: Storea_pat<AtomSt<atomic_store_32>, V2I16, addrgp, S2_storerigp>;
2419 def: Storea_pat<AtomSt<atomic_store_64>, I64, addrgp, S2_storerdgp>;
2420 def: Storea_pat<AtomSt<atomic_store_64>, V8I8, addrgp, S2_storerdgp>;
2421 def: Storea_pat<AtomSt<atomic_store_64>, V4I16, addrgp, S2_storerdgp>;
2422 def: Storea_pat<AtomSt<atomic_store_64>, V2I32, addrgp, S2_storerdgp>;
2265 2423
2266 def: Stoream_pat<truncstorei8, I64, addrgp, LoReg, S2_storerbgp>; 2424 def: Stoream_pat<truncstorei8, I64, addrgp, LoReg, S2_storerbgp>;
2267 def: Stoream_pat<truncstorei16, I64, addrgp, LoReg, S2_storerhgp>; 2425 def: Stoream_pat<truncstorei16, I64, addrgp, LoReg, S2_storerhgp>;
2268 def: Stoream_pat<truncstorei32, I64, addrgp, LoReg, S2_storerigp>; 2426 def: Stoream_pat<truncstorei32, I64, addrgp, LoReg, S2_storerigp>;
2269 def: Stoream_pat<store, I1, addrgp, I1toI32, S2_storerbgp>; 2427 def: Stoream_pat<store, I1, addrgp, I1toI32, S2_storerbgp>;
2270 } 2428 }
2271 2429
2272 // Absolute address 2430 // Absolute address
2273 let AddedComplexity = 110 in { 2431 let AddedComplexity = 110 in {
2274 def: Storea_pat<truncstorei8, I32, anyimm0, PS_storerbabs>; 2432 def: Storea_pat<truncstorei8, I32, anyimm0, PS_storerbabs>;
2275 def: Storea_pat<truncstorei16, I32, anyimm1, PS_storerhabs>; 2433 def: Storea_pat<truncstorei16, I32, anyimm1, PS_storerhabs>;
2276 def: Storea_pat<store, I32, anyimm2, PS_storeriabs>; 2434 def: Storea_pat<store, I32, anyimm2, PS_storeriabs>;
2277 def: Storea_pat<store, I64, anyimm3, PS_storerdabs>; 2435 def: Storea_pat<store, V4I8, anyimm2, PS_storeriabs>;
2278 def: Storea_pat<store, F32, anyimm2, PS_storeriabs>; 2436 def: Storea_pat<store, V2I16, anyimm2, PS_storeriabs>;
2279 def: Storea_pat<store, F64, anyimm3, PS_storerdabs>; 2437 def: Storea_pat<store, I64, anyimm3, PS_storerdabs>;
2280 def: Storea_pat<AtomSt<atomic_store_8>, I32, anyimm0, PS_storerbabs>; 2438 def: Storea_pat<store, V8I8, anyimm3, PS_storerdabs>;
2281 def: Storea_pat<AtomSt<atomic_store_16>, I32, anyimm1, PS_storerhabs>; 2439 def: Storea_pat<store, V4I16, anyimm3, PS_storerdabs>;
2282 def: Storea_pat<AtomSt<atomic_store_32>, I32, anyimm2, PS_storeriabs>; 2440 def: Storea_pat<store, V2I32, anyimm3, PS_storerdabs>;
2283 def: Storea_pat<AtomSt<atomic_store_64>, I64, anyimm3, PS_storerdabs>; 2441 def: Storea_pat<store, F32, anyimm2, PS_storeriabs>;
2442 def: Storea_pat<store, F64, anyimm3, PS_storerdabs>;
2443 def: Storea_pat<AtomSt<atomic_store_8>, I32, anyimm0, PS_storerbabs>;
2444 def: Storea_pat<AtomSt<atomic_store_16>, I32, anyimm1, PS_storerhabs>;
2445 def: Storea_pat<AtomSt<atomic_store_32>, I32, anyimm2, PS_storeriabs>;
2446 def: Storea_pat<AtomSt<atomic_store_32>, V4I8, anyimm2, PS_storeriabs>;
2447 def: Storea_pat<AtomSt<atomic_store_32>, V2I16, anyimm2, PS_storeriabs>;
2448 def: Storea_pat<AtomSt<atomic_store_64>, I64, anyimm3, PS_storerdabs>;
2449 def: Storea_pat<AtomSt<atomic_store_64>, V8I8, anyimm3, PS_storerdabs>;
2450 def: Storea_pat<AtomSt<atomic_store_64>, V4I16, anyimm3, PS_storerdabs>;
2451 def: Storea_pat<AtomSt<atomic_store_64>, V2I32, anyimm3, PS_storerdabs>;
2284 2452
2285 def: Stoream_pat<truncstorei8, I64, anyimm0, LoReg, PS_storerbabs>; 2453 def: Stoream_pat<truncstorei8, I64, anyimm0, LoReg, PS_storerbabs>;
2286 def: Stoream_pat<truncstorei16, I64, anyimm1, LoReg, PS_storerhabs>; 2454 def: Stoream_pat<truncstorei16, I64, anyimm1, LoReg, PS_storerhabs>;
2287 def: Stoream_pat<truncstorei32, I64, anyimm2, LoReg, PS_storeriabs>; 2455 def: Stoream_pat<truncstorei32, I64, anyimm2, LoReg, PS_storeriabs>;
2288 def: Stoream_pat<store, I1, anyimm0, I1toI32, PS_storerbabs>; 2456 def: Stoream_pat<store, I1, anyimm0, I1toI32, PS_storerbabs>;
2289 } 2457 }
2290 2458
2291 // Reg<<S + Imm 2459 // Reg<<S + Imm
2292 let AddedComplexity = 100 in { 2460 let AddedComplexity = 100 in {
2293 def: Storexu_shl_pat<truncstorei8, I32, anyimm0, S4_storerb_ur>; 2461 def: Storexu_shl_pat<truncstorei8, I32, anyimm0, S4_storerb_ur>;
2294 def: Storexu_shl_pat<truncstorei16, I32, anyimm1, S4_storerh_ur>; 2462 def: Storexu_shl_pat<truncstorei16, I32, anyimm1, S4_storerh_ur>;
2295 def: Storexu_shl_pat<store, I32, anyimm2, S4_storeri_ur>; 2463 def: Storexu_shl_pat<store, I32, anyimm2, S4_storeri_ur>;
2296 def: Storexu_shl_pat<store, I64, anyimm3, S4_storerd_ur>; 2464 def: Storexu_shl_pat<store, V4I8, anyimm2, S4_storeri_ur>;
2297 def: Storexu_shl_pat<store, F32, anyimm2, S4_storeri_ur>; 2465 def: Storexu_shl_pat<store, V2I16, anyimm2, S4_storeri_ur>;
2298 def: Storexu_shl_pat<store, F64, anyimm3, S4_storerd_ur>; 2466 def: Storexu_shl_pat<store, I64, anyimm3, S4_storerd_ur>;
2467 def: Storexu_shl_pat<store, V8I8, anyimm3, S4_storerd_ur>;
2468 def: Storexu_shl_pat<store, V4I16, anyimm3, S4_storerd_ur>;
2469 def: Storexu_shl_pat<store, V2I32, anyimm3, S4_storerd_ur>;
2470 def: Storexu_shl_pat<store, F32, anyimm2, S4_storeri_ur>;
2471 def: Storexu_shl_pat<store, F64, anyimm3, S4_storerd_ur>;
2299 2472
2300 def: Pat<(store I1:$Pu, (add (shl I32:$Rs, u2_0ImmPred:$u2), anyimm:$A)), 2473 def: Pat<(store I1:$Pu, (add (shl I32:$Rs, u2_0ImmPred:$u2), anyimm:$A)),
2301 (S4_storerb_ur IntRegs:$Rs, imm:$u2, imm:$A, (I1toI32 I1:$Pu))>; 2474 (S4_storerb_ur IntRegs:$Rs, imm:$u2, imm:$A, (I1toI32 I1:$Pu))>;
2302 } 2475 }
2303 2476
2304 // Reg<<S + Reg 2477 // Reg<<S + Reg
2305 let AddedComplexity = 90 in { 2478 let AddedComplexity = 90 in {
2306 def: Storexr_shl_pat<truncstorei8, I32, S4_storerb_rr>; 2479 def: Storexr_shl_pat<truncstorei8, I32, S4_storerb_rr>;
2307 def: Storexr_shl_pat<truncstorei16, I32, S4_storerh_rr>; 2480 def: Storexr_shl_pat<truncstorei16, I32, S4_storerh_rr>;
2308 def: Storexr_shl_pat<store, I32, S4_storeri_rr>; 2481 def: Storexr_shl_pat<store, I32, S4_storeri_rr>;
2309 def: Storexr_shl_pat<store, I64, S4_storerd_rr>; 2482 def: Storexr_shl_pat<store, V4I8, S4_storeri_rr>;
2310 def: Storexr_shl_pat<store, F32, S4_storeri_rr>; 2483 def: Storexr_shl_pat<store, V2I16, S4_storeri_rr>;
2311 def: Storexr_shl_pat<store, F64, S4_storerd_rr>; 2484 def: Storexr_shl_pat<store, I64, S4_storerd_rr>;
2485 def: Storexr_shl_pat<store, V8I8, S4_storerd_rr>;
2486 def: Storexr_shl_pat<store, V4I16, S4_storerd_rr>;
2487 def: Storexr_shl_pat<store, V2I32, S4_storerd_rr>;
2488 def: Storexr_shl_pat<store, F32, S4_storeri_rr>;
2489 def: Storexr_shl_pat<store, F64, S4_storerd_rr>;
2312 2490
2313 def: Pat<(store I1:$Pu, (add (shl I32:$Rs, u2_0ImmPred:$u2), I32:$Rt)), 2491 def: Pat<(store I1:$Pu, (add (shl I32:$Rs, u2_0ImmPred:$u2), I32:$Rt)),
2314 (S4_storerb_ur IntRegs:$Rt, IntRegs:$Rs, imm:$u2, (I1toI32 I1:$Pu))>; 2492 (S4_storerb_ur IntRegs:$Rt, IntRegs:$Rs, imm:$u2, (I1toI32 I1:$Pu))>;
2315 } 2493 }
2316 2494
2358 def: Storexim_fi_pat<LS_<store>, anyimm, ToI32, S2_storeri_io>; 2536 def: Storexim_fi_pat<LS_<store>, anyimm, ToI32, S2_storeri_io>;
2359 } 2537 }
2360 2538
2361 // Fi+Imm, Fi, store-register 2539 // Fi+Imm, Fi, store-register
2362 let AddedComplexity = 60 in { 2540 let AddedComplexity = 60 in {
2363 defm: Storexi_fi_add_pat<truncstorei8, I32, anyimm, S2_storerb_io>; 2541 defm: Storexi_fi_add_pat<truncstorei8, I32, anyimm, S2_storerb_io>;
2364 defm: Storexi_fi_add_pat<truncstorei16, I32, anyimm, S2_storerh_io>; 2542 defm: Storexi_fi_add_pat<truncstorei16, I32, anyimm, S2_storerh_io>;
2365 defm: Storexi_fi_add_pat<store, I32, anyimm, S2_storeri_io>; 2543 defm: Storexi_fi_add_pat<store, I32, anyimm, S2_storeri_io>;
2366 defm: Storexi_fi_add_pat<store, I64, anyimm, S2_storerd_io>; 2544 defm: Storexi_fi_add_pat<store, V4I8, anyimm, S2_storeri_io>;
2367 defm: Storexi_fi_add_pat<store, F32, anyimm, S2_storeri_io>; 2545 defm: Storexi_fi_add_pat<store, V2I16, anyimm, S2_storeri_io>;
2368 defm: Storexi_fi_add_pat<store, F64, anyimm, S2_storerd_io>; 2546 defm: Storexi_fi_add_pat<store, I64, anyimm, S2_storerd_io>;
2547 defm: Storexi_fi_add_pat<store, V8I8, anyimm, S2_storerd_io>;
2548 defm: Storexi_fi_add_pat<store, V4I16, anyimm, S2_storerd_io>;
2549 defm: Storexi_fi_add_pat<store, V2I32, anyimm, S2_storerd_io>;
2550 defm: Storexi_fi_add_pat<store, F32, anyimm, S2_storeri_io>;
2551 defm: Storexi_fi_add_pat<store, F64, anyimm, S2_storerd_io>;
2369 defm: Storexim_fi_add_pat<store, I1, anyimm, I1toI32, S2_storerb_io>; 2552 defm: Storexim_fi_add_pat<store, I1, anyimm, I1toI32, S2_storerb_io>;
2370 2553
2371 def: Storexi_fi_pat<truncstorei8, I32, S2_storerb_io>; 2554 def: Storexi_fi_pat<truncstorei8, I32, S2_storerb_io>;
2372 def: Storexi_fi_pat<truncstorei16, I32, S2_storerh_io>; 2555 def: Storexi_fi_pat<truncstorei16, I32, S2_storerh_io>;
2373 def: Storexi_fi_pat<store, I32, S2_storeri_io>; 2556 def: Storexi_fi_pat<store, I32, S2_storeri_io>;
2374 def: Storexi_fi_pat<store, I64, S2_storerd_io>; 2557 def: Storexi_fi_pat<store, V4I8, S2_storeri_io>;
2375 def: Storexi_fi_pat<store, F32, S2_storeri_io>; 2558 def: Storexi_fi_pat<store, V2I16, S2_storeri_io>;
2376 def: Storexi_fi_pat<store, F64, S2_storerd_io>; 2559 def: Storexi_fi_pat<store, I64, S2_storerd_io>;
2560 def: Storexi_fi_pat<store, V8I8, S2_storerd_io>;
2561 def: Storexi_fi_pat<store, V4I16, S2_storerd_io>;
2562 def: Storexi_fi_pat<store, V2I32, S2_storerd_io>;
2563 def: Storexi_fi_pat<store, F32, S2_storeri_io>;
2564 def: Storexi_fi_pat<store, F64, S2_storerd_io>;
2377 def: Storexim_fi_pat<store, I1, I1toI32, S2_storerb_io>; 2565 def: Storexim_fi_pat<store, I1, I1toI32, S2_storerb_io>;
2378 } 2566 }
2379 2567
2380 2568
2381 multiclass IMRA_<PatFrag S, PatFrag V, PatFrag O, PatFrag M, InstHexagon I> { 2569 multiclass IMRA_<PatFrag S, PatFrag V, PatFrag O, PatFrag M, InstHexagon I> {
2396 defm: IRA_<store, anyimm, u6_2ImmPred, S4_storeiri_io>; 2584 defm: IRA_<store, anyimm, u6_2ImmPred, S4_storeiri_io>;
2397 } 2585 }
2398 2586
2399 // Reg+Imm, store-register 2587 // Reg+Imm, store-register
2400 let AddedComplexity = 40 in { 2588 let AddedComplexity = 40 in {
2401 defm: Storexi_pat<truncstorei8, I32, anyimm0, S2_storerb_io>; 2589 defm: Storexi_pat<truncstorei8, I32, anyimm0, S2_storerb_io>;
2402 defm: Storexi_pat<truncstorei16, I32, anyimm1, S2_storerh_io>; 2590 defm: Storexi_pat<truncstorei16, I32, anyimm1, S2_storerh_io>;
2403 defm: Storexi_pat<store, I32, anyimm2, S2_storeri_io>; 2591 defm: Storexi_pat<store, I32, anyimm2, S2_storeri_io>;
2404 defm: Storexi_pat<store, I64, anyimm3, S2_storerd_io>; 2592 defm: Storexi_pat<store, V4I8, anyimm2, S2_storeri_io>;
2405 defm: Storexi_pat<store, F32, anyimm2, S2_storeri_io>; 2593 defm: Storexi_pat<store, V2I16, anyimm2, S2_storeri_io>;
2406 defm: Storexi_pat<store, F64, anyimm3, S2_storerd_io>; 2594 defm: Storexi_pat<store, I64, anyimm3, S2_storerd_io>;
2595 defm: Storexi_pat<store, V8I8, anyimm3, S2_storerd_io>;
2596 defm: Storexi_pat<store, V4I16, anyimm3, S2_storerd_io>;
2597 defm: Storexi_pat<store, V2I32, anyimm3, S2_storerd_io>;
2598 defm: Storexi_pat<store, F32, anyimm2, S2_storeri_io>;
2599 defm: Storexi_pat<store, F64, anyimm3, S2_storerd_io>;
2407 2600
2408 defm: Storexim_pat<truncstorei8, I64, anyimm0, LoReg, S2_storerb_io>; 2601 defm: Storexim_pat<truncstorei8, I64, anyimm0, LoReg, S2_storerb_io>;
2409 defm: Storexim_pat<truncstorei16, I64, anyimm1, LoReg, S2_storerh_io>; 2602 defm: Storexim_pat<truncstorei16, I64, anyimm1, LoReg, S2_storerh_io>;
2410 defm: Storexim_pat<truncstorei32, I64, anyimm2, LoReg, S2_storeri_io>; 2603 defm: Storexim_pat<truncstorei32, I64, anyimm2, LoReg, S2_storeri_io>;
2411 defm: Storexim_pat<store, I1, anyimm0, I1toI32, S2_storerb_io>; 2604 defm: Storexim_pat<store, I1, anyimm0, I1toI32, S2_storerb_io>;
2412 2605
2413 defm: Storexi_pat<AtomSt<atomic_store_8>, I32, anyimm0, S2_storerb_io>; 2606 defm: Storexi_pat<AtomSt<atomic_store_8>, I32, anyimm0, S2_storerb_io>;
2414 defm: Storexi_pat<AtomSt<atomic_store_16>, I32, anyimm1, S2_storerh_io>; 2607 defm: Storexi_pat<AtomSt<atomic_store_16>, I32, anyimm1, S2_storerh_io>;
2415 defm: Storexi_pat<AtomSt<atomic_store_32>, I32, anyimm2, S2_storeri_io>; 2608 defm: Storexi_pat<AtomSt<atomic_store_32>, I32, anyimm2, S2_storeri_io>;
2416 defm: Storexi_pat<AtomSt<atomic_store_64>, I64, anyimm3, S2_storerd_io>; 2609 defm: Storexi_pat<AtomSt<atomic_store_32>, V4I8, anyimm2, S2_storeri_io>;
2610 defm: Storexi_pat<AtomSt<atomic_store_32>, V2I16, anyimm2, S2_storeri_io>;
2611 defm: Storexi_pat<AtomSt<atomic_store_64>, I64, anyimm3, S2_storerd_io>;
2612 defm: Storexi_pat<AtomSt<atomic_store_64>, V8I8, anyimm3, S2_storerd_io>;
2613 defm: Storexi_pat<AtomSt<atomic_store_64>, V4I16, anyimm3, S2_storerd_io>;
2614 defm: Storexi_pat<AtomSt<atomic_store_64>, V2I32, anyimm3, S2_storerd_io>;
2417 } 2615 }
2418 2616
2419 // Reg+Reg 2617 // Reg+Reg
2420 let AddedComplexity = 30 in { 2618 let AddedComplexity = 30 in {
2421 def: Storexr_add_pat<truncstorei8, I32, S4_storerb_rr>; 2619 def: Storexr_add_pat<truncstorei8, I32, S4_storerb_rr>;
2422 def: Storexr_add_pat<truncstorei16, I32, S4_storerh_rr>; 2620 def: Storexr_add_pat<truncstorei16, I32, S4_storerh_rr>;
2423 def: Storexr_add_pat<store, I32, S4_storeri_rr>; 2621 def: Storexr_add_pat<store, I32, S4_storeri_rr>;
2424 def: Storexr_add_pat<store, I64, S4_storerd_rr>; 2622 def: Storexr_add_pat<store, V4I8, S4_storeri_rr>;
2425 def: Storexr_add_pat<store, F32, S4_storeri_rr>; 2623 def: Storexr_add_pat<store, V2I16, S4_storeri_rr>;
2426 def: Storexr_add_pat<store, F64, S4_storerd_rr>; 2624 def: Storexr_add_pat<store, I64, S4_storerd_rr>;
2625 def: Storexr_add_pat<store, V8I8, S4_storerd_rr>;
2626 def: Storexr_add_pat<store, V4I16, S4_storerd_rr>;
2627 def: Storexr_add_pat<store, V2I32, S4_storerd_rr>;
2628 def: Storexr_add_pat<store, F32, S4_storeri_rr>;
2629 def: Storexr_add_pat<store, F64, S4_storerd_rr>;
2427 2630
2428 def: Pat<(store I1:$Pu, (add I32:$Rs, I32:$Rt)), 2631 def: Pat<(store I1:$Pu, (add I32:$Rs, I32:$Rt)),
2429 (S4_storerb_rr IntRegs:$Rs, IntRegs:$Rt, 0, (I1toI32 I1:$Pu))>; 2632 (S4_storerb_rr IntRegs:$Rs, IntRegs:$Rt, 0, (I1toI32 I1:$Pu))>;
2430 } 2633 }
2431 2634
2440 def: Storexi_base_pat<store, anyimm, S4_storeiri_io>; 2643 def: Storexi_base_pat<store, anyimm, S4_storeiri_io>;
2441 } 2644 }
2442 2645
2443 // Reg, store-register 2646 // Reg, store-register
2444 let AddedComplexity = 10 in { 2647 let AddedComplexity = 10 in {
2445 def: Storexi_base_pat<truncstorei8, I32, S2_storerb_io>; 2648 def: Storexi_base_pat<truncstorei8, I32, S2_storerb_io>;
2446 def: Storexi_base_pat<truncstorei16, I32, S2_storerh_io>; 2649 def: Storexi_base_pat<truncstorei16, I32, S2_storerh_io>;
2447 def: Storexi_base_pat<store, I32, S2_storeri_io>; 2650 def: Storexi_base_pat<store, I32, S2_storeri_io>;
2448 def: Storexi_base_pat<store, I64, S2_storerd_io>; 2651 def: Storexi_base_pat<store, V4I8, S2_storeri_io>;
2449 def: Storexi_base_pat<store, F32, S2_storeri_io>; 2652 def: Storexi_base_pat<store, V2I16, S2_storeri_io>;
2450 def: Storexi_base_pat<store, F64, S2_storerd_io>; 2653 def: Storexi_base_pat<store, I64, S2_storerd_io>;
2654 def: Storexi_base_pat<store, V8I8, S2_storerd_io>;
2655 def: Storexi_base_pat<store, V4I16, S2_storerd_io>;
2656 def: Storexi_base_pat<store, V2I32, S2_storerd_io>;
2657 def: Storexi_base_pat<store, F32, S2_storeri_io>;
2658 def: Storexi_base_pat<store, F64, S2_storerd_io>;
2451 2659
2452 def: Storexim_base_pat<truncstorei8, I64, LoReg, S2_storerb_io>; 2660 def: Storexim_base_pat<truncstorei8, I64, LoReg, S2_storerb_io>;
2453 def: Storexim_base_pat<truncstorei16, I64, LoReg, S2_storerh_io>; 2661 def: Storexim_base_pat<truncstorei16, I64, LoReg, S2_storerh_io>;
2454 def: Storexim_base_pat<truncstorei32, I64, LoReg, S2_storeri_io>; 2662 def: Storexim_base_pat<truncstorei32, I64, LoReg, S2_storeri_io>;
2455 def: Storexim_base_pat<store, I1, I1toI32, S2_storerb_io>; 2663 def: Storexim_base_pat<store, I1, I1toI32, S2_storerb_io>;
2456 2664
2457 def: Storexi_base_pat<AtomSt<atomic_store_8>, I32, S2_storerb_io>; 2665 def: Storexi_base_pat<AtomSt<atomic_store_8>, I32, S2_storerb_io>;
2458 def: Storexi_base_pat<AtomSt<atomic_store_16>, I32, S2_storerh_io>; 2666 def: Storexi_base_pat<AtomSt<atomic_store_16>, I32, S2_storerh_io>;
2459 def: Storexi_base_pat<AtomSt<atomic_store_32>, I32, S2_storeri_io>; 2667 def: Storexi_base_pat<AtomSt<atomic_store_32>, I32, S2_storeri_io>;
2460 def: Storexi_base_pat<AtomSt<atomic_store_64>, I64, S2_storerd_io>; 2668 def: Storexi_base_pat<AtomSt<atomic_store_32>, V4I8, S2_storeri_io>;
2669 def: Storexi_base_pat<AtomSt<atomic_store_32>, V2I16, S2_storeri_io>;
2670 def: Storexi_base_pat<AtomSt<atomic_store_64>, I64, S2_storerd_io>;
2671 def: Storexi_base_pat<AtomSt<atomic_store_64>, V8I8, S2_storerd_io>;
2672 def: Storexi_base_pat<AtomSt<atomic_store_64>, V4I16, S2_storerd_io>;
2673 def: Storexi_base_pat<AtomSt<atomic_store_64>, V2I32, S2_storerd_io>;
2461 } 2674 }
2462 2675
2463 2676
2464 // --(14) Memop ---------------------------------------------------------- 2677 // --(14) Memop ----------------------------------------------------------
2465 // 2678 //
2539 (MI AddrFI:$Rs, imm:$Off, I32:$A)>; 2752 (MI AddrFI:$Rs, imm:$Off, I32:$A)>;
2540 } 2753 }
2541 2754
2542 multiclass Memopxr_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred, 2755 multiclass Memopxr_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
2543 SDNode Oper, InstHexagon MI> { 2756 SDNode Oper, InstHexagon MI> {
2544 defm: Memopxr_base_pat <Load, Store, Oper, MI>; 2757 let Predicates = [UseMEMOPS] in {
2545 defm: Memopxr_add_pat <Load, Store, ImmPred, Oper, MI>; 2758 defm: Memopxr_base_pat <Load, Store, Oper, MI>;
2759 defm: Memopxr_add_pat <Load, Store, ImmPred, Oper, MI>;
2760 }
2546 } 2761 }
2547 2762
2548 let AddedComplexity = 200 in { 2763 let AddedComplexity = 200 in {
2549 // add reg 2764 // add reg
2550 defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, add, 2765 defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, add,
2638 } 2853 }
2639 2854
2640 multiclass Memopxi_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred, 2855 multiclass Memopxi_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
2641 SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod, 2856 SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod,
2642 InstHexagon MI> { 2857 InstHexagon MI> {
2643 defm: Memopxi_base_pat <Load, Store, Oper, Arg, ArgMod, MI>; 2858 let Predicates = [UseMEMOPS] in {
2644 defm: Memopxi_add_pat <Load, Store, ImmPred, Oper, Arg, ArgMod, MI>; 2859 defm: Memopxi_base_pat <Load, Store, Oper, Arg, ArgMod, MI>;
2860 defm: Memopxi_add_pat <Load, Store, ImmPred, Oper, Arg, ArgMod, MI>;
2861 }
2645 } 2862 }
2646 2863
2647 let AddedComplexity = 220 in { 2864 let AddedComplexity = 220 in {
2648 // add imm 2865 // add imm
2649 defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred, 2866 defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred,
2798 (J2_jumpt I1:$Pu, bb:$dst)>; 3015 (J2_jumpt I1:$Pu, bb:$dst)>;
2799 def: Pat<(brcond (not I1:$Pu), bb:$dst), 3016 def: Pat<(brcond (not I1:$Pu), bb:$dst),
2800 (J2_jumpf I1:$Pu, bb:$dst)>; 3017 (J2_jumpf I1:$Pu, bb:$dst)>;
2801 def: Pat<(brcond (i1 (setne I1:$Pu, -1)), bb:$dst), 3018 def: Pat<(brcond (i1 (setne I1:$Pu, -1)), bb:$dst),
2802 (J2_jumpf I1:$Pu, bb:$dst)>; 3019 (J2_jumpf I1:$Pu, bb:$dst)>;
3020 def: Pat<(brcond (i1 (seteq I1:$Pu, 0)), bb:$dst),
3021 (J2_jumpf I1:$Pu, bb:$dst)>;
2803 def: Pat<(brcond (i1 (setne I1:$Pu, 0)), bb:$dst), 3022 def: Pat<(brcond (i1 (setne I1:$Pu, 0)), bb:$dst),
2804 (J2_jumpt I1:$Pu, bb:$dst)>; 3023 (J2_jumpt I1:$Pu, bb:$dst)>;
2805 3024
2806 3025
2807 // --(17) Misc ----------------------------------------------------------- 3026 // --(17) Misc -----------------------------------------------------------
2860 (PS_alloca IntRegs:$Rs, imm:$A)>; 3079 (PS_alloca IntRegs:$Rs, imm:$A)>;
2861 3080
2862 def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>; 3081 def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>;
2863 def: Pat<(HexagonBARRIER), (Y2_barrier)>; 3082 def: Pat<(HexagonBARRIER), (Y2_barrier)>;
2864 3083
3084 def: Pat<(trap), (PS_crash)>;
3085
2865 // Read cycle counter. 3086 // Read cycle counter.
2866 def SDTInt64Leaf: SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>; 3087 def SDTInt64Leaf: SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>;
2867 def HexagonREADCYCLE: SDNode<"HexagonISD::READCYCLE", SDTInt64Leaf, 3088 def HexagonREADCYCLE: SDNode<"HexagonISD::READCYCLE", SDTInt64Leaf,
2868 [SDNPHasChain]>; 3089 [SDNPHasChain]>;
2869 3090
2870 def: Pat<(HexagonREADCYCLE), (A4_tfrcpp UPCYCLE)>; 3091 def: Pat<(HexagonREADCYCLE), (A4_tfrcpp UPCYCLE)>;
3092
3093 // The declared return value of the store-locked intrinsics is i32, but
3094 // the instructions actually define i1. To avoid register copies from
3095 // IntRegs to PredRegs and back, fold the entire pattern checking the
3096 // result against true/false.
3097 let AddedComplexity = 100 in {
3098 def: Pat<(i1 (setne (int_hexagon_S2_storew_locked I32:$Rs, I32:$Rt), 0)),
3099 (S2_storew_locked I32:$Rs, I32:$Rt)>;
3100 def: Pat<(i1 (seteq (int_hexagon_S2_storew_locked I32:$Rs, I32:$Rt), 0)),
3101 (C2_not (S2_storew_locked I32:$Rs, I32:$Rt))>;
3102 def: Pat<(i1 (setne (int_hexagon_S4_stored_locked I32:$Rs, I64:$Rt), 0)),
3103 (S4_stored_locked I32:$Rs, I64:$Rt)>;
3104 def: Pat<(i1 (seteq (int_hexagon_S4_stored_locked I32:$Rs, I64:$Rt), 0)),
3105 (C2_not (S4_stored_locked I32:$Rs, I64:$Rt))>;
3106 }