Mercurial > hg > CbC > CbC_llvm
comparison lib/Target/Hexagon/HexagonPatterns.td @ 148:63bd29f05246
merged
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 14 Aug 2019 19:46:37 +0900 |
parents | c2174574ed3a |
children |
comparison
equal
deleted
inserted
replaced
146:3fc4d5c3e21e | 148:63bd29f05246 |
---|---|
1 //==- HexagonPatterns.td - Target Description for Hexagon -*- tablegen -*-===// | 1 //==- HexagonPatterns.td - Target Description for Hexagon -*- tablegen -*-===// |
2 // | 2 // |
3 // The LLVM Compiler Infrastructure | 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 // | 4 // See https://llvm.org/LICENSE.txt for license information. |
5 // This file is distributed under the University of Illinois Open Source | 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 // License. See LICENSE.TXT for details. | |
7 // | 6 // |
8 //===----------------------------------------------------------------------===// | 7 //===----------------------------------------------------------------------===// |
9 | 8 |
10 // Table of contents: | 9 // Table of contents: |
11 // (0) Definitions | 10 // (0) Definitions |
98 | 97 |
99 def HWI8: PatLeaf<(VecPI8 HvxWR:$R)>; | 98 def HWI8: PatLeaf<(VecPI8 HvxWR:$R)>; |
100 def HWI16: PatLeaf<(VecPI16 HvxWR:$R)>; | 99 def HWI16: PatLeaf<(VecPI16 HvxWR:$R)>; |
101 def HWI32: PatLeaf<(VecPI32 HvxWR:$R)>; | 100 def HWI32: PatLeaf<(VecPI32 HvxWR:$R)>; |
102 | 101 |
102 def SDTVecVecIntOp: | |
103 SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>, | |
104 SDTCisVT<3,i32>]>; | |
105 | |
106 def HexagonVALIGN: SDNode<"HexagonISD::VALIGN", SDTVecVecIntOp>; | |
107 def HexagonVALIGNADDR: SDNode<"HexagonISD::VALIGNADDR", SDTIntUnaryOp>; | |
108 | |
109 def valign: PatFrag<(ops node:$Vt, node:$Vs, node:$Ru), | |
110 (HexagonVALIGN node:$Vt, node:$Vs, node:$Ru)>; | |
111 def valignaddr: PatFrag<(ops node:$Addr), (HexagonVALIGNADDR node:$Addr)>; | |
112 | |
103 // Pattern fragments to extract the low and high subregisters from a | 113 // Pattern fragments to extract the low and high subregisters from a |
104 // 64-bit value. | 114 // 64-bit value. |
105 def LoReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_lo)>; | 115 def LoReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_lo)>; |
106 def HiReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_hi)>; | 116 def HiReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_hi)>; |
107 | 117 |
163 def UDEC32: SDNodeXForm<imm, [{ | 173 def UDEC32: SDNodeXForm<imm, [{ |
164 uint32_t V = N->getZExtValue(); | 174 uint32_t V = N->getZExtValue(); |
165 assert(V >= 32); | 175 assert(V >= 32); |
166 return CurDAG->getTargetConstant(V-32, SDLoc(N), MVT::i32); | 176 return CurDAG->getTargetConstant(V-32, SDLoc(N), MVT::i32); |
167 }]>; | 177 }]>; |
178 | |
179 class Subi<int From>: SDNodeXForm<imm, | |
180 "int32_t V = " # From # " - N->getSExtValue();" # | |
181 "return CurDAG->getTargetConstant(V, SDLoc(N), MVT::i32);" | |
182 >; | |
168 | 183 |
169 def Log2_32: SDNodeXForm<imm, [{ | 184 def Log2_32: SDNodeXForm<imm, [{ |
170 uint32_t V = N->getZExtValue(); | 185 uint32_t V = N->getZExtValue(); |
171 return CurDAG->getTargetConstant(Log2_32(V), SDLoc(N), MVT::i32); | 186 return CurDAG->getTargetConstant(Log2_32(V), SDLoc(N), MVT::i32); |
172 }]>; | 187 }]>; |
205 // Helpers for type promotions/contractions. | 220 // Helpers for type promotions/contractions. |
206 def I1toI32: OutPatFrag<(ops node:$Rs), (C2_muxii (i1 $Rs), 1, 0)>; | 221 def I1toI32: OutPatFrag<(ops node:$Rs), (C2_muxii (i1 $Rs), 1, 0)>; |
207 def I32toI1: OutPatFrag<(ops node:$Rs), (i1 (C2_cmpgtui (i32 $Rs), (i32 0)))>; | 222 def I32toI1: OutPatFrag<(ops node:$Rs), (i1 (C2_cmpgtui (i32 $Rs), (i32 0)))>; |
208 def ToZext64: OutPatFrag<(ops node:$Rs), (i64 (A4_combineir 0, (i32 $Rs)))>; | 223 def ToZext64: OutPatFrag<(ops node:$Rs), (i64 (A4_combineir 0, (i32 $Rs)))>; |
209 def ToSext64: OutPatFrag<(ops node:$Rs), (i64 (A2_sxtw (i32 $Rs)))>; | 224 def ToSext64: OutPatFrag<(ops node:$Rs), (i64 (A2_sxtw (i32 $Rs)))>; |
225 def ToAext64: OutPatFrag<(ops node:$Rs), | |
226 (REG_SEQUENCE DoubleRegs, (i32 (IMPLICIT_DEF)), isub_hi, (i32 $Rs), isub_lo)>; | |
210 | 227 |
211 def Combinew: OutPatFrag<(ops node:$Rs, node:$Rt), | 228 def Combinew: OutPatFrag<(ops node:$Rs, node:$Rt), |
212 (REG_SEQUENCE DoubleRegs, $Rs, isub_hi, $Rt, isub_lo)>; | 229 (REG_SEQUENCE DoubleRegs, $Rs, isub_hi, $Rt, isub_lo)>; |
213 | 230 |
214 def addrga: PatLeaf<(i32 AddrGA:$Addr)>; | 231 def addrga: PatLeaf<(i32 AddrGA:$Addr)>; |
233 | 250 |
234 def Aext64: PatFrag<(ops node:$Rs), (i64 (anyext node:$Rs))>; | 251 def Aext64: PatFrag<(ops node:$Rs), (i64 (anyext node:$Rs))>; |
235 def Zext64: PatFrag<(ops node:$Rs), (i64 (zext node:$Rs))>; | 252 def Zext64: PatFrag<(ops node:$Rs), (i64 (zext node:$Rs))>; |
236 def Sext64: PatLeaf<(i64 Usxtw:$Rs)>; | 253 def Sext64: PatLeaf<(i64 Usxtw:$Rs)>; |
237 | 254 |
255 def azext: PatFrags<(ops node:$Rs), [(zext node:$Rs), (anyext node:$Rs)]>; | |
256 def asext: PatFrags<(ops node:$Rs), [(sext node:$Rs), (anyext node:$Rs)]>; | |
257 | |
238 def: Pat<(IsOrAdd (i32 AddrFI:$Rs), s32_0ImmPred:$off), | 258 def: Pat<(IsOrAdd (i32 AddrFI:$Rs), s32_0ImmPred:$off), |
239 (PS_fi (i32 AddrFI:$Rs), imm:$off)>; | 259 (PS_fi (i32 AddrFI:$Rs), imm:$off)>; |
240 | 260 |
241 | 261 |
242 // Converters from unary/binary SDNode to PatFrag. | 262 // Converters from unary/binary SDNode to PatFrag. |
244 class pf2<SDNode Op> : PatFrag<(ops node:$a, node:$b), (Op node:$a, node:$b)>; | 264 class pf2<SDNode Op> : PatFrag<(ops node:$a, node:$b), (Op node:$a, node:$b)>; |
245 | 265 |
246 class Not2<PatFrag P> | 266 class Not2<PatFrag P> |
247 : PatFrag<(ops node:$A, node:$B), (P node:$A, (not node:$B))>; | 267 : PatFrag<(ops node:$A, node:$B), (P node:$A, (not node:$B))>; |
248 | 268 |
269 // If there is a constant operand that feeds the and/or instruction, | |
270 // do not generate the compound instructions. | |
271 // It is not always profitable, as some times we end up with a transfer. | |
272 // Check the below example. | |
273 // ra = #65820; rb = lsr(rb, #8); rc ^= and (rb, ra) | |
274 // Instead this is preferable. | |
275 // ra = and (#65820, lsr(ra, #8)); rb = xor(rb, ra) | |
276 class Su_ni1<PatFrag Op> | |
277 : PatFrag<Op.Operands, !head(Op.Fragments), [{ | |
278 if (hasOneUse(N)){ | |
279 // Check if Op1 is an immediate operand. | |
280 SDValue Op1 = N->getOperand(1); | |
281 return !isa<ConstantSDNode>(Op1); | |
282 } | |
283 return false;}], | |
284 Op.OperandTransform>; | |
285 | |
249 class Su<PatFrag Op> | 286 class Su<PatFrag Op> |
250 : PatFrag<Op.Operands, Op.Fragment, [{ return hasOneUse(N); }], | 287 : PatFrag<Op.Operands, !head(Op.Fragments), [{ return hasOneUse(N); }], |
251 Op.OperandTransform>; | 288 Op.OperandTransform>; |
252 | 289 |
253 // Main selection macros. | 290 // Main selection macros. |
254 | 291 |
255 class OpR_R_pat<InstHexagon MI, PatFrag Op, ValueType ResVT, PatFrag RegPred> | 292 class OpR_R_pat<InstHexagon MI, PatFrag Op, ValueType ResVT, PatFrag RegPred> |
269 PatFrag RegPred, PatFrag ImmPred> | 306 PatFrag RegPred, PatFrag ImmPred> |
270 : Pat<(AccOp RegPred:$Rx, (Op RegPred:$Rs, ImmPred:$I)), | 307 : Pat<(AccOp RegPred:$Rx, (Op RegPred:$Rs, ImmPred:$I)), |
271 (MI RegPred:$Rx, RegPred:$Rs, imm:$I)>; | 308 (MI RegPred:$Rx, RegPred:$Rs, imm:$I)>; |
272 | 309 |
273 class AccRRR_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op, | 310 class AccRRR_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op, |
274 PatFrag RsPred, PatFrag RtPred> | 311 PatFrag RxPred, PatFrag RsPred, PatFrag RtPred> |
275 : Pat<(AccOp RsPred:$Rx, (Op RsPred:$Rs, RtPred:$Rt)), | 312 : Pat<(AccOp RxPred:$Rx, (Op RsPred:$Rs, RtPred:$Rt)), |
276 (MI RsPred:$Rx, RsPred:$Rs, RtPred:$Rt)>; | 313 (MI RxPred:$Rx, RsPred:$Rs, RtPred:$Rt)>; |
277 | 314 |
278 multiclass SelMinMax_pats<PatFrag CmpOp, PatFrag Val, | 315 multiclass SelMinMax_pats<PatFrag CmpOp, PatFrag Val, |
279 InstHexagon InstA, InstHexagon InstB> { | 316 InstHexagon InstA, InstHexagon InstB> { |
280 def: Pat<(select (i1 (CmpOp Val:$A, Val:$B)), Val:$A, Val:$B), | 317 def: Pat<(select (i1 (CmpOp Val:$A, Val:$B)), Val:$A, Val:$B), |
281 (InstA Val:$A, Val:$B)>; | 318 (InstA Val:$A, Val:$B)>; |
287 // Frags for commonly used SDNodes. | 324 // Frags for commonly used SDNodes. |
288 def Add: pf2<add>; def And: pf2<and>; def Sra: pf2<sra>; | 325 def Add: pf2<add>; def And: pf2<and>; def Sra: pf2<sra>; |
289 def Sub: pf2<sub>; def Or: pf2<or>; def Srl: pf2<srl>; | 326 def Sub: pf2<sub>; def Or: pf2<or>; def Srl: pf2<srl>; |
290 def Mul: pf2<mul>; def Xor: pf2<xor>; def Shl: pf2<shl>; | 327 def Mul: pf2<mul>; def Xor: pf2<xor>; def Shl: pf2<shl>; |
291 | 328 |
329 def Rol: pf2<rotl>; | |
292 | 330 |
293 // --(1) Immediate ------------------------------------------------------- | 331 // --(1) Immediate ------------------------------------------------------- |
294 // | 332 // |
295 | 333 |
296 def SDTHexagonCONST32 | 334 def SDTHexagonCONST32 |
334 def ToI32: OutPatFrag<(ops node:$V), (A2_tfrsi $V)>; | 372 def ToI32: OutPatFrag<(ops node:$V), (A2_tfrsi $V)>; |
335 | 373 |
336 // --(2) Type cast ------------------------------------------------------- | 374 // --(2) Type cast ------------------------------------------------------- |
337 // | 375 // |
338 | 376 |
339 let Predicates = [HasV5T] in { | 377 def: OpR_R_pat<F2_conv_sf2df, pf1<fpextend>, f64, F32>; |
340 def: OpR_R_pat<F2_conv_sf2df, pf1<fpextend>, f64, F32>; | 378 def: OpR_R_pat<F2_conv_df2sf, pf1<fpround>, f32, F64>; |
341 def: OpR_R_pat<F2_conv_df2sf, pf1<fpround>, f32, F64>; | 379 |
342 | 380 def: OpR_R_pat<F2_conv_w2sf, pf1<sint_to_fp>, f32, I32>; |
343 def: OpR_R_pat<F2_conv_w2sf, pf1<sint_to_fp>, f32, I32>; | 381 def: OpR_R_pat<F2_conv_d2sf, pf1<sint_to_fp>, f32, I64>; |
344 def: OpR_R_pat<F2_conv_d2sf, pf1<sint_to_fp>, f32, I64>; | 382 def: OpR_R_pat<F2_conv_w2df, pf1<sint_to_fp>, f64, I32>; |
345 def: OpR_R_pat<F2_conv_w2df, pf1<sint_to_fp>, f64, I32>; | 383 def: OpR_R_pat<F2_conv_d2df, pf1<sint_to_fp>, f64, I64>; |
346 def: OpR_R_pat<F2_conv_d2df, pf1<sint_to_fp>, f64, I64>; | 384 |
347 | 385 def: OpR_R_pat<F2_conv_uw2sf, pf1<uint_to_fp>, f32, I32>; |
348 def: OpR_R_pat<F2_conv_uw2sf, pf1<uint_to_fp>, f32, I32>; | 386 def: OpR_R_pat<F2_conv_ud2sf, pf1<uint_to_fp>, f32, I64>; |
349 def: OpR_R_pat<F2_conv_ud2sf, pf1<uint_to_fp>, f32, I64>; | 387 def: OpR_R_pat<F2_conv_uw2df, pf1<uint_to_fp>, f64, I32>; |
350 def: OpR_R_pat<F2_conv_uw2df, pf1<uint_to_fp>, f64, I32>; | 388 def: OpR_R_pat<F2_conv_ud2df, pf1<uint_to_fp>, f64, I64>; |
351 def: OpR_R_pat<F2_conv_ud2df, pf1<uint_to_fp>, f64, I64>; | 389 |
352 | 390 def: OpR_R_pat<F2_conv_sf2w_chop, pf1<fp_to_sint>, i32, F32>; |
353 def: OpR_R_pat<F2_conv_sf2w_chop, pf1<fp_to_sint>, i32, F32>; | 391 def: OpR_R_pat<F2_conv_df2w_chop, pf1<fp_to_sint>, i32, F64>; |
354 def: OpR_R_pat<F2_conv_df2w_chop, pf1<fp_to_sint>, i32, F64>; | 392 def: OpR_R_pat<F2_conv_sf2d_chop, pf1<fp_to_sint>, i64, F32>; |
355 def: OpR_R_pat<F2_conv_sf2d_chop, pf1<fp_to_sint>, i64, F32>; | 393 def: OpR_R_pat<F2_conv_df2d_chop, pf1<fp_to_sint>, i64, F64>; |
356 def: OpR_R_pat<F2_conv_df2d_chop, pf1<fp_to_sint>, i64, F64>; | 394 |
357 | 395 def: OpR_R_pat<F2_conv_sf2uw_chop, pf1<fp_to_uint>, i32, F32>; |
358 def: OpR_R_pat<F2_conv_sf2uw_chop, pf1<fp_to_uint>, i32, F32>; | 396 def: OpR_R_pat<F2_conv_df2uw_chop, pf1<fp_to_uint>, i32, F64>; |
359 def: OpR_R_pat<F2_conv_df2uw_chop, pf1<fp_to_uint>, i32, F64>; | 397 def: OpR_R_pat<F2_conv_sf2ud_chop, pf1<fp_to_uint>, i64, F32>; |
360 def: OpR_R_pat<F2_conv_sf2ud_chop, pf1<fp_to_uint>, i64, F32>; | 398 def: OpR_R_pat<F2_conv_df2ud_chop, pf1<fp_to_uint>, i64, F64>; |
361 def: OpR_R_pat<F2_conv_df2ud_chop, pf1<fp_to_uint>, i64, F64>; | |
362 } | |
363 | 399 |
364 // Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp]. | 400 // Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp]. |
365 let Predicates = [HasV5T] in { | 401 def: Pat<(i32 (bitconvert F32:$v)), (I32:$v)>; |
366 def: Pat<(i32 (bitconvert F32:$v)), (I32:$v)>; | 402 def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>; |
367 def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>; | 403 def: Pat<(i64 (bitconvert F64:$v)), (I64:$v)>; |
368 def: Pat<(i64 (bitconvert F64:$v)), (I64:$v)>; | 404 def: Pat<(f64 (bitconvert I64:$v)), (F64:$v)>; |
369 def: Pat<(f64 (bitconvert I64:$v)), (F64:$v)>; | |
370 } | |
371 | 405 |
372 multiclass Cast_pat<ValueType Ta, ValueType Tb, RegisterClass RC> { | 406 multiclass Cast_pat<ValueType Ta, ValueType Tb, RegisterClass RC> { |
373 def: Pat<(Tb (bitconvert (Ta RC:$Rs))), (Tb RC:$Rs)>; | 407 def: Pat<(Tb (bitconvert (Ta RC:$Rs))), (Tb RC:$Rs)>; |
374 def: Pat<(Ta (bitconvert (Tb RC:$Rs))), (Ta RC:$Rs)>; | 408 def: Pat<(Ta (bitconvert (Tb RC:$Rs))), (Ta RC:$Rs)>; |
375 } | 409 } |
389 def: Pat<(sext_inreg I32:$Rs, i16), (A2_sxth I32:$Rs)>; | 423 def: Pat<(sext_inreg I32:$Rs, i16), (A2_sxth I32:$Rs)>; |
390 def: Pat<(sext_inreg I64:$Rs, i32), (A2_sxtw (LoReg $Rs))>; | 424 def: Pat<(sext_inreg I64:$Rs, i32), (A2_sxtw (LoReg $Rs))>; |
391 def: Pat<(sext_inreg I64:$Rs, i16), (A2_sxtw (A2_sxth (LoReg $Rs)))>; | 425 def: Pat<(sext_inreg I64:$Rs, i16), (A2_sxtw (A2_sxth (LoReg $Rs)))>; |
392 def: Pat<(sext_inreg I64:$Rs, i8), (A2_sxtw (A2_sxtb (LoReg $Rs)))>; | 426 def: Pat<(sext_inreg I64:$Rs, i8), (A2_sxtw (A2_sxtb (LoReg $Rs)))>; |
393 | 427 |
394 def: Pat<(i64 (sext I1:$Pu)), | 428 def: Pat<(i64 (sext I32:$Rs)), (A2_sxtw I32:$Rs)>; |
395 (Combinew (C2_muxii PredRegs:$Pu, -1, 0), | 429 def: Pat<(Zext64 I32:$Rs), (ToZext64 $Rs)>; |
396 (C2_muxii PredRegs:$Pu, -1, 0))>; | 430 def: Pat<(Aext64 I32:$Rs), (ToZext64 $Rs)>; |
397 | 431 |
398 def: Pat<(i32 (sext I1:$Pu)), (C2_muxii I1:$Pu, -1, 0)>; | 432 def: Pat<(i32 (trunc I64:$Rs)), (LoReg $Rs)>; |
399 def: Pat<(i32 (zext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>; | 433 def: Pat<(i1 (trunc I32:$Rs)), (S2_tstbit_i I32:$Rs, 0)>; |
400 def: Pat<(i64 (zext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>; | 434 def: Pat<(i1 (trunc I64:$Rs)), (S2_tstbit_i (LoReg $Rs), 0)>; |
435 | |
436 let AddedComplexity = 20 in { | |
437 def: Pat<(and I32:$Rs, 255), (A2_zxtb I32:$Rs)>; | |
438 def: Pat<(and I32:$Rs, 65535), (A2_zxth I32:$Rs)>; | |
439 } | |
440 | |
441 // Extensions from i1 or vectors of i1. | |
442 def: Pat<(i32 (azext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>; | |
443 def: Pat<(i64 (azext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>; | |
444 def: Pat<(i32 (sext I1:$Pu)), (C2_muxii I1:$Pu, -1, 0)>; | |
445 def: Pat<(i64 (sext I1:$Pu)), (Combinew (C2_muxii PredRegs:$Pu, -1, 0), | |
446 (C2_muxii PredRegs:$Pu, -1, 0))>; | |
447 | |
401 def: Pat<(v2i16 (sext V2I1:$Pu)), (S2_vtrunehb (C2_mask V2I1:$Pu))>; | 448 def: Pat<(v2i16 (sext V2I1:$Pu)), (S2_vtrunehb (C2_mask V2I1:$Pu))>; |
402 def: Pat<(v2i32 (sext V2I1:$Pu)), (C2_mask V2I1:$Pu)>; | 449 def: Pat<(v2i32 (sext V2I1:$Pu)), (C2_mask V2I1:$Pu)>; |
403 def: Pat<(v4i8 (sext V4I1:$Pu)), (S2_vtrunehb (C2_mask V4I1:$Pu))>; | 450 def: Pat<(v4i8 (sext V4I1:$Pu)), (S2_vtrunehb (C2_mask V4I1:$Pu))>; |
404 def: Pat<(v4i16 (sext V4I1:$Pu)), (C2_mask V4I1:$Pu)>; | 451 def: Pat<(v4i16 (sext V4I1:$Pu)), (C2_mask V4I1:$Pu)>; |
405 def: Pat<(v8i8 (sext V8I1:$Pu)), (C2_mask V8I1:$Pu)>; | 452 def: Pat<(v8i8 (sext V8I1:$Pu)), (C2_mask V8I1:$Pu)>; |
406 | 453 |
407 def: Pat<(i64 (sext I32:$Rs)), (A2_sxtw I32:$Rs)>; | 454 def Vsplatpi: OutPatFrag<(ops node:$V), |
408 def: Pat<(Zext64 I32:$Rs), (ToZext64 $Rs)>; | 455 (Combinew (A2_tfrsi $V), (A2_tfrsi $V))>; |
409 def: Pat<(Aext64 I32:$Rs), (ToZext64 $Rs)>; | 456 |
410 | 457 def: Pat<(v2i16 (azext V2I1:$Pu)), |
411 def: Pat<(i32 (trunc I64:$Rs)), (LoReg $Rs)>; | 458 (A2_andir (LoReg (C2_mask V2I1:$Pu)), (i32 0x00010001))>; |
412 def: Pat<(i1 (trunc I64:$Rs)), (C2_tfrrp (LoReg $Rs))>; | 459 def: Pat<(v2i32 (azext V2I1:$Pu)), |
413 | 460 (A2_andp (C2_mask V2I1:$Pu), (A2_combineii (i32 1), (i32 1)))>; |
414 let AddedComplexity = 20 in { | 461 def: Pat<(v4i8 (azext V4I1:$Pu)), |
415 def: Pat<(and I32:$Rs, 255), (A2_zxtb I32:$Rs)>; | 462 (A2_andir (LoReg (C2_mask V4I1:$Pu)), (i32 0x01010101))>; |
416 def: Pat<(and I32:$Rs, 65535), (A2_zxth I32:$Rs)>; | 463 def: Pat<(v4i16 (azext V4I1:$Pu)), |
417 } | 464 (A2_andp (C2_mask V4I1:$Pu), (Vsplatpi (i32 0x00010001)))>; |
418 | 465 def: Pat<(v8i8 (azext V8I1:$Pu)), |
419 def: Pat<(i32 (anyext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>; | 466 (A2_andp (C2_mask V8I1:$Pu), (Vsplatpi (i32 0x01010101)))>; |
420 def: Pat<(i64 (anyext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>; | 467 |
421 | 468 def: Pat<(v4i16 (azext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; |
422 def: Pat<(v8i8 (zext V8I1:$Pu)), (C2_mask V8I1:$Pu)>; | 469 def: Pat<(v2i32 (azext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; |
423 def: Pat<(v4i16 (zext V4I1:$Pu)), (C2_mask V4I1:$Pu)>; | |
424 def: Pat<(v2i32 (zext V2I1:$Pu)), (C2_mask V2I1:$Pu)>; | |
425 def: Pat<(v4i8 (zext V4I1:$Pu)), (LoReg (C2_mask V4I1:$Pu))>; | |
426 def: Pat<(v2i16 (zext V2I1:$Pu)), (LoReg (C2_mask V2I1:$Pu))>; | |
427 | |
428 def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; | |
429 def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; | |
430 def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; | |
431 def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; | |
432 def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>; | 470 def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>; |
433 def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>; | 471 def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>; |
434 | 472 |
435 def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)), | 473 def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)), |
436 (Combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>; | 474 (Combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>; |
468 def: OpR_RR_pat<MI, Op, v4i1, V4I1>; | 506 def: OpR_RR_pat<MI, Op, v4i1, V4I1>; |
469 def: OpR_RR_pat<MI, Op, v8i1, V8I1>; | 507 def: OpR_RR_pat<MI, Op, v8i1, V8I1>; |
470 } | 508 } |
471 | 509 |
472 multiclass BoolAccRRR_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op> { | 510 multiclass BoolAccRRR_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op> { |
473 def: AccRRR_pat<MI, AccOp, Op, I1, I1>; | 511 def: AccRRR_pat<MI, AccOp, Op, I1, I1, I1>; |
474 def: AccRRR_pat<MI, AccOp, Op, V2I1, V2I1>; | 512 def: AccRRR_pat<MI, AccOp, Op, V2I1, V2I1, V2I1>; |
475 def: AccRRR_pat<MI, AccOp, Op, V4I1, V4I1>; | 513 def: AccRRR_pat<MI, AccOp, Op, V4I1, V4I1, V4I1>; |
476 def: AccRRR_pat<MI, AccOp, Op, V8I1, V8I1>; | 514 def: AccRRR_pat<MI, AccOp, Op, V8I1, V8I1, V8I1>; |
477 } | 515 } |
478 | 516 |
479 defm: BoolOpR_RR_pat<C2_and, And>; | 517 defm: BoolOpR_RR_pat<C2_and, And>; |
480 defm: BoolOpR_RR_pat<C2_or, Or>; | 518 defm: BoolOpR_RR_pat<C2_or, Or>; |
481 defm: BoolOpR_RR_pat<C2_xor, Xor>; | 519 defm: BoolOpR_RR_pat<C2_xor, Xor>; |
516 (C2_not (C2_cmpgtui I32:$Rs, (UDEC1 imm:$u9)))>; | 554 (C2_not (C2_cmpgtui I32:$Rs, (UDEC1 imm:$u9)))>; |
517 | 555 |
518 // Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones | 556 // Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones |
519 // that reverse the order of the operands. | 557 // that reverse the order of the operands. |
520 class RevCmp<PatFrag F> | 558 class RevCmp<PatFrag F> |
521 : PatFrag<(ops node:$rhs, node:$lhs), F.Fragment, F.PredicateCode, | 559 : PatFrag<(ops node:$rhs, node:$lhs), !head(F.Fragments), F.PredicateCode, |
522 F.OperandTransform>; | 560 F.OperandTransform>; |
523 | 561 |
524 def: OpR_RR_pat<C2_cmpeq, seteq, i1, I32>; | 562 def: OpR_RR_pat<C2_cmpeq, seteq, i1, I32>; |
525 def: OpR_RR_pat<C2_cmpgt, setgt, i1, I32>; | 563 def: OpR_RR_pat<C2_cmpgt, setgt, i1, I32>; |
526 def: OpR_RR_pat<C2_cmpgtu, setugt, i1, I32>; | 564 def: OpR_RR_pat<C2_cmpgtu, setugt, i1, I32>; |
560 def: OpR_RR_pat<A2_vcmpwgtu, RevCmp<setult>, i1, V2I32>; | 598 def: OpR_RR_pat<A2_vcmpwgtu, RevCmp<setult>, i1, V2I32>; |
561 def: OpR_RR_pat<A2_vcmpwgtu, RevCmp<setult>, v2i1, V2I32>; | 599 def: OpR_RR_pat<A2_vcmpwgtu, RevCmp<setult>, v2i1, V2I32>; |
562 def: OpR_RR_pat<A2_vcmpwgtu, setugt, i1, V2I32>; | 600 def: OpR_RR_pat<A2_vcmpwgtu, setugt, i1, V2I32>; |
563 def: OpR_RR_pat<A2_vcmpwgtu, setugt, v2i1, V2I32>; | 601 def: OpR_RR_pat<A2_vcmpwgtu, setugt, v2i1, V2I32>; |
564 | 602 |
565 let Predicates = [HasV5T] in { | 603 def: OpR_RR_pat<F2_sfcmpeq, seteq, i1, F32>; |
566 def: OpR_RR_pat<F2_sfcmpeq, seteq, i1, F32>; | 604 def: OpR_RR_pat<F2_sfcmpgt, setgt, i1, F32>; |
567 def: OpR_RR_pat<F2_sfcmpgt, setgt, i1, F32>; | 605 def: OpR_RR_pat<F2_sfcmpge, setge, i1, F32>; |
568 def: OpR_RR_pat<F2_sfcmpge, setge, i1, F32>; | 606 def: OpR_RR_pat<F2_sfcmpeq, setoeq, i1, F32>; |
569 def: OpR_RR_pat<F2_sfcmpeq, setoeq, i1, F32>; | 607 def: OpR_RR_pat<F2_sfcmpgt, setogt, i1, F32>; |
570 def: OpR_RR_pat<F2_sfcmpgt, setogt, i1, F32>; | 608 def: OpR_RR_pat<F2_sfcmpge, setoge, i1, F32>; |
571 def: OpR_RR_pat<F2_sfcmpge, setoge, i1, F32>; | 609 def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setolt>, i1, F32>; |
572 def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setolt>, i1, F32>; | 610 def: OpR_RR_pat<F2_sfcmpge, RevCmp<setole>, i1, F32>; |
573 def: OpR_RR_pat<F2_sfcmpge, RevCmp<setole>, i1, F32>; | 611 def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setlt>, i1, F32>; |
574 def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setlt>, i1, F32>; | 612 def: OpR_RR_pat<F2_sfcmpge, RevCmp<setle>, i1, F32>; |
575 def: OpR_RR_pat<F2_sfcmpge, RevCmp<setle>, i1, F32>; | 613 def: OpR_RR_pat<F2_sfcmpuo, setuo, i1, F32>; |
576 def: OpR_RR_pat<F2_sfcmpuo, setuo, i1, F32>; | 614 |
577 | 615 def: OpR_RR_pat<F2_dfcmpeq, seteq, i1, F64>; |
578 def: OpR_RR_pat<F2_dfcmpeq, seteq, i1, F64>; | 616 def: OpR_RR_pat<F2_dfcmpgt, setgt, i1, F64>; |
579 def: OpR_RR_pat<F2_dfcmpgt, setgt, i1, F64>; | 617 def: OpR_RR_pat<F2_dfcmpge, setge, i1, F64>; |
580 def: OpR_RR_pat<F2_dfcmpge, setge, i1, F64>; | 618 def: OpR_RR_pat<F2_dfcmpeq, setoeq, i1, F64>; |
581 def: OpR_RR_pat<F2_dfcmpeq, setoeq, i1, F64>; | 619 def: OpR_RR_pat<F2_dfcmpgt, setogt, i1, F64>; |
582 def: OpR_RR_pat<F2_dfcmpgt, setogt, i1, F64>; | 620 def: OpR_RR_pat<F2_dfcmpge, setoge, i1, F64>; |
583 def: OpR_RR_pat<F2_dfcmpge, setoge, i1, F64>; | 621 def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setolt>, i1, F64>; |
584 def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setolt>, i1, F64>; | 622 def: OpR_RR_pat<F2_dfcmpge, RevCmp<setole>, i1, F64>; |
585 def: OpR_RR_pat<F2_dfcmpge, RevCmp<setole>, i1, F64>; | 623 def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setlt>, i1, F64>; |
586 def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setlt>, i1, F64>; | 624 def: OpR_RR_pat<F2_dfcmpge, RevCmp<setle>, i1, F64>; |
587 def: OpR_RR_pat<F2_dfcmpge, RevCmp<setle>, i1, F64>; | 625 def: OpR_RR_pat<F2_dfcmpuo, setuo, i1, F64>; |
588 def: OpR_RR_pat<F2_dfcmpuo, setuo, i1, F64>; | |
589 } | |
590 | 626 |
591 // Avoid C4_cmpneqi, C4_cmpltei, C4_cmplteui, since they cannot form compounds. | 627 // Avoid C4_cmpneqi, C4_cmpltei, C4_cmplteui, since they cannot form compounds. |
592 | 628 |
593 def: Pat<(i1 (setne I32:$Rs, anyimm:$u5)), | 629 def: Pat<(i1 (setne I32:$Rs, anyimm:$u5)), |
594 (C2_not (C2_cmpeqi I32:$Rs, imm:$u5))>; | 630 (C2_not (C2_cmpeqi I32:$Rs, imm:$u5))>; |
595 def: Pat<(i1 (setle I32:$Rs, anyimm:$u5)), | 631 def: Pat<(i1 (setle I32:$Rs, anyimm:$u5)), |
596 (C2_not (C2_cmpgti I32:$Rs, imm:$u5))>; | 632 (C2_not (C2_cmpgti I32:$Rs, imm:$u5))>; |
597 def: Pat<(i1 (setule I32:$Rs, anyimm:$u5)), | 633 def: Pat<(i1 (setule I32:$Rs, anyimm:$u5)), |
598 (C2_not (C2_cmpgtui I32:$Rs, imm:$u5))>; | 634 (C2_not (C2_cmpgtui I32:$Rs, imm:$u5))>; |
599 | 635 |
600 def: Pat<(i1 (setne I32:$Rs, I32:$Rt)), | 636 class OpmR_RR_pat<PatFrag Output, PatFrag Op, ValueType ResType, |
601 (C2_not (C2_cmpeq I32:$Rs, I32:$Rt))>; | 637 PatFrag RsPred, PatFrag RtPred = RsPred> |
602 def: Pat<(i1 (setle I32:$Rs, I32:$Rt)), | 638 : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)), |
603 (C2_not (C2_cmpgt I32:$Rs, I32:$Rt))>; | 639 (Output RsPred:$Rs, RtPred:$Rt)>; |
604 def: Pat<(i1 (setule I32:$Rs, I32:$Rt)), | 640 |
605 (C2_not (C2_cmpgtu I32:$Rs, I32:$Rt))>; | 641 class Outn<InstHexagon MI> |
606 def: Pat<(i1 (setge I32:$Rs, I32:$Rt)), | 642 : OutPatFrag<(ops node:$Rs, node:$Rt), |
607 (C2_not (C2_cmpgt I32:$Rt, I32:$Rs))>; | 643 (C2_not (MI $Rs, $Rt))>; |
608 def: Pat<(i1 (setuge I32:$Rs, I32:$Rt)), | 644 |
609 (C2_not (C2_cmpgtu I32:$Rt, I32:$Rs))>; | 645 def: OpmR_RR_pat<Outn<C2_cmpeq>, setne, i1, I32>; |
610 | 646 def: OpmR_RR_pat<Outn<C2_cmpgt>, setle, i1, I32>; |
611 def: Pat<(i1 (setle I64:$Rs, I64:$Rt)), | 647 def: OpmR_RR_pat<Outn<C2_cmpgtu>, setule, i1, I32>; |
612 (C2_not (C2_cmpgtp I64:$Rs, I64:$Rt))>; | 648 def: OpmR_RR_pat<Outn<C2_cmpgt>, RevCmp<setge>, i1, I32>; |
613 def: Pat<(i1 (setne I64:$Rs, I64:$Rt)), | 649 def: OpmR_RR_pat<Outn<C2_cmpgtu>, RevCmp<setuge>, i1, I32>; |
614 (C2_not (C2_cmpeqp I64:$Rs, I64:$Rt))>; | 650 def: OpmR_RR_pat<Outn<C2_cmpeqp>, setne, i1, I64>; |
615 def: Pat<(i1 (setge I64:$Rs, I64:$Rt)), | 651 def: OpmR_RR_pat<Outn<C2_cmpgtp>, setle, i1, I64>; |
616 (C2_not (C2_cmpgtp I64:$Rt, I64:$Rs))>; | 652 def: OpmR_RR_pat<Outn<C2_cmpgtup>, setule, i1, I64>; |
617 def: Pat<(i1 (setuge I64:$Rs, I64:$Rt)), | 653 def: OpmR_RR_pat<Outn<C2_cmpgtp>, RevCmp<setge>, i1, I64>; |
618 (C2_not (C2_cmpgtup I64:$Rt, I64:$Rs))>; | 654 def: OpmR_RR_pat<Outn<C2_cmpgtup>, RevCmp<setuge>, i1, I64>; |
619 def: Pat<(i1 (setule I64:$Rs, I64:$Rt)), | 655 def: OpmR_RR_pat<Outn<A2_vcmpbeq>, setne, v8i1, V8I8>; |
620 (C2_not (C2_cmpgtup I64:$Rs, I64:$Rt))>; | 656 def: OpmR_RR_pat<Outn<A4_vcmpbgt>, setle, v8i1, V8I8>; |
657 def: OpmR_RR_pat<Outn<A2_vcmpbgtu>, setule, v8i1, V8I8>; | |
658 def: OpmR_RR_pat<Outn<A4_vcmpbgt>, RevCmp<setge>, v8i1, V8I8>; | |
659 def: OpmR_RR_pat<Outn<A2_vcmpbgtu>, RevCmp<setuge>, v8i1, V8I8>; | |
660 def: OpmR_RR_pat<Outn<A2_vcmpheq>, setne, v4i1, V4I16>; | |
661 def: OpmR_RR_pat<Outn<A2_vcmphgt>, setle, v4i1, V4I16>; | |
662 def: OpmR_RR_pat<Outn<A2_vcmphgtu>, setule, v4i1, V4I16>; | |
663 def: OpmR_RR_pat<Outn<A2_vcmphgt>, RevCmp<setge>, v4i1, V4I16>; | |
664 def: OpmR_RR_pat<Outn<A2_vcmphgtu>, RevCmp<setuge>, v4i1, V4I16>; | |
665 def: OpmR_RR_pat<Outn<A2_vcmpweq>, setne, v2i1, V2I32>; | |
666 def: OpmR_RR_pat<Outn<A2_vcmpwgt>, setle, v2i1, V2I32>; | |
667 def: OpmR_RR_pat<Outn<A2_vcmpwgtu>, setule, v2i1, V2I32>; | |
668 def: OpmR_RR_pat<Outn<A2_vcmpwgt>, RevCmp<setge>, v2i1, V2I32>; | |
669 def: OpmR_RR_pat<Outn<A2_vcmpwgtu>, RevCmp<setuge>, v2i1, V2I32>; | |
621 | 670 |
622 let AddedComplexity = 100 in { | 671 let AddedComplexity = 100 in { |
623 def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt), 255), 0)), | 672 def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt), 255), 0)), |
624 (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt)>; | 673 (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt)>; |
625 def: Pat<(i1 (setne (and (xor I32:$Rs, I32:$Rt), 255), 0)), | 674 def: Pat<(i1 (setne (and (xor I32:$Rs, I32:$Rt), 255), 0)), |
677 def: Pat<(i32 (zext (i1 (seteq I32:$Rs, anyimm:$s8)))), | 726 def: Pat<(i32 (zext (i1 (seteq I32:$Rs, anyimm:$s8)))), |
678 (A4_rcmpeqi I32:$Rs, imm:$s8)>; | 727 (A4_rcmpeqi I32:$Rs, imm:$s8)>; |
679 def: Pat<(i32 (zext (i1 (setne I32:$Rs, anyimm:$s8)))), | 728 def: Pat<(i32 (zext (i1 (setne I32:$Rs, anyimm:$s8)))), |
680 (A4_rcmpneqi I32:$Rs, imm:$s8)>; | 729 (A4_rcmpneqi I32:$Rs, imm:$s8)>; |
681 | 730 |
682 def: Pat<(i1 (setne I1:$Ps, I1:$Pt)), | 731 def: Pat<(i1 (seteq I1:$Ps, (i1 -1))), (I1:$Ps)>; |
683 (C2_xor I1:$Ps, I1:$Pt)>; | 732 def: Pat<(i1 (setne I1:$Ps, (i1 -1))), (C2_not I1:$Ps)>; |
684 | 733 def: Pat<(i1 (seteq I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, (C2_not I1:$Pt))>; |
685 def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)), | 734 def: Pat<(i1 (setne I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, I1:$Pt)>; |
686 (A2_vcmpbeq (ToZext64 $Rs), (ToZext64 $Rt))>; | |
687 def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)), | |
688 (A4_vcmpbgt (ToZext64 $Rs), (ToZext64 $Rt))>; | |
689 def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)), | |
690 (A2_vcmpbgtu (ToZext64 $Rs), (ToZext64 $Rt))>; | |
691 | |
692 def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)), | |
693 (A2_vcmpheq (ToZext64 $Rs), (ToZext64 $Rt))>; | |
694 def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)), | |
695 (A2_vcmphgt (ToZext64 $Rs), (ToZext64 $Rt))>; | |
696 def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)), | |
697 (A2_vcmphgtu (ToZext64 $Rs), (ToZext64 $Rt))>; | |
698 | |
699 def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)), | |
700 (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>; | |
701 | 735 |
702 // Floating-point comparisons with checks for ordered/unordered status. | 736 // Floating-point comparisons with checks for ordered/unordered status. |
703 | 737 |
704 class T3<InstHexagon MI1, InstHexagon MI2, InstHexagon MI3> | 738 class T3<InstHexagon MI1, InstHexagon MI2, InstHexagon MI3> |
705 : OutPatFrag<(ops node:$Rs, node:$Rt), | 739 : OutPatFrag<(ops node:$Rs, node:$Rt), |
706 (MI1 (MI2 $Rs, $Rt), (MI3 $Rs, $Rt))>; | 740 (MI1 (MI2 $Rs, $Rt), (MI3 $Rs, $Rt))>; |
707 | 741 |
708 class OpmR_RR_pat<PatFrag Output, PatFrag Op, ValueType ResType, | |
709 PatFrag RsPred, PatFrag RtPred = RsPred> | |
710 : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)), | |
711 (Output RsPred:$Rs, RtPred:$Rt)>; | |
712 | |
713 class Cmpuf<InstHexagon MI>: T3<C2_or, F2_sfcmpuo, MI>; | 742 class Cmpuf<InstHexagon MI>: T3<C2_or, F2_sfcmpuo, MI>; |
714 class Cmpud<InstHexagon MI>: T3<C2_or, F2_dfcmpuo, MI>; | 743 class Cmpud<InstHexagon MI>: T3<C2_or, F2_dfcmpuo, MI>; |
715 | 744 |
716 class Cmpufn<InstHexagon MI>: T3<C2_orn, F2_sfcmpuo, MI>; | 745 class Cmpufn<InstHexagon MI>: T3<C2_orn, F2_sfcmpuo, MI>; |
717 class Cmpudn<InstHexagon MI>: T3<C2_orn, F2_dfcmpuo, MI>; | 746 class Cmpudn<InstHexagon MI>: T3<C2_orn, F2_dfcmpuo, MI>; |
718 | 747 |
719 let Predicates = [HasV5T] in { | 748 def: OpmR_RR_pat<Cmpuf<F2_sfcmpeq>, setueq, i1, F32>; |
720 def: OpmR_RR_pat<Cmpuf<F2_sfcmpeq>, setueq, i1, F32>; | 749 def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, setuge, i1, F32>; |
721 def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, setuge, i1, F32>; | 750 def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, setugt, i1, F32>; |
722 def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, setugt, i1, F32>; | 751 def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, RevCmp<setule>, i1, F32>; |
723 def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, RevCmp<setule>, i1, F32>; | 752 def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, RevCmp<setult>, i1, F32>; |
724 def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, RevCmp<setult>, i1, F32>; | 753 def: OpmR_RR_pat<Cmpufn<F2_sfcmpeq>, setune, i1, F32>; |
725 def: OpmR_RR_pat<Cmpufn<F2_sfcmpeq>, setune, i1, F32>; | 754 |
726 | 755 def: OpmR_RR_pat<Cmpud<F2_dfcmpeq>, setueq, i1, F64>; |
727 def: OpmR_RR_pat<Cmpud<F2_dfcmpeq>, setueq, i1, F64>; | 756 def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, setuge, i1, F64>; |
728 def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, setuge, i1, F64>; | 757 def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, setugt, i1, F64>; |
729 def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, setugt, i1, F64>; | 758 def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, RevCmp<setule>, i1, F64>; |
730 def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, RevCmp<setule>, i1, F64>; | 759 def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, RevCmp<setult>, i1, F64>; |
731 def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, RevCmp<setult>, i1, F64>; | 760 def: OpmR_RR_pat<Cmpudn<F2_dfcmpeq>, setune, i1, F64>; |
732 def: OpmR_RR_pat<Cmpudn<F2_dfcmpeq>, setune, i1, F64>; | 761 |
733 } | 762 def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setone, i1, F32>; |
734 | 763 def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne, i1, F32>; |
735 class Outn<InstHexagon MI> | 764 |
736 : OutPatFrag<(ops node:$Rs, node:$Rt), | 765 def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setone, i1, F64>; |
737 (C2_not (MI $Rs, $Rt))>; | 766 def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setne, i1, F64>; |
738 | 767 |
739 let Predicates = [HasV5T] in { | 768 def: OpmR_RR_pat<Outn<F2_sfcmpuo>, seto, i1, F32>; |
740 def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setone, i1, F32>; | 769 def: OpmR_RR_pat<Outn<F2_dfcmpuo>, seto, i1, F64>; |
741 def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne, i1, F32>; | |
742 | |
743 def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setone, i1, F64>; | |
744 def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setne, i1, F64>; | |
745 | |
746 def: OpmR_RR_pat<Outn<F2_sfcmpuo>, seto, i1, F32>; | |
747 def: OpmR_RR_pat<Outn<F2_dfcmpuo>, seto, i1, F64>; | |
748 } | |
749 | 770 |
750 | 771 |
751 // --(6) Select ---------------------------------------------------------- | 772 // --(6) Select ---------------------------------------------------------- |
752 // | 773 // |
753 | 774 |
773 // Hexagon does not support 64-bit MUXes; so emulate with combines. | 794 // Hexagon does not support 64-bit MUXes; so emulate with combines. |
774 def: Pat<(select I1:$Pu, I64:$Rs, I64:$Rt), | 795 def: Pat<(select I1:$Pu, I64:$Rs, I64:$Rt), |
775 (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)), | 796 (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)), |
776 (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>; | 797 (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>; |
777 | 798 |
778 let Predicates = [HasV5T] in { | 799 def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$I), |
779 def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$I), | 800 (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>; |
780 (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>; | 801 def: Pat<(select I1:$Pu, f32ImmPred:$I, F32:$Rt), |
781 def: Pat<(select I1:$Pu, f32ImmPred:$I, F32:$Rt), | 802 (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>; |
782 (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>; | 803 def: Pat<(select I1:$Pu, F32:$Rs, F32:$Rt), |
783 def: Pat<(select I1:$Pu, F32:$Rs, F32:$Rt), | 804 (C2_mux I1:$Pu, F32:$Rs, F32:$Rt)>; |
784 (C2_mux I1:$Pu, F32:$Rs, F32:$Rt)>; | 805 def: Pat<(select I1:$Pu, F64:$Rs, F64:$Rt), |
785 def: Pat<(select I1:$Pu, F64:$Rs, F64:$Rt), | 806 (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)), |
786 (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)), | 807 (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>; |
787 (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>; | 808 |
788 | 809 def: Pat<(select (i1 (setult F32:$Ra, F32:$Rb)), F32:$Rs, F32:$Rt), |
789 def: Pat<(select (i1 (setult F32:$Ra, F32:$Rb)), F32:$Rs, F32:$Rt), | 810 (C2_mux (F2_sfcmpgt F32:$Rb, F32:$Ra), F32:$Rs, F32:$Rt)>; |
790 (C2_mux (F2_sfcmpgt F32:$Rb, F32:$Ra), F32:$Rs, F32:$Rt)>; | 811 def: Pat<(select (i1 (setult F64:$Ra, F64:$Rb)), F64:$Rs, F64:$Rt), |
791 def: Pat<(select (i1 (setult F64:$Ra, F64:$Rb)), F64:$Rs, F64:$Rt), | 812 (C2_vmux (F2_dfcmpgt F64:$Rb, F64:$Ra), F64:$Rs, F64:$Rt)>; |
792 (C2_vmux (F2_dfcmpgt F64:$Rb, F64:$Ra), F64:$Rs, F64:$Rt)>; | 813 |
793 | 814 def: Pat<(select (not I1:$Pu), f32ImmPred:$I, F32:$Rs), |
794 def: Pat<(select (not I1:$Pu), f32ImmPred:$I, F32:$Rs), | 815 (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>; |
795 (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>; | 816 def: Pat<(select (not I1:$Pu), F32:$Rt, f32ImmPred:$I), |
796 def: Pat<(select (not I1:$Pu), F32:$Rt, f32ImmPred:$I), | 817 (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>; |
797 (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>; | |
798 } | |
799 | 818 |
800 def: Pat<(select I1:$Pu, V4I8:$Rs, V4I8:$Rt), | 819 def: Pat<(select I1:$Pu, V4I8:$Rs, V4I8:$Rt), |
801 (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>; | 820 (LoReg (C2_vmux I1:$Pu, (ToAext64 $Rs), (ToAext64 $Rt)))>; |
802 def: Pat<(select I1:$Pu, V2I16:$Rs, V2I16:$Rt), | 821 def: Pat<(select I1:$Pu, V2I16:$Rs, V2I16:$Rt), |
803 (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>; | 822 (LoReg (C2_vmux I1:$Pu, (ToAext64 $Rs), (ToAext64 $Rt)))>; |
804 def: Pat<(select I1:$Pu, V2I32:$Rs, V2I32:$Rt), | 823 def: Pat<(select I1:$Pu, V2I32:$Rs, V2I32:$Rt), |
805 (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)), | 824 (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)), |
806 (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>; | 825 (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>; |
807 | 826 |
808 def: Pat<(vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt), | 827 def: Pat<(vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt), |
861 defm: SelMinMax_pats<setugt, I64, A2_maxup, A2_minup>; | 880 defm: SelMinMax_pats<setugt, I64, A2_maxup, A2_minup>; |
862 defm: SelMinMax_pats<setule, I64, A2_minup, A2_maxup>; | 881 defm: SelMinMax_pats<setule, I64, A2_minup, A2_maxup>; |
863 defm: SelMinMax_pats<setult, I64, A2_minup, A2_maxup>; | 882 defm: SelMinMax_pats<setult, I64, A2_minup, A2_maxup>; |
864 } | 883 } |
865 | 884 |
866 let AddedComplexity = 100, Predicates = [HasV5T] in { | 885 let AddedComplexity = 100 in { |
867 defm: SelMinMax_pats<setolt, F32, F2_sfmin, F2_sfmax>; | 886 defm: SelMinMax_pats<setolt, F32, F2_sfmin, F2_sfmax>; |
868 defm: SelMinMax_pats<setole, F32, F2_sfmin, F2_sfmax>; | 887 defm: SelMinMax_pats<setole, F32, F2_sfmin, F2_sfmax>; |
869 defm: SelMinMax_pats<setogt, F32, F2_sfmax, F2_sfmin>; | 888 defm: SelMinMax_pats<setogt, F32, F2_sfmax, F2_sfmin>; |
870 defm: SelMinMax_pats<setoge, F32, F2_sfmax, F2_sfmin>; | 889 defm: SelMinMax_pats<setoge, F32, F2_sfmax, F2_sfmin>; |
871 } | 890 } |
917 (A2_combineii imm:$s8, imm:$s8)>; | 936 (A2_combineii imm:$s8, imm:$s8)>; |
918 def: Pat<(v2i32 (HexagonVSPLAT I32:$Rs)), (Combinew I32:$Rs, I32:$Rs)>; | 937 def: Pat<(v2i32 (HexagonVSPLAT I32:$Rs)), (Combinew I32:$Rs, I32:$Rs)>; |
919 | 938 |
920 let AddedComplexity = 10 in | 939 let AddedComplexity = 10 in |
921 def: Pat<(v8i8 (HexagonVSPLAT I32:$Rs)), (S6_vsplatrbp I32:$Rs)>, | 940 def: Pat<(v8i8 (HexagonVSPLAT I32:$Rs)), (S6_vsplatrbp I32:$Rs)>, |
922 Requires<[HasV62T]>; | 941 Requires<[HasV62]>; |
923 def: Pat<(v8i8 (HexagonVSPLAT I32:$Rs)), | 942 def: Pat<(v8i8 (HexagonVSPLAT I32:$Rs)), |
924 (Combinew (S2_vsplatrb I32:$Rs), (S2_vsplatrb I32:$Rs))>; | 943 (Combinew (S2_vsplatrb I32:$Rs), (S2_vsplatrb I32:$Rs))>; |
925 | 944 |
926 | 945 |
927 // --(8) Shift/permute --------------------------------------------------- | 946 // --(8) Shift/permute --------------------------------------------------- |
978 def: OpR_RR_pat<S2_asl_r_r, Shl, i32, I32, I32>; | 997 def: OpR_RR_pat<S2_asl_r_r, Shl, i32, I32, I32>; |
979 def: OpR_RR_pat<S2_asr_r_p, Sra, i64, I64, I32>; | 998 def: OpR_RR_pat<S2_asr_r_p, Sra, i64, I64, I32>; |
980 def: OpR_RR_pat<S2_lsr_r_p, Srl, i64, I64, I32>; | 999 def: OpR_RR_pat<S2_lsr_r_p, Srl, i64, I64, I32>; |
981 def: OpR_RR_pat<S2_asl_r_p, Shl, i64, I64, I32>; | 1000 def: OpR_RR_pat<S2_asl_r_p, Shl, i64, I64, I32>; |
982 | 1001 |
1002 // Funnel shifts. | |
1003 def IsMul8_U3: PatLeaf<(i32 imm), [{ | |
1004 uint64_t V = N->getZExtValue(); | |
1005 return V % 8 == 0 && isUInt<3>(V / 8); | |
1006 }]>; | |
1007 | |
1008 def Divu8: SDNodeXForm<imm, [{ | |
1009 return CurDAG->getTargetConstant(N->getZExtValue() / 8, SDLoc(N), MVT::i32); | |
1010 }]>; | |
1011 | |
1012 // Funnel shift-left. | |
1013 def FShl32i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S), | |
1014 (HiReg (S2_asl_i_p (Combinew $Rs, $Rt), $S))>; | |
1015 def FShl32r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru), | |
1016 (HiReg (S2_asl_r_p (Combinew $Rs, $Rt), $Ru))>; | |
1017 | |
1018 def FShl64i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S), | |
1019 (S2_lsr_i_p_or (S2_asl_i_p $Rt, $S), $Rs, (Subi<64> $S))>; | |
1020 def FShl64r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru), | |
1021 (S2_lsr_r_p_or (S2_asl_r_p $Rt, $Ru), $Rs, (A2_subri 64, $Ru))>; | |
1022 | |
1023 // Combined SDNodeXForm: (Divu8 (Subi<64> $S)) | |
1024 def Divu64_8: SDNodeXForm<imm, [{ | |
1025 return CurDAG->getTargetConstant((64 - N->getSExtValue()) / 8, | |
1026 SDLoc(N), MVT::i32); | |
1027 }]>; | |
1028 | |
1029 // Special cases: | |
1030 let AddedComplexity = 100 in { | |
1031 def: Pat<(fshl I32:$Rs, I32:$Rt, (i32 16)), | |
1032 (A2_combine_hl I32:$Rs, I32:$Rt)>; | |
1033 def: Pat<(fshl I64:$Rs, I64:$Rt, IsMul8_U3:$S), | |
1034 (S2_valignib I64:$Rs, I64:$Rt, (Divu64_8 $S))>; | |
1035 } | |
1036 | |
1037 let Predicates = [HasV60], AddedComplexity = 50 in { | |
1038 def: OpR_RI_pat<S6_rol_i_r, Rol, i32, I32, u5_0ImmPred>; | |
1039 def: OpR_RI_pat<S6_rol_i_p, Rol, i64, I64, u6_0ImmPred>; | |
1040 } | |
1041 let AddedComplexity = 30 in { | |
1042 def: Pat<(rotl I32:$Rs, u5_0ImmPred:$S), (FShl32i $Rs, $Rs, imm:$S)>; | |
1043 def: Pat<(rotl I64:$Rs, u6_0ImmPred:$S), (FShl64i $Rs, $Rs, imm:$S)>; | |
1044 def: Pat<(fshl I32:$Rs, I32:$Rt, u5_0ImmPred:$S), (FShl32i $Rs, $Rt, imm:$S)>; | |
1045 def: Pat<(fshl I64:$Rs, I64:$Rt, u6_0ImmPred:$S), (FShl64i $Rs, $Rt, imm:$S)>; | |
1046 } | |
1047 def: Pat<(rotl I32:$Rs, I32:$Rt), (FShl32r $Rs, $Rs, $Rt)>; | |
1048 def: Pat<(rotl I64:$Rs, I32:$Rt), (FShl64r $Rs, $Rs, $Rt)>; | |
1049 def: Pat<(fshl I32:$Rs, I32:$Rt, I32:$Ru), (FShl32r $Rs, $Rt, $Ru)>; | |
1050 def: Pat<(fshl I64:$Rs, I64:$Rt, I32:$Ru), (FShl64r $Rs, $Rt, $Ru)>; | |
1051 | |
1052 // Funnel shift-right. | |
1053 def FShr32i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S), | |
1054 (LoReg (S2_lsr_i_p (Combinew $Rs, $Rt), $S))>; | |
1055 def FShr32r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru), | |
1056 (LoReg (S2_lsr_r_p (Combinew $Rs, $Rt), $Ru))>; | |
1057 | |
1058 def FShr64i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S), | |
1059 (S2_asl_i_p_or (S2_lsr_i_p $Rt, $S), $Rs, (Subi<64> $S))>; | |
1060 def FShr64r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru), | |
1061 (S2_asl_r_p_or (S2_lsr_r_p $Rt, $Ru), $Rs, (A2_subri 64, $Ru))>; | |
1062 | |
1063 // Special cases: | |
1064 let AddedComplexity = 100 in { | |
1065 def: Pat<(fshr I32:$Rs, I32:$Rt, (i32 16)), | |
1066 (A2_combine_hl I32:$Rs, I32:$Rt)>; | |
1067 def: Pat<(fshr I64:$Rs, I64:$Rt, IsMul8_U3:$S), | |
1068 (S2_valignib I64:$Rs, I64:$Rt, (Divu8 $S))>; | |
1069 } | |
1070 | |
1071 let Predicates = [HasV60], AddedComplexity = 50 in { | |
1072 def: Pat<(rotr I32:$Rs, u5_0ImmPred:$S), (S6_rol_i_r I32:$Rs, (Subi<32> $S))>; | |
1073 def: Pat<(rotr I64:$Rs, u6_0ImmPred:$S), (S6_rol_i_p I64:$Rs, (Subi<64> $S))>; | |
1074 } | |
1075 let AddedComplexity = 30 in { | |
1076 def: Pat<(rotr I32:$Rs, u5_0ImmPred:$S), (FShr32i $Rs, $Rs, imm:$S)>; | |
1077 def: Pat<(rotr I64:$Rs, u6_0ImmPred:$S), (FShr64i $Rs, $Rs, imm:$S)>; | |
1078 def: Pat<(fshr I32:$Rs, I32:$Rt, u5_0ImmPred:$S), (FShr32i $Rs, $Rt, imm:$S)>; | |
1079 def: Pat<(fshr I64:$Rs, I64:$Rt, u6_0ImmPred:$S), (FShr64i $Rs, $Rt, imm:$S)>; | |
1080 } | |
1081 def: Pat<(rotr I32:$Rs, I32:$Rt), (FShr32r $Rs, $Rs, $Rt)>; | |
1082 def: Pat<(rotr I64:$Rs, I32:$Rt), (FShr64r $Rs, $Rs, $Rt)>; | |
1083 def: Pat<(fshr I32:$Rs, I32:$Rt, I32:$Ru), (FShr32r $Rs, $Rt, $Ru)>; | |
1084 def: Pat<(fshr I64:$Rs, I64:$Rt, I32:$Ru), (FShr64r $Rs, $Rt, $Ru)>; | |
1085 | |
983 | 1086 |
984 def: Pat<(sra (add (sra I32:$Rs, u5_0ImmPred:$u5), 1), (i32 1)), | 1087 def: Pat<(sra (add (sra I32:$Rs, u5_0ImmPred:$u5), 1), (i32 1)), |
985 (S2_asr_i_r_rnd I32:$Rs, imm:$u5)>; | 1088 (S2_asr_i_r_rnd I32:$Rs, imm:$u5)>; |
986 def: Pat<(sra (add (sra I64:$Rs, u6_0ImmPred:$u6), 1), (i32 1)), | 1089 def: Pat<(sra (add (sra I64:$Rs, u6_0ImmPred:$u6), 1), (i32 1)), |
987 (S2_asr_i_p_rnd I64:$Rs, imm:$u6)>, Requires<[HasV5T]>; | 1090 (S2_asr_i_p_rnd I64:$Rs, imm:$u6)>; |
988 | 1091 |
989 // Prefer S2_addasl_rrri over S2_asl_i_r_acc. | 1092 // Prefer S2_addasl_rrri over S2_asl_i_r_acc. |
990 let AddedComplexity = 120 in | 1093 let AddedComplexity = 120 in |
991 def: Pat<(add I32:$Rt, (shl I32:$Rs, u3_0ImmPred:$u3)), | 1094 def: Pat<(add I32:$Rt, (shl I32:$Rs, u3_0ImmPred:$u3)), |
992 (S2_addasl_rrri IntRegs:$Rt, IntRegs:$Rs, imm:$u3)>; | 1095 (S2_addasl_rrri IntRegs:$Rt, IntRegs:$Rs, imm:$u3)>; |
1023 def: AccRRI_pat<S2_asl_i_p_acc, Add, Su<Shl>, I64, u6_0ImmPred>; | 1126 def: AccRRI_pat<S2_asl_i_p_acc, Add, Su<Shl>, I64, u6_0ImmPred>; |
1024 def: AccRRI_pat<S2_asl_i_p_nac, Sub, Su<Shl>, I64, u6_0ImmPred>; | 1127 def: AccRRI_pat<S2_asl_i_p_nac, Sub, Su<Shl>, I64, u6_0ImmPred>; |
1025 def: AccRRI_pat<S2_asl_i_p_and, And, Su<Shl>, I64, u6_0ImmPred>; | 1128 def: AccRRI_pat<S2_asl_i_p_and, And, Su<Shl>, I64, u6_0ImmPred>; |
1026 def: AccRRI_pat<S2_asl_i_p_or, Or, Su<Shl>, I64, u6_0ImmPred>; | 1129 def: AccRRI_pat<S2_asl_i_p_or, Or, Su<Shl>, I64, u6_0ImmPred>; |
1027 def: AccRRI_pat<S2_asl_i_p_xacc, Xor, Su<Shl>, I64, u6_0ImmPred>; | 1130 def: AccRRI_pat<S2_asl_i_p_xacc, Xor, Su<Shl>, I64, u6_0ImmPred>; |
1131 | |
1132 let Predicates = [HasV60] in { | |
1133 def: AccRRI_pat<S6_rol_i_r_acc, Add, Su<Rol>, I32, u5_0ImmPred>; | |
1134 def: AccRRI_pat<S6_rol_i_r_nac, Sub, Su<Rol>, I32, u5_0ImmPred>; | |
1135 def: AccRRI_pat<S6_rol_i_r_and, And, Su<Rol>, I32, u5_0ImmPred>; | |
1136 def: AccRRI_pat<S6_rol_i_r_or, Or, Su<Rol>, I32, u5_0ImmPred>; | |
1137 def: AccRRI_pat<S6_rol_i_r_xacc, Xor, Su<Rol>, I32, u5_0ImmPred>; | |
1138 | |
1139 def: AccRRI_pat<S6_rol_i_p_acc, Add, Su<Rol>, I64, u6_0ImmPred>; | |
1140 def: AccRRI_pat<S6_rol_i_p_nac, Sub, Su<Rol>, I64, u6_0ImmPred>; | |
1141 def: AccRRI_pat<S6_rol_i_p_and, And, Su<Rol>, I64, u6_0ImmPred>; | |
1142 def: AccRRI_pat<S6_rol_i_p_or, Or, Su<Rol>, I64, u6_0ImmPred>; | |
1143 def: AccRRI_pat<S6_rol_i_p_xacc, Xor, Su<Rol>, I64, u6_0ImmPred>; | |
1144 } | |
1028 } | 1145 } |
1029 | 1146 |
1030 let AddedComplexity = 100 in { | 1147 let AddedComplexity = 100 in { |
1031 def: AccRRR_pat<S2_asr_r_r_acc, Add, Su<Sra>, I32, I32>; | 1148 def: AccRRR_pat<S2_asr_r_r_acc, Add, Su<Sra>, I32, I32, I32>; |
1032 def: AccRRR_pat<S2_asr_r_r_nac, Sub, Su<Sra>, I32, I32>; | 1149 def: AccRRR_pat<S2_asr_r_r_nac, Sub, Su<Sra>, I32, I32, I32>; |
1033 def: AccRRR_pat<S2_asr_r_r_and, And, Su<Sra>, I32, I32>; | 1150 def: AccRRR_pat<S2_asr_r_r_and, And, Su<Sra>, I32, I32, I32>; |
1034 def: AccRRR_pat<S2_asr_r_r_or, Or, Su<Sra>, I32, I32>; | 1151 def: AccRRR_pat<S2_asr_r_r_or, Or, Su<Sra>, I32, I32, I32>; |
1035 | 1152 |
1036 def: AccRRR_pat<S2_asr_r_p_acc, Add, Su<Sra>, I64, I32>; | 1153 def: AccRRR_pat<S2_asr_r_p_acc, Add, Su<Sra>, I64, I64, I32>; |
1037 def: AccRRR_pat<S2_asr_r_p_nac, Sub, Su<Sra>, I64, I32>; | 1154 def: AccRRR_pat<S2_asr_r_p_nac, Sub, Su<Sra>, I64, I64, I32>; |
1038 def: AccRRR_pat<S2_asr_r_p_and, And, Su<Sra>, I64, I32>; | 1155 def: AccRRR_pat<S2_asr_r_p_and, And, Su<Sra>, I64, I64, I32>; |
1039 def: AccRRR_pat<S2_asr_r_p_or, Or, Su<Sra>, I64, I32>; | 1156 def: AccRRR_pat<S2_asr_r_p_or, Or, Su<Sra>, I64, I64, I32>; |
1040 def: AccRRR_pat<S2_asr_r_p_xor, Xor, Su<Sra>, I64, I32>; | 1157 def: AccRRR_pat<S2_asr_r_p_xor, Xor, Su<Sra>, I64, I64, I32>; |
1041 | 1158 |
1042 def: AccRRR_pat<S2_lsr_r_r_acc, Add, Su<Srl>, I32, I32>; | 1159 def: AccRRR_pat<S2_lsr_r_r_acc, Add, Su<Srl>, I32, I32, I32>; |
1043 def: AccRRR_pat<S2_lsr_r_r_nac, Sub, Su<Srl>, I32, I32>; | 1160 def: AccRRR_pat<S2_lsr_r_r_nac, Sub, Su<Srl>, I32, I32, I32>; |
1044 def: AccRRR_pat<S2_lsr_r_r_and, And, Su<Srl>, I32, I32>; | 1161 def: AccRRR_pat<S2_lsr_r_r_and, And, Su<Srl>, I32, I32, I32>; |
1045 def: AccRRR_pat<S2_lsr_r_r_or, Or, Su<Srl>, I32, I32>; | 1162 def: AccRRR_pat<S2_lsr_r_r_or, Or, Su<Srl>, I32, I32, I32>; |
1046 | 1163 |
1047 def: AccRRR_pat<S2_lsr_r_p_acc, Add, Su<Srl>, I64, I32>; | 1164 def: AccRRR_pat<S2_lsr_r_p_acc, Add, Su<Srl>, I64, I64, I32>; |
1048 def: AccRRR_pat<S2_lsr_r_p_nac, Sub, Su<Srl>, I64, I32>; | 1165 def: AccRRR_pat<S2_lsr_r_p_nac, Sub, Su<Srl>, I64, I64, I32>; |
1049 def: AccRRR_pat<S2_lsr_r_p_and, And, Su<Srl>, I64, I32>; | 1166 def: AccRRR_pat<S2_lsr_r_p_and, And, Su<Srl>, I64, I64, I32>; |
1050 def: AccRRR_pat<S2_lsr_r_p_or, Or, Su<Srl>, I64, I32>; | 1167 def: AccRRR_pat<S2_lsr_r_p_or, Or, Su<Srl>, I64, I64, I32>; |
1051 def: AccRRR_pat<S2_lsr_r_p_xor, Xor, Su<Srl>, I64, I32>; | 1168 def: AccRRR_pat<S2_lsr_r_p_xor, Xor, Su<Srl>, I64, I64, I32>; |
1052 | 1169 |
1053 def: AccRRR_pat<S2_asl_r_r_acc, Add, Su<Shl>, I32, I32>; | 1170 def: AccRRR_pat<S2_asl_r_r_acc, Add, Su<Shl>, I32, I32, I32>; |
1054 def: AccRRR_pat<S2_asl_r_r_nac, Sub, Su<Shl>, I32, I32>; | 1171 def: AccRRR_pat<S2_asl_r_r_nac, Sub, Su<Shl>, I32, I32, I32>; |
1055 def: AccRRR_pat<S2_asl_r_r_and, And, Su<Shl>, I32, I32>; | 1172 def: AccRRR_pat<S2_asl_r_r_and, And, Su<Shl>, I32, I32, I32>; |
1056 def: AccRRR_pat<S2_asl_r_r_or, Or, Su<Shl>, I32, I32>; | 1173 def: AccRRR_pat<S2_asl_r_r_or, Or, Su<Shl>, I32, I32, I32>; |
1057 | 1174 |
1058 def: AccRRR_pat<S2_asl_r_p_acc, Add, Su<Shl>, I64, I32>; | 1175 def: AccRRR_pat<S2_asl_r_p_acc, Add, Su<Shl>, I64, I64, I32>; |
1059 def: AccRRR_pat<S2_asl_r_p_nac, Sub, Su<Shl>, I64, I32>; | 1176 def: AccRRR_pat<S2_asl_r_p_nac, Sub, Su<Shl>, I64, I64, I32>; |
1060 def: AccRRR_pat<S2_asl_r_p_and, And, Su<Shl>, I64, I32>; | 1177 def: AccRRR_pat<S2_asl_r_p_and, And, Su<Shl>, I64, I64, I32>; |
1061 def: AccRRR_pat<S2_asl_r_p_or, Or, Su<Shl>, I64, I32>; | 1178 def: AccRRR_pat<S2_asl_r_p_or, Or, Su<Shl>, I64, I64, I32>; |
1062 def: AccRRR_pat<S2_asl_r_p_xor, Xor, Su<Shl>, I64, I32>; | 1179 def: AccRRR_pat<S2_asl_r_p_xor, Xor, Su<Shl>, I64, I64, I32>; |
1063 } | 1180 } |
1064 | 1181 |
1065 | 1182 |
1066 class OpshIRI_pat<InstHexagon MI, PatFrag Op, PatFrag ShOp, | 1183 class OpshIRI_pat<InstHexagon MI, PatFrag Op, PatFrag ShOp, |
1067 PatFrag RegPred, PatFrag ImmPred> | 1184 PatFrag RegPred, PatFrag ImmPred> |
1089 (Zext64 (and I32:$a, (i32 65535)))), | 1206 (Zext64 (and I32:$a, (i32 65535)))), |
1090 (shl (Aext64 (and I32:$c, (i32 65535))), (i32 32))), | 1207 (shl (Aext64 (and I32:$c, (i32 65535))), (i32 32))), |
1091 (shl (Aext64 I32:$d), (i32 48))), | 1208 (shl (Aext64 I32:$d), (i32 48))), |
1092 (Combinew (A2_combine_ll I32:$d, I32:$c), | 1209 (Combinew (A2_combine_ll I32:$d, I32:$c), |
1093 (A2_combine_ll I32:$b, I32:$a))>; | 1210 (A2_combine_ll I32:$b, I32:$a))>; |
1094 | |
1095 def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add I32:$b, 3))), | |
1096 (i32 8)), | |
1097 (i32 (zextloadi8 (add I32:$b, 2)))), | |
1098 (i32 16)), | |
1099 (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))), | |
1100 (zextloadi8 I32:$b)), | |
1101 (A2_swiz (L2_loadri_io IntRegs:$b, 0))>; | |
1102 | 1211 |
1103 let AddedComplexity = 200 in { | 1212 let AddedComplexity = 200 in { |
1104 def: Pat<(or (shl I32:$Rt, (i32 16)), (and I32:$Rs, (i32 65535))), | 1213 def: Pat<(or (shl I32:$Rt, (i32 16)), (and I32:$Rs, (i32 65535))), |
1105 (A2_combine_ll I32:$Rt, I32:$Rs)>; | 1214 (A2_combine_ll I32:$Rt, I32:$Rs)>; |
1106 def: Pat<(or (shl I32:$Rt, (i32 16)), (srl I32:$Rs, (i32 16))), | 1215 def: Pat<(or (shl I32:$Rt, (i32 16)), (srl I32:$Rs, (i32 16))), |
1143 def: Pat<(srl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))), | 1252 def: Pat<(srl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))), |
1144 (S2_lsr_i_vh V4I16:$b, imm:$c)>; | 1253 (S2_lsr_i_vh V4I16:$b, imm:$c)>; |
1145 def: Pat<(shl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))), | 1254 def: Pat<(shl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))), |
1146 (S2_asl_i_vh V4I16:$b, imm:$c)>; | 1255 (S2_asl_i_vh V4I16:$b, imm:$c)>; |
1147 | 1256 |
1257 def: Pat<(HexagonVASR V2I16:$Rs, u4_0ImmPred:$S), | |
1258 (LoReg (S2_asr_i_vh (ToAext64 $Rs), imm:$S))>; | |
1259 def: Pat<(HexagonVASL V2I16:$Rs, u4_0ImmPred:$S), | |
1260 (LoReg (S2_asl_i_vh (ToAext64 $Rs), imm:$S))>; | |
1261 def: Pat<(HexagonVLSR V2I16:$Rs, u4_0ImmPred:$S), | |
1262 (LoReg (S2_lsr_i_vh (ToAext64 $Rs), imm:$S))>; | |
1263 def: Pat<(HexagonVASR V2I16:$Rs, I32:$Rt), | |
1264 (LoReg (S2_asr_i_vh (ToAext64 $Rs), I32:$Rt))>; | |
1265 def: Pat<(HexagonVASL V2I16:$Rs, I32:$Rt), | |
1266 (LoReg (S2_asl_i_vh (ToAext64 $Rs), I32:$Rt))>; | |
1267 def: Pat<(HexagonVLSR V2I16:$Rs, I32:$Rt), | |
1268 (LoReg (S2_lsr_i_vh (ToAext64 $Rs), I32:$Rt))>; | |
1269 | |
1148 | 1270 |
1149 // --(9) Arithmetic/bitwise ---------------------------------------------- | 1271 // --(9) Arithmetic/bitwise ---------------------------------------------- |
1150 // | 1272 // |
1151 | 1273 |
1152 def: Pat<(abs I32:$Rs), (A2_abs I32:$Rs)>; | 1274 def: Pat<(abs I32:$Rs), (A2_abs I32:$Rs)>; |
1153 def: Pat<(not I32:$Rs), (A2_subri -1, I32:$Rs)>; | 1275 def: Pat<(abs I64:$Rs), (A2_absp I64:$Rs)>; |
1154 def: Pat<(not I64:$Rs), (A2_notp I64:$Rs)>; | 1276 def: Pat<(not I32:$Rs), (A2_subri -1, I32:$Rs)>; |
1155 | 1277 def: Pat<(not I64:$Rs), (A2_notp I64:$Rs)>; |
1156 let Predicates = [HasV5T] in { | 1278 def: Pat<(ineg I64:$Rs), (A2_negp I64:$Rs)>; |
1157 def: Pat<(fabs F32:$Rs), (S2_clrbit_i F32:$Rs, 31)>; | 1279 |
1158 def: Pat<(fneg F32:$Rs), (S2_togglebit_i F32:$Rs, 31)>; | 1280 def: Pat<(fabs F32:$Rs), (S2_clrbit_i F32:$Rs, 31)>; |
1159 | 1281 def: Pat<(fneg F32:$Rs), (S2_togglebit_i F32:$Rs, 31)>; |
1160 def: Pat<(fabs F64:$Rs), | 1282 |
1161 (Combinew (S2_clrbit_i (HiReg $Rs), 31), | 1283 def: Pat<(fabs F64:$Rs), |
1162 (i32 (LoReg $Rs)))>; | 1284 (Combinew (S2_clrbit_i (HiReg $Rs), 31), |
1163 def: Pat<(fneg F64:$Rs), | 1285 (i32 (LoReg $Rs)))>; |
1164 (Combinew (S2_togglebit_i (HiReg $Rs), 31), | 1286 def: Pat<(fneg F64:$Rs), |
1165 (i32 (LoReg $Rs)))>; | 1287 (Combinew (S2_togglebit_i (HiReg $Rs), 31), |
1166 } | 1288 (i32 (LoReg $Rs)))>; |
1167 | |
1168 let AddedComplexity = 50 in | |
1169 def: Pat<(xor (add (sra I32:$Rs, (i32 31)), | |
1170 I32:$Rs), | |
1171 (sra I32:$Rs, (i32 31))), | |
1172 (A2_abs I32:$Rs)>; | |
1173 | |
1174 | 1289 |
1175 def: Pat<(add I32:$Rs, anyimm:$s16), (A2_addi I32:$Rs, imm:$s16)>; | 1290 def: Pat<(add I32:$Rs, anyimm:$s16), (A2_addi I32:$Rs, imm:$s16)>; |
1176 def: Pat<(or I32:$Rs, anyimm:$s10), (A2_orir I32:$Rs, imm:$s10)>; | 1291 def: Pat<(or I32:$Rs, anyimm:$s10), (A2_orir I32:$Rs, imm:$s10)>; |
1177 def: Pat<(and I32:$Rs, anyimm:$s10), (A2_andir I32:$Rs, imm:$s10)>; | 1292 def: Pat<(and I32:$Rs, anyimm:$s10), (A2_andir I32:$Rs, imm:$s10)>; |
1178 def: Pat<(sub anyimm:$s10, I32:$Rs), (A2_subri imm:$s10, I32:$Rs)>; | 1293 def: Pat<(sub anyimm:$s10, I32:$Rs), (A2_subri imm:$s10, I32:$Rs)>; |
1198 def: OpR_RR_pat<A2_vaddw, Add, v2i32, V2I32>; | 1313 def: OpR_RR_pat<A2_vaddw, Add, v2i32, V2I32>; |
1199 def: OpR_RR_pat<A2_vsubub, Sub, v8i8, V8I8>; | 1314 def: OpR_RR_pat<A2_vsubub, Sub, v8i8, V8I8>; |
1200 def: OpR_RR_pat<A2_vsubh, Sub, v4i16, V4I16>; | 1315 def: OpR_RR_pat<A2_vsubh, Sub, v4i16, V4I16>; |
1201 def: OpR_RR_pat<A2_vsubw, Sub, v2i32, V2I32>; | 1316 def: OpR_RR_pat<A2_vsubw, Sub, v2i32, V2I32>; |
1202 | 1317 |
1318 def: OpR_RR_pat<A2_and, And, v4i8, V4I8>; | |
1319 def: OpR_RR_pat<A2_xor, Xor, v4i8, V4I8>; | |
1320 def: OpR_RR_pat<A2_or, Or, v4i8, V4I8>; | |
1203 def: OpR_RR_pat<A2_and, And, v2i16, V2I16>; | 1321 def: OpR_RR_pat<A2_and, And, v2i16, V2I16>; |
1204 def: OpR_RR_pat<A2_xor, Xor, v2i16, V2I16>; | 1322 def: OpR_RR_pat<A2_xor, Xor, v2i16, V2I16>; |
1205 def: OpR_RR_pat<A2_or, Or, v2i16, V2I16>; | 1323 def: OpR_RR_pat<A2_or, Or, v2i16, V2I16>; |
1206 | |
1207 def: OpR_RR_pat<A2_andp, And, v8i8, V8I8>; | 1324 def: OpR_RR_pat<A2_andp, And, v8i8, V8I8>; |
1325 def: OpR_RR_pat<A2_orp, Or, v8i8, V8I8>; | |
1326 def: OpR_RR_pat<A2_xorp, Xor, v8i8, V8I8>; | |
1208 def: OpR_RR_pat<A2_andp, And, v4i16, V4I16>; | 1327 def: OpR_RR_pat<A2_andp, And, v4i16, V4I16>; |
1328 def: OpR_RR_pat<A2_orp, Or, v4i16, V4I16>; | |
1329 def: OpR_RR_pat<A2_xorp, Xor, v4i16, V4I16>; | |
1209 def: OpR_RR_pat<A2_andp, And, v2i32, V2I32>; | 1330 def: OpR_RR_pat<A2_andp, And, v2i32, V2I32>; |
1210 def: OpR_RR_pat<A2_orp, Or, v8i8, V8I8>; | |
1211 def: OpR_RR_pat<A2_orp, Or, v4i16, V4I16>; | |
1212 def: OpR_RR_pat<A2_orp, Or, v2i32, V2I32>; | 1331 def: OpR_RR_pat<A2_orp, Or, v2i32, V2I32>; |
1213 def: OpR_RR_pat<A2_xorp, Xor, v8i8, V8I8>; | |
1214 def: OpR_RR_pat<A2_xorp, Xor, v4i16, V4I16>; | |
1215 def: OpR_RR_pat<A2_xorp, Xor, v2i32, V2I32>; | 1332 def: OpR_RR_pat<A2_xorp, Xor, v2i32, V2I32>; |
1216 | 1333 |
1217 def: OpR_RR_pat<M2_mpyi, Mul, i32, I32>; | 1334 def: OpR_RR_pat<M2_mpyi, Mul, i32, I32>; |
1218 def: OpR_RR_pat<M2_mpy_up, pf2<mulhs>, i32, I32>; | 1335 def: OpR_RR_pat<M2_mpy_up, pf2<mulhs>, i32, I32>; |
1219 def: OpR_RR_pat<M2_mpyu_up, pf2<mulhu>, i32, I32>; | 1336 def: OpR_RR_pat<M2_mpyu_up, pf2<mulhu>, i32, I32>; |
1232 def: OpR_RR_pat<C2_and, Mul, i1, I1>; | 1349 def: OpR_RR_pat<C2_and, Mul, i1, I1>; |
1233 def: OpR_RR_pat<C2_and, Mul, v2i1, V2I1>; | 1350 def: OpR_RR_pat<C2_and, Mul, v2i1, V2I1>; |
1234 def: OpR_RR_pat<C2_and, Mul, v4i1, V4I1>; | 1351 def: OpR_RR_pat<C2_and, Mul, v4i1, V4I1>; |
1235 def: OpR_RR_pat<C2_and, Mul, v8i1, V8I1>; | 1352 def: OpR_RR_pat<C2_and, Mul, v8i1, V8I1>; |
1236 | 1353 |
1237 let Predicates = [HasV5T] in { | 1354 def: OpR_RR_pat<F2_sfadd, pf2<fadd>, f32, F32>; |
1238 def: OpR_RR_pat<F2_sfadd, pf2<fadd>, f32, F32>; | 1355 def: OpR_RR_pat<F2_sfsub, pf2<fsub>, f32, F32>; |
1239 def: OpR_RR_pat<F2_sfsub, pf2<fsub>, f32, F32>; | 1356 def: OpR_RR_pat<F2_sfmpy, pf2<fmul>, f32, F32>; |
1240 def: OpR_RR_pat<F2_sfmpy, pf2<fmul>, f32, F32>; | 1357 def: OpR_RR_pat<F2_sfmin, pf2<fminnum>, f32, F32>; |
1241 def: OpR_RR_pat<F2_sfmin, pf2<fminnum>, f32, F32>; | 1358 def: OpR_RR_pat<F2_sfmax, pf2<fmaxnum>, f32, F32>; |
1242 def: OpR_RR_pat<F2_sfmax, pf2<fmaxnum>, f32, F32>; | 1359 |
1360 let Predicates = [HasV66] in { | |
1361 def: OpR_RR_pat<F2_dfadd, pf2<fadd>, f64, F64>; | |
1362 def: OpR_RR_pat<F2_dfsub, pf2<fsub>, f64, F64>; | |
1243 } | 1363 } |
1244 | 1364 |
1245 // In expressions like a0*b0 + a1*b1 + ..., prefer to generate multiply-add, | 1365 // In expressions like a0*b0 + a1*b1 + ..., prefer to generate multiply-add, |
1246 // over add-add with individual multiplies as inputs. | 1366 // over add-add with individual multiplies as inputs. |
1247 let AddedComplexity = 10 in { | 1367 let AddedComplexity = 10 in { |
1248 def: AccRRI_pat<M2_macsip, Add, Su<Mul>, I32, u32_0ImmPred>; | 1368 def: AccRRI_pat<M2_macsip, Add, Su<Mul>, I32, u32_0ImmPred>; |
1249 def: AccRRI_pat<M2_macsin, Sub, Su<Mul>, I32, u32_0ImmPred>; | 1369 def: AccRRI_pat<M2_macsin, Sub, Su<Mul>, I32, u32_0ImmPred>; |
1250 def: AccRRR_pat<M2_maci, Add, Su<Mul>, I32, I32>; | 1370 def: AccRRR_pat<M2_maci, Add, Su<Mul>, I32, I32, I32>; |
1371 let Predicates = [HasV66] in | |
1372 def: AccRRR_pat<M2_mnaci, Sub, Su<Mul>, I32, I32, I32>; | |
1251 } | 1373 } |
1252 | 1374 |
1253 def: AccRRI_pat<M2_naccii, Sub, Su<Add>, I32, s32_0ImmPred>; | 1375 def: AccRRI_pat<M2_naccii, Sub, Su<Add>, I32, s32_0ImmPred>; |
1254 def: AccRRI_pat<M2_accii, Add, Su<Add>, I32, s32_0ImmPred>; | 1376 def: AccRRI_pat<M2_accii, Add, Su<Add>, I32, s32_0ImmPred>; |
1255 def: AccRRR_pat<M2_acci, Add, Su<Add>, I32, I32>; | 1377 def: AccRRR_pat<M2_acci, Add, Su<Add>, I32, I32, I32>; |
1256 | 1378 |
1257 // Mulh for vectors | 1379 // Mulh for vectors |
1258 // | 1380 // |
1259 def: Pat<(v2i32 (mulhu V2I32:$Rss, V2I32:$Rtt)), | 1381 def: Pat<(v2i32 (mulhu V2I32:$Rss, V2I32:$Rtt)), |
1260 (Combinew (M2_mpyu_up (HiReg $Rss), (HiReg $Rtt)), | 1382 (Combinew (M2_mpyu_up (HiReg $Rss), (HiReg $Rtt)), |
1318 (M2_mpysin I32:$Rs, (NegImm8 imm:$n8))>; | 1440 (M2_mpysin I32:$Rs, (NegImm8 imm:$n8))>; |
1319 | 1441 |
1320 def: Pat<(add Sext64:$Rs, I64:$Rt), | 1442 def: Pat<(add Sext64:$Rs, I64:$Rt), |
1321 (A2_addsp (LoReg Sext64:$Rs), I64:$Rt)>; | 1443 (A2_addsp (LoReg Sext64:$Rs), I64:$Rt)>; |
1322 | 1444 |
1323 def: AccRRR_pat<M4_and_and, And, Su<And>, I32, I32>; | 1445 def: AccRRR_pat<M4_and_and, And, Su_ni1<And>, I32, I32, I32>; |
1324 def: AccRRR_pat<M4_and_or, And, Su<Or>, I32, I32>; | 1446 def: AccRRR_pat<M4_and_or, And, Su_ni1<Or>, I32, I32, I32>; |
1325 def: AccRRR_pat<M4_and_xor, And, Su<Xor>, I32, I32>; | 1447 def: AccRRR_pat<M4_and_xor, And, Su<Xor>, I32, I32, I32>; |
1326 def: AccRRR_pat<M4_or_and, Or, Su<And>, I32, I32>; | 1448 def: AccRRR_pat<M4_or_and, Or, Su_ni1<And>, I32, I32, I32>; |
1327 def: AccRRR_pat<M4_or_or, Or, Su<Or>, I32, I32>; | 1449 def: AccRRR_pat<M4_or_or, Or, Su_ni1<Or>, I32, I32, I32>; |
1328 def: AccRRR_pat<M4_or_xor, Or, Su<Xor>, I32, I32>; | 1450 def: AccRRR_pat<M4_or_xor, Or, Su<Xor>, I32, I32, I32>; |
1329 def: AccRRR_pat<M4_xor_and, Xor, Su<And>, I32, I32>; | 1451 def: AccRRR_pat<M4_xor_and, Xor, Su_ni1<And>, I32, I32, I32>; |
1330 def: AccRRR_pat<M4_xor_or, Xor, Su<Or>, I32, I32>; | 1452 def: AccRRR_pat<M4_xor_or, Xor, Su_ni1<Or>, I32, I32, I32>; |
1331 def: AccRRR_pat<M2_xor_xacc, Xor, Su<Xor>, I32, I32>; | 1453 def: AccRRR_pat<M2_xor_xacc, Xor, Su<Xor>, I32, I32, I32>; |
1332 def: AccRRR_pat<M4_xor_xacc, Xor, Su<Xor>, I64, I64>; | 1454 def: AccRRR_pat<M4_xor_xacc, Xor, Su<Xor>, I64, I64, I64>; |
1333 | 1455 |
1334 // For dags like (or (and (not _), _), (shl _, _)) where the "or" with | 1456 // For dags like (or (and (not _), _), (shl _, _)) where the "or" with |
1335 // one argument matches the patterns below, and with the other argument | 1457 // one argument matches the patterns below, and with the other argument |
1336 // matches S2_asl_r_r_or, etc, prefer the patterns below. | 1458 // matches S2_asl_r_r_or, etc, prefer the patterns below. |
1337 let AddedComplexity = 110 in { // greater than S2_asl_r_r_and/or/xor. | 1459 let AddedComplexity = 110 in { // greater than S2_asl_r_r_and/or/xor. |
1338 def: AccRRR_pat<M4_and_andn, And, Su<Not2<And>>, I32, I32>; | 1460 def: AccRRR_pat<M4_and_andn, And, Su<Not2<And>>, I32, I32, I32>; |
1339 def: AccRRR_pat<M4_or_andn, Or, Su<Not2<And>>, I32, I32>; | 1461 def: AccRRR_pat<M4_or_andn, Or, Su<Not2<And>>, I32, I32, I32>; |
1340 def: AccRRR_pat<M4_xor_andn, Xor, Su<Not2<And>>, I32, I32>; | 1462 def: AccRRR_pat<M4_xor_andn, Xor, Su<Not2<And>>, I32, I32, I32>; |
1341 } | 1463 } |
1342 | 1464 |
1343 // S4_addaddi and S4_subaddi don't have tied operands, so give them | 1465 // S4_addaddi and S4_subaddi don't have tied operands, so give them |
1344 // a bit of preference. | 1466 // a bit of preference. |
1345 let AddedComplexity = 30 in { | 1467 let AddedComplexity = 30 in { |
1471 (M4_mpyri_addr IntRegs:$Ru, IntRegs:$Rs, imm:$u6)>; | 1593 (M4_mpyri_addr IntRegs:$Ru, IntRegs:$Rs, imm:$u6)>; |
1472 def: Pat<(add I32:$Ru, (Su<Mul> I32:$Ry, I32:$Rs)), | 1594 def: Pat<(add I32:$Ru, (Su<Mul> I32:$Ry, I32:$Rs)), |
1473 (M4_mpyrr_addr IntRegs:$Ru, IntRegs:$Ry, IntRegs:$Rs)>; | 1595 (M4_mpyrr_addr IntRegs:$Ru, IntRegs:$Ry, IntRegs:$Rs)>; |
1474 | 1596 |
1475 | 1597 |
1476 let Predicates = [HasV5T] in { | 1598 def: Pat<(fma F32:$Rs, F32:$Rt, F32:$Rx), |
1477 def: Pat<(fma F32:$Rs, F32:$Rt, F32:$Rx), | 1599 (F2_sffma F32:$Rx, F32:$Rs, F32:$Rt)>; |
1478 (F2_sffma F32:$Rx, F32:$Rs, F32:$Rt)>; | 1600 def: Pat<(fma (fneg F32:$Rs), F32:$Rt, F32:$Rx), |
1479 def: Pat<(fma (fneg F32:$Rs), F32:$Rt, F32:$Rx), | 1601 (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>; |
1480 (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>; | 1602 def: Pat<(fma F32:$Rs, (fneg F32:$Rt), F32:$Rx), |
1481 def: Pat<(fma F32:$Rs, (fneg F32:$Rt), F32:$Rx), | 1603 (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>; |
1482 (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>; | |
1483 } | |
1484 | 1604 |
1485 | 1605 |
1486 def: Pat<(mul V2I32:$Rs, V2I32:$Rt), | 1606 def: Pat<(mul V2I32:$Rs, V2I32:$Rt), |
1487 (PS_vmulw V2I32:$Rs, V2I32:$Rt)>; | 1607 (PS_vmulw V2I32:$Rs, V2I32:$Rt)>; |
1488 def: Pat<(add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)), | 1608 def: Pat<(add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)), |
1489 (PS_vmulw_acc V2I32:$Rx, V2I32:$Rs, V2I32:$Rt)>; | 1609 (PS_vmulw_acc V2I32:$Rx, V2I32:$Rs, V2I32:$Rt)>; |
1490 | 1610 |
1491 // Add/subtract two v4i8: Hexagon does not have an insn for this one, so | 1611 // Add/subtract two v4i8: Hexagon does not have an insn for this one, so |
1492 // we use the double add v8i8, and use only the low part of the result. | 1612 // we use the double add v8i8, and use only the low part of the result. |
1493 def: Pat<(add V4I8:$Rs, V4I8:$Rt), | 1613 def: Pat<(add V4I8:$Rs, V4I8:$Rt), |
1494 (LoReg (A2_vaddub (ToZext64 $Rs), (ToZext64 $Rt)))>; | 1614 (LoReg (A2_vaddub (ToAext64 $Rs), (ToAext64 $Rt)))>; |
1495 def: Pat<(sub V4I8:$Rs, V4I8:$Rt), | 1615 def: Pat<(sub V4I8:$Rs, V4I8:$Rt), |
1496 (LoReg (A2_vsubub (ToZext64 $Rs), (ToZext64 $Rt)))>; | 1616 (LoReg (A2_vsubub (ToAext64 $Rs), (ToAext64 $Rt)))>; |
1497 | 1617 |
1498 // Use M2_vmpy2s_s0 for half-word vector multiply. It multiplies two | 1618 // Use M2_vmpy2s_s0 for half-word vector multiply. It multiplies two |
1499 // half-words, and saturates the result to a 32-bit value, except the | 1619 // half-words, and saturates the result to a 32-bit value, except the |
1500 // saturation never happens (it can only occur with scaling). | 1620 // saturation never happens (it can only occur with scaling). |
1501 def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)), | 1621 def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)), |
1505 (S2_vtrunewh (M2_vmpy2s_s0 (HiReg $Rs), (HiReg $Rt)), | 1625 (S2_vtrunewh (M2_vmpy2s_s0 (HiReg $Rs), (HiReg $Rt)), |
1506 (M2_vmpy2s_s0 (LoReg $Rs), (LoReg $Rt)))>; | 1626 (M2_vmpy2s_s0 (LoReg $Rs), (LoReg $Rt)))>; |
1507 | 1627 |
1508 // Multiplies two v4i8 vectors. | 1628 // Multiplies two v4i8 vectors. |
1509 def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), | 1629 def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), |
1510 (S2_vtrunehb (M5_vmpybuu V4I8:$Rs, V4I8:$Rt))>, | 1630 (S2_vtrunehb (M5_vmpybuu V4I8:$Rs, V4I8:$Rt))>; |
1511 Requires<[HasV5T]>; | |
1512 | 1631 |
1513 // Multiplies two v8i8 vectors. | 1632 // Multiplies two v8i8 vectors. |
1514 def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), | 1633 def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), |
1515 (Combinew (S2_vtrunehb (M5_vmpybuu (HiReg $Rs), (HiReg $Rt))), | 1634 (Combinew (S2_vtrunehb (M5_vmpybuu (HiReg $Rs), (HiReg $Rt))), |
1516 (S2_vtrunehb (M5_vmpybuu (LoReg $Rs), (LoReg $Rt))))>, | 1635 (S2_vtrunehb (M5_vmpybuu (LoReg $Rs), (LoReg $Rt))))>; |
1517 Requires<[HasV5T]>; | |
1518 | 1636 |
1519 | 1637 |
1520 // --(10) Bit ------------------------------------------------------------ | 1638 // --(10) Bit ------------------------------------------------------------ |
1521 // | 1639 // |
1522 | 1640 |
1825 defm: Loadxi_pat<zextloadi8, i32, anyimm0, L2_loadrub_io>; | 1943 defm: Loadxi_pat<zextloadi8, i32, anyimm0, L2_loadrub_io>; |
1826 defm: Loadxi_pat<zextloadi16, i32, anyimm1, L2_loadruh_io>; | 1944 defm: Loadxi_pat<zextloadi16, i32, anyimm1, L2_loadruh_io>; |
1827 defm: Loadxi_pat<zextloadv2i8, v2i16, anyimm1, L2_loadbzw2_io>; | 1945 defm: Loadxi_pat<zextloadv2i8, v2i16, anyimm1, L2_loadbzw2_io>; |
1828 defm: Loadxi_pat<zextloadv4i8, v4i16, anyimm2, L2_loadbzw4_io>; | 1946 defm: Loadxi_pat<zextloadv4i8, v4i16, anyimm2, L2_loadbzw4_io>; |
1829 defm: Loadxi_pat<load, i32, anyimm2, L2_loadri_io>; | 1947 defm: Loadxi_pat<load, i32, anyimm2, L2_loadri_io>; |
1948 defm: Loadxi_pat<load, v2i16, anyimm2, L2_loadri_io>; | |
1949 defm: Loadxi_pat<load, v4i8, anyimm2, L2_loadri_io>; | |
1830 defm: Loadxi_pat<load, i64, anyimm3, L2_loadrd_io>; | 1950 defm: Loadxi_pat<load, i64, anyimm3, L2_loadrd_io>; |
1951 defm: Loadxi_pat<load, v2i32, anyimm3, L2_loadrd_io>; | |
1952 defm: Loadxi_pat<load, v4i16, anyimm3, L2_loadrd_io>; | |
1953 defm: Loadxi_pat<load, v8i8, anyimm3, L2_loadrd_io>; | |
1831 defm: Loadxi_pat<load, f32, anyimm2, L2_loadri_io>; | 1954 defm: Loadxi_pat<load, f32, anyimm2, L2_loadri_io>; |
1832 defm: Loadxi_pat<load, f64, anyimm3, L2_loadrd_io>; | 1955 defm: Loadxi_pat<load, f64, anyimm3, L2_loadrd_io>; |
1833 // No sextloadi1. | 1956 // No sextloadi1. |
1834 | 1957 |
1835 defm: Loadxi_pat<atomic_load_8 , i32, anyimm0, L2_loadrub_io>; | 1958 defm: Loadxi_pat<atomic_load_8 , i32, anyimm0, L2_loadrub_io>; |
1837 defm: Loadxi_pat<atomic_load_32, i32, anyimm2, L2_loadri_io>; | 1960 defm: Loadxi_pat<atomic_load_32, i32, anyimm2, L2_loadri_io>; |
1838 defm: Loadxi_pat<atomic_load_64, i64, anyimm3, L2_loadrd_io>; | 1961 defm: Loadxi_pat<atomic_load_64, i64, anyimm3, L2_loadrd_io>; |
1839 } | 1962 } |
1840 | 1963 |
1841 let AddedComplexity = 30 in { | 1964 let AddedComplexity = 30 in { |
1842 defm: Loadxim_pat<extloadi1, i64, ToZext64, anyimm0, L2_loadrub_io>; | 1965 defm: Loadxim_pat<extloadi1, i64, ToAext64, anyimm0, L2_loadrub_io>; |
1843 defm: Loadxim_pat<extloadi8, i64, ToZext64, anyimm0, L2_loadrub_io>; | 1966 defm: Loadxim_pat<extloadi8, i64, ToAext64, anyimm0, L2_loadrub_io>; |
1844 defm: Loadxim_pat<extloadi16, i64, ToZext64, anyimm1, L2_loadruh_io>; | 1967 defm: Loadxim_pat<extloadi16, i64, ToAext64, anyimm1, L2_loadruh_io>; |
1845 defm: Loadxim_pat<extloadi32, i64, ToZext64, anyimm2, L2_loadri_io>; | 1968 defm: Loadxim_pat<extloadi32, i64, ToAext64, anyimm2, L2_loadri_io>; |
1846 defm: Loadxim_pat<zextloadi1, i64, ToZext64, anyimm0, L2_loadrub_io>; | 1969 defm: Loadxim_pat<zextloadi1, i64, ToZext64, anyimm0, L2_loadrub_io>; |
1847 defm: Loadxim_pat<zextloadi8, i64, ToZext64, anyimm0, L2_loadrub_io>; | 1970 defm: Loadxim_pat<zextloadi8, i64, ToZext64, anyimm0, L2_loadrub_io>; |
1848 defm: Loadxim_pat<zextloadi16, i64, ToZext64, anyimm1, L2_loadruh_io>; | 1971 defm: Loadxim_pat<zextloadi16, i64, ToZext64, anyimm1, L2_loadruh_io>; |
1849 defm: Loadxim_pat<zextloadi32, i64, ToZext64, anyimm2, L2_loadri_io>; | 1972 defm: Loadxim_pat<zextloadi32, i64, ToZext64, anyimm2, L2_loadri_io>; |
1850 defm: Loadxim_pat<sextloadi8, i64, ToSext64, anyimm0, L2_loadrb_io>; | 1973 defm: Loadxim_pat<sextloadi8, i64, ToSext64, anyimm0, L2_loadrb_io>; |
1863 def: Loadxu_pat<sextloadv4i8, v4i16, anyimm2, L4_loadbzw4_ur>; | 1986 def: Loadxu_pat<sextloadv4i8, v4i16, anyimm2, L4_loadbzw4_ur>; |
1864 def: Loadxu_pat<zextloadi8, i32, anyimm0, L4_loadrub_ur>; | 1987 def: Loadxu_pat<zextloadi8, i32, anyimm0, L4_loadrub_ur>; |
1865 def: Loadxu_pat<zextloadi16, i32, anyimm1, L4_loadruh_ur>; | 1988 def: Loadxu_pat<zextloadi16, i32, anyimm1, L4_loadruh_ur>; |
1866 def: Loadxu_pat<zextloadv2i8, v2i16, anyimm1, L4_loadbzw2_ur>; | 1989 def: Loadxu_pat<zextloadv2i8, v2i16, anyimm1, L4_loadbzw2_ur>; |
1867 def: Loadxu_pat<zextloadv4i8, v4i16, anyimm2, L4_loadbzw4_ur>; | 1990 def: Loadxu_pat<zextloadv4i8, v4i16, anyimm2, L4_loadbzw4_ur>; |
1991 def: Loadxu_pat<load, i32, anyimm2, L4_loadri_ur>; | |
1992 def: Loadxu_pat<load, v2i16, anyimm2, L4_loadri_ur>; | |
1993 def: Loadxu_pat<load, v4i8, anyimm2, L4_loadri_ur>; | |
1994 def: Loadxu_pat<load, i64, anyimm3, L4_loadrd_ur>; | |
1995 def: Loadxu_pat<load, v2i32, anyimm3, L4_loadrd_ur>; | |
1996 def: Loadxu_pat<load, v4i16, anyimm3, L4_loadrd_ur>; | |
1997 def: Loadxu_pat<load, v8i8, anyimm3, L4_loadrd_ur>; | |
1868 def: Loadxu_pat<load, f32, anyimm2, L4_loadri_ur>; | 1998 def: Loadxu_pat<load, f32, anyimm2, L4_loadri_ur>; |
1869 def: Loadxu_pat<load, f64, anyimm3, L4_loadrd_ur>; | 1999 def: Loadxu_pat<load, f64, anyimm3, L4_loadrd_ur>; |
1870 def: Loadxu_pat<load, i32, anyimm2, L4_loadri_ur>; | |
1871 def: Loadxu_pat<load, i64, anyimm3, L4_loadrd_ur>; | |
1872 | 2000 |
1873 def: Loadxum_pat<sextloadi8, i64, anyimm0, ToSext64, L4_loadrb_ur>; | 2001 def: Loadxum_pat<sextloadi8, i64, anyimm0, ToSext64, L4_loadrb_ur>; |
1874 def: Loadxum_pat<zextloadi8, i64, anyimm0, ToZext64, L4_loadrub_ur>; | 2002 def: Loadxum_pat<zextloadi8, i64, anyimm0, ToZext64, L4_loadrub_ur>; |
1875 def: Loadxum_pat<extloadi8, i64, anyimm0, ToZext64, L4_loadrub_ur>; | 2003 def: Loadxum_pat<extloadi8, i64, anyimm0, ToAext64, L4_loadrub_ur>; |
1876 def: Loadxum_pat<sextloadi16, i64, anyimm1, ToSext64, L4_loadrh_ur>; | 2004 def: Loadxum_pat<sextloadi16, i64, anyimm1, ToSext64, L4_loadrh_ur>; |
1877 def: Loadxum_pat<zextloadi16, i64, anyimm1, ToZext64, L4_loadruh_ur>; | 2005 def: Loadxum_pat<zextloadi16, i64, anyimm1, ToZext64, L4_loadruh_ur>; |
1878 def: Loadxum_pat<extloadi16, i64, anyimm1, ToZext64, L4_loadruh_ur>; | 2006 def: Loadxum_pat<extloadi16, i64, anyimm1, ToAext64, L4_loadruh_ur>; |
1879 def: Loadxum_pat<sextloadi32, i64, anyimm2, ToSext64, L4_loadri_ur>; | 2007 def: Loadxum_pat<sextloadi32, i64, anyimm2, ToSext64, L4_loadri_ur>; |
1880 def: Loadxum_pat<zextloadi32, i64, anyimm2, ToZext64, L4_loadri_ur>; | 2008 def: Loadxum_pat<zextloadi32, i64, anyimm2, ToZext64, L4_loadri_ur>; |
1881 def: Loadxum_pat<extloadi32, i64, anyimm2, ToZext64, L4_loadri_ur>; | 2009 def: Loadxum_pat<extloadi32, i64, anyimm2, ToAext64, L4_loadri_ur>; |
1882 } | 2010 } |
1883 | 2011 |
1884 let AddedComplexity = 40 in { | 2012 let AddedComplexity = 40 in { |
1885 def: Loadxr_shl_pat<extloadi8, i32, L4_loadrub_rr>; | 2013 def: Loadxr_shl_pat<extloadi8, i32, L4_loadrub_rr>; |
1886 def: Loadxr_shl_pat<zextloadi8, i32, L4_loadrub_rr>; | 2014 def: Loadxr_shl_pat<zextloadi8, i32, L4_loadrub_rr>; |
1887 def: Loadxr_shl_pat<sextloadi8, i32, L4_loadrb_rr>; | 2015 def: Loadxr_shl_pat<sextloadi8, i32, L4_loadrb_rr>; |
1888 def: Loadxr_shl_pat<extloadi16, i32, L4_loadruh_rr>; | 2016 def: Loadxr_shl_pat<extloadi16, i32, L4_loadruh_rr>; |
1889 def: Loadxr_shl_pat<zextloadi16, i32, L4_loadruh_rr>; | 2017 def: Loadxr_shl_pat<zextloadi16, i32, L4_loadruh_rr>; |
1890 def: Loadxr_shl_pat<sextloadi16, i32, L4_loadrh_rr>; | 2018 def: Loadxr_shl_pat<sextloadi16, i32, L4_loadrh_rr>; |
1891 def: Loadxr_shl_pat<load, i32, L4_loadri_rr>; | 2019 def: Loadxr_shl_pat<load, i32, L4_loadri_rr>; |
1892 def: Loadxr_shl_pat<load, i64, L4_loadrd_rr>; | 2020 def: Loadxr_shl_pat<load, v2i16, L4_loadri_rr>; |
1893 def: Loadxr_shl_pat<load, f32, L4_loadri_rr>; | 2021 def: Loadxr_shl_pat<load, v4i8, L4_loadri_rr>; |
1894 def: Loadxr_shl_pat<load, f64, L4_loadrd_rr>; | 2022 def: Loadxr_shl_pat<load, i64, L4_loadrd_rr>; |
2023 def: Loadxr_shl_pat<load, v2i32, L4_loadrd_rr>; | |
2024 def: Loadxr_shl_pat<load, v4i16, L4_loadrd_rr>; | |
2025 def: Loadxr_shl_pat<load, v8i8, L4_loadrd_rr>; | |
2026 def: Loadxr_shl_pat<load, f32, L4_loadri_rr>; | |
2027 def: Loadxr_shl_pat<load, f64, L4_loadrd_rr>; | |
1895 } | 2028 } |
1896 | 2029 |
1897 let AddedComplexity = 20 in { | 2030 let AddedComplexity = 20 in { |
1898 def: Loadxr_add_pat<extloadi8, i32, L4_loadrub_rr>; | 2031 def: Loadxr_add_pat<extloadi8, i32, L4_loadrub_rr>; |
1899 def: Loadxr_add_pat<zextloadi8, i32, L4_loadrub_rr>; | 2032 def: Loadxr_add_pat<zextloadi8, i32, L4_loadrub_rr>; |
1900 def: Loadxr_add_pat<sextloadi8, i32, L4_loadrb_rr>; | 2033 def: Loadxr_add_pat<sextloadi8, i32, L4_loadrb_rr>; |
1901 def: Loadxr_add_pat<extloadi16, i32, L4_loadruh_rr>; | 2034 def: Loadxr_add_pat<extloadi16, i32, L4_loadruh_rr>; |
1902 def: Loadxr_add_pat<zextloadi16, i32, L4_loadruh_rr>; | 2035 def: Loadxr_add_pat<zextloadi16, i32, L4_loadruh_rr>; |
1903 def: Loadxr_add_pat<sextloadi16, i32, L4_loadrh_rr>; | 2036 def: Loadxr_add_pat<sextloadi16, i32, L4_loadrh_rr>; |
1904 def: Loadxr_add_pat<load, i32, L4_loadri_rr>; | 2037 def: Loadxr_add_pat<load, i32, L4_loadri_rr>; |
1905 def: Loadxr_add_pat<load, i64, L4_loadrd_rr>; | 2038 def: Loadxr_add_pat<load, v2i16, L4_loadri_rr>; |
1906 def: Loadxr_add_pat<load, f32, L4_loadri_rr>; | 2039 def: Loadxr_add_pat<load, v4i8, L4_loadri_rr>; |
1907 def: Loadxr_add_pat<load, f64, L4_loadrd_rr>; | 2040 def: Loadxr_add_pat<load, i64, L4_loadrd_rr>; |
2041 def: Loadxr_add_pat<load, v2i32, L4_loadrd_rr>; | |
2042 def: Loadxr_add_pat<load, v4i16, L4_loadrd_rr>; | |
2043 def: Loadxr_add_pat<load, v8i8, L4_loadrd_rr>; | |
2044 def: Loadxr_add_pat<load, f32, L4_loadri_rr>; | |
2045 def: Loadxr_add_pat<load, f64, L4_loadrd_rr>; | |
1908 } | 2046 } |
1909 | 2047 |
1910 let AddedComplexity = 40 in { | 2048 let AddedComplexity = 40 in { |
1911 def: Loadxrm_shl_pat<extloadi8, i64, ToZext64, L4_loadrub_rr>; | 2049 def: Loadxrm_shl_pat<extloadi8, i64, ToAext64, L4_loadrub_rr>; |
1912 def: Loadxrm_shl_pat<zextloadi8, i64, ToZext64, L4_loadrub_rr>; | 2050 def: Loadxrm_shl_pat<zextloadi8, i64, ToZext64, L4_loadrub_rr>; |
1913 def: Loadxrm_shl_pat<sextloadi8, i64, ToSext64, L4_loadrb_rr>; | 2051 def: Loadxrm_shl_pat<sextloadi8, i64, ToSext64, L4_loadrb_rr>; |
1914 def: Loadxrm_shl_pat<extloadi16, i64, ToZext64, L4_loadruh_rr>; | 2052 def: Loadxrm_shl_pat<extloadi16, i64, ToAext64, L4_loadruh_rr>; |
1915 def: Loadxrm_shl_pat<zextloadi16, i64, ToZext64, L4_loadruh_rr>; | 2053 def: Loadxrm_shl_pat<zextloadi16, i64, ToZext64, L4_loadruh_rr>; |
1916 def: Loadxrm_shl_pat<sextloadi16, i64, ToSext64, L4_loadrh_rr>; | 2054 def: Loadxrm_shl_pat<sextloadi16, i64, ToSext64, L4_loadrh_rr>; |
1917 def: Loadxrm_shl_pat<extloadi32, i64, ToZext64, L4_loadri_rr>; | 2055 def: Loadxrm_shl_pat<extloadi32, i64, ToAext64, L4_loadri_rr>; |
1918 def: Loadxrm_shl_pat<zextloadi32, i64, ToZext64, L4_loadri_rr>; | 2056 def: Loadxrm_shl_pat<zextloadi32, i64, ToZext64, L4_loadri_rr>; |
1919 def: Loadxrm_shl_pat<sextloadi32, i64, ToSext64, L4_loadri_rr>; | 2057 def: Loadxrm_shl_pat<sextloadi32, i64, ToSext64, L4_loadri_rr>; |
1920 } | 2058 } |
1921 | 2059 |
1922 let AddedComplexity = 20 in { | 2060 let AddedComplexity = 20 in { |
1923 def: Loadxrm_add_pat<extloadi8, i64, ToZext64, L4_loadrub_rr>; | 2061 def: Loadxrm_add_pat<extloadi8, i64, ToAext64, L4_loadrub_rr>; |
1924 def: Loadxrm_add_pat<zextloadi8, i64, ToZext64, L4_loadrub_rr>; | 2062 def: Loadxrm_add_pat<zextloadi8, i64, ToZext64, L4_loadrub_rr>; |
1925 def: Loadxrm_add_pat<sextloadi8, i64, ToSext64, L4_loadrb_rr>; | 2063 def: Loadxrm_add_pat<sextloadi8, i64, ToSext64, L4_loadrb_rr>; |
1926 def: Loadxrm_add_pat<extloadi16, i64, ToZext64, L4_loadruh_rr>; | 2064 def: Loadxrm_add_pat<extloadi16, i64, ToAext64, L4_loadruh_rr>; |
1927 def: Loadxrm_add_pat<zextloadi16, i64, ToZext64, L4_loadruh_rr>; | 2065 def: Loadxrm_add_pat<zextloadi16, i64, ToZext64, L4_loadruh_rr>; |
1928 def: Loadxrm_add_pat<sextloadi16, i64, ToSext64, L4_loadrh_rr>; | 2066 def: Loadxrm_add_pat<sextloadi16, i64, ToSext64, L4_loadrh_rr>; |
1929 def: Loadxrm_add_pat<extloadi32, i64, ToZext64, L4_loadri_rr>; | 2067 def: Loadxrm_add_pat<extloadi32, i64, ToAext64, L4_loadri_rr>; |
1930 def: Loadxrm_add_pat<zextloadi32, i64, ToZext64, L4_loadri_rr>; | 2068 def: Loadxrm_add_pat<zextloadi32, i64, ToZext64, L4_loadri_rr>; |
1931 def: Loadxrm_add_pat<sextloadi32, i64, ToSext64, L4_loadri_rr>; | 2069 def: Loadxrm_add_pat<sextloadi32, i64, ToSext64, L4_loadri_rr>; |
1932 } | 2070 } |
1933 | 2071 |
1934 // Absolute address | 2072 // Absolute address |
1935 | 2073 |
1936 let AddedComplexity = 60 in { | 2074 let AddedComplexity = 60 in { |
1937 def: Loada_pat<zextloadi1, i32, anyimm0, PS_loadrubabs>; | 2075 def: Loada_pat<zextloadi1, i32, anyimm0, PS_loadrubabs>; |
1938 def: Loada_pat<sextloadi8, i32, anyimm0, PS_loadrbabs>; | 2076 def: Loada_pat<sextloadi8, i32, anyimm0, PS_loadrbabs>; |
1939 def: Loada_pat<extloadi8, i32, anyimm0, PS_loadrubabs>; | 2077 def: Loada_pat<extloadi8, i32, anyimm0, PS_loadrubabs>; |
1940 def: Loada_pat<zextloadi8, i32, anyimm0, PS_loadrubabs>; | 2078 def: Loada_pat<zextloadi8, i32, anyimm0, PS_loadrubabs>; |
1941 def: Loada_pat<sextloadi16, i32, anyimm1, PS_loadrhabs>; | 2079 def: Loada_pat<sextloadi16, i32, anyimm1, PS_loadrhabs>; |
1942 def: Loada_pat<extloadi16, i32, anyimm1, PS_loadruhabs>; | 2080 def: Loada_pat<extloadi16, i32, anyimm1, PS_loadruhabs>; |
1943 def: Loada_pat<zextloadi16, i32, anyimm1, PS_loadruhabs>; | 2081 def: Loada_pat<zextloadi16, i32, anyimm1, PS_loadruhabs>; |
1944 def: Loada_pat<load, i32, anyimm2, PS_loadriabs>; | 2082 def: Loada_pat<load, i32, anyimm2, PS_loadriabs>; |
1945 def: Loada_pat<load, i64, anyimm3, PS_loadrdabs>; | 2083 def: Loada_pat<load, v2i16, anyimm2, PS_loadriabs>; |
1946 def: Loada_pat<load, f32, anyimm2, PS_loadriabs>; | 2084 def: Loada_pat<load, v4i8, anyimm2, PS_loadriabs>; |
1947 def: Loada_pat<load, f64, anyimm3, PS_loadrdabs>; | 2085 def: Loada_pat<load, i64, anyimm3, PS_loadrdabs>; |
2086 def: Loada_pat<load, v2i32, anyimm3, PS_loadrdabs>; | |
2087 def: Loada_pat<load, v4i16, anyimm3, PS_loadrdabs>; | |
2088 def: Loada_pat<load, v8i8, anyimm3, PS_loadrdabs>; | |
2089 def: Loada_pat<load, f32, anyimm2, PS_loadriabs>; | |
2090 def: Loada_pat<load, f64, anyimm3, PS_loadrdabs>; | |
1948 | 2091 |
1949 def: Loada_pat<atomic_load_8, i32, anyimm0, PS_loadrubabs>; | 2092 def: Loada_pat<atomic_load_8, i32, anyimm0, PS_loadrubabs>; |
1950 def: Loada_pat<atomic_load_16, i32, anyimm1, PS_loadruhabs>; | 2093 def: Loada_pat<atomic_load_16, i32, anyimm1, PS_loadruhabs>; |
1951 def: Loada_pat<atomic_load_32, i32, anyimm2, PS_loadriabs>; | 2094 def: Loada_pat<atomic_load_32, i32, anyimm2, PS_loadriabs>; |
1952 def: Loada_pat<atomic_load_64, i64, anyimm3, PS_loadrdabs>; | 2095 def: Loada_pat<atomic_load_64, i64, anyimm3, PS_loadrdabs>; |
1953 } | 2096 } |
1954 | 2097 |
1955 let AddedComplexity = 30 in { | 2098 let AddedComplexity = 30 in { |
1956 def: Loadam_pat<extloadi8, i64, anyimm0, ToZext64, PS_loadrubabs>; | 2099 def: Loadam_pat<extloadi8, i64, anyimm0, ToAext64, PS_loadrubabs>; |
1957 def: Loadam_pat<sextloadi8, i64, anyimm0, ToSext64, PS_loadrbabs>; | 2100 def: Loadam_pat<sextloadi8, i64, anyimm0, ToSext64, PS_loadrbabs>; |
1958 def: Loadam_pat<zextloadi8, i64, anyimm0, ToZext64, PS_loadrubabs>; | 2101 def: Loadam_pat<zextloadi8, i64, anyimm0, ToZext64, PS_loadrubabs>; |
1959 def: Loadam_pat<extloadi16, i64, anyimm1, ToZext64, PS_loadruhabs>; | 2102 def: Loadam_pat<extloadi16, i64, anyimm1, ToAext64, PS_loadruhabs>; |
1960 def: Loadam_pat<sextloadi16, i64, anyimm1, ToSext64, PS_loadrhabs>; | 2103 def: Loadam_pat<sextloadi16, i64, anyimm1, ToSext64, PS_loadrhabs>; |
1961 def: Loadam_pat<zextloadi16, i64, anyimm1, ToZext64, PS_loadruhabs>; | 2104 def: Loadam_pat<zextloadi16, i64, anyimm1, ToZext64, PS_loadruhabs>; |
1962 def: Loadam_pat<extloadi32, i64, anyimm2, ToZext64, PS_loadriabs>; | 2105 def: Loadam_pat<extloadi32, i64, anyimm2, ToAext64, PS_loadriabs>; |
1963 def: Loadam_pat<sextloadi32, i64, anyimm2, ToSext64, PS_loadriabs>; | 2106 def: Loadam_pat<sextloadi32, i64, anyimm2, ToSext64, PS_loadriabs>; |
1964 def: Loadam_pat<zextloadi32, i64, anyimm2, ToZext64, PS_loadriabs>; | 2107 def: Loadam_pat<zextloadi32, i64, anyimm2, ToZext64, PS_loadriabs>; |
1965 | 2108 |
1966 def: Loadam_pat<load, i1, anyimm0, I32toI1, PS_loadrubabs>; | 2109 def: Loadam_pat<load, i1, anyimm0, I32toI1, PS_loadrubabs>; |
1967 def: Loadam_pat<zextloadi1, i64, anyimm0, ToZext64, PS_loadrubabs>; | 2110 def: Loadam_pat<zextloadi1, i64, anyimm0, ToZext64, PS_loadrubabs>; |
1968 } | 2111 } |
1969 | 2112 |
1970 // GP-relative address | 2113 // GP-relative address |
1971 | 2114 |
1972 let AddedComplexity = 100 in { | 2115 let AddedComplexity = 100 in { |
1973 def: Loada_pat<extloadi1, i32, addrgp, L2_loadrubgp>; | 2116 def: Loada_pat<extloadi1, i32, addrgp, L2_loadrubgp>; |
1974 def: Loada_pat<zextloadi1, i32, addrgp, L2_loadrubgp>; | 2117 def: Loada_pat<zextloadi1, i32, addrgp, L2_loadrubgp>; |
1975 def: Loada_pat<extloadi8, i32, addrgp, L2_loadrubgp>; | 2118 def: Loada_pat<extloadi8, i32, addrgp, L2_loadrubgp>; |
1976 def: Loada_pat<sextloadi8, i32, addrgp, L2_loadrbgp>; | 2119 def: Loada_pat<sextloadi8, i32, addrgp, L2_loadrbgp>; |
1977 def: Loada_pat<zextloadi8, i32, addrgp, L2_loadrubgp>; | 2120 def: Loada_pat<zextloadi8, i32, addrgp, L2_loadrubgp>; |
1978 def: Loada_pat<extloadi16, i32, addrgp, L2_loadruhgp>; | 2121 def: Loada_pat<extloadi16, i32, addrgp, L2_loadruhgp>; |
1979 def: Loada_pat<sextloadi16, i32, addrgp, L2_loadrhgp>; | 2122 def: Loada_pat<sextloadi16, i32, addrgp, L2_loadrhgp>; |
1980 def: Loada_pat<zextloadi16, i32, addrgp, L2_loadruhgp>; | 2123 def: Loada_pat<zextloadi16, i32, addrgp, L2_loadruhgp>; |
1981 def: Loada_pat<load, i32, addrgp, L2_loadrigp>; | 2124 def: Loada_pat<load, i32, addrgp, L2_loadrigp>; |
1982 def: Loada_pat<load, i64, addrgp, L2_loadrdgp>; | 2125 def: Loada_pat<load, v2i16, addrgp, L2_loadrigp>; |
1983 def: Loada_pat<load, f32, addrgp, L2_loadrigp>; | 2126 def: Loada_pat<load, v4i8, addrgp, L2_loadrigp>; |
1984 def: Loada_pat<load, f64, addrgp, L2_loadrdgp>; | 2127 def: Loada_pat<load, i64, addrgp, L2_loadrdgp>; |
2128 def: Loada_pat<load, v2i32, addrgp, L2_loadrdgp>; | |
2129 def: Loada_pat<load, v4i16, addrgp, L2_loadrdgp>; | |
2130 def: Loada_pat<load, v8i8, addrgp, L2_loadrdgp>; | |
2131 def: Loada_pat<load, f32, addrgp, L2_loadrigp>; | |
2132 def: Loada_pat<load, f64, addrgp, L2_loadrdgp>; | |
1985 | 2133 |
1986 def: Loada_pat<atomic_load_8, i32, addrgp, L2_loadrubgp>; | 2134 def: Loada_pat<atomic_load_8, i32, addrgp, L2_loadrubgp>; |
1987 def: Loada_pat<atomic_load_16, i32, addrgp, L2_loadruhgp>; | 2135 def: Loada_pat<atomic_load_16, i32, addrgp, L2_loadruhgp>; |
1988 def: Loada_pat<atomic_load_32, i32, addrgp, L2_loadrigp>; | 2136 def: Loada_pat<atomic_load_32, i32, addrgp, L2_loadrigp>; |
1989 def: Loada_pat<atomic_load_64, i64, addrgp, L2_loadrdgp>; | 2137 def: Loada_pat<atomic_load_64, i64, addrgp, L2_loadrdgp>; |
1990 } | 2138 } |
1991 | 2139 |
1992 let AddedComplexity = 70 in { | 2140 let AddedComplexity = 70 in { |
1993 def: Loadam_pat<extloadi8, i64, addrgp, ToZext64, L2_loadrubgp>; | 2141 def: Loadam_pat<extloadi8, i64, addrgp, ToAext64, L2_loadrubgp>; |
1994 def: Loadam_pat<sextloadi8, i64, addrgp, ToSext64, L2_loadrbgp>; | 2142 def: Loadam_pat<sextloadi8, i64, addrgp, ToSext64, L2_loadrbgp>; |
1995 def: Loadam_pat<zextloadi8, i64, addrgp, ToZext64, L2_loadrubgp>; | 2143 def: Loadam_pat<zextloadi8, i64, addrgp, ToZext64, L2_loadrubgp>; |
1996 def: Loadam_pat<extloadi16, i64, addrgp, ToZext64, L2_loadruhgp>; | 2144 def: Loadam_pat<extloadi16, i64, addrgp, ToAext64, L2_loadruhgp>; |
1997 def: Loadam_pat<sextloadi16, i64, addrgp, ToSext64, L2_loadrhgp>; | 2145 def: Loadam_pat<sextloadi16, i64, addrgp, ToSext64, L2_loadrhgp>; |
1998 def: Loadam_pat<zextloadi16, i64, addrgp, ToZext64, L2_loadruhgp>; | 2146 def: Loadam_pat<zextloadi16, i64, addrgp, ToZext64, L2_loadruhgp>; |
1999 def: Loadam_pat<extloadi32, i64, addrgp, ToZext64, L2_loadrigp>; | 2147 def: Loadam_pat<extloadi32, i64, addrgp, ToAext64, L2_loadrigp>; |
2000 def: Loadam_pat<sextloadi32, i64, addrgp, ToSext64, L2_loadrigp>; | 2148 def: Loadam_pat<sextloadi32, i64, addrgp, ToSext64, L2_loadrigp>; |
2001 def: Loadam_pat<zextloadi32, i64, addrgp, ToZext64, L2_loadrigp>; | 2149 def: Loadam_pat<zextloadi32, i64, addrgp, ToZext64, L2_loadrigp>; |
2002 | 2150 |
2003 def: Loadam_pat<load, i1, addrgp, I32toI1, L2_loadrubgp>; | 2151 def: Loadam_pat<load, i1, addrgp, I32toI1, L2_loadrubgp>; |
2004 def: Loadam_pat<zextloadi1, i64, addrgp, ToZext64, L2_loadrubgp>; | 2152 def: Loadam_pat<zextloadi1, i64, addrgp, ToZext64, L2_loadrubgp>; |
2134 // Atomic stores also have two, but they are reversed: address, value. | 2282 // Atomic stores also have two, but they are reversed: address, value. |
2135 // To use atomic stores with the patterns, they need to have their operands | 2283 // To use atomic stores with the patterns, they need to have their operands |
2136 // swapped. This relies on the knowledge that the F.Fragment uses names | 2284 // swapped. This relies on the knowledge that the F.Fragment uses names |
2137 // "ptr" and "val". | 2285 // "ptr" and "val". |
2138 class AtomSt<PatFrag F> | 2286 class AtomSt<PatFrag F> |
2139 : PatFrag<(ops node:$val, node:$ptr), F.Fragment, F.PredicateCode, | 2287 : PatFrag<(ops node:$val, node:$ptr), !head(F.Fragments), F.PredicateCode, |
2140 F.OperandTransform> { | 2288 F.OperandTransform> { |
2141 let IsAtomic = F.IsAtomic; | 2289 let IsAtomic = F.IsAtomic; |
2142 let MemoryVT = F.MemoryVT; | 2290 let MemoryVT = F.MemoryVT; |
2143 } | 2291 } |
2144 | 2292 |
2250 def: Storexi_abs_pat<store, anyimm, S4_storeiri_io>; | 2398 def: Storexi_abs_pat<store, anyimm, S4_storeiri_io>; |
2251 } | 2399 } |
2252 | 2400 |
2253 // GP-relative address | 2401 // GP-relative address |
2254 let AddedComplexity = 120 in { | 2402 let AddedComplexity = 120 in { |
2255 def: Storea_pat<truncstorei8, I32, addrgp, S2_storerbgp>; | 2403 def: Storea_pat<truncstorei8, I32, addrgp, S2_storerbgp>; |
2256 def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhgp>; | 2404 def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhgp>; |
2257 def: Storea_pat<store, I32, addrgp, S2_storerigp>; | 2405 def: Storea_pat<store, I32, addrgp, S2_storerigp>; |
2258 def: Storea_pat<store, I64, addrgp, S2_storerdgp>; | 2406 def: Storea_pat<store, V4I8, addrgp, S2_storerigp>; |
2259 def: Storea_pat<store, F32, addrgp, S2_storerigp>; | 2407 def: Storea_pat<store, V2I16, addrgp, S2_storerigp>; |
2260 def: Storea_pat<store, F64, addrgp, S2_storerdgp>; | 2408 def: Storea_pat<store, I64, addrgp, S2_storerdgp>; |
2261 def: Storea_pat<AtomSt<atomic_store_8>, I32, addrgp, S2_storerbgp>; | 2409 def: Storea_pat<store, V8I8, addrgp, S2_storerdgp>; |
2262 def: Storea_pat<AtomSt<atomic_store_16>, I32, addrgp, S2_storerhgp>; | 2410 def: Storea_pat<store, V4I16, addrgp, S2_storerdgp>; |
2263 def: Storea_pat<AtomSt<atomic_store_32>, I32, addrgp, S2_storerigp>; | 2411 def: Storea_pat<store, V2I32, addrgp, S2_storerdgp>; |
2264 def: Storea_pat<AtomSt<atomic_store_64>, I64, addrgp, S2_storerdgp>; | 2412 def: Storea_pat<store, F32, addrgp, S2_storerigp>; |
2413 def: Storea_pat<store, F64, addrgp, S2_storerdgp>; | |
2414 def: Storea_pat<AtomSt<atomic_store_8>, I32, addrgp, S2_storerbgp>; | |
2415 def: Storea_pat<AtomSt<atomic_store_16>, I32, addrgp, S2_storerhgp>; | |
2416 def: Storea_pat<AtomSt<atomic_store_32>, I32, addrgp, S2_storerigp>; | |
2417 def: Storea_pat<AtomSt<atomic_store_32>, V4I8, addrgp, S2_storerigp>; | |
2418 def: Storea_pat<AtomSt<atomic_store_32>, V2I16, addrgp, S2_storerigp>; | |
2419 def: Storea_pat<AtomSt<atomic_store_64>, I64, addrgp, S2_storerdgp>; | |
2420 def: Storea_pat<AtomSt<atomic_store_64>, V8I8, addrgp, S2_storerdgp>; | |
2421 def: Storea_pat<AtomSt<atomic_store_64>, V4I16, addrgp, S2_storerdgp>; | |
2422 def: Storea_pat<AtomSt<atomic_store_64>, V2I32, addrgp, S2_storerdgp>; | |
2265 | 2423 |
2266 def: Stoream_pat<truncstorei8, I64, addrgp, LoReg, S2_storerbgp>; | 2424 def: Stoream_pat<truncstorei8, I64, addrgp, LoReg, S2_storerbgp>; |
2267 def: Stoream_pat<truncstorei16, I64, addrgp, LoReg, S2_storerhgp>; | 2425 def: Stoream_pat<truncstorei16, I64, addrgp, LoReg, S2_storerhgp>; |
2268 def: Stoream_pat<truncstorei32, I64, addrgp, LoReg, S2_storerigp>; | 2426 def: Stoream_pat<truncstorei32, I64, addrgp, LoReg, S2_storerigp>; |
2269 def: Stoream_pat<store, I1, addrgp, I1toI32, S2_storerbgp>; | 2427 def: Stoream_pat<store, I1, addrgp, I1toI32, S2_storerbgp>; |
2270 } | 2428 } |
2271 | 2429 |
2272 // Absolute address | 2430 // Absolute address |
2273 let AddedComplexity = 110 in { | 2431 let AddedComplexity = 110 in { |
2274 def: Storea_pat<truncstorei8, I32, anyimm0, PS_storerbabs>; | 2432 def: Storea_pat<truncstorei8, I32, anyimm0, PS_storerbabs>; |
2275 def: Storea_pat<truncstorei16, I32, anyimm1, PS_storerhabs>; | 2433 def: Storea_pat<truncstorei16, I32, anyimm1, PS_storerhabs>; |
2276 def: Storea_pat<store, I32, anyimm2, PS_storeriabs>; | 2434 def: Storea_pat<store, I32, anyimm2, PS_storeriabs>; |
2277 def: Storea_pat<store, I64, anyimm3, PS_storerdabs>; | 2435 def: Storea_pat<store, V4I8, anyimm2, PS_storeriabs>; |
2278 def: Storea_pat<store, F32, anyimm2, PS_storeriabs>; | 2436 def: Storea_pat<store, V2I16, anyimm2, PS_storeriabs>; |
2279 def: Storea_pat<store, F64, anyimm3, PS_storerdabs>; | 2437 def: Storea_pat<store, I64, anyimm3, PS_storerdabs>; |
2280 def: Storea_pat<AtomSt<atomic_store_8>, I32, anyimm0, PS_storerbabs>; | 2438 def: Storea_pat<store, V8I8, anyimm3, PS_storerdabs>; |
2281 def: Storea_pat<AtomSt<atomic_store_16>, I32, anyimm1, PS_storerhabs>; | 2439 def: Storea_pat<store, V4I16, anyimm3, PS_storerdabs>; |
2282 def: Storea_pat<AtomSt<atomic_store_32>, I32, anyimm2, PS_storeriabs>; | 2440 def: Storea_pat<store, V2I32, anyimm3, PS_storerdabs>; |
2283 def: Storea_pat<AtomSt<atomic_store_64>, I64, anyimm3, PS_storerdabs>; | 2441 def: Storea_pat<store, F32, anyimm2, PS_storeriabs>; |
2442 def: Storea_pat<store, F64, anyimm3, PS_storerdabs>; | |
2443 def: Storea_pat<AtomSt<atomic_store_8>, I32, anyimm0, PS_storerbabs>; | |
2444 def: Storea_pat<AtomSt<atomic_store_16>, I32, anyimm1, PS_storerhabs>; | |
2445 def: Storea_pat<AtomSt<atomic_store_32>, I32, anyimm2, PS_storeriabs>; | |
2446 def: Storea_pat<AtomSt<atomic_store_32>, V4I8, anyimm2, PS_storeriabs>; | |
2447 def: Storea_pat<AtomSt<atomic_store_32>, V2I16, anyimm2, PS_storeriabs>; | |
2448 def: Storea_pat<AtomSt<atomic_store_64>, I64, anyimm3, PS_storerdabs>; | |
2449 def: Storea_pat<AtomSt<atomic_store_64>, V8I8, anyimm3, PS_storerdabs>; | |
2450 def: Storea_pat<AtomSt<atomic_store_64>, V4I16, anyimm3, PS_storerdabs>; | |
2451 def: Storea_pat<AtomSt<atomic_store_64>, V2I32, anyimm3, PS_storerdabs>; | |
2284 | 2452 |
2285 def: Stoream_pat<truncstorei8, I64, anyimm0, LoReg, PS_storerbabs>; | 2453 def: Stoream_pat<truncstorei8, I64, anyimm0, LoReg, PS_storerbabs>; |
2286 def: Stoream_pat<truncstorei16, I64, anyimm1, LoReg, PS_storerhabs>; | 2454 def: Stoream_pat<truncstorei16, I64, anyimm1, LoReg, PS_storerhabs>; |
2287 def: Stoream_pat<truncstorei32, I64, anyimm2, LoReg, PS_storeriabs>; | 2455 def: Stoream_pat<truncstorei32, I64, anyimm2, LoReg, PS_storeriabs>; |
2288 def: Stoream_pat<store, I1, anyimm0, I1toI32, PS_storerbabs>; | 2456 def: Stoream_pat<store, I1, anyimm0, I1toI32, PS_storerbabs>; |
2289 } | 2457 } |
2290 | 2458 |
2291 // Reg<<S + Imm | 2459 // Reg<<S + Imm |
2292 let AddedComplexity = 100 in { | 2460 let AddedComplexity = 100 in { |
2293 def: Storexu_shl_pat<truncstorei8, I32, anyimm0, S4_storerb_ur>; | 2461 def: Storexu_shl_pat<truncstorei8, I32, anyimm0, S4_storerb_ur>; |
2294 def: Storexu_shl_pat<truncstorei16, I32, anyimm1, S4_storerh_ur>; | 2462 def: Storexu_shl_pat<truncstorei16, I32, anyimm1, S4_storerh_ur>; |
2295 def: Storexu_shl_pat<store, I32, anyimm2, S4_storeri_ur>; | 2463 def: Storexu_shl_pat<store, I32, anyimm2, S4_storeri_ur>; |
2296 def: Storexu_shl_pat<store, I64, anyimm3, S4_storerd_ur>; | 2464 def: Storexu_shl_pat<store, V4I8, anyimm2, S4_storeri_ur>; |
2297 def: Storexu_shl_pat<store, F32, anyimm2, S4_storeri_ur>; | 2465 def: Storexu_shl_pat<store, V2I16, anyimm2, S4_storeri_ur>; |
2298 def: Storexu_shl_pat<store, F64, anyimm3, S4_storerd_ur>; | 2466 def: Storexu_shl_pat<store, I64, anyimm3, S4_storerd_ur>; |
2467 def: Storexu_shl_pat<store, V8I8, anyimm3, S4_storerd_ur>; | |
2468 def: Storexu_shl_pat<store, V4I16, anyimm3, S4_storerd_ur>; | |
2469 def: Storexu_shl_pat<store, V2I32, anyimm3, S4_storerd_ur>; | |
2470 def: Storexu_shl_pat<store, F32, anyimm2, S4_storeri_ur>; | |
2471 def: Storexu_shl_pat<store, F64, anyimm3, S4_storerd_ur>; | |
2299 | 2472 |
2300 def: Pat<(store I1:$Pu, (add (shl I32:$Rs, u2_0ImmPred:$u2), anyimm:$A)), | 2473 def: Pat<(store I1:$Pu, (add (shl I32:$Rs, u2_0ImmPred:$u2), anyimm:$A)), |
2301 (S4_storerb_ur IntRegs:$Rs, imm:$u2, imm:$A, (I1toI32 I1:$Pu))>; | 2474 (S4_storerb_ur IntRegs:$Rs, imm:$u2, imm:$A, (I1toI32 I1:$Pu))>; |
2302 } | 2475 } |
2303 | 2476 |
2304 // Reg<<S + Reg | 2477 // Reg<<S + Reg |
2305 let AddedComplexity = 90 in { | 2478 let AddedComplexity = 90 in { |
2306 def: Storexr_shl_pat<truncstorei8, I32, S4_storerb_rr>; | 2479 def: Storexr_shl_pat<truncstorei8, I32, S4_storerb_rr>; |
2307 def: Storexr_shl_pat<truncstorei16, I32, S4_storerh_rr>; | 2480 def: Storexr_shl_pat<truncstorei16, I32, S4_storerh_rr>; |
2308 def: Storexr_shl_pat<store, I32, S4_storeri_rr>; | 2481 def: Storexr_shl_pat<store, I32, S4_storeri_rr>; |
2309 def: Storexr_shl_pat<store, I64, S4_storerd_rr>; | 2482 def: Storexr_shl_pat<store, V4I8, S4_storeri_rr>; |
2310 def: Storexr_shl_pat<store, F32, S4_storeri_rr>; | 2483 def: Storexr_shl_pat<store, V2I16, S4_storeri_rr>; |
2311 def: Storexr_shl_pat<store, F64, S4_storerd_rr>; | 2484 def: Storexr_shl_pat<store, I64, S4_storerd_rr>; |
2485 def: Storexr_shl_pat<store, V8I8, S4_storerd_rr>; | |
2486 def: Storexr_shl_pat<store, V4I16, S4_storerd_rr>; | |
2487 def: Storexr_shl_pat<store, V2I32, S4_storerd_rr>; | |
2488 def: Storexr_shl_pat<store, F32, S4_storeri_rr>; | |
2489 def: Storexr_shl_pat<store, F64, S4_storerd_rr>; | |
2312 | 2490 |
2313 def: Pat<(store I1:$Pu, (add (shl I32:$Rs, u2_0ImmPred:$u2), I32:$Rt)), | 2491 def: Pat<(store I1:$Pu, (add (shl I32:$Rs, u2_0ImmPred:$u2), I32:$Rt)), |
2314 (S4_storerb_ur IntRegs:$Rt, IntRegs:$Rs, imm:$u2, (I1toI32 I1:$Pu))>; | 2492 (S4_storerb_ur IntRegs:$Rt, IntRegs:$Rs, imm:$u2, (I1toI32 I1:$Pu))>; |
2315 } | 2493 } |
2316 | 2494 |
2358 def: Storexim_fi_pat<LS_<store>, anyimm, ToI32, S2_storeri_io>; | 2536 def: Storexim_fi_pat<LS_<store>, anyimm, ToI32, S2_storeri_io>; |
2359 } | 2537 } |
2360 | 2538 |
2361 // Fi+Imm, Fi, store-register | 2539 // Fi+Imm, Fi, store-register |
2362 let AddedComplexity = 60 in { | 2540 let AddedComplexity = 60 in { |
2363 defm: Storexi_fi_add_pat<truncstorei8, I32, anyimm, S2_storerb_io>; | 2541 defm: Storexi_fi_add_pat<truncstorei8, I32, anyimm, S2_storerb_io>; |
2364 defm: Storexi_fi_add_pat<truncstorei16, I32, anyimm, S2_storerh_io>; | 2542 defm: Storexi_fi_add_pat<truncstorei16, I32, anyimm, S2_storerh_io>; |
2365 defm: Storexi_fi_add_pat<store, I32, anyimm, S2_storeri_io>; | 2543 defm: Storexi_fi_add_pat<store, I32, anyimm, S2_storeri_io>; |
2366 defm: Storexi_fi_add_pat<store, I64, anyimm, S2_storerd_io>; | 2544 defm: Storexi_fi_add_pat<store, V4I8, anyimm, S2_storeri_io>; |
2367 defm: Storexi_fi_add_pat<store, F32, anyimm, S2_storeri_io>; | 2545 defm: Storexi_fi_add_pat<store, V2I16, anyimm, S2_storeri_io>; |
2368 defm: Storexi_fi_add_pat<store, F64, anyimm, S2_storerd_io>; | 2546 defm: Storexi_fi_add_pat<store, I64, anyimm, S2_storerd_io>; |
2547 defm: Storexi_fi_add_pat<store, V8I8, anyimm, S2_storerd_io>; | |
2548 defm: Storexi_fi_add_pat<store, V4I16, anyimm, S2_storerd_io>; | |
2549 defm: Storexi_fi_add_pat<store, V2I32, anyimm, S2_storerd_io>; | |
2550 defm: Storexi_fi_add_pat<store, F32, anyimm, S2_storeri_io>; | |
2551 defm: Storexi_fi_add_pat<store, F64, anyimm, S2_storerd_io>; | |
2369 defm: Storexim_fi_add_pat<store, I1, anyimm, I1toI32, S2_storerb_io>; | 2552 defm: Storexim_fi_add_pat<store, I1, anyimm, I1toI32, S2_storerb_io>; |
2370 | 2553 |
2371 def: Storexi_fi_pat<truncstorei8, I32, S2_storerb_io>; | 2554 def: Storexi_fi_pat<truncstorei8, I32, S2_storerb_io>; |
2372 def: Storexi_fi_pat<truncstorei16, I32, S2_storerh_io>; | 2555 def: Storexi_fi_pat<truncstorei16, I32, S2_storerh_io>; |
2373 def: Storexi_fi_pat<store, I32, S2_storeri_io>; | 2556 def: Storexi_fi_pat<store, I32, S2_storeri_io>; |
2374 def: Storexi_fi_pat<store, I64, S2_storerd_io>; | 2557 def: Storexi_fi_pat<store, V4I8, S2_storeri_io>; |
2375 def: Storexi_fi_pat<store, F32, S2_storeri_io>; | 2558 def: Storexi_fi_pat<store, V2I16, S2_storeri_io>; |
2376 def: Storexi_fi_pat<store, F64, S2_storerd_io>; | 2559 def: Storexi_fi_pat<store, I64, S2_storerd_io>; |
2560 def: Storexi_fi_pat<store, V8I8, S2_storerd_io>; | |
2561 def: Storexi_fi_pat<store, V4I16, S2_storerd_io>; | |
2562 def: Storexi_fi_pat<store, V2I32, S2_storerd_io>; | |
2563 def: Storexi_fi_pat<store, F32, S2_storeri_io>; | |
2564 def: Storexi_fi_pat<store, F64, S2_storerd_io>; | |
2377 def: Storexim_fi_pat<store, I1, I1toI32, S2_storerb_io>; | 2565 def: Storexim_fi_pat<store, I1, I1toI32, S2_storerb_io>; |
2378 } | 2566 } |
2379 | 2567 |
2380 | 2568 |
2381 multiclass IMRA_<PatFrag S, PatFrag V, PatFrag O, PatFrag M, InstHexagon I> { | 2569 multiclass IMRA_<PatFrag S, PatFrag V, PatFrag O, PatFrag M, InstHexagon I> { |
2396 defm: IRA_<store, anyimm, u6_2ImmPred, S4_storeiri_io>; | 2584 defm: IRA_<store, anyimm, u6_2ImmPred, S4_storeiri_io>; |
2397 } | 2585 } |
2398 | 2586 |
2399 // Reg+Imm, store-register | 2587 // Reg+Imm, store-register |
2400 let AddedComplexity = 40 in { | 2588 let AddedComplexity = 40 in { |
2401 defm: Storexi_pat<truncstorei8, I32, anyimm0, S2_storerb_io>; | 2589 defm: Storexi_pat<truncstorei8, I32, anyimm0, S2_storerb_io>; |
2402 defm: Storexi_pat<truncstorei16, I32, anyimm1, S2_storerh_io>; | 2590 defm: Storexi_pat<truncstorei16, I32, anyimm1, S2_storerh_io>; |
2403 defm: Storexi_pat<store, I32, anyimm2, S2_storeri_io>; | 2591 defm: Storexi_pat<store, I32, anyimm2, S2_storeri_io>; |
2404 defm: Storexi_pat<store, I64, anyimm3, S2_storerd_io>; | 2592 defm: Storexi_pat<store, V4I8, anyimm2, S2_storeri_io>; |
2405 defm: Storexi_pat<store, F32, anyimm2, S2_storeri_io>; | 2593 defm: Storexi_pat<store, V2I16, anyimm2, S2_storeri_io>; |
2406 defm: Storexi_pat<store, F64, anyimm3, S2_storerd_io>; | 2594 defm: Storexi_pat<store, I64, anyimm3, S2_storerd_io>; |
2595 defm: Storexi_pat<store, V8I8, anyimm3, S2_storerd_io>; | |
2596 defm: Storexi_pat<store, V4I16, anyimm3, S2_storerd_io>; | |
2597 defm: Storexi_pat<store, V2I32, anyimm3, S2_storerd_io>; | |
2598 defm: Storexi_pat<store, F32, anyimm2, S2_storeri_io>; | |
2599 defm: Storexi_pat<store, F64, anyimm3, S2_storerd_io>; | |
2407 | 2600 |
2408 defm: Storexim_pat<truncstorei8, I64, anyimm0, LoReg, S2_storerb_io>; | 2601 defm: Storexim_pat<truncstorei8, I64, anyimm0, LoReg, S2_storerb_io>; |
2409 defm: Storexim_pat<truncstorei16, I64, anyimm1, LoReg, S2_storerh_io>; | 2602 defm: Storexim_pat<truncstorei16, I64, anyimm1, LoReg, S2_storerh_io>; |
2410 defm: Storexim_pat<truncstorei32, I64, anyimm2, LoReg, S2_storeri_io>; | 2603 defm: Storexim_pat<truncstorei32, I64, anyimm2, LoReg, S2_storeri_io>; |
2411 defm: Storexim_pat<store, I1, anyimm0, I1toI32, S2_storerb_io>; | 2604 defm: Storexim_pat<store, I1, anyimm0, I1toI32, S2_storerb_io>; |
2412 | 2605 |
2413 defm: Storexi_pat<AtomSt<atomic_store_8>, I32, anyimm0, S2_storerb_io>; | 2606 defm: Storexi_pat<AtomSt<atomic_store_8>, I32, anyimm0, S2_storerb_io>; |
2414 defm: Storexi_pat<AtomSt<atomic_store_16>, I32, anyimm1, S2_storerh_io>; | 2607 defm: Storexi_pat<AtomSt<atomic_store_16>, I32, anyimm1, S2_storerh_io>; |
2415 defm: Storexi_pat<AtomSt<atomic_store_32>, I32, anyimm2, S2_storeri_io>; | 2608 defm: Storexi_pat<AtomSt<atomic_store_32>, I32, anyimm2, S2_storeri_io>; |
2416 defm: Storexi_pat<AtomSt<atomic_store_64>, I64, anyimm3, S2_storerd_io>; | 2609 defm: Storexi_pat<AtomSt<atomic_store_32>, V4I8, anyimm2, S2_storeri_io>; |
2610 defm: Storexi_pat<AtomSt<atomic_store_32>, V2I16, anyimm2, S2_storeri_io>; | |
2611 defm: Storexi_pat<AtomSt<atomic_store_64>, I64, anyimm3, S2_storerd_io>; | |
2612 defm: Storexi_pat<AtomSt<atomic_store_64>, V8I8, anyimm3, S2_storerd_io>; | |
2613 defm: Storexi_pat<AtomSt<atomic_store_64>, V4I16, anyimm3, S2_storerd_io>; | |
2614 defm: Storexi_pat<AtomSt<atomic_store_64>, V2I32, anyimm3, S2_storerd_io>; | |
2417 } | 2615 } |
2418 | 2616 |
2419 // Reg+Reg | 2617 // Reg+Reg |
2420 let AddedComplexity = 30 in { | 2618 let AddedComplexity = 30 in { |
2421 def: Storexr_add_pat<truncstorei8, I32, S4_storerb_rr>; | 2619 def: Storexr_add_pat<truncstorei8, I32, S4_storerb_rr>; |
2422 def: Storexr_add_pat<truncstorei16, I32, S4_storerh_rr>; | 2620 def: Storexr_add_pat<truncstorei16, I32, S4_storerh_rr>; |
2423 def: Storexr_add_pat<store, I32, S4_storeri_rr>; | 2621 def: Storexr_add_pat<store, I32, S4_storeri_rr>; |
2424 def: Storexr_add_pat<store, I64, S4_storerd_rr>; | 2622 def: Storexr_add_pat<store, V4I8, S4_storeri_rr>; |
2425 def: Storexr_add_pat<store, F32, S4_storeri_rr>; | 2623 def: Storexr_add_pat<store, V2I16, S4_storeri_rr>; |
2426 def: Storexr_add_pat<store, F64, S4_storerd_rr>; | 2624 def: Storexr_add_pat<store, I64, S4_storerd_rr>; |
2625 def: Storexr_add_pat<store, V8I8, S4_storerd_rr>; | |
2626 def: Storexr_add_pat<store, V4I16, S4_storerd_rr>; | |
2627 def: Storexr_add_pat<store, V2I32, S4_storerd_rr>; | |
2628 def: Storexr_add_pat<store, F32, S4_storeri_rr>; | |
2629 def: Storexr_add_pat<store, F64, S4_storerd_rr>; | |
2427 | 2630 |
2428 def: Pat<(store I1:$Pu, (add I32:$Rs, I32:$Rt)), | 2631 def: Pat<(store I1:$Pu, (add I32:$Rs, I32:$Rt)), |
2429 (S4_storerb_rr IntRegs:$Rs, IntRegs:$Rt, 0, (I1toI32 I1:$Pu))>; | 2632 (S4_storerb_rr IntRegs:$Rs, IntRegs:$Rt, 0, (I1toI32 I1:$Pu))>; |
2430 } | 2633 } |
2431 | 2634 |
2440 def: Storexi_base_pat<store, anyimm, S4_storeiri_io>; | 2643 def: Storexi_base_pat<store, anyimm, S4_storeiri_io>; |
2441 } | 2644 } |
2442 | 2645 |
2443 // Reg, store-register | 2646 // Reg, store-register |
2444 let AddedComplexity = 10 in { | 2647 let AddedComplexity = 10 in { |
2445 def: Storexi_base_pat<truncstorei8, I32, S2_storerb_io>; | 2648 def: Storexi_base_pat<truncstorei8, I32, S2_storerb_io>; |
2446 def: Storexi_base_pat<truncstorei16, I32, S2_storerh_io>; | 2649 def: Storexi_base_pat<truncstorei16, I32, S2_storerh_io>; |
2447 def: Storexi_base_pat<store, I32, S2_storeri_io>; | 2650 def: Storexi_base_pat<store, I32, S2_storeri_io>; |
2448 def: Storexi_base_pat<store, I64, S2_storerd_io>; | 2651 def: Storexi_base_pat<store, V4I8, S2_storeri_io>; |
2449 def: Storexi_base_pat<store, F32, S2_storeri_io>; | 2652 def: Storexi_base_pat<store, V2I16, S2_storeri_io>; |
2450 def: Storexi_base_pat<store, F64, S2_storerd_io>; | 2653 def: Storexi_base_pat<store, I64, S2_storerd_io>; |
2654 def: Storexi_base_pat<store, V8I8, S2_storerd_io>; | |
2655 def: Storexi_base_pat<store, V4I16, S2_storerd_io>; | |
2656 def: Storexi_base_pat<store, V2I32, S2_storerd_io>; | |
2657 def: Storexi_base_pat<store, F32, S2_storeri_io>; | |
2658 def: Storexi_base_pat<store, F64, S2_storerd_io>; | |
2451 | 2659 |
2452 def: Storexim_base_pat<truncstorei8, I64, LoReg, S2_storerb_io>; | 2660 def: Storexim_base_pat<truncstorei8, I64, LoReg, S2_storerb_io>; |
2453 def: Storexim_base_pat<truncstorei16, I64, LoReg, S2_storerh_io>; | 2661 def: Storexim_base_pat<truncstorei16, I64, LoReg, S2_storerh_io>; |
2454 def: Storexim_base_pat<truncstorei32, I64, LoReg, S2_storeri_io>; | 2662 def: Storexim_base_pat<truncstorei32, I64, LoReg, S2_storeri_io>; |
2455 def: Storexim_base_pat<store, I1, I1toI32, S2_storerb_io>; | 2663 def: Storexim_base_pat<store, I1, I1toI32, S2_storerb_io>; |
2456 | 2664 |
2457 def: Storexi_base_pat<AtomSt<atomic_store_8>, I32, S2_storerb_io>; | 2665 def: Storexi_base_pat<AtomSt<atomic_store_8>, I32, S2_storerb_io>; |
2458 def: Storexi_base_pat<AtomSt<atomic_store_16>, I32, S2_storerh_io>; | 2666 def: Storexi_base_pat<AtomSt<atomic_store_16>, I32, S2_storerh_io>; |
2459 def: Storexi_base_pat<AtomSt<atomic_store_32>, I32, S2_storeri_io>; | 2667 def: Storexi_base_pat<AtomSt<atomic_store_32>, I32, S2_storeri_io>; |
2460 def: Storexi_base_pat<AtomSt<atomic_store_64>, I64, S2_storerd_io>; | 2668 def: Storexi_base_pat<AtomSt<atomic_store_32>, V4I8, S2_storeri_io>; |
2669 def: Storexi_base_pat<AtomSt<atomic_store_32>, V2I16, S2_storeri_io>; | |
2670 def: Storexi_base_pat<AtomSt<atomic_store_64>, I64, S2_storerd_io>; | |
2671 def: Storexi_base_pat<AtomSt<atomic_store_64>, V8I8, S2_storerd_io>; | |
2672 def: Storexi_base_pat<AtomSt<atomic_store_64>, V4I16, S2_storerd_io>; | |
2673 def: Storexi_base_pat<AtomSt<atomic_store_64>, V2I32, S2_storerd_io>; | |
2461 } | 2674 } |
2462 | 2675 |
2463 | 2676 |
2464 // --(14) Memop ---------------------------------------------------------- | 2677 // --(14) Memop ---------------------------------------------------------- |
2465 // | 2678 // |
2539 (MI AddrFI:$Rs, imm:$Off, I32:$A)>; | 2752 (MI AddrFI:$Rs, imm:$Off, I32:$A)>; |
2540 } | 2753 } |
2541 | 2754 |
2542 multiclass Memopxr_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred, | 2755 multiclass Memopxr_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred, |
2543 SDNode Oper, InstHexagon MI> { | 2756 SDNode Oper, InstHexagon MI> { |
2544 defm: Memopxr_base_pat <Load, Store, Oper, MI>; | 2757 let Predicates = [UseMEMOPS] in { |
2545 defm: Memopxr_add_pat <Load, Store, ImmPred, Oper, MI>; | 2758 defm: Memopxr_base_pat <Load, Store, Oper, MI>; |
2759 defm: Memopxr_add_pat <Load, Store, ImmPred, Oper, MI>; | |
2760 } | |
2546 } | 2761 } |
2547 | 2762 |
2548 let AddedComplexity = 200 in { | 2763 let AddedComplexity = 200 in { |
2549 // add reg | 2764 // add reg |
2550 defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, add, | 2765 defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, add, |
2638 } | 2853 } |
2639 | 2854 |
2640 multiclass Memopxi_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred, | 2855 multiclass Memopxi_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred, |
2641 SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod, | 2856 SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod, |
2642 InstHexagon MI> { | 2857 InstHexagon MI> { |
2643 defm: Memopxi_base_pat <Load, Store, Oper, Arg, ArgMod, MI>; | 2858 let Predicates = [UseMEMOPS] in { |
2644 defm: Memopxi_add_pat <Load, Store, ImmPred, Oper, Arg, ArgMod, MI>; | 2859 defm: Memopxi_base_pat <Load, Store, Oper, Arg, ArgMod, MI>; |
2860 defm: Memopxi_add_pat <Load, Store, ImmPred, Oper, Arg, ArgMod, MI>; | |
2861 } | |
2645 } | 2862 } |
2646 | 2863 |
2647 let AddedComplexity = 220 in { | 2864 let AddedComplexity = 220 in { |
2648 // add imm | 2865 // add imm |
2649 defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred, | 2866 defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred, |
2798 (J2_jumpt I1:$Pu, bb:$dst)>; | 3015 (J2_jumpt I1:$Pu, bb:$dst)>; |
2799 def: Pat<(brcond (not I1:$Pu), bb:$dst), | 3016 def: Pat<(brcond (not I1:$Pu), bb:$dst), |
2800 (J2_jumpf I1:$Pu, bb:$dst)>; | 3017 (J2_jumpf I1:$Pu, bb:$dst)>; |
2801 def: Pat<(brcond (i1 (setne I1:$Pu, -1)), bb:$dst), | 3018 def: Pat<(brcond (i1 (setne I1:$Pu, -1)), bb:$dst), |
2802 (J2_jumpf I1:$Pu, bb:$dst)>; | 3019 (J2_jumpf I1:$Pu, bb:$dst)>; |
3020 def: Pat<(brcond (i1 (seteq I1:$Pu, 0)), bb:$dst), | |
3021 (J2_jumpf I1:$Pu, bb:$dst)>; | |
2803 def: Pat<(brcond (i1 (setne I1:$Pu, 0)), bb:$dst), | 3022 def: Pat<(brcond (i1 (setne I1:$Pu, 0)), bb:$dst), |
2804 (J2_jumpt I1:$Pu, bb:$dst)>; | 3023 (J2_jumpt I1:$Pu, bb:$dst)>; |
2805 | 3024 |
2806 | 3025 |
2807 // --(17) Misc ----------------------------------------------------------- | 3026 // --(17) Misc ----------------------------------------------------------- |
2860 (PS_alloca IntRegs:$Rs, imm:$A)>; | 3079 (PS_alloca IntRegs:$Rs, imm:$A)>; |
2861 | 3080 |
2862 def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>; | 3081 def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>; |
2863 def: Pat<(HexagonBARRIER), (Y2_barrier)>; | 3082 def: Pat<(HexagonBARRIER), (Y2_barrier)>; |
2864 | 3083 |
3084 def: Pat<(trap), (PS_crash)>; | |
3085 | |
2865 // Read cycle counter. | 3086 // Read cycle counter. |
2866 def SDTInt64Leaf: SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>; | 3087 def SDTInt64Leaf: SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>; |
2867 def HexagonREADCYCLE: SDNode<"HexagonISD::READCYCLE", SDTInt64Leaf, | 3088 def HexagonREADCYCLE: SDNode<"HexagonISD::READCYCLE", SDTInt64Leaf, |
2868 [SDNPHasChain]>; | 3089 [SDNPHasChain]>; |
2869 | 3090 |
2870 def: Pat<(HexagonREADCYCLE), (A4_tfrcpp UPCYCLE)>; | 3091 def: Pat<(HexagonREADCYCLE), (A4_tfrcpp UPCYCLE)>; |
3092 | |
3093 // The declared return value of the store-locked intrinsics is i32, but | |
3094 // the instructions actually define i1. To avoid register copies from | |
3095 // IntRegs to PredRegs and back, fold the entire pattern checking the | |
3096 // result against true/false. | |
3097 let AddedComplexity = 100 in { | |
3098 def: Pat<(i1 (setne (int_hexagon_S2_storew_locked I32:$Rs, I32:$Rt), 0)), | |
3099 (S2_storew_locked I32:$Rs, I32:$Rt)>; | |
3100 def: Pat<(i1 (seteq (int_hexagon_S2_storew_locked I32:$Rs, I32:$Rt), 0)), | |
3101 (C2_not (S2_storew_locked I32:$Rs, I32:$Rt))>; | |
3102 def: Pat<(i1 (setne (int_hexagon_S4_stored_locked I32:$Rs, I64:$Rt), 0)), | |
3103 (S4_stored_locked I32:$Rs, I64:$Rt)>; | |
3104 def: Pat<(i1 (seteq (int_hexagon_S4_stored_locked I32:$Rs, I64:$Rt), 0)), | |
3105 (C2_not (S4_stored_locked I32:$Rs, I64:$Rt))>; | |
3106 } |