100
|
1 ; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.2,+aes,+pclmul < %s | FileCheck %s
|
83
|
2
|
|
3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
4 target triple = "x86_64-unknown-unknown"
|
|
5
|
|
6 ; Stack reload folding tests.
|
|
7 ;
|
|
8 ; By including a nop call with sideeffects we can force a partial register spill of the
|
|
9 ; relevant registers and check that the reload is correctly folded into the instruction.
|
|
10
|
|
11 define <2 x i64> @stack_fold_aesdec(<2 x i64> %a0, <2 x i64> %a1) {
|
|
12 ;CHECK-LABEL: stack_fold_aesdec
|
|
13 ;CHECK: aesdec {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
14 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
15 %2 = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1)
|
|
16 ret <2 x i64> %2
|
|
17 }
|
|
18 declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone
|
|
19
|
|
20 define <2 x i64> @stack_fold_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) {
|
|
21 ;CHECK-LABEL: stack_fold_aesdeclast
|
|
22 ;CHECK: aesdeclast {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
23 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
24 %2 = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1)
|
|
25 ret <2 x i64> %2
|
|
26 }
|
|
27 declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone
|
|
28
|
|
29 define <2 x i64> @stack_fold_aesenc(<2 x i64> %a0, <2 x i64> %a1) {
|
|
30 ;CHECK-LABEL: stack_fold_aesenc
|
|
31 ;CHECK: aesenc {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
32 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
33 %2 = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1)
|
|
34 ret <2 x i64> %2
|
|
35 }
|
|
36 declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone
|
|
37
|
|
38 define <2 x i64> @stack_fold_aesenclast(<2 x i64> %a0, <2 x i64> %a1) {
|
|
39 ;CHECK-LABEL: stack_fold_aesenclast
|
|
40 ;CHECK: aesenclast {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
41 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
42 %2 = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1)
|
|
43 ret <2 x i64> %2
|
|
44 }
|
|
45 declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone
|
|
46
|
|
47 define <2 x i64> @stack_fold_aesimc(<2 x i64> %a0) {
|
|
48 ;CHECK-LABEL: stack_fold_aesimc
|
|
49 ;CHECK: aesimc {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
50 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
51 %2 = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0)
|
|
52 ret <2 x i64> %2
|
|
53 }
|
|
54 declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone
|
|
55
|
|
56 define <2 x i64> @stack_fold_aeskeygenassist(<2 x i64> %a0) {
|
|
57 ;CHECK-LABEL: stack_fold_aeskeygenassist
|
|
58 ;CHECK: aeskeygenassist $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
59 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
60 %2 = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7)
|
|
61 ret <2 x i64> %2
|
|
62 }
|
|
63 declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone
|
|
64
|
95
|
65 ;TODO stack_fold_crc32_32_8
|
|
66 declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
|
|
67
|
|
68 ;TODO stack_fold_crc32_32_16
|
|
69 declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
|
|
70
|
|
71 define i32 @stack_fold_crc32_32_32(i32 %a0, i32 %a1) {
|
|
72 ;CHECK-LABEL: stack_fold_crc32_32_32
|
|
73 ;CHECK: crc32l {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
|
|
74 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
|
75 %2 = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1)
|
|
76 ret i32 %2
|
|
77 }
|
|
78 declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
|
|
79
|
|
80 ;TODO stack_fold_crc32_64_8
|
|
81 declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind
|
|
82
|
|
83 define i64 @stack_fold_crc32_64_64(i64 %a0, i64 %a1) {
|
|
84 ;CHECK-LABEL: stack_fold_crc32_64_64
|
|
85 ;CHECK: crc32q {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
|
|
86 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
|
87 %2 = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1)
|
|
88 ret i64 %2
|
|
89 }
|
|
90 declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind
|
|
91
|
83
|
92 define <4 x i32> @stack_fold_movd_load(i32 %a0) {
|
|
93 ;CHECK-LABEL: stack_fold_movd_load
|
|
94 ;CHECK: movd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
|
|
95 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
|
96 %2 = insertelement <4 x i32> zeroinitializer, i32 %a0, i32 0
|
|
97 ; add forces execution domain
|
|
98 %3 = add <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
|
|
99 ret <4 x i32> %3
|
|
100 }
|
|
101
|
|
102 define i32 @stack_fold_movd_store(<4 x i32> %a0) {
|
|
103 ;CHECK-LABEL: stack_fold_movd_store
|
|
104 ;CHECK: movd {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill
|
|
105 ; add forces execution domain
|
|
106 %1 = add <4 x i32> %a0, <i32 1, i32 1, i32 1, i32 1>
|
|
107 %2 = extractelement <4 x i32> %1, i32 0
|
|
108 %3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
|
109 ret i32 %2
|
|
110 }
|
|
111
|
|
112 define <2 x i64> @stack_fold_movq_load(<2 x i64> %a0) {
|
|
113 ;CHECK-LABEL: stack_fold_movq_load
|
|
114 ;CHECK: movq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
115 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
116 %2 = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
|
95
|
117 ; add forces execution domain
|
|
118 %3 = add <2 x i64> %2, <i64 1, i64 1>
|
|
119 ret <2 x i64> %3
|
83
|
120 }
|
|
121
|
|
122 define i64 @stack_fold_movq_store(<2 x i64> %a0) {
|
|
123 ;CHECK-LABEL: stack_fold_movq_store
|
|
124 ;CHECK: movq {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 8-byte Folded Spill
|
95
|
125 ; add forces execution domain
|
|
126 %1 = add <2 x i64> %a0, <i64 1, i64 1>
|
|
127 %2 = extractelement <2 x i64> %1, i32 0
|
|
128 %3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
|
129 ret i64 %2
|
83
|
130 }
|
|
131
|
|
132 define <8 x i16> @stack_fold_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
|
|
133 ;CHECK-LABEL: stack_fold_mpsadbw
|
|
134 ;CHECK: mpsadbw $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
135 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
136 %2 = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7)
|
|
137 ret <8 x i16> %2
|
|
138 }
|
|
139 declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
|
|
140
|
|
141 define <16 x i8> @stack_fold_pabsb(<16 x i8> %a0) {
|
|
142 ;CHECK-LABEL: stack_fold_pabsb
|
|
143 ;CHECK: pabsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
144 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
145 %2 = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0)
|
|
146 ret <16 x i8> %2
|
|
147 }
|
|
148 declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
|
|
149
|
|
150 define <4 x i32> @stack_fold_pabsd(<4 x i32> %a0) {
|
|
151 ;CHECK-LABEL: stack_fold_pabsd
|
|
152 ;CHECK: pabsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
153 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
154 %2 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0)
|
|
155 ret <4 x i32> %2
|
|
156 }
|
|
157 declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
|
|
158
|
|
159 define <8 x i16> @stack_fold_pabsw(<8 x i16> %a0) {
|
|
160 ;CHECK-LABEL: stack_fold_pabsw
|
|
161 ;CHECK: pabsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
162 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
163 %2 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0)
|
|
164 ret <8 x i16> %2
|
|
165 }
|
|
166 declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
|
|
167
|
|
168 define <8 x i16> @stack_fold_packssdw(<4 x i32> %a0, <4 x i32> %a1) {
|
|
169 ;CHECK-LABEL: stack_fold_packssdw
|
|
170 ;CHECK: packssdw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
171 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
172 %2 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1)
|
|
173 ret <8 x i16> %2
|
|
174 }
|
|
175 declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
|
|
176
|
|
177 define <16 x i8> @stack_fold_packsswb(<8 x i16> %a0, <8 x i16> %a1) {
|
|
178 ;CHECK-LABEL: stack_fold_packsswb
|
|
179 ;CHECK: packsswb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
180 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
181 %2 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1)
|
|
182 ret <16 x i8> %2
|
|
183 }
|
|
184 declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
|
|
185
|
|
186 define <8 x i16> @stack_fold_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
|
|
187 ;CHECK-LABEL: stack_fold_packusdw
|
|
188 ;CHECK: packusdw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
189 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
190 %2 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1)
|
|
191 ret <8 x i16> %2
|
|
192 }
|
|
193 declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
|
|
194
|
|
195 define <16 x i8> @stack_fold_packuswb(<8 x i16> %a0, <8 x i16> %a1) {
|
|
196 ;CHECK-LABEL: stack_fold_packuswb
|
|
197 ;CHECK: packuswb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
198 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
199 %2 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1)
|
|
200 ret <16 x i8> %2
|
|
201 }
|
|
202 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
|
|
203
|
|
204 define <16 x i8> @stack_fold_paddb(<16 x i8> %a0, <16 x i8> %a1) {
|
|
205 ;CHECK-LABEL: stack_fold_paddb
|
|
206 ;CHECK: paddb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
207 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
208 %2 = add <16 x i8> %a0, %a1
|
|
209 ret <16 x i8> %2
|
|
210 }
|
|
211
|
|
212 define <4 x i32> @stack_fold_paddd(<4 x i32> %a0, <4 x i32> %a1) {
|
|
213 ;CHECK-LABEL: stack_fold_paddd
|
|
214 ;CHECK: paddd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
215 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
216 %2 = add <4 x i32> %a0, %a1
|
|
217 ret <4 x i32> %2
|
|
218 }
|
|
219
|
|
220 define <2 x i64> @stack_fold_paddq(<2 x i64> %a0, <2 x i64> %a1) {
|
|
221 ;CHECK-LABEL: stack_fold_paddq
|
|
222 ;CHECK: paddq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
223 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
224 %2 = add <2 x i64> %a0, %a1
|
|
225 ret <2 x i64> %2
|
|
226 }
|
|
227
|
|
228 define <16 x i8> @stack_fold_paddsb(<16 x i8> %a0, <16 x i8> %a1) {
|
|
229 ;CHECK-LABEL: stack_fold_paddsb
|
|
230 ;CHECK: paddsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
231 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
232 %2 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1)
|
|
233 ret <16 x i8> %2
|
|
234 }
|
|
235 declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
|
|
236
|
|
237 define <8 x i16> @stack_fold_paddsw(<8 x i16> %a0, <8 x i16> %a1) {
|
|
238 ;CHECK-LABEL: stack_fold_paddsw
|
|
239 ;CHECK: paddsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
240 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
241 %2 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1)
|
|
242 ret <8 x i16> %2
|
|
243 }
|
|
244 declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
|
|
245
|
|
246 define <16 x i8> @stack_fold_paddusb(<16 x i8> %a0, <16 x i8> %a1) {
|
|
247 ;CHECK-LABEL: stack_fold_paddusb
|
|
248 ;CHECK: paddusb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
249 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
250 %2 = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1)
|
|
251 ret <16 x i8> %2
|
|
252 }
|
|
253 declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
|
|
254
|
|
255 define <8 x i16> @stack_fold_paddusw(<8 x i16> %a0, <8 x i16> %a1) {
|
|
256 ;CHECK-LABEL: stack_fold_paddusw
|
|
257 ;CHECK: paddusw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
258 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
259 %2 = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1)
|
|
260 ret <8 x i16> %2
|
|
261 }
|
|
262 declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
|
|
263
|
|
264 define <8 x i16> @stack_fold_paddw(<8 x i16> %a0, <8 x i16> %a1) {
|
|
265 ;CHECK-LABEL: stack_fold_paddw
|
|
266 ;CHECK: paddw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
267 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
268 %2 = add <8 x i16> %a0, %a1
|
|
269 ret <8 x i16> %2
|
|
270 }
|
|
271
|
|
272 define <16 x i8> @stack_fold_palignr(<16 x i8> %a0, <16 x i8> %a1) {
|
|
273 ;CHECK-LABEL: stack_fold_palignr
|
|
274 ;CHECK: palignr $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
275 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
276 %2 = shufflevector <16 x i8> %a1, <16 x i8> %a0, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
|
|
277 ret <16 x i8> %2
|
|
278 }
|
|
279
|
|
280 define <16 x i8> @stack_fold_pand(<16 x i8> %a0, <16 x i8> %a1) {
|
|
281 ;CHECK-LABEL: stack_fold_pand
|
|
282 ;CHECK: pand {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
283 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
284 %2 = and <16 x i8> %a0, %a1
|
|
285 ; add forces execution domain
|
|
286 %3 = add <16 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
|
287 ret <16 x i8> %3
|
|
288 }
|
|
289
|
|
290 define <16 x i8> @stack_fold_pandn(<16 x i8> %a0, <16 x i8> %a1) {
|
|
291 ;CHECK-LABEL: stack_fold_pandn
|
|
292 ;CHECK: pandn {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
293 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
294 %2 = xor <16 x i8> %a0, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
|
|
295 %3 = and <16 x i8> %2, %a1
|
|
296 ; add forces execution domain
|
|
297 %4 = add <16 x i8> %3, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
|
298 ret <16 x i8> %4
|
|
299 }
|
|
300
|
|
301 define <16 x i8> @stack_fold_pavgb(<16 x i8> %a0, <16 x i8> %a1) {
|
|
302 ;CHECK-LABEL: stack_fold_pavgb
|
|
303 ;CHECK: pavgb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
304 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
305 %2 = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1)
|
|
306 ret <16 x i8> %2
|
|
307 }
|
|
308 declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
|
|
309
|
|
310 define <8 x i16> @stack_fold_pavgw(<8 x i16> %a0, <8 x i16> %a1) {
|
|
311 ;CHECK-LABEL: stack_fold_pavgw
|
|
312 ;CHECK: pavgw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
313 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
314 %2 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1)
|
|
315 ret <8 x i16> %2
|
|
316 }
|
|
317 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
|
|
318
|
|
319 define <16 x i8> @stack_fold_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %c) {
|
|
320 ;CHECK-LABEL: stack_fold_pblendvb
|
|
321 ;CHECK: pblendvb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
322 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
323 %2 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a1, <16 x i8> %c, <16 x i8> %a0)
|
|
324 ret <16 x i8> %2
|
|
325 }
|
|
326 declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
|
|
327
|
|
328 define <8 x i16> @stack_fold_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
|
|
329 ;CHECK-LABEL: stack_fold_pblendw
|
|
330 ;CHECK: pblendw $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
331 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
332 %2 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7)
|
|
333 ret <8 x i16> %2
|
|
334 }
|
|
335 declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
|
|
336
|
|
337 define <2 x i64> @stack_fold_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) {
|
|
338 ;CHECK-LABEL: stack_fold_pclmulqdq
|
|
339 ;CHECK: pclmulqdq $0, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
340 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
341 %2 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0)
|
|
342 ret <2 x i64> %2
|
|
343 }
|
|
344 declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone
|
|
345
|
|
346 define <16 x i8> @stack_fold_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1) {
|
|
347 ;CHECK-LABEL: stack_fold_pcmpeqb
|
|
348 ;CHECK: pcmpeqb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
349 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
350 %2 = icmp eq <16 x i8> %a0, %a1
|
|
351 %3 = sext <16 x i1> %2 to <16 x i8>
|
|
352 ret <16 x i8> %3
|
|
353 }
|
|
354
|
|
355 define <4 x i32> @stack_fold_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1) {
|
|
356 ;CHECK-LABEL: stack_fold_pcmpeqd
|
|
357 ;CHECK: pcmpeqd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
358 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
359 %2 = icmp eq <4 x i32> %a0, %a1
|
|
360 %3 = sext <4 x i1> %2 to <4 x i32>
|
|
361 ret <4 x i32> %3
|
|
362 }
|
|
363
|
|
364 define <2 x i64> @stack_fold_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) {
|
|
365 ;CHECK-LABEL: stack_fold_pcmpeqq
|
|
366 ;CHECK: pcmpeqq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
367 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
368 %2 = icmp eq <2 x i64> %a0, %a1
|
|
369 %3 = sext <2 x i1> %2 to <2 x i64>
|
|
370 ret <2 x i64> %3
|
|
371 }
|
|
372
|
|
373 define <8 x i16> @stack_fold_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1) {
|
|
374 ;CHECK-LABEL: stack_fold_pcmpeqw
|
|
375 ;CHECK: pcmpeqw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
376 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
377 %2 = icmp eq <8 x i16> %a0, %a1
|
|
378 %3 = sext <8 x i1> %2 to <8 x i16>
|
|
379 ret <8 x i16> %3
|
|
380 }
|
|
381
|
|
382 define i32 @stack_fold_pcmpestri(<16 x i8> %a0, <16 x i8> %a1) {
|
|
383 ;CHECK-LABEL: stack_fold_pcmpestri
|
|
384 ;CHECK: pcmpestri $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
385 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{rax},~{flags}"()
|
|
386 %2 = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a1, i32 7, i8 7)
|
|
387 ret i32 %2
|
|
388 }
|
|
389 declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
|
|
390
|
|
391 define <16 x i8> @stack_fold_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1) {
|
|
392 ;CHECK-LABEL: stack_fold_pcmpestrm
|
|
393 ;CHECK: pcmpestrm $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
394 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{rax},~{flags}"()
|
|
395 %2 = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a1, i32 7, i8 7)
|
|
396 ret <16 x i8> %2
|
|
397 }
|
|
398 declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
|
|
399
|
|
400 define <16 x i8> @stack_fold_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1) {
|
|
401 ;CHECK-LABEL: stack_fold_pcmpgtb
|
|
402 ;CHECK: pcmpgtb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
403 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
404 %2 = icmp sgt <16 x i8> %a0, %a1
|
|
405 %3 = sext <16 x i1> %2 to <16 x i8>
|
|
406 ret <16 x i8> %3
|
|
407 }
|
|
408
|
|
409 define <4 x i32> @stack_fold_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1) {
|
|
410 ;CHECK-LABEL: stack_fold_pcmpgtd
|
|
411 ;CHECK: pcmpgtd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
412 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
413 %2 = icmp sgt <4 x i32> %a0, %a1
|
|
414 %3 = sext <4 x i1> %2 to <4 x i32>
|
|
415 ret <4 x i32> %3
|
|
416 }
|
|
417
|
|
418 define <2 x i64> @stack_fold_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) {
|
|
419 ;CHECK-LABEL: stack_fold_pcmpgtq
|
|
420 ;CHECK: pcmpgtq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
421 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
422 %2 = icmp sgt <2 x i64> %a0, %a1
|
|
423 %3 = sext <2 x i1> %2 to <2 x i64>
|
|
424 ret <2 x i64> %3
|
|
425 }
|
|
426
|
|
427 define <8 x i16> @stack_fold_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1) {
|
|
428 ;CHECK-LABEL: stack_fold_pcmpgtw
|
|
429 ;CHECK: pcmpgtw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
430 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
431 %2 = icmp sgt <8 x i16> %a0, %a1
|
|
432 %3 = sext <8 x i1> %2 to <8 x i16>
|
|
433 ret <8 x i16> %3
|
|
434 }
|
|
435
|
|
436 define i32 @stack_fold_pcmpistri(<16 x i8> %a0, <16 x i8> %a1) {
|
|
437 ;CHECK-LABEL: stack_fold_pcmpistri
|
|
438 ;CHECK: pcmpistri $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
439 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
440 %2 = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7)
|
|
441 ret i32 %2
|
|
442 }
|
|
443 declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
|
|
444
|
|
445 define <16 x i8> @stack_fold_pcmpistrm(<16 x i8> %a0, <16 x i8> %a1) {
|
|
446 ;CHECK-LABEL: stack_fold_pcmpistrm
|
|
447 ;CHECK: pcmpistrm $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
448 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
449 %2 = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7)
|
|
450 ret <16 x i8> %2
|
|
451 }
|
|
452 declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
|
|
453
|
|
454 ; TODO stack_fold_pextrb
|
|
455
|
|
456 define i32 @stack_fold_pextrd(<4 x i32> %a0) {
|
|
457 ;CHECK-LABEL: stack_fold_pextrd
|
|
458 ;CHECK: pextrd $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill
|
|
459 ;CHECK: movl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Reload
|
|
460 %1 = extractelement <4 x i32> %a0, i32 1
|
|
461 %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
|
462 ret i32 %1
|
|
463 }
|
|
464
|
|
465 define i64 @stack_fold_pextrq(<2 x i64> %a0) {
|
|
466 ;CHECK-LABEL: stack_fold_pextrq
|
|
467 ;CHECK: pextrq $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 8-byte Folded Spill
|
|
468 ;CHECK: movq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Reload
|
|
469 %1 = extractelement <2 x i64> %a0, i32 1
|
|
470 %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
|
471 ret i64 %1
|
|
472 }
|
|
473
|
|
474 ; TODO stack_fold_pextrw
|
|
475
|
|
476 define <4 x i32> @stack_fold_phaddd(<4 x i32> %a0, <4 x i32> %a1) {
|
|
477 ;CHECK-LABEL: stack_fold_phaddd
|
|
478 ;CHECK: phaddd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
479 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
480 %2 = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1)
|
|
481 ret <4 x i32> %2
|
|
482 }
|
|
483 declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
|
|
484
|
|
485 define <8 x i16> @stack_fold_phaddsw(<8 x i16> %a0, <8 x i16> %a1) {
|
|
486 ;CHECK-LABEL: stack_fold_phaddsw
|
|
487 ;CHECK: phaddsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
488 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
489 %2 = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1)
|
|
490 ret <8 x i16> %2
|
|
491 }
|
|
492 declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
|
|
493
|
|
494 define <8 x i16> @stack_fold_phaddw(<8 x i16> %a0, <8 x i16> %a1) {
|
|
495 ;CHECK-LABEL: stack_fold_phaddw
|
|
496 ;CHECK: phaddw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
497 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
498 %2 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1)
|
|
499 ret <8 x i16> %2
|
|
500 }
|
|
501 declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
|
|
502
|
|
503 define <8 x i16> @stack_fold_phminposuw(<8 x i16> %a0) {
|
|
504 ;CHECK-LABEL: stack_fold_phminposuw
|
|
505 ;CHECK: phminposuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
506 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
507 %2 = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0)
|
|
508 ret <8 x i16> %2
|
|
509 }
|
|
510 declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
|
|
511
|
|
512 define <4 x i32> @stack_fold_phsubd(<4 x i32> %a0, <4 x i32> %a1) {
|
|
513 ;CHECK-LABEL: stack_fold_phsubd
|
|
514 ;CHECK: phsubd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
515 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
516 %2 = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1)
|
|
517 ret <4 x i32> %2
|
|
518 }
|
|
519 declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
|
|
520
|
|
521 define <8 x i16> @stack_fold_phsubsw(<8 x i16> %a0, <8 x i16> %a1) {
|
|
522 ;CHECK-LABEL: stack_fold_phsubsw
|
|
523 ;CHECK: phsubsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
524 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
525 %2 = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1)
|
|
526 ret <8 x i16> %2
|
|
527 }
|
|
528 declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
|
|
529
|
|
530 define <8 x i16> @stack_fold_phsubw(<8 x i16> %a0, <8 x i16> %a1) {
|
|
531 ;CHECK-LABEL: stack_fold_phsubw
|
|
532 ;CHECK: phsubw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
533 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
534 %2 = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1)
|
|
535 ret <8 x i16> %2
|
|
536 }
|
|
537 declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
|
|
538
|
|
539 define <16 x i8> @stack_fold_pinsrb(<16 x i8> %a0, i8 %a1) {
|
|
540 ;CHECK-LABEL: stack_fold_pinsrb
|
|
541 ;CHECK: pinsrb $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
|
|
542 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
|
543 %2 = insertelement <16 x i8> %a0, i8 %a1, i32 1
|
|
544 ret <16 x i8> %2
|
|
545 }
|
|
546
|
|
547 define <4 x i32> @stack_fold_pinsrd(<4 x i32> %a0, i32 %a1) {
|
|
548 ;CHECK-LABEL: stack_fold_pinsrd
|
|
549 ;CHECK: pinsrd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
|
|
550 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
|
551 %2 = insertelement <4 x i32> %a0, i32 %a1, i32 1
|
|
552 ret <4 x i32> %2
|
|
553 }
|
|
554
|
|
555 define <2 x i64> @stack_fold_pinsrq(<2 x i64> %a0, i64 %a1) {
|
|
556 ;CHECK-LABEL: stack_fold_pinsrq
|
|
557 ;CHECK: pinsrq $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
|
|
558 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
|
559 %2 = insertelement <2 x i64> %a0, i64 %a1, i32 1
|
|
560 ret <2 x i64> %2
|
|
561 }
|
|
562
|
|
563 define <8 x i16> @stack_fold_pinsrw(<8 x i16> %a0, i16 %a1) {
|
|
564 ;CHECK-LABEL: stack_fold_pinsrw
|
|
565 ;CHECK: pinsrw $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
|
|
566 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
|
|
567 %2 = insertelement <8 x i16> %a0, i16 %a1, i32 1
|
|
568 ret <8 x i16> %2
|
|
569 }
|
|
570
|
|
571 define <8 x i16> @stack_fold_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1) {
|
|
572 ;CHECK-LABEL: stack_fold_pmaddubsw
|
|
573 ;CHECK: pmaddubsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
574 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
575 %2 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1)
|
|
576 ret <8 x i16> %2
|
|
577 }
|
|
578 declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
|
|
579
|
|
580 define <4 x i32> @stack_fold_pmaddwd(<8 x i16> %a0, <8 x i16> %a1) {
|
|
581 ;CHECK-LABEL: stack_fold_pmaddwd
|
|
582 ;CHECK: pmaddwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
583 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
584 %2 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1)
|
|
585 ret <4 x i32> %2
|
|
586 }
|
|
587 declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
|
|
588
|
|
589 define <16 x i8> @stack_fold_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
|
|
590 ;CHECK-LABEL: stack_fold_pmaxsb
|
|
591 ;CHECK: pmaxsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
592 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
593 %2 = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1)
|
|
594 ret <16 x i8> %2
|
|
595 }
|
|
596 declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
|
|
597
|
|
598 define <4 x i32> @stack_fold_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
|
|
599 ;CHECK-LABEL: stack_fold_pmaxsd
|
|
600 ;CHECK: pmaxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
601 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
602 %2 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1)
|
|
603 ret <4 x i32> %2
|
|
604 }
|
|
605 declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
|
|
606
|
|
607 define <8 x i16> @stack_fold_pmaxsw(<8 x i16> %a0, <8 x i16> %a1) {
|
|
608 ;CHECK-LABEL: stack_fold_pmaxsw
|
|
609 ;CHECK: pmaxsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
610 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
611 %2 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1)
|
|
612 ret <8 x i16> %2
|
|
613 }
|
|
614 declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
|
|
615
|
|
616 define <16 x i8> @stack_fold_pmaxub(<16 x i8> %a0, <16 x i8> %a1) {
|
|
617 ;CHECK-LABEL: stack_fold_pmaxub
|
|
618 ;CHECK: pmaxub {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
619 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
620 %2 = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1)
|
|
621 ret <16 x i8> %2
|
|
622 }
|
|
623 declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
|
|
624
|
|
625 define <4 x i32> @stack_fold_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
|
|
626 ;CHECK-LABEL: stack_fold_pmaxud
|
|
627 ;CHECK: pmaxud {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
628 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
629 %2 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1)
|
|
630 ret <4 x i32> %2
|
|
631 }
|
|
632 declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
|
|
633
|
|
634 define <8 x i16> @stack_fold_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
|
|
635 ;CHECK-LABEL: stack_fold_pmaxuw
|
|
636 ;CHECK: pmaxuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
637 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
638 %2 = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1)
|
|
639 ret <8 x i16> %2
|
|
640 }
|
|
641 declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
|
|
642
|
|
643 define <16 x i8> @stack_fold_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
|
|
644 ;CHECK-LABEL: stack_fold_pminsb
|
|
645 ;CHECK: pminsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
646 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
647 %2 = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1)
|
|
648 ret <16 x i8> %2
|
|
649 }
|
|
650 declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
|
|
651
|
|
652 define <4 x i32> @stack_fold_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
|
|
653 ;CHECK-LABEL: stack_fold_pminsd
|
|
654 ;CHECK: pminsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
655 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
656 %2 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1)
|
|
657 ret <4 x i32> %2
|
|
658 }
|
|
659 declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
|
|
660
|
|
661 define <8 x i16> @stack_fold_pminsw(<8 x i16> %a0, <8 x i16> %a1) {
|
|
662 ;CHECK-LABEL: stack_fold_pminsw
|
|
663 ;CHECK: pminsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
664 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
665 %2 = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1)
|
|
666 ret <8 x i16> %2
|
|
667 }
|
|
668 declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
|
|
669
|
|
670 define <16 x i8> @stack_fold_pminub(<16 x i8> %a0, <16 x i8> %a1) {
|
|
671 ;CHECK-LABEL: stack_fold_pminub
|
|
672 ;CHECK: pminub {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
673 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
674 %2 = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1)
|
|
675 ret <16 x i8> %2
|
|
676 }
|
|
677 declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
|
|
678
|
|
679 define <4 x i32> @stack_fold_pminud(<4 x i32> %a0, <4 x i32> %a1) {
|
|
680 ;CHECK-LABEL: stack_fold_pminud
|
|
681 ;CHECK: pminud {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
682 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
683 %2 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1)
|
|
684 ret <4 x i32> %2
|
|
685 }
|
|
686 declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
|
|
687
|
|
688 define <8 x i16> @stack_fold_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
|
|
689 ;CHECK-LABEL: stack_fold_pminuw
|
|
690 ;CHECK: pminuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
691 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
692 %2 = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1)
|
|
693 ret <8 x i16> %2
|
|
694 }
|
|
695 declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
|
|
696
|
|
697 define <4 x i32> @stack_fold_pmovsxbd(<16 x i8> %a0) {
|
|
698 ;CHECK-LABEL: stack_fold_pmovsxbd
|
|
699 ;CHECK: pmovsxbd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
700 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
95
|
701 %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
702 %3 = sext <4 x i8> %2 to <4 x i32>
|
|
703 ret <4 x i32> %3
|
83
|
704 }
|
|
705
|
|
706 define <2 x i64> @stack_fold_pmovsxbq(<16 x i8> %a0) {
|
|
707 ;CHECK-LABEL: stack_fold_pmovsxbq
|
|
708 ;CHECK: pmovsxbq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
709 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
95
|
710 %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
|
|
711 %3 = sext <2 x i8> %2 to <2 x i64>
|
|
712 ret <2 x i64> %3
|
83
|
713 }
|
|
714
|
|
715 define <8 x i16> @stack_fold_pmovsxbw(<16 x i8> %a0) {
|
|
716 ;CHECK-LABEL: stack_fold_pmovsxbw
|
|
717 ;CHECK: pmovsxbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
718 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
95
|
719 %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
720 %3 = sext <8 x i8> %2 to <8 x i16>
|
|
721 ret <8 x i16> %3
|
83
|
722 }
|
|
723
|
|
724 define <2 x i64> @stack_fold_pmovsxdq(<4 x i32> %a0) {
|
|
725 ;CHECK-LABEL: stack_fold_pmovsxdq
|
|
726 ;CHECK: pmovsxdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
727 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
95
|
728 %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
|
|
729 %3 = sext <2 x i32> %2 to <2 x i64>
|
|
730 ret <2 x i64> %3
|
83
|
731 }
|
|
732
|
|
733 define <4 x i32> @stack_fold_pmovsxwd(<8 x i16> %a0) {
|
|
734 ;CHECK-LABEL: stack_fold_pmovsxwd
|
|
735 ;CHECK: pmovsxwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
736 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
95
|
737 %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
738 %3 = sext <4 x i16> %2 to <4 x i32>
|
|
739 ret <4 x i32> %3
|
83
|
740 }
|
|
741
|
|
742 define <2 x i64> @stack_fold_pmovsxwq(<8 x i16> %a0) {
|
|
743 ;CHECK-LABEL: stack_fold_pmovsxwq
|
|
744 ;CHECK: pmovsxwq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
745 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
95
|
746 %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
|
|
747 %3 = sext <2 x i16> %2 to <2 x i64>
|
|
748 ret <2 x i64> %3
|
83
|
749 }
|
|
750
|
|
751 define <4 x i32> @stack_fold_pmovzxbd(<16 x i8> %a0) {
|
|
752 ;CHECK-LABEL: stack_fold_pmovzxbd
|
|
753 ;CHECK: pmovzxbd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
754 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
95
|
755 %2 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 1, i32 19, i32 20, i32 21, i32 2, i32 22, i32 23, i32 24, i32 3, i32 25, i32 26, i32 27>
|
|
756 %3 = bitcast <16 x i8> %2 to <4 x i32>
|
|
757 ret <4 x i32> %3
|
83
|
758 }
|
|
759
|
|
760 define <2 x i64> @stack_fold_pmovzxbq(<16 x i8> %a0) {
|
|
761 ;CHECK-LABEL: stack_fold_pmovzxbq
|
|
762 ;CHECK: pmovzxbq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
763 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
95
|
764 %2 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 1, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
|
|
765 %3 = bitcast <16 x i8> %2 to <2 x i64>
|
|
766 ret <2 x i64> %3
|
83
|
767 }
|
|
768
|
|
769 define <8 x i16> @stack_fold_pmovzxbw(<16 x i8> %a0) {
|
|
770 ;CHECK-LABEL: stack_fold_pmovzxbw
|
|
771 ;CHECK: pmovzxbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
772 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
95
|
773 %2 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
|
|
774 %3 = bitcast <16 x i8> %2 to <8 x i16>
|
|
775 ret <8 x i16> %3
|
83
|
776 }
|
|
777
|
|
778 define <2 x i64> @stack_fold_pmovzxdq(<4 x i32> %a0) {
|
|
779 ;CHECK-LABEL: stack_fold_pmovzxdq
|
|
780 ;CHECK: pmovzxdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
781 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
95
|
782 %2 = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
|
|
783 %3 = bitcast <4 x i32> %2 to <2 x i64>
|
|
784 ret <2 x i64> %3
|
83
|
785 }
|
|
786
|
|
787 define <4 x i32> @stack_fold_pmovzxwd(<8 x i16> %a0) {
|
|
788 ;CHECK-LABEL: stack_fold_pmovzxwd
|
|
789 ;CHECK: pmovzxwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
790 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
95
|
791 %2 = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
|
|
792 %3 = bitcast <8 x i16> %2 to <4 x i32>
|
|
793 ret <4 x i32> %3
|
83
|
794 }
|
|
795
|
|
796 define <2 x i64> @stack_fold_pmovzxwq(<8 x i16> %a0) {
|
|
797 ;CHECK-LABEL: stack_fold_pmovzxwq
|
|
798 ;CHECK: pmovzxwq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
799 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
95
|
800 %2 = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 1, i32 11, i32 12, i32 13>
|
|
801 %3 = bitcast <8 x i16> %2 to <2 x i64>
|
|
802 ret <2 x i64> %3
|
83
|
803 }
|
|
804
|
|
805 define <2 x i64> @stack_fold_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
|
|
806 ;CHECK-LABEL: stack_fold_pmuldq
|
|
807 ;CHECK: pmuldq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
808 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
809 %2 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1)
|
|
810 ret <2 x i64> %2
|
|
811 }
|
|
812 declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
|
|
813
|
|
814 define <8 x i16> @stack_fold_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1) {
|
|
815 ;CHECK-LABEL: stack_fold_pmulhrsw
|
|
816 ;CHECK: pmulhrsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
817 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
818 %2 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1)
|
|
819 ret <8 x i16> %2
|
|
820 }
|
|
821 declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
|
|
822
|
|
823 define <8 x i16> @stack_fold_pmulhuw(<8 x i16> %a0, <8 x i16> %a1) {
|
|
824 ;CHECK-LABEL: stack_fold_pmulhuw
|
|
825 ;CHECK: pmulhuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
826 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
827 %2 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1)
|
|
828 ret <8 x i16> %2
|
|
829 }
|
|
830 declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
|
|
831
|
|
832 define <8 x i16> @stack_fold_pmulhw(<8 x i16> %a0, <8 x i16> %a1) {
|
|
833 ;CHECK-LABEL: stack_fold_pmulhw
|
|
834 ;CHECK: pmulhw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
835 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
836 %2 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1)
|
|
837 ret <8 x i16> %2
|
|
838 }
|
|
839 declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
|
|
840
|
|
841 define <4 x i32> @stack_fold_pmulld(<4 x i32> %a0, <4 x i32> %a1) {
|
|
842 ;CHECK-LABEL: stack_fold_pmulld
|
|
843 ;CHECK: pmulld {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
844 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
845 %2 = mul <4 x i32> %a0, %a1
|
|
846 ret <4 x i32> %2
|
|
847 }
|
|
848
|
|
849 define <8 x i16> @stack_fold_pmullw(<8 x i16> %a0, <8 x i16> %a1) {
|
|
850 ;CHECK-LABEL: stack_fold_pmullw
|
|
851 ;CHECK: pmullw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
852 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
853 %2 = mul <8 x i16> %a0, %a1
|
|
854 ret <8 x i16> %2
|
|
855 }
|
|
856
|
|
857 define <2 x i64> @stack_fold_pmuludq(<4 x i32> %a0, <4 x i32> %a1) {
|
|
858 ;CHECK-LABEL: stack_fold_pmuludq
|
|
859 ;CHECK: pmuludq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
860 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
861 %2 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1)
|
|
862 ret <2 x i64> %2
|
|
863 }
|
|
864 declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
|
|
865
|
|
866 define <16 x i8> @stack_fold_por(<16 x i8> %a0, <16 x i8> %a1) {
|
|
867 ;CHECK-LABEL: stack_fold_por
|
|
868 ;CHECK: por {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
869 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
870 %2 = or <16 x i8> %a0, %a1
|
|
871 ; add forces execution domain
|
|
872 %3 = add <16 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
|
873 ret <16 x i8> %3
|
|
874 }
|
|
875
|
|
876 define <2 x i64> @stack_fold_psadbw(<16 x i8> %a0, <16 x i8> %a1) {
|
|
877 ;CHECK-LABEL: stack_fold_psadbw
|
|
878 ;CHECK: psadbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
879 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
880 %2 = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1)
|
|
881 ret <2 x i64> %2
|
|
882 }
|
|
883 declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
|
|
884
|
|
885 define <16 x i8> @stack_fold_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
|
|
886 ;CHECK-LABEL: stack_fold_pshufb
|
|
887 ;CHECK: pshufb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
888 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
889 %2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1)
|
|
890 ret <16 x i8> %2
|
|
891 }
|
|
892 declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
|
|
893
|
|
894 define <4 x i32> @stack_fold_pshufd(<4 x i32> %a0) {
|
|
895 ;CHECK-LABEL: stack_fold_pshufd
|
|
896 ;CHECK: pshufd $27, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
897 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
898 %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
899 ret <4 x i32> %2
|
|
900 }
|
|
901
|
|
902 define <8 x i16> @stack_fold_pshufhw(<8 x i16> %a0) {
|
|
903 ;CHECK-LABEL: stack_fold_pshufhw
|
|
904 ;CHECK: pshufhw $11, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
905 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
906 %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 4, i32 4>
|
|
907 ret <8 x i16> %2
|
|
908 }
|
|
909
|
|
910 define <8 x i16> @stack_fold_pshuflw(<8 x i16> %a0) {
|
|
911 ;CHECK-LABEL: stack_fold_pshuflw
|
|
912 ;CHECK: pshuflw $27, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
913 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
914 %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
|
|
915 ret <8 x i16> %2
|
|
916 }
|
|
917
|
|
918 define <16 x i8> @stack_fold_psignb(<16 x i8> %a0, <16 x i8> %a1) {
|
|
919 ;CHECK-LABEL: stack_fold_psignb
|
|
920 ;CHECK: psignb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
921 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
922 %2 = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1)
|
|
923 ret <16 x i8> %2
|
|
924 }
|
|
925 declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
|
|
926
|
|
927 define <4 x i32> @stack_fold_psignd(<4 x i32> %a0, <4 x i32> %a1) {
|
|
928 ;CHECK-LABEL: stack_fold_psignd
|
|
929 ;CHECK: psignd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
930 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
931 %2 = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1)
|
|
932 ret <4 x i32> %2
|
|
933 }
|
|
934 declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
|
|
935
|
|
936 define <8 x i16> @stack_fold_psignw(<8 x i16> %a0, <8 x i16> %a1) {
|
|
937 ;CHECK-LABEL: stack_fold_psignw
|
|
938 ;CHECK: psignw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
939 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
940 %2 = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1)
|
|
941 ret <8 x i16> %2
|
|
942 }
|
|
943 declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
|
|
944
|
|
945 define <4 x i32> @stack_fold_pslld(<4 x i32> %a0, <4 x i32> %a1) {
|
|
946 ;CHECK-LABEL: stack_fold_pslld
|
|
947 ;CHECK: pslld {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
948 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
949 %2 = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1)
|
|
950 ret <4 x i32> %2
|
|
951 }
|
|
952 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
|
|
953
|
|
954 define <2 x i64> @stack_fold_psllq(<2 x i64> %a0, <2 x i64> %a1) {
|
|
955 ;CHECK-LABEL: stack_fold_psllq
|
|
956 ;CHECK: psllq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
957 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
958 %2 = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1)
|
|
959 ret <2 x i64> %2
|
|
960 }
|
|
961 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
|
|
962
|
|
963 define <8 x i16> @stack_fold_psllw(<8 x i16> %a0, <8 x i16> %a1) {
|
|
964 ;CHECK-LABEL: stack_fold_psllw
|
|
965 ;CHECK: psllw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
966 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
967 %2 = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1)
|
|
968 ret <8 x i16> %2
|
|
969 }
|
|
970 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
|
|
971
|
|
972 define <4 x i32> @stack_fold_psrad(<4 x i32> %a0, <4 x i32> %a1) {
|
|
973 ;CHECK-LABEL: stack_fold_psrad
|
|
974 ;CHECK: psrad {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
975 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
976 %2 = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1)
|
|
977 ret <4 x i32> %2
|
|
978 }
|
|
979 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
|
|
980
|
|
981 define <8 x i16> @stack_fold_psraw(<8 x i16> %a0, <8 x i16> %a1) {
|
|
982 ;CHECK-LABEL: stack_fold_psraw
|
|
983 ;CHECK: psraw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
984 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
985 %2 = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1)
|
|
986 ret <8 x i16> %2
|
|
987 }
|
|
988 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
|
|
989
|
|
990 define <4 x i32> @stack_fold_psrld(<4 x i32> %a0, <4 x i32> %a1) {
|
|
991 ;CHECK-LABEL: stack_fold_psrld
|
|
992 ;CHECK: psrld {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
993 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
994 %2 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1)
|
|
995 ret <4 x i32> %2
|
|
996 }
|
|
997 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
|
|
998
|
|
999 define <2 x i64> @stack_fold_psrlq(<2 x i64> %a0, <2 x i64> %a1) {
|
|
1000 ;CHECK-LABEL: stack_fold_psrlq
|
|
1001 ;CHECK: psrlq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
1002 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
1003 %2 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1)
|
|
1004 ret <2 x i64> %2
|
|
1005 }
|
|
1006 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
|
|
1007
|
|
1008 define <8 x i16> @stack_fold_psrlw(<8 x i16> %a0, <8 x i16> %a1) {
|
|
1009 ;CHECK-LABEL: stack_fold_psrlw
|
|
1010 ;CHECK: psrlw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
1011 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
1012 %2 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1)
|
|
1013 ret <8 x i16> %2
|
|
1014 }
|
|
1015 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
|
|
1016
|
|
1017 define <16 x i8> @stack_fold_psubb(<16 x i8> %a0, <16 x i8> %a1) {
|
|
1018 ;CHECK-LABEL: stack_fold_psubb
|
|
1019 ;CHECK: psubb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
1020 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
1021 %2 = sub <16 x i8> %a0, %a1
|
|
1022 ret <16 x i8> %2
|
|
1023 }
|
|
1024
|
|
1025 define <4 x i32> @stack_fold_psubd(<4 x i32> %a0, <4 x i32> %a1) {
|
|
1026 ;CHECK-LABEL: stack_fold_psubd
|
|
1027 ;CHECK: psubd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
1028 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
1029 %2 = sub <4 x i32> %a0, %a1
|
|
1030 ret <4 x i32> %2
|
|
1031 }
|
|
1032
|
|
1033 define <2 x i64> @stack_fold_psubq(<2 x i64> %a0, <2 x i64> %a1) {
|
|
1034 ;CHECK-LABEL: stack_fold_psubq
|
|
1035 ;CHECK: psubq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
1036 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
1037 %2 = sub <2 x i64> %a0, %a1
|
|
1038 ret <2 x i64> %2
|
|
1039 }
|
|
1040
|
|
1041 define <16 x i8> @stack_fold_psubsb(<16 x i8> %a0, <16 x i8> %a1) {
|
|
1042 ;CHECK-LABEL: stack_fold_psubsb
|
|
1043 ;CHECK: psubsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
1044 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
1045 %2 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1)
|
|
1046 ret <16 x i8> %2
|
|
1047 }
|
|
1048 declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
|
|
1049
|
|
1050 define <8 x i16> @stack_fold_psubsw(<8 x i16> %a0, <8 x i16> %a1) {
|
|
1051 ;CHECK-LABEL: stack_fold_psubsw
|
|
1052 ;CHECK: psubsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
1053 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
1054 %2 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1)
|
|
1055 ret <8 x i16> %2
|
|
1056 }
|
|
1057 declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
|
|
1058
|
|
1059 define <16 x i8> @stack_fold_psubusb(<16 x i8> %a0, <16 x i8> %a1) {
|
|
1060 ;CHECK-LABEL: stack_fold_psubusb
|
|
1061 ;CHECK: psubusb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
1062 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
1063 %2 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1)
|
|
1064 ret <16 x i8> %2
|
|
1065 }
|
|
1066 declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
|
|
1067
|
|
1068 define <8 x i16> @stack_fold_psubusw(<8 x i16> %a0, <8 x i16> %a1) {
|
|
1069 ;CHECK-LABEL: stack_fold_psubusw
|
|
1070 ;CHECK: psubusw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
1071 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
1072 %2 = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1)
|
|
1073 ret <8 x i16> %2
|
|
1074 }
|
|
1075 declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
|
|
1076
|
|
1077 define <8 x i16> @stack_fold_psubw(<8 x i16> %a0, <8 x i16> %a1) {
|
|
1078 ;CHECK-LABEL: stack_fold_psubw
|
|
1079 ;CHECK: psubw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
1080 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
1081 %2 = sub <8 x i16> %a0, %a1
|
|
1082 ret <8 x i16> %2
|
|
1083 }
|
|
1084
|
|
1085 define i32 @stack_fold_ptest(<2 x i64> %a0, <2 x i64> %a1) {
|
|
1086 ;CHECK-LABEL: stack_fold_ptest
|
|
1087 ;CHECK: ptest {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
1088 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
1089 %2 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1)
|
|
1090 ret i32 %2
|
|
1091 }
|
|
1092 declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
|
|
1093
|
|
1094 define <16 x i8> @stack_fold_punpckhbw(<16 x i8> %a0, <16 x i8> %a1) {
|
|
1095 ;CHECK-LABEL: stack_fold_punpckhbw
|
|
1096 ;CHECK: punpckhbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
1097 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
1098 %2 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
|
|
1099 ret <16 x i8> %2
|
|
1100 }
|
|
1101
|
|
1102 define <4 x i32> @stack_fold_punpckhdq(<4 x i32> %a0, <4 x i32> %a1) {
|
|
1103 ;CHECK-LABEL: stack_fold_punpckhdq
|
|
1104 ;CHECK: punpckhdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
1105 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
1106 %2 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
|
|
1107 ; add forces execution domain
|
|
1108 %3 = add <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
|
|
1109 ret <4 x i32> %3
|
|
1110 }
|
|
1111
|
|
1112 define <2 x i64> @stack_fold_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1) {
|
|
1113 ;CHECK-LABEL: stack_fold_punpckhqdq
|
|
1114 ;CHECK: punpckhqdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
1115 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
1116 %2 = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3>
|
|
1117 ; add forces execution domain
|
|
1118 %3 = add <2 x i64> %2, <i64 1, i64 1>
|
|
1119 ret <2 x i64> %3
|
|
1120 }
|
|
1121
|
|
1122 define <8 x i16> @stack_fold_punpckhwd(<8 x i16> %a0, <8 x i16> %a1) {
|
|
1123 ;CHECK-LABEL: stack_fold_punpckhwd
|
|
1124 ;CHECK: punpckhwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
1125 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
1126 %2 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
|
|
1127 ret <8 x i16> %2
|
|
1128 }
|
|
1129
|
|
1130 define <16 x i8> @stack_fold_punpcklbw(<16 x i8> %a0, <16 x i8> %a1) {
|
|
1131 ;CHECK-LABEL: stack_fold_punpcklbw
|
|
1132 ;CHECK: punpcklbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
1133 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
1134 %2 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
|
|
1135 ret <16 x i8> %2
|
|
1136 }
|
|
1137
|
|
1138 define <4 x i32> @stack_fold_punpckldq(<4 x i32> %a0, <4 x i32> %a1) {
|
|
1139 ;CHECK-LABEL: stack_fold_punpckldq
|
|
1140 ;CHECK: punpckldq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
1141 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
1142 %2 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
|
|
1143 ; add forces execution domain
|
|
1144 %3 = add <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
|
|
1145 ret <4 x i32> %3
|
|
1146 }
|
|
1147
|
|
1148 define <2 x i64> @stack_fold_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1) {
|
|
1149 ;CHECK-LABEL: stack_fold_punpcklqdq
|
|
1150 ;CHECK: punpcklqdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
1151 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
1152 %2 = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2>
|
|
1153 ; add forces execution domain
|
|
1154 %3 = add <2 x i64> %2, <i64 1, i64 1>
|
|
1155 ret <2 x i64> %3
|
|
1156 }
|
|
1157
|
|
1158 define <8 x i16> @stack_fold_punpcklwd(<8 x i16> %a0, <8 x i16> %a1) {
|
|
1159 ;CHECK-LABEL: stack_fold_punpcklwd
|
|
1160 ;CHECK: punpcklwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
1161 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
1162 %2 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
|
|
1163 ret <8 x i16> %2
|
|
1164 }
|
|
1165
|
|
1166 define <16 x i8> @stack_fold_pxor(<16 x i8> %a0, <16 x i8> %a1) {
|
|
1167 ;CHECK-LABEL: stack_fold_pxor
|
|
1168 ;CHECK: pxor {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
|
1169 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
|
1170 %2 = xor <16 x i8> %a0, %a1
|
|
1171 ; add forces execution domain
|
|
1172 %3 = add <16 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
|
1173 ret <16 x i8> %3
|
|
1174 }
|