Mercurial > hg > CbC > CbC_llvm
comparison test/CodeGen/X86/vector-tzcnt-256.ll @ 95:afa8332a0e37 LLVM3.8
LLVM 3.8
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 13 Oct 2015 17:48:58 +0900 |
parents | |
children | 7d135dc70f03 |
comparison
equal
deleted
inserted
replaced
84:f3e34b893a5f | 95:afa8332a0e37 |
---|---|
1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 | |
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 | |
3 | |
4 define <4 x i64> @testv4i64(<4 x i64> %in) nounwind { | |
5 ; AVX1-LABEL: testv4i64: | |
6 ; AVX1: # BB#0: | |
7 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 | |
8 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 | |
9 ; AVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm1 | |
10 ; AVX1-NEXT: vpsubq %xmm0, %xmm2, %xmm3 | |
11 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1 | |
12 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 | |
13 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 | |
14 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1] | |
15 ; AVX1-NEXT: vpsubq %xmm3, %xmm1, %xmm1 | |
16 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
17 ; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm5 | |
18 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] | |
19 ; AVX1-NEXT: vpshufb %xmm5, %xmm6, %xmm5 | |
20 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1 | |
21 ; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1 | |
22 ; AVX1-NEXT: vpshufb %xmm1, %xmm6, %xmm1 | |
23 ; AVX1-NEXT: vpaddb %xmm5, %xmm1, %xmm1 | |
24 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm1 | |
25 ; AVX1-NEXT: vpsubq %xmm3, %xmm0, %xmm0 | |
26 ; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm3 | |
27 ; AVX1-NEXT: vpshufb %xmm3, %xmm6, %xmm3 | |
28 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 | |
29 ; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0 | |
30 ; AVX1-NEXT: vpshufb %xmm0, %xmm6, %xmm0 | |
31 ; AVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm0 | |
32 ; AVX1-NEXT: vpsadbw %xmm0, %xmm2, %xmm0 | |
33 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 | |
34 ; AVX1-NEXT: retq | |
35 ; | |
36 ; AVX2-LABEL: testv4i64: | |
37 ; AVX2: # BB#0: | |
38 ; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 | |
39 ; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm2 | |
40 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 | |
41 ; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 | |
42 ; AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0 | |
43 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
44 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm3 | |
45 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] | |
46 ; AVX2-NEXT: vpshufb %ymm3, %ymm4, %ymm3 | |
47 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 | |
48 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 | |
49 ; AVX2-NEXT: vpshufb %ymm0, %ymm4, %ymm0 | |
50 ; AVX2-NEXT: vpaddb %ymm3, %ymm0, %ymm0 | |
51 ; AVX2-NEXT: vpsadbw %ymm0, %ymm1, %ymm0 | |
52 ; AVX2-NEXT: retq | |
53 %out = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %in, i1 0) | |
54 ret <4 x i64> %out | |
55 } | |
56 | |
57 define <4 x i64> @testv4i64u(<4 x i64> %in) nounwind { | |
58 ; AVX1-LABEL: testv4i64u: | |
59 ; AVX1: # BB#0: | |
60 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 | |
61 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 | |
62 ; AVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm1 | |
63 ; AVX1-NEXT: vpsubq %xmm0, %xmm2, %xmm3 | |
64 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1 | |
65 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 | |
66 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 | |
67 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1] | |
68 ; AVX1-NEXT: vpsubq %xmm3, %xmm1, %xmm1 | |
69 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
70 ; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm5 | |
71 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] | |
72 ; AVX1-NEXT: vpshufb %xmm5, %xmm6, %xmm5 | |
73 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1 | |
74 ; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1 | |
75 ; AVX1-NEXT: vpshufb %xmm1, %xmm6, %xmm1 | |
76 ; AVX1-NEXT: vpaddb %xmm5, %xmm1, %xmm1 | |
77 ; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm1 | |
78 ; AVX1-NEXT: vpsubq %xmm3, %xmm0, %xmm0 | |
79 ; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm3 | |
80 ; AVX1-NEXT: vpshufb %xmm3, %xmm6, %xmm3 | |
81 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 | |
82 ; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0 | |
83 ; AVX1-NEXT: vpshufb %xmm0, %xmm6, %xmm0 | |
84 ; AVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm0 | |
85 ; AVX1-NEXT: vpsadbw %xmm0, %xmm2, %xmm0 | |
86 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 | |
87 ; AVX1-NEXT: retq | |
88 ; | |
89 ; AVX2-LABEL: testv4i64u: | |
90 ; AVX2: # BB#0: | |
91 ; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 | |
92 ; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm2 | |
93 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 | |
94 ; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 | |
95 ; AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0 | |
96 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
97 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm3 | |
98 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] | |
99 ; AVX2-NEXT: vpshufb %ymm3, %ymm4, %ymm3 | |
100 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 | |
101 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 | |
102 ; AVX2-NEXT: vpshufb %ymm0, %ymm4, %ymm0 | |
103 ; AVX2-NEXT: vpaddb %ymm3, %ymm0, %ymm0 | |
104 ; AVX2-NEXT: vpsadbw %ymm0, %ymm1, %ymm0 | |
105 ; AVX2-NEXT: retq | |
106 %out = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %in, i1 -1) | |
107 ret <4 x i64> %out | |
108 } | |
109 | |
110 define <8 x i32> @testv8i32(<8 x i32> %in) nounwind { | |
111 ; AVX1-LABEL: testv8i32: | |
112 ; AVX1: # BB#0: | |
113 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 | |
114 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 | |
115 ; AVX1-NEXT: vpsubd %xmm2, %xmm1, %xmm2 | |
116 ; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm3 | |
117 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 | |
118 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 | |
119 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 | |
120 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1,1,1] | |
121 ; AVX1-NEXT: vpsubd %xmm3, %xmm2, %xmm2 | |
122 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
123 ; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm5 | |
124 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] | |
125 ; AVX1-NEXT: vpshufb %xmm5, %xmm6, %xmm5 | |
126 ; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm2 | |
127 ; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2 | |
128 ; AVX1-NEXT: vpshufb %xmm2, %xmm6, %xmm2 | |
129 ; AVX1-NEXT: vpaddb %xmm5, %xmm2, %xmm2 | |
130 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm1[2],xmm2[3],xmm1[3] | |
131 ; AVX1-NEXT: vpsadbw %xmm5, %xmm1, %xmm5 | |
132 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] | |
133 ; AVX1-NEXT: vpsadbw %xmm2, %xmm1, %xmm2 | |
134 ; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2 | |
135 ; AVX1-NEXT: vpsubd %xmm3, %xmm0, %xmm0 | |
136 ; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm3 | |
137 ; AVX1-NEXT: vpshufb %xmm3, %xmm6, %xmm3 | |
138 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 | |
139 ; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0 | |
140 ; AVX1-NEXT: vpshufb %xmm0, %xmm6, %xmm0 | |
141 ; AVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm0 | |
142 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] | |
143 ; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm3 | |
144 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] | |
145 ; AVX1-NEXT: vpsadbw %xmm0, %xmm1, %xmm0 | |
146 ; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 | |
147 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
148 ; AVX1-NEXT: retq | |
149 ; | |
150 ; AVX2-LABEL: testv8i32: | |
151 ; AVX2: # BB#0: | |
152 ; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 | |
153 ; AVX2-NEXT: vpsubd %ymm0, %ymm1, %ymm2 | |
154 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 | |
155 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2 | |
156 ; AVX2-NEXT: vpsubd %ymm2, %ymm0, %ymm0 | |
157 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
158 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm3 | |
159 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] | |
160 ; AVX2-NEXT: vpshufb %ymm3, %ymm4, %ymm3 | |
161 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 | |
162 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 | |
163 ; AVX2-NEXT: vpshufb %ymm0, %ymm4, %ymm0 | |
164 ; AVX2-NEXT: vpaddb %ymm3, %ymm0, %ymm0 | |
165 ; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] | |
166 ; AVX2-NEXT: vpsadbw %ymm2, %ymm1, %ymm2 | |
167 ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] | |
168 ; AVX2-NEXT: vpsadbw %ymm0, %ymm1, %ymm0 | |
169 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 | |
170 ; AVX2-NEXT: retq | |
171 %out = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %in, i1 0) | |
172 ret <8 x i32> %out | |
173 } | |
174 | |
175 define <8 x i32> @testv8i32u(<8 x i32> %in) nounwind { | |
176 ; AVX1-LABEL: testv8i32u: | |
177 ; AVX1: # BB#0: | |
178 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 | |
179 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 | |
180 ; AVX1-NEXT: vpsubd %xmm2, %xmm1, %xmm2 | |
181 ; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm3 | |
182 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 | |
183 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 | |
184 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 | |
185 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1,1,1] | |
186 ; AVX1-NEXT: vpsubd %xmm3, %xmm2, %xmm2 | |
187 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
188 ; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm5 | |
189 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] | |
190 ; AVX1-NEXT: vpshufb %xmm5, %xmm6, %xmm5 | |
191 ; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm2 | |
192 ; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2 | |
193 ; AVX1-NEXT: vpshufb %xmm2, %xmm6, %xmm2 | |
194 ; AVX1-NEXT: vpaddb %xmm5, %xmm2, %xmm2 | |
195 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm1[2],xmm2[3],xmm1[3] | |
196 ; AVX1-NEXT: vpsadbw %xmm5, %xmm1, %xmm5 | |
197 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] | |
198 ; AVX1-NEXT: vpsadbw %xmm2, %xmm1, %xmm2 | |
199 ; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2 | |
200 ; AVX1-NEXT: vpsubd %xmm3, %xmm0, %xmm0 | |
201 ; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm3 | |
202 ; AVX1-NEXT: vpshufb %xmm3, %xmm6, %xmm3 | |
203 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 | |
204 ; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0 | |
205 ; AVX1-NEXT: vpshufb %xmm0, %xmm6, %xmm0 | |
206 ; AVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm0 | |
207 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] | |
208 ; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm3 | |
209 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] | |
210 ; AVX1-NEXT: vpsadbw %xmm0, %xmm1, %xmm0 | |
211 ; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 | |
212 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 | |
213 ; AVX1-NEXT: retq | |
214 ; | |
215 ; AVX2-LABEL: testv8i32u: | |
216 ; AVX2: # BB#0: | |
217 ; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 | |
218 ; AVX2-NEXT: vpsubd %ymm0, %ymm1, %ymm2 | |
219 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 | |
220 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2 | |
221 ; AVX2-NEXT: vpsubd %ymm2, %ymm0, %ymm0 | |
222 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
223 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm3 | |
224 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] | |
225 ; AVX2-NEXT: vpshufb %ymm3, %ymm4, %ymm3 | |
226 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 | |
227 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 | |
228 ; AVX2-NEXT: vpshufb %ymm0, %ymm4, %ymm0 | |
229 ; AVX2-NEXT: vpaddb %ymm3, %ymm0, %ymm0 | |
230 ; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] | |
231 ; AVX2-NEXT: vpsadbw %ymm2, %ymm1, %ymm2 | |
232 ; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] | |
233 ; AVX2-NEXT: vpsadbw %ymm0, %ymm1, %ymm0 | |
234 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 | |
235 ; AVX2-NEXT: retq | |
236 %out = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %in, i1 -1) | |
237 ret <8 x i32> %out | |
238 } | |
239 | |
240 define <16 x i16> @testv16i16(<16 x i16> %in) nounwind { | |
241 ; AVX1-LABEL: testv16i16: | |
242 ; AVX1: # BB#0: | |
243 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 | |
244 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 | |
245 ; AVX1-NEXT: vpsubw %xmm1, %xmm2, %xmm1 | |
246 ; AVX1-NEXT: vpsubw %xmm0, %xmm2, %xmm2 | |
247 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 | |
248 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 | |
249 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1] | |
250 ; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm2 | |
251 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
252 ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 | |
253 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] | |
254 ; AVX1-NEXT: vpshufb %xmm4, %xmm5, %xmm4 | |
255 ; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm2 | |
256 ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 | |
257 ; AVX1-NEXT: vpshufb %xmm2, %xmm5, %xmm2 | |
258 ; AVX1-NEXT: vpaddb %xmm4, %xmm2, %xmm2 | |
259 ; AVX1-NEXT: vpsllw $8, %xmm2, %xmm4 | |
260 ; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 | |
261 ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 | |
262 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 | |
263 ; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm0 | |
264 ; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm1 | |
265 ; AVX1-NEXT: vpshufb %xmm1, %xmm5, %xmm1 | |
266 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 | |
267 ; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 | |
268 ; AVX1-NEXT: vpshufb %xmm0, %xmm5, %xmm0 | |
269 ; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 | |
270 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 | |
271 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 | |
272 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 | |
273 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 | |
274 ; AVX1-NEXT: retq | |
275 ; | |
276 ; AVX2-LABEL: testv16i16: | |
277 ; AVX2: # BB#0: | |
278 ; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 | |
279 ; AVX2-NEXT: vpsubw %ymm0, %ymm1, %ymm1 | |
280 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 | |
281 ; AVX2-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0 | |
282 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
283 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 | |
284 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] | |
285 ; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 | |
286 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 | |
287 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 | |
288 ; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 | |
289 ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 | |
290 ; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1 | |
291 ; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 | |
292 ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 | |
293 ; AVX2-NEXT: retq | |
294 %out = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %in, i1 0) | |
295 ret <16 x i16> %out | |
296 } | |
297 | |
298 define <16 x i16> @testv16i16u(<16 x i16> %in) nounwind { | |
299 ; AVX1-LABEL: testv16i16u: | |
300 ; AVX1: # BB#0: | |
301 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 | |
302 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 | |
303 ; AVX1-NEXT: vpsubw %xmm1, %xmm2, %xmm1 | |
304 ; AVX1-NEXT: vpsubw %xmm0, %xmm2, %xmm2 | |
305 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 | |
306 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 | |
307 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1] | |
308 ; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm2 | |
309 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
310 ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4 | |
311 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] | |
312 ; AVX1-NEXT: vpshufb %xmm4, %xmm5, %xmm4 | |
313 ; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm2 | |
314 ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 | |
315 ; AVX1-NEXT: vpshufb %xmm2, %xmm5, %xmm2 | |
316 ; AVX1-NEXT: vpaddb %xmm4, %xmm2, %xmm2 | |
317 ; AVX1-NEXT: vpsllw $8, %xmm2, %xmm4 | |
318 ; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2 | |
319 ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 | |
320 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 | |
321 ; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm0 | |
322 ; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm1 | |
323 ; AVX1-NEXT: vpshufb %xmm1, %xmm5, %xmm1 | |
324 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 | |
325 ; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 | |
326 ; AVX1-NEXT: vpshufb %xmm0, %xmm5, %xmm0 | |
327 ; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 | |
328 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1 | |
329 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 | |
330 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 | |
331 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 | |
332 ; AVX1-NEXT: retq | |
333 ; | |
334 ; AVX2-LABEL: testv16i16u: | |
335 ; AVX2: # BB#0: | |
336 ; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 | |
337 ; AVX2-NEXT: vpsubw %ymm0, %ymm1, %ymm1 | |
338 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 | |
339 ; AVX2-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0 | |
340 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
341 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 | |
342 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] | |
343 ; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 | |
344 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 | |
345 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 | |
346 ; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 | |
347 ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 | |
348 ; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1 | |
349 ; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0 | |
350 ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 | |
351 ; AVX2-NEXT: retq | |
352 %out = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %in, i1 -1) | |
353 ret <16 x i16> %out | |
354 } | |
355 | |
356 define <32 x i8> @testv32i8(<32 x i8> %in) nounwind { | |
357 ; AVX1-LABEL: testv32i8: | |
358 ; AVX1: # BB#0: | |
359 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 | |
360 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 | |
361 ; AVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1 | |
362 ; AVX1-NEXT: vpsubb %xmm0, %xmm2, %xmm2 | |
363 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 | |
364 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 | |
365 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 | |
366 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] | |
367 ; AVX1-NEXT: vpsubb %xmm2, %xmm1, %xmm1 | |
368 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
369 ; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm4 | |
370 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] | |
371 ; AVX1-NEXT: vpshufb %xmm4, %xmm5, %xmm4 | |
372 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1 | |
373 ; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 | |
374 ; AVX1-NEXT: vpshufb %xmm1, %xmm5, %xmm1 | |
375 ; AVX1-NEXT: vpaddb %xmm4, %xmm1, %xmm1 | |
376 ; AVX1-NEXT: vpsubb %xmm2, %xmm0, %xmm0 | |
377 ; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm2 | |
378 ; AVX1-NEXT: vpshufb %xmm2, %xmm5, %xmm2 | |
379 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 | |
380 ; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 | |
381 ; AVX1-NEXT: vpshufb %xmm0, %xmm5, %xmm0 | |
382 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 | |
383 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 | |
384 ; AVX1-NEXT: retq | |
385 ; | |
386 ; AVX2-LABEL: testv32i8: | |
387 ; AVX2: # BB#0: | |
388 ; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 | |
389 ; AVX2-NEXT: vpsubb %ymm0, %ymm1, %ymm1 | |
390 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 | |
391 ; AVX2-NEXT: vpsubb {{.*}}(%rip), %ymm0, %ymm0 | |
392 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
393 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 | |
394 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] | |
395 ; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 | |
396 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 | |
397 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 | |
398 ; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 | |
399 ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 | |
400 ; AVX2-NEXT: retq | |
401 %out = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %in, i1 0) | |
402 ret <32 x i8> %out | |
403 } | |
404 | |
405 define <32 x i8> @testv32i8u(<32 x i8> %in) nounwind { | |
406 ; AVX1-LABEL: testv32i8u: | |
407 ; AVX1: # BB#0: | |
408 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 | |
409 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 | |
410 ; AVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1 | |
411 ; AVX1-NEXT: vpsubb %xmm0, %xmm2, %xmm2 | |
412 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 | |
413 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 | |
414 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 | |
415 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] | |
416 ; AVX1-NEXT: vpsubb %xmm2, %xmm1, %xmm1 | |
417 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
418 ; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm4 | |
419 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] | |
420 ; AVX1-NEXT: vpshufb %xmm4, %xmm5, %xmm4 | |
421 ; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1 | |
422 ; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 | |
423 ; AVX1-NEXT: vpshufb %xmm1, %xmm5, %xmm1 | |
424 ; AVX1-NEXT: vpaddb %xmm4, %xmm1, %xmm1 | |
425 ; AVX1-NEXT: vpsubb %xmm2, %xmm0, %xmm0 | |
426 ; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm2 | |
427 ; AVX1-NEXT: vpshufb %xmm2, %xmm5, %xmm2 | |
428 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 | |
429 ; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 | |
430 ; AVX1-NEXT: vpshufb %xmm0, %xmm5, %xmm0 | |
431 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 | |
432 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 | |
433 ; AVX1-NEXT: retq | |
434 ; | |
435 ; AVX2-LABEL: testv32i8u: | |
436 ; AVX2: # BB#0: | |
437 ; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 | |
438 ; AVX2-NEXT: vpsubb %ymm0, %ymm1, %ymm1 | |
439 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 | |
440 ; AVX2-NEXT: vpsubb {{.*}}(%rip), %ymm0, %ymm0 | |
441 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
442 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2 | |
443 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] | |
444 ; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2 | |
445 ; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0 | |
446 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 | |
447 ; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0 | |
448 ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 | |
449 ; AVX2-NEXT: retq | |
450 %out = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %in, i1 -1) | |
451 ret <32 x i8> %out | |
452 } | |
453 | |
454 define <4 x i64> @foldv4i64() nounwind { | |
455 ; ALL-LABEL: foldv4i64: | |
456 ; ALL: # BB#0: | |
457 ; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0] | |
458 ; ALL-NEXT: retq | |
459 %out = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 0) | |
460 ret <4 x i64> %out | |
461 } | |
462 | |
463 define <4 x i64> @foldv4i64u() nounwind { | |
464 ; ALL-LABEL: foldv4i64u: | |
465 ; ALL: # BB#0: | |
466 ; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0] | |
467 ; ALL-NEXT: retq | |
468 %out = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 -1) | |
469 ret <4 x i64> %out | |
470 } | |
471 | |
472 define <8 x i32> @foldv8i32() nounwind { | |
473 ; ALL-LABEL: foldv8i32: | |
474 ; ALL: # BB#0: | |
475 ; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3] | |
476 ; ALL-NEXT: retq | |
477 %out = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 0) | |
478 ret <8 x i32> %out | |
479 } | |
480 | |
481 define <8 x i32> @foldv8i32u() nounwind { | |
482 ; ALL-LABEL: foldv8i32u: | |
483 ; ALL: # BB#0: | |
484 ; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3] | |
485 ; ALL-NEXT: retq | |
486 %out = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 -1) | |
487 ret <8 x i32> %out | |
488 } | |
489 | |
490 define <16 x i16> @foldv16i16() nounwind { | |
491 ; ALL-LABEL: foldv16i16: | |
492 ; ALL: # BB#0: | |
493 ; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5] | |
494 ; ALL-NEXT: retq | |
495 %out = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 0) | |
496 ret <16 x i16> %out | |
497 } | |
498 | |
499 define <16 x i16> @foldv16i16u() nounwind { | |
500 ; ALL-LABEL: foldv16i16u: | |
501 ; ALL: # BB#0: | |
502 ; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5] | |
503 ; ALL-NEXT: retq | |
504 %out = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 -1) | |
505 ret <16 x i16> %out | |
506 } | |
507 | |
508 define <32 x i8> @foldv32i8() nounwind { | |
509 ; ALL-LABEL: foldv32i8: | |
510 ; ALL: # BB#0: | |
511 ; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0] | |
512 ; ALL-NEXT: retq | |
513 %out = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 0) | |
514 ret <32 x i8> %out | |
515 } | |
516 | |
517 define <32 x i8> @foldv32i8u() nounwind { | |
518 ; ALL-LABEL: foldv32i8u: | |
519 ; ALL: # BB#0: | |
520 ; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0] | |
521 ; ALL-NEXT: retq | |
522 %out = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 -1) | |
523 ret <32 x i8> %out | |
524 } | |
525 | |
526 declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>, i1) | |
527 declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>, i1) | |
528 declare <16 x i16> @llvm.cttz.v16i16(<16 x i16>, i1) | |
529 declare <32 x i8> @llvm.cttz.v32i8(<32 x i8>, i1) |