Mercurial > hg > CbC > CbC_llvm
comparison test/CodeGen/X86/vector-popcnt-128.ll @ 95:afa8332a0e37 LLVM3.8
LLVM 3.8
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 13 Oct 2015 17:48:58 +0900 |
parents | |
children | 7d135dc70f03 |
comparison
equal
deleted
inserted
replaced
84:f3e34b893a5f | 95:afa8332a0e37 |
---|---|
1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 | |
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3 | |
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3 | |
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 | |
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 | |
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 | |
7 | |
8 define <2 x i64> @testv2i64(<2 x i64> %in) nounwind { | |
9 ; SSE2-LABEL: testv2i64: | |
10 ; SSE2: # BB#0: | |
11 ; SSE2-NEXT: movdqa %xmm0, %xmm1 | |
12 ; SSE2-NEXT: psrlq $1, %xmm1 | |
13 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 | |
14 ; SSE2-NEXT: psubq %xmm1, %xmm0 | |
15 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3689348814741910323,3689348814741910323] | |
16 ; SSE2-NEXT: movdqa %xmm0, %xmm2 | |
17 ; SSE2-NEXT: pand %xmm1, %xmm2 | |
18 ; SSE2-NEXT: psrlq $2, %xmm0 | |
19 ; SSE2-NEXT: pand %xmm1, %xmm0 | |
20 ; SSE2-NEXT: paddq %xmm2, %xmm0 | |
21 ; SSE2-NEXT: movdqa %xmm0, %xmm1 | |
22 ; SSE2-NEXT: psrlq $4, %xmm1 | |
23 ; SSE2-NEXT: paddq %xmm0, %xmm1 | |
24 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 | |
25 ; SSE2-NEXT: pxor %xmm0, %xmm0 | |
26 ; SSE2-NEXT: psadbw %xmm0, %xmm1 | |
27 ; SSE2-NEXT: movdqa %xmm1, %xmm0 | |
28 ; SSE2-NEXT: retq | |
29 ; | |
30 ; SSE3-LABEL: testv2i64: | |
31 ; SSE3: # BB#0: | |
32 ; SSE3-NEXT: movdqa %xmm0, %xmm1 | |
33 ; SSE3-NEXT: psrlq $1, %xmm1 | |
34 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 | |
35 ; SSE3-NEXT: psubq %xmm1, %xmm0 | |
36 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [3689348814741910323,3689348814741910323] | |
37 ; SSE3-NEXT: movdqa %xmm0, %xmm2 | |
38 ; SSE3-NEXT: pand %xmm1, %xmm2 | |
39 ; SSE3-NEXT: psrlq $2, %xmm0 | |
40 ; SSE3-NEXT: pand %xmm1, %xmm0 | |
41 ; SSE3-NEXT: paddq %xmm2, %xmm0 | |
42 ; SSE3-NEXT: movdqa %xmm0, %xmm1 | |
43 ; SSE3-NEXT: psrlq $4, %xmm1 | |
44 ; SSE3-NEXT: paddq %xmm0, %xmm1 | |
45 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 | |
46 ; SSE3-NEXT: pxor %xmm0, %xmm0 | |
47 ; SSE3-NEXT: psadbw %xmm0, %xmm1 | |
48 ; SSE3-NEXT: movdqa %xmm1, %xmm0 | |
49 ; SSE3-NEXT: retq | |
50 ; | |
51 ; SSSE3-LABEL: testv2i64: | |
52 ; SSSE3: # BB#0: | |
53 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
54 ; SSSE3-NEXT: movdqa %xmm0, %xmm2 | |
55 ; SSSE3-NEXT: pand %xmm1, %xmm2 | |
56 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] | |
57 ; SSSE3-NEXT: movdqa %xmm3, %xmm4 | |
58 ; SSSE3-NEXT: pshufb %xmm2, %xmm4 | |
59 ; SSSE3-NEXT: psrlw $4, %xmm0 | |
60 ; SSSE3-NEXT: pand %xmm1, %xmm0 | |
61 ; SSSE3-NEXT: pshufb %xmm0, %xmm3 | |
62 ; SSSE3-NEXT: paddb %xmm4, %xmm3 | |
63 ; SSSE3-NEXT: pxor %xmm0, %xmm0 | |
64 ; SSSE3-NEXT: psadbw %xmm3, %xmm0 | |
65 ; SSSE3-NEXT: retq | |
66 ; | |
67 ; SSE41-LABEL: testv2i64: | |
68 ; SSE41: # BB#0: | |
69 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
70 ; SSE41-NEXT: movdqa %xmm0, %xmm2 | |
71 ; SSE41-NEXT: pand %xmm1, %xmm2 | |
72 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] | |
73 ; SSE41-NEXT: movdqa %xmm3, %xmm4 | |
74 ; SSE41-NEXT: pshufb %xmm2, %xmm4 | |
75 ; SSE41-NEXT: psrlw $4, %xmm0 | |
76 ; SSE41-NEXT: pand %xmm1, %xmm0 | |
77 ; SSE41-NEXT: pshufb %xmm0, %xmm3 | |
78 ; SSE41-NEXT: paddb %xmm4, %xmm3 | |
79 ; SSE41-NEXT: pxor %xmm0, %xmm0 | |
80 ; SSE41-NEXT: psadbw %xmm3, %xmm0 | |
81 ; SSE41-NEXT: retq | |
82 ; | |
83 ; AVX-LABEL: testv2i64: | |
84 ; AVX: # BB#0: | |
85 ; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
86 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm2 | |
87 ; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] | |
88 ; AVX-NEXT: vpshufb %xmm2, %xmm3, %xmm2 | |
89 ; AVX-NEXT: vpsrlw $4, %xmm0, %xmm0 | |
90 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 | |
91 ; AVX-NEXT: vpshufb %xmm0, %xmm3, %xmm0 | |
92 ; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0 | |
93 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 | |
94 ; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0 | |
95 ; AVX-NEXT: retq | |
96 %out = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %in) | |
97 ret <2 x i64> %out | |
98 } | |
99 | |
100 define <4 x i32> @testv4i32(<4 x i32> %in) nounwind { | |
101 ; SSE2-LABEL: testv4i32: | |
102 ; SSE2: # BB#0: | |
103 ; SSE2-NEXT: movdqa %xmm0, %xmm1 | |
104 ; SSE2-NEXT: psrld $1, %xmm1 | |
105 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 | |
106 ; SSE2-NEXT: psubd %xmm1, %xmm0 | |
107 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [858993459,858993459,858993459,858993459] | |
108 ; SSE2-NEXT: movdqa %xmm0, %xmm2 | |
109 ; SSE2-NEXT: pand %xmm1, %xmm2 | |
110 ; SSE2-NEXT: psrld $2, %xmm0 | |
111 ; SSE2-NEXT: pand %xmm1, %xmm0 | |
112 ; SSE2-NEXT: paddd %xmm2, %xmm0 | |
113 ; SSE2-NEXT: movdqa %xmm0, %xmm1 | |
114 ; SSE2-NEXT: psrld $4, %xmm1 | |
115 ; SSE2-NEXT: paddd %xmm0, %xmm1 | |
116 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 | |
117 ; SSE2-NEXT: pxor %xmm0, %xmm0 | |
118 ; SSE2-NEXT: movdqa %xmm1, %xmm2 | |
119 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] | |
120 ; SSE2-NEXT: psadbw %xmm0, %xmm2 | |
121 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] | |
122 ; SSE2-NEXT: psadbw %xmm0, %xmm1 | |
123 ; SSE2-NEXT: packuswb %xmm2, %xmm1 | |
124 ; SSE2-NEXT: movdqa %xmm1, %xmm0 | |
125 ; SSE2-NEXT: retq | |
126 ; | |
127 ; SSE3-LABEL: testv4i32: | |
128 ; SSE3: # BB#0: | |
129 ; SSE3-NEXT: movdqa %xmm0, %xmm1 | |
130 ; SSE3-NEXT: psrld $1, %xmm1 | |
131 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 | |
132 ; SSE3-NEXT: psubd %xmm1, %xmm0 | |
133 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [858993459,858993459,858993459,858993459] | |
134 ; SSE3-NEXT: movdqa %xmm0, %xmm2 | |
135 ; SSE3-NEXT: pand %xmm1, %xmm2 | |
136 ; SSE3-NEXT: psrld $2, %xmm0 | |
137 ; SSE3-NEXT: pand %xmm1, %xmm0 | |
138 ; SSE3-NEXT: paddd %xmm2, %xmm0 | |
139 ; SSE3-NEXT: movdqa %xmm0, %xmm1 | |
140 ; SSE3-NEXT: psrld $4, %xmm1 | |
141 ; SSE3-NEXT: paddd %xmm0, %xmm1 | |
142 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 | |
143 ; SSE3-NEXT: pxor %xmm0, %xmm0 | |
144 ; SSE3-NEXT: movdqa %xmm1, %xmm2 | |
145 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] | |
146 ; SSE3-NEXT: psadbw %xmm0, %xmm2 | |
147 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] | |
148 ; SSE3-NEXT: psadbw %xmm0, %xmm1 | |
149 ; SSE3-NEXT: packuswb %xmm2, %xmm1 | |
150 ; SSE3-NEXT: movdqa %xmm1, %xmm0 | |
151 ; SSE3-NEXT: retq | |
152 ; | |
153 ; SSSE3-LABEL: testv4i32: | |
154 ; SSSE3: # BB#0: | |
155 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
156 ; SSSE3-NEXT: movdqa %xmm0, %xmm3 | |
157 ; SSSE3-NEXT: pand %xmm2, %xmm3 | |
158 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] | |
159 ; SSSE3-NEXT: movdqa %xmm1, %xmm4 | |
160 ; SSSE3-NEXT: pshufb %xmm3, %xmm4 | |
161 ; SSSE3-NEXT: psrlw $4, %xmm0 | |
162 ; SSSE3-NEXT: pand %xmm2, %xmm0 | |
163 ; SSSE3-NEXT: pshufb %xmm0, %xmm1 | |
164 ; SSSE3-NEXT: paddb %xmm4, %xmm1 | |
165 ; SSSE3-NEXT: pxor %xmm0, %xmm0 | |
166 ; SSSE3-NEXT: movdqa %xmm1, %xmm2 | |
167 ; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] | |
168 ; SSSE3-NEXT: psadbw %xmm0, %xmm2 | |
169 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] | |
170 ; SSSE3-NEXT: psadbw %xmm0, %xmm1 | |
171 ; SSSE3-NEXT: packuswb %xmm2, %xmm1 | |
172 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 | |
173 ; SSSE3-NEXT: retq | |
174 ; | |
175 ; SSE41-LABEL: testv4i32: | |
176 ; SSE41: # BB#0: | |
177 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
178 ; SSE41-NEXT: movdqa %xmm0, %xmm3 | |
179 ; SSE41-NEXT: pand %xmm2, %xmm3 | |
180 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] | |
181 ; SSE41-NEXT: movdqa %xmm1, %xmm4 | |
182 ; SSE41-NEXT: pshufb %xmm3, %xmm4 | |
183 ; SSE41-NEXT: psrlw $4, %xmm0 | |
184 ; SSE41-NEXT: pand %xmm2, %xmm0 | |
185 ; SSE41-NEXT: pshufb %xmm0, %xmm1 | |
186 ; SSE41-NEXT: paddb %xmm4, %xmm1 | |
187 ; SSE41-NEXT: pxor %xmm0, %xmm0 | |
188 ; SSE41-NEXT: movdqa %xmm1, %xmm2 | |
189 ; SSE41-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] | |
190 ; SSE41-NEXT: psadbw %xmm0, %xmm2 | |
191 ; SSE41-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] | |
192 ; SSE41-NEXT: psadbw %xmm0, %xmm1 | |
193 ; SSE41-NEXT: packuswb %xmm2, %xmm1 | |
194 ; SSE41-NEXT: movdqa %xmm1, %xmm0 | |
195 ; SSE41-NEXT: retq | |
196 ; | |
197 ; AVX-LABEL: testv4i32: | |
198 ; AVX: # BB#0: | |
199 ; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
200 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm2 | |
201 ; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] | |
202 ; AVX-NEXT: vpshufb %xmm2, %xmm3, %xmm2 | |
203 ; AVX-NEXT: vpsrlw $4, %xmm0, %xmm0 | |
204 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 | |
205 ; AVX-NEXT: vpshufb %xmm0, %xmm3, %xmm0 | |
206 ; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0 | |
207 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 | |
208 ; AVX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] | |
209 ; AVX-NEXT: vpsadbw %xmm2, %xmm1, %xmm2 | |
210 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] | |
211 ; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0 | |
212 ; AVX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 | |
213 ; AVX-NEXT: retq | |
214 %out = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %in) | |
215 ret <4 x i32> %out | |
216 } | |
217 | |
218 define <8 x i16> @testv8i16(<8 x i16> %in) nounwind { | |
219 ; SSE2-LABEL: testv8i16: | |
220 ; SSE2: # BB#0: | |
221 ; SSE2-NEXT: movdqa %xmm0, %xmm1 | |
222 ; SSE2-NEXT: psrlw $1, %xmm1 | |
223 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 | |
224 ; SSE2-NEXT: psubw %xmm1, %xmm0 | |
225 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [13107,13107,13107,13107,13107,13107,13107,13107] | |
226 ; SSE2-NEXT: movdqa %xmm0, %xmm2 | |
227 ; SSE2-NEXT: pand %xmm1, %xmm2 | |
228 ; SSE2-NEXT: psrlw $2, %xmm0 | |
229 ; SSE2-NEXT: pand %xmm1, %xmm0 | |
230 ; SSE2-NEXT: paddw %xmm2, %xmm0 | |
231 ; SSE2-NEXT: movdqa %xmm0, %xmm1 | |
232 ; SSE2-NEXT: psrlw $4, %xmm1 | |
233 ; SSE2-NEXT: paddw %xmm0, %xmm1 | |
234 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 | |
235 ; SSE2-NEXT: movdqa %xmm1, %xmm0 | |
236 ; SSE2-NEXT: psllw $8, %xmm0 | |
237 ; SSE2-NEXT: paddb %xmm1, %xmm0 | |
238 ; SSE2-NEXT: psrlw $8, %xmm0 | |
239 ; SSE2-NEXT: retq | |
240 ; | |
241 ; SSE3-LABEL: testv8i16: | |
242 ; SSE3: # BB#0: | |
243 ; SSE3-NEXT: movdqa %xmm0, %xmm1 | |
244 ; SSE3-NEXT: psrlw $1, %xmm1 | |
245 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 | |
246 ; SSE3-NEXT: psubw %xmm1, %xmm0 | |
247 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [13107,13107,13107,13107,13107,13107,13107,13107] | |
248 ; SSE3-NEXT: movdqa %xmm0, %xmm2 | |
249 ; SSE3-NEXT: pand %xmm1, %xmm2 | |
250 ; SSE3-NEXT: psrlw $2, %xmm0 | |
251 ; SSE3-NEXT: pand %xmm1, %xmm0 | |
252 ; SSE3-NEXT: paddw %xmm2, %xmm0 | |
253 ; SSE3-NEXT: movdqa %xmm0, %xmm1 | |
254 ; SSE3-NEXT: psrlw $4, %xmm1 | |
255 ; SSE3-NEXT: paddw %xmm0, %xmm1 | |
256 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 | |
257 ; SSE3-NEXT: movdqa %xmm1, %xmm0 | |
258 ; SSE3-NEXT: psllw $8, %xmm0 | |
259 ; SSE3-NEXT: paddb %xmm1, %xmm0 | |
260 ; SSE3-NEXT: psrlw $8, %xmm0 | |
261 ; SSE3-NEXT: retq | |
262 ; | |
263 ; SSSE3-LABEL: testv8i16: | |
264 ; SSSE3: # BB#0: | |
265 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
266 ; SSSE3-NEXT: movdqa %xmm0, %xmm2 | |
267 ; SSSE3-NEXT: pand %xmm1, %xmm2 | |
268 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] | |
269 ; SSSE3-NEXT: movdqa %xmm3, %xmm4 | |
270 ; SSSE3-NEXT: pshufb %xmm2, %xmm4 | |
271 ; SSSE3-NEXT: psrlw $4, %xmm0 | |
272 ; SSSE3-NEXT: pand %xmm1, %xmm0 | |
273 ; SSSE3-NEXT: pshufb %xmm0, %xmm3 | |
274 ; SSSE3-NEXT: paddb %xmm4, %xmm3 | |
275 ; SSSE3-NEXT: movdqa %xmm3, %xmm0 | |
276 ; SSSE3-NEXT: psllw $8, %xmm0 | |
277 ; SSSE3-NEXT: paddb %xmm3, %xmm0 | |
278 ; SSSE3-NEXT: psrlw $8, %xmm0 | |
279 ; SSSE3-NEXT: retq | |
280 ; | |
281 ; SSE41-LABEL: testv8i16: | |
282 ; SSE41: # BB#0: | |
283 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
284 ; SSE41-NEXT: movdqa %xmm0, %xmm2 | |
285 ; SSE41-NEXT: pand %xmm1, %xmm2 | |
286 ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] | |
287 ; SSE41-NEXT: movdqa %xmm3, %xmm4 | |
288 ; SSE41-NEXT: pshufb %xmm2, %xmm4 | |
289 ; SSE41-NEXT: psrlw $4, %xmm0 | |
290 ; SSE41-NEXT: pand %xmm1, %xmm0 | |
291 ; SSE41-NEXT: pshufb %xmm0, %xmm3 | |
292 ; SSE41-NEXT: paddb %xmm4, %xmm3 | |
293 ; SSE41-NEXT: movdqa %xmm3, %xmm0 | |
294 ; SSE41-NEXT: psllw $8, %xmm0 | |
295 ; SSE41-NEXT: paddb %xmm3, %xmm0 | |
296 ; SSE41-NEXT: psrlw $8, %xmm0 | |
297 ; SSE41-NEXT: retq | |
298 ; | |
299 ; AVX-LABEL: testv8i16: | |
300 ; AVX: # BB#0: | |
301 ; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
302 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm2 | |
303 ; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] | |
304 ; AVX-NEXT: vpshufb %xmm2, %xmm3, %xmm2 | |
305 ; AVX-NEXT: vpsrlw $4, %xmm0, %xmm0 | |
306 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 | |
307 ; AVX-NEXT: vpshufb %xmm0, %xmm3, %xmm0 | |
308 ; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0 | |
309 ; AVX-NEXT: vpsllw $8, %xmm0, %xmm1 | |
310 ; AVX-NEXT: vpaddb %xmm0, %xmm1, %xmm0 | |
311 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0 | |
312 ; AVX-NEXT: retq | |
313 %out = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %in) | |
314 ret <8 x i16> %out | |
315 } | |
316 | |
317 define <16 x i8> @testv16i8(<16 x i8> %in) nounwind { | |
318 ; SSE2-LABEL: testv16i8: | |
319 ; SSE2: # BB#0: | |
320 ; SSE2-NEXT: movdqa %xmm0, %xmm1 | |
321 ; SSE2-NEXT: psrlw $1, %xmm1 | |
322 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 | |
323 ; SSE2-NEXT: psubb %xmm1, %xmm0 | |
324 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] | |
325 ; SSE2-NEXT: movdqa %xmm0, %xmm2 | |
326 ; SSE2-NEXT: pand %xmm1, %xmm2 | |
327 ; SSE2-NEXT: psrlw $2, %xmm0 | |
328 ; SSE2-NEXT: pand %xmm1, %xmm0 | |
329 ; SSE2-NEXT: paddb %xmm2, %xmm0 | |
330 ; SSE2-NEXT: movdqa %xmm0, %xmm1 | |
331 ; SSE2-NEXT: psrlw $4, %xmm1 | |
332 ; SSE2-NEXT: paddb %xmm0, %xmm1 | |
333 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 | |
334 ; SSE2-NEXT: movdqa %xmm1, %xmm0 | |
335 ; SSE2-NEXT: retq | |
336 ; | |
337 ; SSE3-LABEL: testv16i8: | |
338 ; SSE3: # BB#0: | |
339 ; SSE3-NEXT: movdqa %xmm0, %xmm1 | |
340 ; SSE3-NEXT: psrlw $1, %xmm1 | |
341 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 | |
342 ; SSE3-NEXT: psubb %xmm1, %xmm0 | |
343 ; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] | |
344 ; SSE3-NEXT: movdqa %xmm0, %xmm2 | |
345 ; SSE3-NEXT: pand %xmm1, %xmm2 | |
346 ; SSE3-NEXT: psrlw $2, %xmm0 | |
347 ; SSE3-NEXT: pand %xmm1, %xmm0 | |
348 ; SSE3-NEXT: paddb %xmm2, %xmm0 | |
349 ; SSE3-NEXT: movdqa %xmm0, %xmm1 | |
350 ; SSE3-NEXT: psrlw $4, %xmm1 | |
351 ; SSE3-NEXT: paddb %xmm0, %xmm1 | |
352 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 | |
353 ; SSE3-NEXT: movdqa %xmm1, %xmm0 | |
354 ; SSE3-NEXT: retq | |
355 ; | |
356 ; SSSE3-LABEL: testv16i8: | |
357 ; SSSE3: # BB#0: | |
358 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
359 ; SSSE3-NEXT: movdqa %xmm0, %xmm3 | |
360 ; SSSE3-NEXT: pand %xmm2, %xmm3 | |
361 ; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] | |
362 ; SSSE3-NEXT: movdqa %xmm1, %xmm4 | |
363 ; SSSE3-NEXT: pshufb %xmm3, %xmm4 | |
364 ; SSSE3-NEXT: psrlw $4, %xmm0 | |
365 ; SSSE3-NEXT: pand %xmm2, %xmm0 | |
366 ; SSSE3-NEXT: pshufb %xmm0, %xmm1 | |
367 ; SSSE3-NEXT: paddb %xmm4, %xmm1 | |
368 ; SSSE3-NEXT: movdqa %xmm1, %xmm0 | |
369 ; SSSE3-NEXT: retq | |
370 ; | |
371 ; SSE41-LABEL: testv16i8: | |
372 ; SSE41: # BB#0: | |
373 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
374 ; SSE41-NEXT: movdqa %xmm0, %xmm3 | |
375 ; SSE41-NEXT: pand %xmm2, %xmm3 | |
376 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] | |
377 ; SSE41-NEXT: movdqa %xmm1, %xmm4 | |
378 ; SSE41-NEXT: pshufb %xmm3, %xmm4 | |
379 ; SSE41-NEXT: psrlw $4, %xmm0 | |
380 ; SSE41-NEXT: pand %xmm2, %xmm0 | |
381 ; SSE41-NEXT: pshufb %xmm0, %xmm1 | |
382 ; SSE41-NEXT: paddb %xmm4, %xmm1 | |
383 ; SSE41-NEXT: movdqa %xmm1, %xmm0 | |
384 ; SSE41-NEXT: retq | |
385 ; | |
386 ; AVX-LABEL: testv16i8: | |
387 ; AVX: # BB#0: | |
388 ; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] | |
389 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm2 | |
390 ; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] | |
391 ; AVX-NEXT: vpshufb %xmm2, %xmm3, %xmm2 | |
392 ; AVX-NEXT: vpsrlw $4, %xmm0, %xmm0 | |
393 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 | |
394 ; AVX-NEXT: vpshufb %xmm0, %xmm3, %xmm0 | |
395 ; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0 | |
396 ; AVX-NEXT: retq | |
397 %out = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %in) | |
398 ret <16 x i8> %out | |
399 } | |
400 | |
401 define <2 x i64> @foldv2i64() nounwind { | |
402 ; SSE-LABEL: foldv2i64: | |
403 ; SSE: # BB#0: | |
404 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,64] | |
405 ; SSE-NEXT: retq | |
406 ; | |
407 ; AVX-LABEL: foldv2i64: | |
408 ; AVX: # BB#0: | |
409 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,64] | |
410 ; AVX-NEXT: retq | |
411 %out = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> <i64 256, i64 -1>) | |
412 ret <2 x i64> %out | |
413 } | |
414 | |
415 define <4 x i32> @foldv4i32() nounwind { | |
416 ; SSE-LABEL: foldv4i32: | |
417 ; SSE: # BB#0: | |
418 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,32,0,8] | |
419 ; SSE-NEXT: retq | |
420 ; | |
421 ; AVX-LABEL: foldv4i32: | |
422 ; AVX: # BB#0: | |
423 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,32,0,8] | |
424 ; AVX-NEXT: retq | |
425 %out = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> <i32 256, i32 -1, i32 0, i32 255>) | |
426 ret <4 x i32> %out | |
427 } | |
428 | |
429 define <8 x i16> @foldv8i16() nounwind { | |
430 ; SSE-LABEL: foldv8i16: | |
431 ; SSE: # BB#0: | |
432 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,16,0,8,0,3,2,3] | |
433 ; SSE-NEXT: retq | |
434 ; | |
435 ; AVX-LABEL: foldv8i16: | |
436 ; AVX: # BB#0: | |
437 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,16,0,8,0,3,2,3] | |
438 ; AVX-NEXT: retq | |
439 %out = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88>) | |
440 ret <8 x i16> %out | |
441 } | |
442 | |
443 define <16 x i8> @foldv16i8() nounwind { | |
444 ; SSE-LABEL: foldv16i8: | |
445 ; SSE: # BB#0: | |
446 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,8,0,8,0,3,2,3,7,7,1,1,1,1,1,1] | |
447 ; SSE-NEXT: retq | |
448 ; | |
449 ; AVX-LABEL: foldv16i8: | |
450 ; AVX: # BB#0: | |
451 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [0,8,0,8,0,3,2,3,7,7,1,1,1,1,1,1] | |
452 ; AVX-NEXT: retq | |
453 %out = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32>) | |
454 ret <16 x i8> %out | |
455 } | |
456 | |
457 declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) | |
458 declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) | |
459 declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) | |
460 declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) |