Mercurial > hg > CbC > CbC_llvm
comparison test/CodeGen/X86/pseudo_cmov_lower.ll @ 95:afa8332a0e37 LLVM3.8
LLVM 3.8
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 13 Oct 2015 17:48:58 +0900 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
84:f3e34b893a5f | 95:afa8332a0e37 |
---|---|
1 ; RUN: llc < %s -mtriple=i386-linux-gnu -o - | FileCheck %s | |
2 | |
3 ; This test checks that only a single js gets generated in the final code | |
4 ; for lowering the CMOV pseudos that get created for this IR. | |
5 ; CHECK-LABEL: foo1: | |
6 ; CHECK: js | |
7 ; CHECK-NOT: js | |
8 define i32 @foo1(i32 %v1, i32 %v2, i32 %v3) nounwind { | |
9 entry: | |
10 %cmp = icmp slt i32 %v1, 0 | |
11 %v2.v3 = select i1 %cmp, i32 %v2, i32 %v3 | |
12 %v1.v2 = select i1 %cmp, i32 %v1, i32 %v2 | |
13 %sub = sub i32 %v1.v2, %v2.v3 | |
14 ret i32 %sub | |
15 } | |
16 | |
17 ; This test checks that only a single js gets generated in the final code | |
18 ; for lowering the CMOV pseudos that get created for this IR. This makes | |
19 ; sure the code for the lowering for opposite conditions gets tested. | |
20 ; CHECK-LABEL: foo11: | |
21 ; CHECK: js | |
22 ; CHECK-NOT: js | |
23 ; CHECK-NOT: jns | |
24 define i32 @foo11(i32 %v1, i32 %v2, i32 %v3) nounwind { | |
25 entry: | |
26 %cmp1 = icmp slt i32 %v1, 0 | |
27 %v2.v3 = select i1 %cmp1, i32 %v2, i32 %v3 | |
28 %cmp2 = icmp sge i32 %v1, 0 | |
29 %v1.v2 = select i1 %cmp2, i32 %v1, i32 %v2 | |
30 %sub = sub i32 %v1.v2, %v2.v3 | |
31 ret i32 %sub | |
32 } | |
33 | |
34 ; This test checks that only a single js gets generated in the final code | |
35 ; for lowering the CMOV pseudos that get created for this IR. | |
36 ; CHECK-LABEL: foo2: | |
37 ; CHECK: js | |
38 ; CHECK-NOT: js | |
39 define i32 @foo2(i8 %v1, i8 %v2, i8 %v3) nounwind { | |
40 entry: | |
41 %cmp = icmp slt i8 %v1, 0 | |
42 %v2.v3 = select i1 %cmp, i8 %v2, i8 %v3 | |
43 %v1.v2 = select i1 %cmp, i8 %v1, i8 %v2 | |
44 %t1 = sext i8 %v2.v3 to i32 | |
45 %t2 = sext i8 %v1.v2 to i32 | |
46 %sub = sub i32 %t1, %t2 | |
47 ret i32 %sub | |
48 } | |
49 | |
50 ; This test checks that only a single js gets generated in the final code | |
51 ; for lowering the CMOV pseudos that get created for this IR. | |
52 ; CHECK-LABEL: foo3: | |
53 ; CHECK: js | |
54 ; CHECK-NOT: js | |
55 define i32 @foo3(i16 %v1, i16 %v2, i16 %v3) nounwind { | |
56 entry: | |
57 %cmp = icmp slt i16 %v1, 0 | |
58 %v2.v3 = select i1 %cmp, i16 %v2, i16 %v3 | |
59 %v1.v2 = select i1 %cmp, i16 %v1, i16 %v2 | |
60 %t1 = sext i16 %v2.v3 to i32 | |
61 %t2 = sext i16 %v1.v2 to i32 | |
62 %sub = sub i32 %t1, %t2 | |
63 ret i32 %sub | |
64 } | |
65 | |
66 ; This test checks that only a single js gets generated in the final code | |
67 ; for lowering the CMOV pseudos that get created for this IR. | |
68 ; CHECK-LABEL: foo4: | |
69 ; CHECK: js | |
70 ; CHECK-NOT: js | |
71 define float @foo4(i32 %v1, float %v2, float %v3, float %v4) nounwind { | |
72 entry: | |
73 %cmp = icmp slt i32 %v1, 0 | |
74 %t1 = select i1 %cmp, float %v2, float %v3 | |
75 %t2 = select i1 %cmp, float %v3, float %v4 | |
76 %sub = fsub float %t1, %t2 | |
77 ret float %sub | |
78 } | |
79 | |
80 ; This test checks that only a single je gets generated in the final code | |
81 ; for lowering the CMOV pseudos that get created for this IR. | |
82 ; CHECK-LABEL: foo5: | |
83 ; CHECK: je | |
84 ; CHECK-NOT: je | |
85 define double @foo5(i32 %v1, double %v2, double %v3, double %v4) nounwind { | |
86 entry: | |
87 %cmp = icmp eq i32 %v1, 0 | |
88 %t1 = select i1 %cmp, double %v2, double %v3 | |
89 %t2 = select i1 %cmp, double %v3, double %v4 | |
90 %sub = fsub double %t1, %t2 | |
91 ret double %sub | |
92 } | |
93 | |
94 ; This test checks that only a single je gets generated in the final code | |
95 ; for lowering the CMOV pseudos that get created for this IR. | |
96 ; CHECK-LABEL: foo6: | |
97 ; CHECK: je | |
98 ; CHECK-NOT: je | |
99 define <4 x float> @foo6(i32 %v1, <4 x float> %v2, <4 x float> %v3, <4 x float> %v4) nounwind { | |
100 entry: | |
101 %cmp = icmp eq i32 %v1, 0 | |
102 %t1 = select i1 %cmp, <4 x float> %v2, <4 x float> %v3 | |
103 %t2 = select i1 %cmp, <4 x float> %v3, <4 x float> %v4 | |
104 %sub = fsub <4 x float> %t1, %t2 | |
105 ret <4 x float> %sub | |
106 } | |
107 | |
108 ; This test checks that only a single je gets generated in the final code | |
109 ; for lowering the CMOV pseudos that get created for this IR. | |
110 ; CHECK-LABEL: foo7: | |
111 ; CHECK: je | |
112 ; CHECK-NOT: je | |
113 define <2 x double> @foo7(i32 %v1, <2 x double> %v2, <2 x double> %v3, <2 x double> %v4) nounwind { | |
114 entry: | |
115 %cmp = icmp eq i32 %v1, 0 | |
116 %t1 = select i1 %cmp, <2 x double> %v2, <2 x double> %v3 | |
117 %t2 = select i1 %cmp, <2 x double> %v3, <2 x double> %v4 | |
118 %sub = fsub <2 x double> %t1, %t2 | |
119 ret <2 x double> %sub | |
120 } | |
121 | |
122 ; This test checks that only a single ja gets generated in the final code | |
123 ; for lowering the CMOV pseudos that get created for this IR. This combines | |
124 ; all the supported types together into one long string of selects based | |
125 ; on the same condition. | |
126 ; CHECK-LABEL: foo8: | |
127 ; CHECK: ja | |
128 ; CHECK-NOT: ja | |
129 define void @foo8(i32 %v1, | |
130 i8 %v2, i8 %v3, | |
131 i16 %v12, i16 %v13, | |
132 i32 %v22, i32 %v23, | |
133 float %v32, float %v33, | |
134 double %v42, double %v43, | |
135 <4 x float> %v52, <4 x float> %v53, | |
136 <2 x double> %v62, <2 x double> %v63, | |
137 <8 x float> %v72, <8 x float> %v73, | |
138 <4 x double> %v82, <4 x double> %v83, | |
139 <16 x float> %v92, <16 x float> %v93, | |
140 <8 x double> %v102, <8 x double> %v103, | |
141 i8 * %dst) nounwind { | |
142 entry: | |
143 %add.ptr11 = getelementptr inbounds i8, i8* %dst, i32 2 | |
144 %a11 = bitcast i8* %add.ptr11 to i16* | |
145 | |
146 %add.ptr21 = getelementptr inbounds i8, i8* %dst, i32 4 | |
147 %a21 = bitcast i8* %add.ptr21 to i32* | |
148 | |
149 %add.ptr31 = getelementptr inbounds i8, i8* %dst, i32 8 | |
150 %a31 = bitcast i8* %add.ptr31 to float* | |
151 | |
152 %add.ptr41 = getelementptr inbounds i8, i8* %dst, i32 16 | |
153 %a41 = bitcast i8* %add.ptr41 to double* | |
154 | |
155 %add.ptr51 = getelementptr inbounds i8, i8* %dst, i32 32 | |
156 %a51 = bitcast i8* %add.ptr51 to <4 x float>* | |
157 | |
158 %add.ptr61 = getelementptr inbounds i8, i8* %dst, i32 48 | |
159 %a61 = bitcast i8* %add.ptr61 to <2 x double>* | |
160 | |
161 %add.ptr71 = getelementptr inbounds i8, i8* %dst, i32 64 | |
162 %a71 = bitcast i8* %add.ptr71 to <8 x float>* | |
163 | |
164 %add.ptr81 = getelementptr inbounds i8, i8* %dst, i32 128 | |
165 %a81 = bitcast i8* %add.ptr81 to <4 x double>* | |
166 | |
167 %add.ptr91 = getelementptr inbounds i8, i8* %dst, i32 64 | |
168 %a91 = bitcast i8* %add.ptr91 to <16 x float>* | |
169 | |
170 %add.ptr101 = getelementptr inbounds i8, i8* %dst, i32 128 | |
171 %a101 = bitcast i8* %add.ptr101 to <8 x double>* | |
172 | |
173 ; These operations are necessary, because select of two single use loads | |
174 ; ends up getting optimized into a select of two leas, followed by a | |
175 ; single load of the selected address. | |
176 %t13 = xor i16 %v13, 11 | |
177 %t23 = xor i32 %v23, 1234 | |
178 %t33 = fadd float %v33, %v32 | |
179 %t43 = fadd double %v43, %v42 | |
180 %t53 = fadd <4 x float> %v53, %v52 | |
181 %t63 = fadd <2 x double> %v63, %v62 | |
182 %t73 = fsub <8 x float> %v73, %v72 | |
183 %t83 = fsub <4 x double> %v83, %v82 | |
184 %t93 = fsub <16 x float> %v93, %v92 | |
185 %t103 = fsub <8 x double> %v103, %v102 | |
186 | |
187 %cmp = icmp ugt i32 %v1, 31 | |
188 %t11 = select i1 %cmp, i16 %v12, i16 %t13 | |
189 %t21 = select i1 %cmp, i32 %v22, i32 %t23 | |
190 %t31 = select i1 %cmp, float %v32, float %t33 | |
191 %t41 = select i1 %cmp, double %v42, double %t43 | |
192 %t51 = select i1 %cmp, <4 x float> %v52, <4 x float> %t53 | |
193 %t61 = select i1 %cmp, <2 x double> %v62, <2 x double> %t63 | |
194 %t71 = select i1 %cmp, <8 x float> %v72, <8 x float> %t73 | |
195 %t81 = select i1 %cmp, <4 x double> %v82, <4 x double> %t83 | |
196 %t91 = select i1 %cmp, <16 x float> %v92, <16 x float> %t93 | |
197 %t101 = select i1 %cmp, <8 x double> %v102, <8 x double> %t103 | |
198 | |
199 store i16 %t11, i16* %a11, align 2 | |
200 store i32 %t21, i32* %a21, align 4 | |
201 store float %t31, float* %a31, align 4 | |
202 store double %t41, double* %a41, align 8 | |
203 store <4 x float> %t51, <4 x float>* %a51, align 16 | |
204 store <2 x double> %t61, <2 x double>* %a61, align 16 | |
205 store <8 x float> %t71, <8 x float>* %a71, align 32 | |
206 store <4 x double> %t81, <4 x double>* %a81, align 32 | |
207 store <16 x float> %t91, <16 x float>* %a91, align 32 | |
208 store <8 x double> %t101, <8 x double>* %a101, align 32 | |
209 | |
210 ret void | |
211 } | |
212 | |
213 ; This test checks that only a single ja gets generated in the final code | |
214 ; for lowering the CMOV pseudos that get created for this IR. | |
215 ; on the same condition. | |
216 ; Contrary to my expectations, this doesn't exercise the code for | |
217 ; CMOV_V8I1, CMOV_V16I1, CMOV_V32I1, or CMOV_V64I1. Instead the selects all | |
218 ; get lowered into vector length number of selects, which all eventually turn | |
219 ; into a huge number of CMOV_GR8, which are all contiguous, so the optimization | |
220 ; kicks in as long as CMOV_GR8 is supported. I couldn't find a way to get | |
221 ; CMOV_V*I1 pseudo-opcodes to get generated. If a way exists to get CMOV_V*1 | |
222 ; pseudo-opcodes to be generated, this test should be replaced with one that | |
223 ; tests those opcodes. | |
224 ; | |
225 ; CHECK-LABEL: foo9: | |
226 ; CHECK: ja | |
227 ; CHECK-NOT: ja | |
228 define void @foo9(i32 %v1, | |
229 <8 x i1> %v12, <8 x i1> %v13, | |
230 <16 x i1> %v22, <16 x i1> %v23, | |
231 <32 x i1> %v32, <32 x i1> %v33, | |
232 <64 x i1> %v42, <64 x i1> %v43, | |
233 i8 * %dst) nounwind { | |
234 entry: | |
235 %add.ptr11 = getelementptr inbounds i8, i8* %dst, i32 0 | |
236 %a11 = bitcast i8* %add.ptr11 to <8 x i1>* | |
237 | |
238 %add.ptr21 = getelementptr inbounds i8, i8* %dst, i32 4 | |
239 %a21 = bitcast i8* %add.ptr21 to <16 x i1>* | |
240 | |
241 %add.ptr31 = getelementptr inbounds i8, i8* %dst, i32 8 | |
242 %a31 = bitcast i8* %add.ptr31 to <32 x i1>* | |
243 | |
244 %add.ptr41 = getelementptr inbounds i8, i8* %dst, i32 16 | |
245 %a41 = bitcast i8* %add.ptr41 to <64 x i1>* | |
246 | |
247 ; These operations are necessary, because select of two single use loads | |
248 ; ends up getting optimized into a select of two leas, followed by a | |
249 ; single load of the selected address. | |
250 %t13 = xor <8 x i1> %v13, %v12 | |
251 %t23 = xor <16 x i1> %v23, %v22 | |
252 %t33 = xor <32 x i1> %v33, %v32 | |
253 %t43 = xor <64 x i1> %v43, %v42 | |
254 | |
255 %cmp = icmp ugt i32 %v1, 31 | |
256 %t11 = select i1 %cmp, <8 x i1> %v12, <8 x i1> %t13 | |
257 %t21 = select i1 %cmp, <16 x i1> %v22, <16 x i1> %t23 | |
258 %t31 = select i1 %cmp, <32 x i1> %v32, <32 x i1> %t33 | |
259 %t41 = select i1 %cmp, <64 x i1> %v42, <64 x i1> %t43 | |
260 | |
261 store <8 x i1> %t11, <8 x i1>* %a11, align 16 | |
262 store <16 x i1> %t21, <16 x i1>* %a21, align 4 | |
263 store <32 x i1> %t31, <32 x i1>* %a31, align 8 | |
264 store <64 x i1> %t41, <64 x i1>* %a41, align 16 | |
265 | |
266 ret void | |
267 } |