Mercurial > hg > CbC > CbC_llvm
comparison test/CodeGen/X86/fmaxnum.ll @ 100:7d135dc70f03 LLVM 3.9
LLVM 3.9
author | Miyagi Mitsuki <e135756@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 26 Jan 2016 22:53:40 +0900 |
parents | 60c9769439b8 |
children | 3a76565eade5 |
comparison
equal
deleted
inserted
replaced
96:6418606d0ead | 100:7d135dc70f03 |
---|---|
1 ; RUN: llc -march=x86 -mtriple=i386-linux-gnu < %s | FileCheck %s | 1 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=sse2 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SSE |
2 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s --check-prefix=CHECK --check-prefix=AVX | |
2 | 3 |
3 declare float @fmaxf(float, float) | 4 declare float @fmaxf(float, float) |
4 declare double @fmax(double, double) | 5 declare double @fmax(double, double) |
5 declare x86_fp80 @fmaxl(x86_fp80, x86_fp80) | 6 declare x86_fp80 @fmaxl(x86_fp80, x86_fp80) |
6 declare float @llvm.maxnum.f32(float, float) | 7 declare float @llvm.maxnum.f32(float, float) |
7 declare double @llvm.maxnum.f64(double, double) | 8 declare double @llvm.maxnum.f64(double, double) |
8 declare x86_fp80 @llvm.maxnum.f80(x86_fp80, x86_fp80) | 9 declare x86_fp80 @llvm.maxnum.f80(x86_fp80, x86_fp80) |
9 | 10 |
11 declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) | |
12 declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) | |
13 declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) | |
14 declare <4 x double> @llvm.maxnum.v4f64(<4 x double>, <4 x double>) | |
15 declare <8 x double> @llvm.maxnum.v8f64(<8 x double>, <8 x double>) | |
16 | |
17 ; FIXME: As the vector tests show, the SSE run shouldn't need this many moves. | |
18 | |
10 ; CHECK-LABEL: @test_fmaxf | 19 ; CHECK-LABEL: @test_fmaxf |
11 ; CHECK: calll fmaxf | 20 ; SSE: movaps %xmm0, %xmm2 |
21 ; SSE-NEXT: cmpunordss %xmm2, %xmm2 | |
22 ; SSE-NEXT: movaps %xmm2, %xmm3 | |
23 ; SSE-NEXT: andps %xmm1, %xmm3 | |
24 ; SSE-NEXT: maxss %xmm0, %xmm1 | |
25 ; SSE-NEXT: andnps %xmm1, %xmm2 | |
26 ; SSE-NEXT: orps %xmm3, %xmm2 | |
27 ; SSE-NEXT: movaps %xmm2, %xmm0 | |
28 ; SSE-NEXT: retq | |
29 ; | |
30 ; AVX: vmaxss %xmm0, %xmm1, %xmm2 | |
31 ; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 | |
32 ; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 | |
33 ; AVX-NEXT: retq | |
12 define float @test_fmaxf(float %x, float %y) { | 34 define float @test_fmaxf(float %x, float %y) { |
13 %z = call float @fmaxf(float %x, float %y) readnone | 35 %z = call float @fmaxf(float %x, float %y) readnone |
14 ret float %z | 36 ret float %z |
15 } | 37 } |
16 | 38 |
39 ; CHECK-LABEL: @test_fmaxf_minsize | |
40 ; CHECK: jmp fmaxf | |
41 define float @test_fmaxf_minsize(float %x, float %y) minsize { | |
42 %z = call float @fmaxf(float %x, float %y) readnone | |
43 ret float %z | |
44 } | |
45 | |
46 ; FIXME: As the vector tests show, the SSE run shouldn't need this many moves. | |
47 | |
17 ; CHECK-LABEL: @test_fmax | 48 ; CHECK-LABEL: @test_fmax |
18 ; CHECK: calll fmax | 49 ; SSE: movapd %xmm0, %xmm2 |
50 ; SSE-NEXT: cmpunordsd %xmm2, %xmm2 | |
51 ; SSE-NEXT: movapd %xmm2, %xmm3 | |
52 ; SSE-NEXT: andpd %xmm1, %xmm3 | |
53 ; SSE-NEXT: maxsd %xmm0, %xmm1 | |
54 ; SSE-NEXT: andnpd %xmm1, %xmm2 | |
55 ; SSE-NEXT: orpd %xmm3, %xmm2 | |
56 ; SSE-NEXT: movapd %xmm2, %xmm0 | |
57 ; SSE-NEXT: retq | |
58 ; | |
59 ; AVX: vmaxsd %xmm0, %xmm1, %xmm2 | |
60 ; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0 | |
61 ; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 | |
62 ; AVX-NEXT: retq | |
19 define double @test_fmax(double %x, double %y) { | 63 define double @test_fmax(double %x, double %y) { |
20 %z = call double @fmax(double %x, double %y) readnone | 64 %z = call double @fmax(double %x, double %y) readnone |
21 ret double %z | 65 ret double %z |
22 } | 66 } |
23 | 67 |
24 ; CHECK-LABEL: @test_fmaxl | 68 ; CHECK-LABEL: @test_fmaxl |
25 ; CHECK: calll fmaxl | 69 ; CHECK: callq fmaxl |
26 define x86_fp80 @test_fmaxl(x86_fp80 %x, x86_fp80 %y) { | 70 define x86_fp80 @test_fmaxl(x86_fp80 %x, x86_fp80 %y) { |
27 %z = call x86_fp80 @fmaxl(x86_fp80 %x, x86_fp80 %y) readnone | 71 %z = call x86_fp80 @fmaxl(x86_fp80 %x, x86_fp80 %y) readnone |
28 ret x86_fp80 %z | 72 ret x86_fp80 %z |
29 } | 73 } |
30 | 74 |
31 ; CHECK-LABEL: @test_intrinsic_fmaxf | 75 ; CHECK-LABEL: @test_intrinsic_fmaxf |
32 ; CHECK: calll fmaxf | 76 ; SSE: movaps %xmm0, %xmm2 |
77 ; SSE-NEXT: cmpunordss %xmm2, %xmm2 | |
78 ; SSE-NEXT: movaps %xmm2, %xmm3 | |
79 ; SSE-NEXT: andps %xmm1, %xmm3 | |
80 ; SSE-NEXT: maxss %xmm0, %xmm1 | |
81 ; SSE-NEXT: andnps %xmm1, %xmm2 | |
82 ; SSE-NEXT: orps %xmm3, %xmm2 | |
83 ; SSE-NEXT: movaps %xmm2, %xmm0 | |
84 ; SSE-NEXT: retq | |
85 ; | |
86 ; AVX: vmaxss %xmm0, %xmm1, %xmm2 | |
87 ; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 | |
88 ; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 | |
89 ; AVX-NEXT: retq | |
33 define float @test_intrinsic_fmaxf(float %x, float %y) { | 90 define float @test_intrinsic_fmaxf(float %x, float %y) { |
34 %z = call float @llvm.maxnum.f32(float %x, float %y) readnone | 91 %z = call float @llvm.maxnum.f32(float %x, float %y) readnone |
35 ret float %z | 92 ret float %z |
36 } | 93 } |
37 | 94 |
95 | |
38 ; CHECK-LABEL: @test_intrinsic_fmax | 96 ; CHECK-LABEL: @test_intrinsic_fmax |
39 ; CHECK: calll fmax | 97 ; SSE: movapd %xmm0, %xmm2 |
98 ; SSE-NEXT: cmpunordsd %xmm2, %xmm2 | |
99 ; SSE-NEXT: movapd %xmm2, %xmm3 | |
100 ; SSE-NEXT: andpd %xmm1, %xmm3 | |
101 ; SSE-NEXT: maxsd %xmm0, %xmm1 | |
102 ; SSE-NEXT: andnpd %xmm1, %xmm2 | |
103 ; SSE-NEXT: orpd %xmm3, %xmm2 | |
104 ; SSE-NEXT: movapd %xmm2, %xmm0 | |
105 ; SSE-NEXT: retq | |
106 ; | |
107 ; AVX: vmaxsd %xmm0, %xmm1, %xmm2 | |
108 ; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0 | |
109 ; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 | |
110 ; AVX-NEXT: retq | |
40 define double @test_intrinsic_fmax(double %x, double %y) { | 111 define double @test_intrinsic_fmax(double %x, double %y) { |
41 %z = call double @llvm.maxnum.f64(double %x, double %y) readnone | 112 %z = call double @llvm.maxnum.f64(double %x, double %y) readnone |
42 ret double %z | 113 ret double %z |
43 } | 114 } |
44 | 115 |
45 ; CHECK-LABEL: @test_intrinsic_fmaxl | 116 ; CHECK-LABEL: @test_intrinsic_fmaxl |
46 ; CHECK: calll fmaxl | 117 ; CHECK: callq fmaxl |
47 define x86_fp80 @test_intrinsic_fmaxl(x86_fp80 %x, x86_fp80 %y) { | 118 define x86_fp80 @test_intrinsic_fmaxl(x86_fp80 %x, x86_fp80 %y) { |
48 %z = call x86_fp80 @llvm.maxnum.f80(x86_fp80 %x, x86_fp80 %y) readnone | 119 %z = call x86_fp80 @llvm.maxnum.f80(x86_fp80 %x, x86_fp80 %y) readnone |
49 ret x86_fp80 %z | 120 ret x86_fp80 %z |
50 } | 121 } |
122 | |
123 ; CHECK-LABEL: @test_intrinsic_fmax_v2f32 | |
124 ; SSE: movaps %xmm1, %xmm2 | |
125 ; SSE-NEXT: maxps %xmm0, %xmm2 | |
126 ; SSE-NEXT: cmpunordps %xmm0, %xmm0 | |
127 ; SSE-NEXT: andps %xmm0, %xmm1 | |
128 ; SSE-NEXT: andnps %xmm2, %xmm0 | |
129 ; SSE-NEXT: orps %xmm1, %xmm0 | |
130 ; SSE-NEXT: retq | |
131 ; | |
132 ; AVX: vmaxps %xmm0, %xmm1, %xmm2 | |
133 ; AVX-NEXT: vcmpunordps %xmm0, %xmm0, %xmm0 | |
134 ; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 | |
135 ; AVX-NEXT: retq | |
136 define <2 x float> @test_intrinsic_fmax_v2f32(<2 x float> %x, <2 x float> %y) { | |
137 %z = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %x, <2 x float> %y) readnone | |
138 ret <2 x float> %z | |
139 } | |
140 | |
141 ; CHECK-LABEL: @test_intrinsic_fmax_v4f32 | |
142 ; SSE: movaps %xmm1, %xmm2 | |
143 ; SSE-NEXT: maxps %xmm0, %xmm2 | |
144 ; SSE-NEXT: cmpunordps %xmm0, %xmm0 | |
145 ; SSE-NEXT: andps %xmm0, %xmm1 | |
146 ; SSE-NEXT: andnps %xmm2, %xmm0 | |
147 ; SSE-NEXT: orps %xmm1, %xmm0 | |
148 ; SSE-NEXT: retq | |
149 ; | |
150 ; AVX: vmaxps %xmm0, %xmm1, %xmm2 | |
151 ; AVX-NEXT: vcmpunordps %xmm0, %xmm0, %xmm0 | |
152 ; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 | |
153 ; AVX-NEXT: retq | |
154 define <4 x float> @test_intrinsic_fmax_v4f32(<4 x float> %x, <4 x float> %y) { | |
155 %z = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y) readnone | |
156 ret <4 x float> %z | |
157 } | |
158 | |
159 ; CHECK-LABEL: @test_intrinsic_fmax_v2f64 | |
160 ; SSE: movapd %xmm1, %xmm2 | |
161 ; SSE-NEXT: maxpd %xmm0, %xmm2 | |
162 ; SSE-NEXT: cmpunordpd %xmm0, %xmm0 | |
163 ; SSE-NEXT: andpd %xmm0, %xmm1 | |
164 ; SSE-NEXT: andnpd %xmm2, %xmm0 | |
165 ; SSE-NEXT: orpd %xmm1, %xmm0 | |
166 ; SSE-NEXT: retq | |
167 ; | |
168 ; AVX: vmaxpd %xmm0, %xmm1, %xmm2 | |
169 ; AVX-NEXT: vcmpunordpd %xmm0, %xmm0, %xmm0 | |
170 ; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 | |
171 ; AVX-NEXT: retq | |
172 define <2 x double> @test_intrinsic_fmax_v2f64(<2 x double> %x, <2 x double> %y) { | |
173 %z = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double> %y) readnone | |
174 ret <2 x double> %z | |
175 } | |
176 | |
177 ; CHECK-LABEL: @test_intrinsic_fmax_v4f64 | |
178 ; SSE: movapd %xmm2, %xmm4 | |
179 ; SSE-NEXT: maxpd %xmm0, %xmm4 | |
180 ; SSE-NEXT: cmpunordpd %xmm0, %xmm0 | |
181 ; SSE-NEXT: andpd %xmm0, %xmm2 | |
182 ; SSE-NEXT: andnpd %xmm4, %xmm0 | |
183 ; SSE-NEXT: orpd %xmm2, %xmm0 | |
184 ; SSE-NEXT: movapd %xmm3, %xmm2 | |
185 ; SSE-NEXT: maxpd %xmm1, %xmm2 | |
186 ; SSE-NEXT: cmpunordpd %xmm1, %xmm1 | |
187 ; SSE-NEXT: andpd %xmm1, %xmm3 | |
188 ; SSE-NEXT: andnpd %xmm2, %xmm1 | |
189 ; SSE-NEXT: orpd %xmm3, %xmm1 | |
190 ; SSE-NEXT: retq | |
191 ; | |
192 ; AVX: vmaxpd %ymm0, %ymm1, %ymm2 | |
193 ; AVX-NEXT: vcmpunordpd %ymm0, %ymm0, %ymm0 | |
194 ; AVX-NEXT: vblendvpd %ymm0, %ymm1, %ymm2, %ymm0 | |
195 ; AVX-NEXT: retq | |
196 define <4 x double> @test_intrinsic_fmax_v4f64(<4 x double> %x, <4 x double> %y) { | |
197 %z = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %x, <4 x double> %y) readnone | |
198 ret <4 x double> %z | |
199 } | |
200 | |
201 ; CHECK-LABEL: @test_intrinsic_fmax_v8f64 | |
202 ; SSE: movapd %xmm4, %xmm8 | |
203 ; SSE-NEXT: maxpd %xmm0, %xmm8 | |
204 ; SSE-NEXT: cmpunordpd %xmm0, %xmm0 | |
205 ; SSE-NEXT: andpd %xmm0, %xmm4 | |
206 ; SSE-NEXT: andnpd %xmm8, %xmm0 | |
207 ; SSE-NEXT: orpd %xmm4, %xmm0 | |
208 ; SSE-NEXT: movapd %xmm5, %xmm4 | |
209 ; SSE-NEXT: maxpd %xmm1, %xmm4 | |
210 ; SSE-NEXT: cmpunordpd %xmm1, %xmm1 | |
211 ; SSE-NEXT: andpd %xmm1, %xmm5 | |
212 ; SSE-NEXT: andnpd %xmm4, %xmm1 | |
213 ; SSE-NEXT: orpd %xmm5, %xmm1 | |
214 ; SSE-NEXT: movapd %xmm6, %xmm4 | |
215 ; SSE-NEXT: maxpd %xmm2, %xmm4 | |
216 ; SSE-NEXT: cmpunordpd %xmm2, %xmm2 | |
217 ; SSE-NEXT: andpd %xmm2, %xmm6 | |
218 ; SSE-NEXT: andnpd %xmm4, %xmm2 | |
219 ; SSE-NEXT: orpd %xmm6, %xmm2 | |
220 ; SSE-NEXT: movapd %xmm7, %xmm4 | |
221 ; SSE-NEXT: maxpd %xmm3, %xmm4 | |
222 ; SSE-NEXT: cmpunordpd %xmm3, %xmm3 | |
223 ; SSE-NEXT: andpd %xmm3, %xmm7 | |
224 ; SSE-NEXT: andnpd %xmm4, %xmm3 | |
225 ; SSE-NEXT: orpd %xmm7, %xmm3 | |
226 ; SSE-NEXT: retq | |
227 ; | |
228 ; AVX: vmaxpd %ymm0, %ymm2, %ymm4 | |
229 ; AVX-NEXT: vcmpunordpd %ymm0, %ymm0, %ymm0 | |
230 ; AVX-NEXT: vblendvpd %ymm0, %ymm2, %ymm4, %ymm0 | |
231 ; AVX-NEXT: vmaxpd %ymm1, %ymm3, %ymm2 | |
232 ; AVX-NEXT: vcmpunordpd %ymm1, %ymm1, %ymm1 | |
233 ; AVX-NEXT: vblendvpd %ymm1, %ymm3, %ymm2, %ymm1 | |
234 ; AVX-NEXT: retq | |
235 define <8 x double> @test_intrinsic_fmax_v8f64(<8 x double> %x, <8 x double> %y) { | |
236 %z = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %x, <8 x double> %y) readnone | |
237 ret <8 x double> %z | |
238 } | |
239 |