Mercurial > hg > CbC > CbC_llvm
comparison test/CodeGen/X86/fold-load-binops.ll @ 95:afa8332a0e37 LLVM3.8
LLVM 3.8
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 13 Oct 2015 17:48:58 +0900 |
parents | |
children | 7d135dc70f03 |
comparison
equal
deleted
inserted
replaced
84:f3e34b893a5f | 95:afa8332a0e37 |
---|---|
1 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE | |
2 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX | |
3 | |
4 ; Verify that we're folding the load into the math instruction. | |
5 ; This pattern is generated out of the simplest intrinsics usage: | |
6 ; _mm_add_ss(a, _mm_load_ss(b)); | |
7 | |
8 define <4 x float> @addss(<4 x float> %va, float* %pb) { | |
9 ; SSE-LABEL: addss: | |
10 ; SSE: # BB#0: | |
11 ; SSE-NEXT: addss (%rdi), %xmm0 | |
12 ; SSE-NEXT: retq | |
13 ; | |
14 ; AVX-LABEL: addss: | |
15 ; AVX: # BB#0: | |
16 ; AVX-NEXT: vaddss (%rdi), %xmm0, %xmm0 | |
17 ; AVX-NEXT: retq | |
18 %a = extractelement <4 x float> %va, i32 0 | |
19 %b = load float, float* %pb | |
20 %r = fadd float %a, %b | |
21 %vr = insertelement <4 x float> %va, float %r, i32 0 | |
22 ret <4 x float> %vr | |
23 } | |
24 | |
25 define <2 x double> @addsd(<2 x double> %va, double* %pb) { | |
26 ; SSE-LABEL: addsd: | |
27 ; SSE: # BB#0: | |
28 ; SSE-NEXT: addsd (%rdi), %xmm0 | |
29 ; SSE-NEXT: retq | |
30 ; | |
31 ; AVX-LABEL: addsd: | |
32 ; AVX: # BB#0: | |
33 ; AVX-NEXT: vaddsd (%rdi), %xmm0, %xmm0 | |
34 ; AVX-NEXT: retq | |
35 %a = extractelement <2 x double> %va, i32 0 | |
36 %b = load double, double* %pb | |
37 %r = fadd double %a, %b | |
38 %vr = insertelement <2 x double> %va, double %r, i32 0 | |
39 ret <2 x double> %vr | |
40 } | |
41 | |
42 define <4 x float> @subss(<4 x float> %va, float* %pb) { | |
43 ; SSE-LABEL: subss: | |
44 ; SSE: # BB#0: | |
45 ; SSE-NEXT: subss (%rdi), %xmm0 | |
46 ; SSE-NEXT: retq | |
47 ; | |
48 ; AVX-LABEL: subss: | |
49 ; AVX: # BB#0: | |
50 ; AVX-NEXT: vsubss (%rdi), %xmm0, %xmm0 | |
51 ; AVX-NEXT: retq | |
52 %a = extractelement <4 x float> %va, i32 0 | |
53 %b = load float, float* %pb | |
54 %r = fsub float %a, %b | |
55 %vr = insertelement <4 x float> %va, float %r, i32 0 | |
56 ret <4 x float> %vr | |
57 } | |
58 | |
59 define <2 x double> @subsd(<2 x double> %va, double* %pb) { | |
60 ; SSE-LABEL: subsd: | |
61 ; SSE: # BB#0: | |
62 ; SSE-NEXT: subsd (%rdi), %xmm0 | |
63 ; SSE-NEXT: retq | |
64 ; | |
65 ; AVX-LABEL: subsd: | |
66 ; AVX: # BB#0: | |
67 ; AVX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 | |
68 ; AVX-NEXT: retq | |
69 %a = extractelement <2 x double> %va, i32 0 | |
70 %b = load double, double* %pb | |
71 %r = fsub double %a, %b | |
72 %vr = insertelement <2 x double> %va, double %r, i32 0 | |
73 ret <2 x double> %vr | |
74 } | |
75 | |
76 define <4 x float> @mulss(<4 x float> %va, float* %pb) { | |
77 ; SSE-LABEL: mulss: | |
78 ; SSE: # BB#0: | |
79 ; SSE-NEXT: mulss (%rdi), %xmm0 | |
80 ; SSE-NEXT: retq | |
81 ; | |
82 ; AVX-LABEL: mulss: | |
83 ; AVX: # BB#0: | |
84 ; AVX-NEXT: vmulss (%rdi), %xmm0, %xmm0 | |
85 ; AVX-NEXT: retq | |
86 %a = extractelement <4 x float> %va, i32 0 | |
87 %b = load float, float* %pb | |
88 %r = fmul float %a, %b | |
89 %vr = insertelement <4 x float> %va, float %r, i32 0 | |
90 ret <4 x float> %vr | |
91 } | |
92 | |
93 define <2 x double> @mulsd(<2 x double> %va, double* %pb) { | |
94 ; SSE-LABEL: mulsd: | |
95 ; SSE: # BB#0: | |
96 ; SSE-NEXT: mulsd (%rdi), %xmm0 | |
97 ; SSE-NEXT: retq | |
98 ; | |
99 ; AVX-LABEL: mulsd: | |
100 ; AVX: # BB#0: | |
101 ; AVX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 | |
102 ; AVX-NEXT: retq | |
103 %a = extractelement <2 x double> %va, i32 0 | |
104 %b = load double, double* %pb | |
105 %r = fmul double %a, %b | |
106 %vr = insertelement <2 x double> %va, double %r, i32 0 | |
107 ret <2 x double> %vr | |
108 } | |
109 | |
110 define <4 x float> @divss(<4 x float> %va, float* %pb) { | |
111 ; SSE-LABEL: divss: | |
112 ; SSE: # BB#0: | |
113 ; SSE-NEXT: divss (%rdi), %xmm0 | |
114 ; SSE-NEXT: retq | |
115 ; | |
116 ; AVX-LABEL: divss: | |
117 ; AVX: # BB#0: | |
118 ; AVX-NEXT: vdivss (%rdi), %xmm0, %xmm0 | |
119 ; AVX-NEXT: retq | |
120 %a = extractelement <4 x float> %va, i32 0 | |
121 %b = load float, float* %pb | |
122 %r = fdiv float %a, %b | |
123 %vr = insertelement <4 x float> %va, float %r, i32 0 | |
124 ret <4 x float> %vr | |
125 } | |
126 | |
127 define <2 x double> @divsd(<2 x double> %va, double* %pb) { | |
128 ; SSE-LABEL: divsd: | |
129 ; SSE: # BB#0: | |
130 ; SSE-NEXT: divsd (%rdi), %xmm0 | |
131 ; SSE-NEXT: retq | |
132 ; | |
133 ; AVX-LABEL: divsd: | |
134 ; AVX: # BB#0: | |
135 ; AVX-NEXT: vdivsd (%rdi), %xmm0, %xmm0 | |
136 ; AVX-NEXT: retq | |
137 %a = extractelement <2 x double> %va, i32 0 | |
138 %b = load double, double* %pb | |
139 %r = fdiv double %a, %b | |
140 %vr = insertelement <2 x double> %va, double %r, i32 0 | |
141 ret <2 x double> %vr | |
142 } |