Mercurial > hg > CbC > CbC_llvm
comparison test/CodeGen/X86/atomic_mi.ll @ 95:afa8332a0e37 LLVM3.8
LLVM 3.8
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 13 Oct 2015 17:48:58 +0900 |
parents | 60c9769439b8 |
children | 7d135dc70f03 |
comparison
equal
deleted
inserted
replaced
84:f3e34b893a5f | 95:afa8332a0e37 |
---|---|
1 ; RUN: llc < %s -march=x86-64 -verify-machineinstrs | FileCheck %s --check-prefix X64 | 1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix X64 |
2 ; RUN: llc < %s -march=x86 -verify-machineinstrs | FileCheck %s --check-prefix X32 | 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix X32 |
3 ; RUN: llc < %s -march=x86-64 -mattr=slow-incdec -verify-machineinstrs | FileCheck %s --check-prefix SLOW_INC | 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=slow-incdec -verify-machineinstrs | FileCheck %s --check-prefix SLOW_INC |
4 | 4 |
5 ; This file checks that atomic (non-seq_cst) stores of immediate values are | 5 ; This file checks that atomic (non-seq_cst) stores of immediate values are |
6 ; done in one mov instruction and not 2. More precisely, it makes sure that the | 6 ; done in one mov instruction and not 2. More precisely, it makes sure that the |
7 ; immediate is not first copied uselessly into a register. | 7 ; immediate is not first copied uselessly into a register. |
8 | 8 |
12 ; should be just an add instruction, instead of loading x into a register, doing | 12 ; should be just an add instruction, instead of loading x into a register, doing |
13 ; an add and storing the result back. | 13 ; an add and storing the result back. |
14 ; The binary operations supported are currently add, and, or, xor. | 14 ; The binary operations supported are currently add, and, or, xor. |
15 ; sub is not supported because they are translated by an addition of the | 15 ; sub is not supported because they are translated by an addition of the |
16 ; negated immediate. | 16 ; negated immediate. |
17 ; Finally, we also check the same kind of pattern for inc/dec | 17 ; |
18 ; We also check the same patterns: | |
19 ; - For inc/dec. | |
20 ; - For register instead of immediate operands. | |
21 ; - For floating point operations. | |
18 | 22 |
19 ; seq_cst stores are left as (lock) xchgl, but we try to check every other | 23 ; seq_cst stores are left as (lock) xchgl, but we try to check every other |
20 ; attribute at least once. | 24 ; attribute at least once. |
21 | 25 |
22 ; Please note that these operations do not require the lock prefix: only | 26 ; Please note that these operations do not require the lock prefix: only |
23 ; sequentially consistent stores require this kind of protection on X86. | 27 ; sequentially consistent stores require this kind of protection on X86. |
24 ; And even for seq_cst operations, llvm uses the xchg instruction which has | 28 ; And even for seq_cst operations, llvm uses the xchg instruction which has |
25 ; an implicit lock prefix, so making it explicit is not required. | 29 ; an implicit lock prefix, so making it explicit is not required. |
26 | 30 |
27 define void @store_atomic_imm_8(i8* %p) { | 31 define void @store_atomic_imm_8(i8* %p) { |
28 ; X64-LABEL: store_atomic_imm_8 | 32 ; X64-LABEL: store_atomic_imm_8: |
29 ; X64: movb | 33 ; X64: movb |
30 ; X64-NOT: movb | 34 ; X64-NOT: movb |
31 ; X32-LABEL: store_atomic_imm_8 | 35 ; X32-LABEL: store_atomic_imm_8: |
32 ; X32: movb | 36 ; X32: movb |
33 ; X32-NOT: movb | 37 ; X32-NOT: movb |
34 store atomic i8 42, i8* %p release, align 1 | 38 store atomic i8 42, i8* %p release, align 1 |
35 ret void | 39 ret void |
36 } | 40 } |
37 | 41 |
38 define void @store_atomic_imm_16(i16* %p) { | 42 define void @store_atomic_imm_16(i16* %p) { |
39 ; X64-LABEL: store_atomic_imm_16 | 43 ; X64-LABEL: store_atomic_imm_16: |
40 ; X64: movw | 44 ; X64: movw |
41 ; X64-NOT: movw | 45 ; X64-NOT: movw |
42 ; X32-LABEL: store_atomic_imm_16 | 46 ; X32-LABEL: store_atomic_imm_16: |
43 ; X32: movw | 47 ; X32: movw |
44 ; X32-NOT: movw | 48 ; X32-NOT: movw |
45 store atomic i16 42, i16* %p monotonic, align 2 | 49 store atomic i16 42, i16* %p monotonic, align 2 |
46 ret void | 50 ret void |
47 } | 51 } |
48 | 52 |
49 define void @store_atomic_imm_32(i32* %p) { | 53 define void @store_atomic_imm_32(i32* %p) { |
50 ; X64-LABEL: store_atomic_imm_32 | 54 ; X64-LABEL: store_atomic_imm_32: |
51 ; X64: movl | 55 ; X64: movl |
52 ; X64-NOT: movl | 56 ; X64-NOT: movl |
53 ; On 32 bits, there is an extra movl for each of those functions | 57 ; On 32 bits, there is an extra movl for each of those functions |
54 ; (probably for alignment reasons). | 58 ; (probably for alignment reasons). |
55 ; X32-LABEL: store_atomic_imm_32 | 59 ; X32-LABEL: store_atomic_imm_32: |
56 ; X32: movl 4(%esp), %eax | 60 ; X32: movl 4(%esp), %eax |
57 ; X32: movl | 61 ; X32: movl |
58 ; X32-NOT: movl | 62 ; X32-NOT: movl |
59 store atomic i32 42, i32* %p release, align 4 | 63 store atomic i32 42, i32* %p release, align 4 |
60 ret void | 64 ret void |
61 } | 65 } |
62 | 66 |
63 define void @store_atomic_imm_64(i64* %p) { | 67 define void @store_atomic_imm_64(i64* %p) { |
64 ; X64-LABEL: store_atomic_imm_64 | 68 ; X64-LABEL: store_atomic_imm_64: |
65 ; X64: movq | 69 ; X64: movq |
66 ; X64-NOT: movq | 70 ; X64-NOT: movq |
67 ; These are implemented with a CAS loop on 32 bit architectures, and thus | 71 ; These are implemented with a CAS loop on 32 bit architectures, and thus |
68 ; cannot be optimized in the same way as the others. | 72 ; cannot be optimized in the same way as the others. |
69 ; X32-LABEL: store_atomic_imm_64 | 73 ; X32-LABEL: store_atomic_imm_64: |
70 ; X32: cmpxchg8b | 74 ; X32: cmpxchg8b |
71 store atomic i64 42, i64* %p release, align 8 | 75 store atomic i64 42, i64* %p release, align 8 |
72 ret void | 76 ret void |
73 } | 77 } |
74 | 78 |
75 ; If an immediate is too big to fit in 32 bits, it cannot be store in one mov, | 79 ; If an immediate is too big to fit in 32 bits, it cannot be store in one mov, |
76 ; even on X64, one must use movabsq that can only target a register. | 80 ; even on X64, one must use movabsq that can only target a register. |
77 define void @store_atomic_imm_64_big(i64* %p) { | 81 define void @store_atomic_imm_64_big(i64* %p) { |
78 ; X64-LABEL: store_atomic_imm_64_big | 82 ; X64-LABEL: store_atomic_imm_64_big: |
79 ; X64: movabsq | 83 ; X64: movabsq |
80 ; X64: movq | 84 ; X64: movq |
81 store atomic i64 100000000000, i64* %p monotonic, align 8 | 85 store atomic i64 100000000000, i64* %p monotonic, align 8 |
82 ret void | 86 ret void |
83 } | 87 } |
84 | 88 |
85 ; It would be incorrect to replace a lock xchgl by a movl | 89 ; It would be incorrect to replace a lock xchgl by a movl |
86 define void @store_atomic_imm_32_seq_cst(i32* %p) { | 90 define void @store_atomic_imm_32_seq_cst(i32* %p) { |
87 ; X64-LABEL: store_atomic_imm_32_seq_cst | 91 ; X64-LABEL: store_atomic_imm_32_seq_cst: |
88 ; X64: xchgl | 92 ; X64: xchgl |
89 ; X32-LABEL: store_atomic_imm_32_seq_cst | 93 ; X32-LABEL: store_atomic_imm_32_seq_cst: |
90 ; X32: xchgl | 94 ; X32: xchgl |
91 store atomic i32 42, i32* %p seq_cst, align 4 | 95 store atomic i32 42, i32* %p seq_cst, align 4 |
92 ret void | 96 ret void |
93 } | 97 } |
94 | 98 |
95 ; ----- ADD ----- | 99 ; ----- ADD ----- |
96 | 100 |
97 define void @add_8(i8* %p) { | 101 define void @add_8i(i8* %p) { |
98 ; X64-LABEL: add_8 | 102 ; X64-LABEL: add_8i: |
99 ; X64-NOT: lock | 103 ; X64-NOT: lock |
100 ; X64: addb | 104 ; X64: addb |
101 ; X64-NOT: movb | 105 ; X64-NOT: movb |
102 ; X32-LABEL: add_8 | 106 ; X32-LABEL: add_8i: |
103 ; X32-NOT: lock | 107 ; X32-NOT: lock |
104 ; X32: addb | 108 ; X32: addb |
105 ; X32-NOT: movb | 109 ; X32-NOT: movb |
106 %1 = load atomic i8* %p seq_cst, align 1 | 110 %1 = load atomic i8, i8* %p seq_cst, align 1 |
107 %2 = add i8 %1, 2 | 111 %2 = add i8 %1, 2 |
108 store atomic i8 %2, i8* %p release, align 1 | 112 store atomic i8 %2, i8* %p release, align 1 |
109 ret void | 113 ret void |
110 } | 114 } |
111 | 115 |
112 define void @add_16(i16* %p) { | 116 define void @add_8r(i8* %p, i8 %v) { |
117 ; X64-LABEL: add_8r: | |
118 ; X64-NOT: lock | |
119 ; X64: addb | |
120 ; X64-NOT: movb | |
121 ; X32-LABEL: add_8r: | |
122 ; X32-NOT: lock | |
123 ; X32: addb | |
124 ; X32-NOT: movb | |
125 %1 = load atomic i8, i8* %p seq_cst, align 1 | |
126 %2 = add i8 %1, %v | |
127 store atomic i8 %2, i8* %p release, align 1 | |
128 ret void | |
129 } | |
130 | |
131 define void @add_16i(i16* %p) { | |
113 ; Currently the transformation is not done on 16 bit accesses, as the backend | 132 ; Currently the transformation is not done on 16 bit accesses, as the backend |
114 ; treat 16 bit arithmetic as expensive on X86/X86_64. | 133 ; treat 16 bit arithmetic as expensive on X86/X86_64. |
115 ; X64-LABEL: add_16 | 134 ; X64-LABEL: add_16i: |
116 ; X64-NOT: addw | 135 ; X64-NOT: addw |
117 ; X32-LABEL: add_16 | 136 ; X32-LABEL: add_16i: |
118 ; X32-NOT: addw | 137 ; X32-NOT: addw |
119 %1 = load atomic i16* %p acquire, align 2 | 138 %1 = load atomic i16, i16* %p acquire, align 2 |
120 %2 = add i16 %1, 2 | 139 %2 = add i16 %1, 2 |
121 store atomic i16 %2, i16* %p release, align 2 | 140 store atomic i16 %2, i16* %p release, align 2 |
122 ret void | 141 ret void |
123 } | 142 } |
124 | 143 |
125 define void @add_32(i32* %p) { | 144 define void @add_16r(i16* %p, i16 %v) { |
126 ; X64-LABEL: add_32 | 145 ; Currently the transformation is not done on 16 bit accesses, as the backend |
146 ; treat 16 bit arithmetic as expensive on X86/X86_64. | |
147 ; X64-LABEL: add_16r: | |
148 ; X64-NOT: addw | |
149 ; X32-LABEL: add_16r: | |
150 ; X32-NOT: addw [.*], ( | |
151 %1 = load atomic i16, i16* %p acquire, align 2 | |
152 %2 = add i16 %1, %v | |
153 store atomic i16 %2, i16* %p release, align 2 | |
154 ret void | |
155 } | |
156 | |
157 define void @add_32i(i32* %p) { | |
158 ; X64-LABEL: add_32i: | |
127 ; X64-NOT: lock | 159 ; X64-NOT: lock |
128 ; X64: addl | 160 ; X64: addl |
129 ; X64-NOT: movl | 161 ; X64-NOT: movl |
130 ; X32-LABEL: add_32 | 162 ; X32-LABEL: add_32i: |
131 ; X32-NOT: lock | 163 ; X32-NOT: lock |
132 ; X32: addl | 164 ; X32: addl |
133 ; X32-NOT: movl | 165 ; X32-NOT: movl |
134 %1 = load atomic i32* %p acquire, align 4 | 166 %1 = load atomic i32, i32* %p acquire, align 4 |
135 %2 = add i32 %1, 2 | 167 %2 = add i32 %1, 2 |
136 store atomic i32 %2, i32* %p monotonic, align 4 | 168 store atomic i32 %2, i32* %p monotonic, align 4 |
137 ret void | 169 ret void |
138 } | 170 } |
139 | 171 |
140 define void @add_64(i64* %p) { | 172 define void @add_32r(i32* %p, i32 %v) { |
141 ; X64-LABEL: add_64 | 173 ; X64-LABEL: add_32r: |
174 ; X64-NOT: lock | |
175 ; X64: addl | |
176 ; X64-NOT: movl | |
177 ; X32-LABEL: add_32r: | |
178 ; X32-NOT: lock | |
179 ; X32: addl | |
180 ; X32-NOT: movl | |
181 %1 = load atomic i32, i32* %p acquire, align 4 | |
182 %2 = add i32 %1, %v | |
183 store atomic i32 %2, i32* %p monotonic, align 4 | |
184 ret void | |
185 } | |
186 | |
187 ; The following is a corner case where the load is added to itself. The pattern | |
188 ; matching should not fold this. We only test with 32-bit add, but the same | |
189 ; applies to other sizes and operations. | |
190 define void @add_32r_self(i32* %p) { | |
191 ; X64-LABEL: add_32r_self: | |
192 ; X64-NOT: lock | |
193 ; X64: movl (%[[M:[a-z]+]]), %[[R:[a-z]+]] | |
194 ; X64: addl %[[R]], %[[R]] | |
195 ; X64: movl %[[R]], (%[[M]]) | |
196 ; X32-LABEL: add_32r_self: | |
197 ; X32-NOT: lock | |
198 ; X32: movl (%[[M:[a-z]+]]), %[[R:[a-z]+]] | |
199 ; X32: addl %[[R]], %[[R]] | |
200 ; X32: movl %[[R]], (%[[M]]) | |
201 %1 = load atomic i32, i32* %p acquire, align 4 | |
202 %2 = add i32 %1, %1 | |
203 store atomic i32 %2, i32* %p monotonic, align 4 | |
204 ret void | |
205 } | |
206 | |
207 ; The following is a corner case where the load's result is returned. The | |
208 ; optimizer isn't allowed to duplicate the load because it's atomic. | |
209 define i32 @add_32r_ret_load(i32* %p, i32 %v) { | |
210 ; X64-LABEL: add_32r_ret_load: | |
211 ; X64-NOT: lock | |
212 ; X64: movl (%rdi), %eax | |
213 ; X64-NEXT: addl %eax, %esi | |
214 ; X64-NEXT: movl %esi, (%rdi) | |
215 ; X64-NEXT: retq | |
216 ; X32-LABEL: add_32r_ret_load: | |
217 ; X32-NOT: lock | |
218 ; X32: movl 4(%esp), %[[P:[a-z]+]] | |
219 ; X32-NEXT: movl (%[[P]]), | |
220 ; X32-NOT: %[[P]] | |
221 ; More code here, we just don't want it to load from P. | |
222 ; X32: movl %{{.*}}, (%[[P]]) | |
223 ; X32-NEXT: retl | |
224 %1 = load atomic i32, i32* %p acquire, align 4 | |
225 %2 = add i32 %1, %v | |
226 store atomic i32 %2, i32* %p monotonic, align 4 | |
227 ret i32 %1 | |
228 } | |
229 | |
230 define void @add_64i(i64* %p) { | |
231 ; X64-LABEL: add_64i: | |
142 ; X64-NOT: lock | 232 ; X64-NOT: lock |
143 ; X64: addq | 233 ; X64: addq |
144 ; X64-NOT: movq | 234 ; X64-NOT: movq |
145 ; We do not check X86-32 as it cannot do 'addq'. | 235 ; We do not check X86-32 as it cannot do 'addq'. |
146 ; X32-LABEL: add_64 | 236 ; X32-LABEL: add_64i: |
147 %1 = load atomic i64* %p acquire, align 8 | 237 %1 = load atomic i64, i64* %p acquire, align 8 |
148 %2 = add i64 %1, 2 | 238 %2 = add i64 %1, 2 |
149 store atomic i64 %2, i64* %p release, align 8 | 239 store atomic i64 %2, i64* %p release, align 8 |
150 ret void | 240 ret void |
151 } | 241 } |
152 | 242 |
153 define void @add_32_seq_cst(i32* %p) { | 243 define void @add_64r(i64* %p, i64 %v) { |
154 ; X64-LABEL: add_32_seq_cst | 244 ; X64-LABEL: add_64r: |
155 ; X64: xchgl | 245 ; X64-NOT: lock |
156 ; X32-LABEL: add_32_seq_cst | 246 ; X64: addq |
157 ; X32: xchgl | 247 ; X64-NOT: movq |
158 %1 = load atomic i32* %p monotonic, align 4 | 248 ; We do not check X86-32 as it cannot do 'addq'. |
249 ; X32-LABEL: add_64r: | |
250 %1 = load atomic i64, i64* %p acquire, align 8 | |
251 %2 = add i64 %1, %v | |
252 store atomic i64 %2, i64* %p release, align 8 | |
253 ret void | |
254 } | |
255 | |
256 define void @add_32i_seq_cst(i32* %p) { | |
257 ; X64-LABEL: add_32i_seq_cst: | |
258 ; X64: xchgl | |
259 ; X32-LABEL: add_32i_seq_cst: | |
260 ; X32: xchgl | |
261 %1 = load atomic i32, i32* %p monotonic, align 4 | |
159 %2 = add i32 %1, 2 | 262 %2 = add i32 %1, 2 |
160 store atomic i32 %2, i32* %p seq_cst, align 4 | 263 store atomic i32 %2, i32* %p seq_cst, align 4 |
161 ret void | 264 ret void |
162 } | 265 } |
163 | 266 |
267 define void @add_32r_seq_cst(i32* %p, i32 %v) { | |
268 ; X64-LABEL: add_32r_seq_cst: | |
269 ; X64: xchgl | |
270 ; X32-LABEL: add_32r_seq_cst: | |
271 ; X32: xchgl | |
272 %1 = load atomic i32, i32* %p monotonic, align 4 | |
273 %2 = add i32 %1, %v | |
274 store atomic i32 %2, i32* %p seq_cst, align 4 | |
275 ret void | |
276 } | |
277 | |
164 ; ----- AND ----- | 278 ; ----- AND ----- |
165 | 279 |
166 define void @and_8(i8* %p) { | 280 define void @and_8i(i8* %p) { |
167 ; X64-LABEL: and_8 | 281 ; X64-LABEL: and_8i: |
168 ; X64-NOT: lock | 282 ; X64-NOT: lock |
169 ; X64: andb | 283 ; X64: andb |
170 ; X64-NOT: movb | 284 ; X64-NOT: movb |
171 ; X32-LABEL: and_8 | 285 ; X32-LABEL: and_8i: |
172 ; X32-NOT: lock | 286 ; X32-NOT: lock |
173 ; X32: andb | 287 ; X32: andb |
174 ; X32-NOT: movb | 288 ; X32-NOT: movb |
175 %1 = load atomic i8* %p monotonic, align 1 | 289 %1 = load atomic i8, i8* %p monotonic, align 1 |
176 %2 = and i8 %1, 2 | 290 %2 = and i8 %1, 2 |
177 store atomic i8 %2, i8* %p release, align 1 | 291 store atomic i8 %2, i8* %p release, align 1 |
178 ret void | 292 ret void |
179 } | 293 } |
180 | 294 |
181 define void @and_16(i16* %p) { | 295 define void @and_8r(i8* %p, i8 %v) { |
296 ; X64-LABEL: and_8r: | |
297 ; X64-NOT: lock | |
298 ; X64: andb | |
299 ; X64-NOT: movb | |
300 ; X32-LABEL: and_8r: | |
301 ; X32-NOT: lock | |
302 ; X32: andb | |
303 ; X32-NOT: movb | |
304 %1 = load atomic i8, i8* %p monotonic, align 1 | |
305 %2 = and i8 %1, %v | |
306 store atomic i8 %2, i8* %p release, align 1 | |
307 ret void | |
308 } | |
309 | |
310 define void @and_16i(i16* %p) { | |
182 ; Currently the transformation is not done on 16 bit accesses, as the backend | 311 ; Currently the transformation is not done on 16 bit accesses, as the backend |
183 ; treat 16 bit arithmetic as expensive on X86/X86_64. | 312 ; treat 16 bit arithmetic as expensive on X86/X86_64. |
184 ; X64-LABEL: and_16 | 313 ; X64-LABEL: and_16i: |
185 ; X64-NOT: andw | 314 ; X64-NOT: andw |
186 ; X32-LABEL: and_16 | 315 ; X32-LABEL: and_16i: |
187 ; X32-NOT: andw | 316 ; X32-NOT: andw |
188 %1 = load atomic i16* %p acquire, align 2 | 317 %1 = load atomic i16, i16* %p acquire, align 2 |
189 %2 = and i16 %1, 2 | 318 %2 = and i16 %1, 2 |
190 store atomic i16 %2, i16* %p release, align 2 | 319 store atomic i16 %2, i16* %p release, align 2 |
191 ret void | 320 ret void |
192 } | 321 } |
193 | 322 |
194 define void @and_32(i32* %p) { | 323 define void @and_16r(i16* %p, i16 %v) { |
195 ; X64-LABEL: and_32 | 324 ; Currently the transformation is not done on 16 bit accesses, as the backend |
325 ; treat 16 bit arithmetic as expensive on X86/X86_64. | |
326 ; X64-LABEL: and_16r: | |
327 ; X64-NOT: andw | |
328 ; X32-LABEL: and_16r: | |
329 ; X32-NOT: andw [.*], ( | |
330 %1 = load atomic i16, i16* %p acquire, align 2 | |
331 %2 = and i16 %1, %v | |
332 store atomic i16 %2, i16* %p release, align 2 | |
333 ret void | |
334 } | |
335 | |
336 define void @and_32i(i32* %p) { | |
337 ; X64-LABEL: and_32i: | |
196 ; X64-NOT: lock | 338 ; X64-NOT: lock |
197 ; X64: andl | 339 ; X64: andl |
198 ; X64-NOT: movl | 340 ; X64-NOT: movl |
199 ; X32-LABEL: and_32 | 341 ; X32-LABEL: and_32i: |
200 ; X32-NOT: lock | 342 ; X32-NOT: lock |
201 ; X32: andl | 343 ; X32: andl |
202 ; X32-NOT: movl | 344 ; X32-NOT: movl |
203 %1 = load atomic i32* %p acquire, align 4 | 345 %1 = load atomic i32, i32* %p acquire, align 4 |
204 %2 = and i32 %1, 2 | 346 %2 = and i32 %1, 2 |
205 store atomic i32 %2, i32* %p release, align 4 | 347 store atomic i32 %2, i32* %p release, align 4 |
206 ret void | 348 ret void |
207 } | 349 } |
208 | 350 |
209 define void @and_64(i64* %p) { | 351 define void @and_32r(i32* %p, i32 %v) { |
210 ; X64-LABEL: and_64 | 352 ; X64-LABEL: and_32r: |
353 ; X64-NOT: lock | |
354 ; X64: andl | |
355 ; X64-NOT: movl | |
356 ; X32-LABEL: and_32r: | |
357 ; X32-NOT: lock | |
358 ; X32: andl | |
359 ; X32-NOT: movl | |
360 %1 = load atomic i32, i32* %p acquire, align 4 | |
361 %2 = and i32 %1, %v | |
362 store atomic i32 %2, i32* %p release, align 4 | |
363 ret void | |
364 } | |
365 | |
366 define void @and_64i(i64* %p) { | |
367 ; X64-LABEL: and_64i: | |
211 ; X64-NOT: lock | 368 ; X64-NOT: lock |
212 ; X64: andq | 369 ; X64: andq |
213 ; X64-NOT: movq | 370 ; X64-NOT: movq |
214 ; We do not check X86-32 as it cannot do 'andq'. | 371 ; We do not check X86-32 as it cannot do 'andq'. |
215 ; X32-LABEL: and_64 | 372 ; X32-LABEL: and_64i: |
216 %1 = load atomic i64* %p acquire, align 8 | 373 %1 = load atomic i64, i64* %p acquire, align 8 |
217 %2 = and i64 %1, 2 | 374 %2 = and i64 %1, 2 |
218 store atomic i64 %2, i64* %p release, align 8 | 375 store atomic i64 %2, i64* %p release, align 8 |
219 ret void | 376 ret void |
220 } | 377 } |
221 | 378 |
222 define void @and_32_seq_cst(i32* %p) { | 379 define void @and_64r(i64* %p, i64 %v) { |
223 ; X64-LABEL: and_32_seq_cst | 380 ; X64-LABEL: and_64r: |
224 ; X64: xchgl | 381 ; X64-NOT: lock |
225 ; X32-LABEL: and_32_seq_cst | 382 ; X64: andq |
226 ; X32: xchgl | 383 ; X64-NOT: movq |
227 %1 = load atomic i32* %p monotonic, align 4 | 384 ; We do not check X86-32 as it cannot do 'andq'. |
385 ; X32-LABEL: and_64r: | |
386 %1 = load atomic i64, i64* %p acquire, align 8 | |
387 %2 = and i64 %1, %v | |
388 store atomic i64 %2, i64* %p release, align 8 | |
389 ret void | |
390 } | |
391 | |
392 define void @and_32i_seq_cst(i32* %p) { | |
393 ; X64-LABEL: and_32i_seq_cst: | |
394 ; X64: xchgl | |
395 ; X32-LABEL: and_32i_seq_cst: | |
396 ; X32: xchgl | |
397 %1 = load atomic i32, i32* %p monotonic, align 4 | |
228 %2 = and i32 %1, 2 | 398 %2 = and i32 %1, 2 |
229 store atomic i32 %2, i32* %p seq_cst, align 4 | 399 store atomic i32 %2, i32* %p seq_cst, align 4 |
230 ret void | 400 ret void |
231 } | 401 } |
232 | 402 |
403 define void @and_32r_seq_cst(i32* %p, i32 %v) { | |
404 ; X64-LABEL: and_32r_seq_cst: | |
405 ; X64: xchgl | |
406 ; X32-LABEL: and_32r_seq_cst: | |
407 ; X32: xchgl | |
408 %1 = load atomic i32, i32* %p monotonic, align 4 | |
409 %2 = and i32 %1, %v | |
410 store atomic i32 %2, i32* %p seq_cst, align 4 | |
411 ret void | |
412 } | |
413 | |
233 ; ----- OR ----- | 414 ; ----- OR ----- |
234 | 415 |
235 define void @or_8(i8* %p) { | 416 define void @or_8i(i8* %p) { |
236 ; X64-LABEL: or_8 | 417 ; X64-LABEL: or_8i: |
237 ; X64-NOT: lock | 418 ; X64-NOT: lock |
238 ; X64: orb | 419 ; X64: orb |
239 ; X64-NOT: movb | 420 ; X64-NOT: movb |
240 ; X32-LABEL: or_8 | 421 ; X32-LABEL: or_8i: |
241 ; X32-NOT: lock | 422 ; X32-NOT: lock |
242 ; X32: orb | 423 ; X32: orb |
243 ; X32-NOT: movb | 424 ; X32-NOT: movb |
244 %1 = load atomic i8* %p acquire, align 1 | 425 %1 = load atomic i8, i8* %p acquire, align 1 |
245 %2 = or i8 %1, 2 | 426 %2 = or i8 %1, 2 |
246 store atomic i8 %2, i8* %p release, align 1 | 427 store atomic i8 %2, i8* %p release, align 1 |
247 ret void | 428 ret void |
248 } | 429 } |
249 | 430 |
250 define void @or_16(i16* %p) { | 431 define void @or_8r(i8* %p, i8 %v) { |
251 ; X64-LABEL: or_16 | 432 ; X64-LABEL: or_8r: |
433 ; X64-NOT: lock | |
434 ; X64: orb | |
435 ; X64-NOT: movb | |
436 ; X32-LABEL: or_8r: | |
437 ; X32-NOT: lock | |
438 ; X32: orb | |
439 ; X32-NOT: movb | |
440 %1 = load atomic i8, i8* %p acquire, align 1 | |
441 %2 = or i8 %1, %v | |
442 store atomic i8 %2, i8* %p release, align 1 | |
443 ret void | |
444 } | |
445 | |
446 define void @or_16i(i16* %p) { | |
447 ; X64-LABEL: or_16i: | |
252 ; X64-NOT: orw | 448 ; X64-NOT: orw |
253 ; X32-LABEL: or_16 | 449 ; X32-LABEL: or_16i: |
254 ; X32-NOT: orw | 450 ; X32-NOT: orw |
255 %1 = load atomic i16* %p acquire, align 2 | 451 %1 = load atomic i16, i16* %p acquire, align 2 |
256 %2 = or i16 %1, 2 | 452 %2 = or i16 %1, 2 |
257 store atomic i16 %2, i16* %p release, align 2 | 453 store atomic i16 %2, i16* %p release, align 2 |
258 ret void | 454 ret void |
259 } | 455 } |
260 | 456 |
261 define void @or_32(i32* %p) { | 457 define void @or_16r(i16* %p, i16 %v) { |
262 ; X64-LABEL: or_32 | 458 ; X64-LABEL: or_16r: |
459 ; X64-NOT: orw | |
460 ; X32-LABEL: or_16r: | |
461 ; X32-NOT: orw [.*], ( | |
462 %1 = load atomic i16, i16* %p acquire, align 2 | |
463 %2 = or i16 %1, %v | |
464 store atomic i16 %2, i16* %p release, align 2 | |
465 ret void | |
466 } | |
467 | |
468 define void @or_32i(i32* %p) { | |
469 ; X64-LABEL: or_32i: | |
263 ; X64-NOT: lock | 470 ; X64-NOT: lock |
264 ; X64: orl | 471 ; X64: orl |
265 ; X64-NOT: movl | 472 ; X64-NOT: movl |
266 ; X32-LABEL: or_32 | 473 ; X32-LABEL: or_32i: |
267 ; X32-NOT: lock | 474 ; X32-NOT: lock |
268 ; X32: orl | 475 ; X32: orl |
269 ; X32-NOT: movl | 476 ; X32-NOT: movl |
270 %1 = load atomic i32* %p acquire, align 4 | 477 %1 = load atomic i32, i32* %p acquire, align 4 |
271 %2 = or i32 %1, 2 | 478 %2 = or i32 %1, 2 |
272 store atomic i32 %2, i32* %p release, align 4 | 479 store atomic i32 %2, i32* %p release, align 4 |
273 ret void | 480 ret void |
274 } | 481 } |
275 | 482 |
276 define void @or_64(i64* %p) { | 483 define void @or_32r(i32* %p, i32 %v) { |
277 ; X64-LABEL: or_64 | 484 ; X64-LABEL: or_32r: |
485 ; X64-NOT: lock | |
486 ; X64: orl | |
487 ; X64-NOT: movl | |
488 ; X32-LABEL: or_32r: | |
489 ; X32-NOT: lock | |
490 ; X32: orl | |
491 ; X32-NOT: movl | |
492 %1 = load atomic i32, i32* %p acquire, align 4 | |
493 %2 = or i32 %1, %v | |
494 store atomic i32 %2, i32* %p release, align 4 | |
495 ret void | |
496 } | |
497 | |
498 define void @or_64i(i64* %p) { | |
499 ; X64-LABEL: or_64i: | |
278 ; X64-NOT: lock | 500 ; X64-NOT: lock |
279 ; X64: orq | 501 ; X64: orq |
280 ; X64-NOT: movq | 502 ; X64-NOT: movq |
281 ; We do not check X86-32 as it cannot do 'orq'. | 503 ; We do not check X86-32 as it cannot do 'orq'. |
282 ; X32-LABEL: or_64 | 504 ; X32-LABEL: or_64i: |
283 %1 = load atomic i64* %p acquire, align 8 | 505 %1 = load atomic i64, i64* %p acquire, align 8 |
284 %2 = or i64 %1, 2 | 506 %2 = or i64 %1, 2 |
285 store atomic i64 %2, i64* %p release, align 8 | 507 store atomic i64 %2, i64* %p release, align 8 |
286 ret void | 508 ret void |
287 } | 509 } |
288 | 510 |
289 define void @or_32_seq_cst(i32* %p) { | 511 define void @or_64r(i64* %p, i64 %v) { |
290 ; X64-LABEL: or_32_seq_cst | 512 ; X64-LABEL: or_64r: |
291 ; X64: xchgl | 513 ; X64-NOT: lock |
292 ; X32-LABEL: or_32_seq_cst | 514 ; X64: orq |
293 ; X32: xchgl | 515 ; X64-NOT: movq |
294 %1 = load atomic i32* %p monotonic, align 4 | 516 ; We do not check X86-32 as it cannot do 'orq'. |
517 ; X32-LABEL: or_64r: | |
518 %1 = load atomic i64, i64* %p acquire, align 8 | |
519 %2 = or i64 %1, %v | |
520 store atomic i64 %2, i64* %p release, align 8 | |
521 ret void | |
522 } | |
523 | |
524 define void @or_32i_seq_cst(i32* %p) { | |
525 ; X64-LABEL: or_32i_seq_cst: | |
526 ; X64: xchgl | |
527 ; X32-LABEL: or_32i_seq_cst: | |
528 ; X32: xchgl | |
529 %1 = load atomic i32, i32* %p monotonic, align 4 | |
295 %2 = or i32 %1, 2 | 530 %2 = or i32 %1, 2 |
296 store atomic i32 %2, i32* %p seq_cst, align 4 | 531 store atomic i32 %2, i32* %p seq_cst, align 4 |
297 ret void | 532 ret void |
298 } | 533 } |
299 | 534 |
535 define void @or_32r_seq_cst(i32* %p, i32 %v) { | |
536 ; X64-LABEL: or_32r_seq_cst: | |
537 ; X64: xchgl | |
538 ; X32-LABEL: or_32r_seq_cst: | |
539 ; X32: xchgl | |
540 %1 = load atomic i32, i32* %p monotonic, align 4 | |
541 %2 = or i32 %1, %v | |
542 store atomic i32 %2, i32* %p seq_cst, align 4 | |
543 ret void | |
544 } | |
545 | |
300 ; ----- XOR ----- | 546 ; ----- XOR ----- |
301 | 547 |
302 define void @xor_8(i8* %p) { | 548 define void @xor_8i(i8* %p) { |
303 ; X64-LABEL: xor_8 | 549 ; X64-LABEL: xor_8i: |
304 ; X64-NOT: lock | 550 ; X64-NOT: lock |
305 ; X64: xorb | 551 ; X64: xorb |
306 ; X64-NOT: movb | 552 ; X64-NOT: movb |
307 ; X32-LABEL: xor_8 | 553 ; X32-LABEL: xor_8i: |
308 ; X32-NOT: lock | 554 ; X32-NOT: lock |
309 ; X32: xorb | 555 ; X32: xorb |
310 ; X32-NOT: movb | 556 ; X32-NOT: movb |
311 %1 = load atomic i8* %p acquire, align 1 | 557 %1 = load atomic i8, i8* %p acquire, align 1 |
312 %2 = xor i8 %1, 2 | 558 %2 = xor i8 %1, 2 |
313 store atomic i8 %2, i8* %p release, align 1 | 559 store atomic i8 %2, i8* %p release, align 1 |
314 ret void | 560 ret void |
315 } | 561 } |
316 | 562 |
317 define void @xor_16(i16* %p) { | 563 define void @xor_8r(i8* %p, i8 %v) { |
318 ; X64-LABEL: xor_16 | 564 ; X64-LABEL: xor_8r: |
565 ; X64-NOT: lock | |
566 ; X64: xorb | |
567 ; X64-NOT: movb | |
568 ; X32-LABEL: xor_8r: | |
569 ; X32-NOT: lock | |
570 ; X32: xorb | |
571 ; X32-NOT: movb | |
572 %1 = load atomic i8, i8* %p acquire, align 1 | |
573 %2 = xor i8 %1, %v | |
574 store atomic i8 %2, i8* %p release, align 1 | |
575 ret void | |
576 } | |
577 | |
578 define void @xor_16i(i16* %p) { | |
579 ; X64-LABEL: xor_16i: | |
319 ; X64-NOT: xorw | 580 ; X64-NOT: xorw |
320 ; X32-LABEL: xor_16 | 581 ; X32-LABEL: xor_16i: |
321 ; X32-NOT: xorw | 582 ; X32-NOT: xorw |
322 %1 = load atomic i16* %p acquire, align 2 | 583 %1 = load atomic i16, i16* %p acquire, align 2 |
323 %2 = xor i16 %1, 2 | 584 %2 = xor i16 %1, 2 |
324 store atomic i16 %2, i16* %p release, align 2 | 585 store atomic i16 %2, i16* %p release, align 2 |
325 ret void | 586 ret void |
326 } | 587 } |
327 | 588 |
328 define void @xor_32(i32* %p) { | 589 define void @xor_16r(i16* %p, i16 %v) { |
329 ; X64-LABEL: xor_32 | 590 ; X64-LABEL: xor_16r: |
591 ; X64-NOT: xorw | |
592 ; X32-LABEL: xor_16r: | |
593 ; X32-NOT: xorw [.*], ( | |
594 %1 = load atomic i16, i16* %p acquire, align 2 | |
595 %2 = xor i16 %1, %v | |
596 store atomic i16 %2, i16* %p release, align 2 | |
597 ret void | |
598 } | |
599 | |
600 define void @xor_32i(i32* %p) { | |
601 ; X64-LABEL: xor_32i: | |
330 ; X64-NOT: lock | 602 ; X64-NOT: lock |
331 ; X64: xorl | 603 ; X64: xorl |
332 ; X64-NOT: movl | 604 ; X64-NOT: movl |
333 ; X32-LABEL: xor_32 | 605 ; X32-LABEL: xor_32i: |
334 ; X32-NOT: lock | 606 ; X32-NOT: lock |
335 ; X32: xorl | 607 ; X32: xorl |
336 ; X32-NOT: movl | 608 ; X32-NOT: movl |
337 %1 = load atomic i32* %p acquire, align 4 | 609 %1 = load atomic i32, i32* %p acquire, align 4 |
338 %2 = xor i32 %1, 2 | 610 %2 = xor i32 %1, 2 |
339 store atomic i32 %2, i32* %p release, align 4 | 611 store atomic i32 %2, i32* %p release, align 4 |
340 ret void | 612 ret void |
341 } | 613 } |
342 | 614 |
343 define void @xor_64(i64* %p) { | 615 define void @xor_32r(i32* %p, i32 %v) { |
344 ; X64-LABEL: xor_64 | 616 ; X64-LABEL: xor_32r: |
617 ; X64-NOT: lock | |
618 ; X64: xorl | |
619 ; X64-NOT: movl | |
620 ; X32-LABEL: xor_32r: | |
621 ; X32-NOT: lock | |
622 ; X32: xorl | |
623 ; X32-NOT: movl | |
624 %1 = load atomic i32, i32* %p acquire, align 4 | |
625 %2 = xor i32 %1, %v | |
626 store atomic i32 %2, i32* %p release, align 4 | |
627 ret void | |
628 } | |
629 | |
630 define void @xor_64i(i64* %p) { | |
631 ; X64-LABEL: xor_64i: | |
345 ; X64-NOT: lock | 632 ; X64-NOT: lock |
346 ; X64: xorq | 633 ; X64: xorq |
347 ; X64-NOT: movq | 634 ; X64-NOT: movq |
348 ; We do not check X86-32 as it cannot do 'xorq'. | 635 ; We do not check X86-32 as it cannot do 'xorq'. |
349 ; X32-LABEL: xor_64 | 636 ; X32-LABEL: xor_64i: |
350 %1 = load atomic i64* %p acquire, align 8 | 637 %1 = load atomic i64, i64* %p acquire, align 8 |
351 %2 = xor i64 %1, 2 | 638 %2 = xor i64 %1, 2 |
352 store atomic i64 %2, i64* %p release, align 8 | 639 store atomic i64 %2, i64* %p release, align 8 |
353 ret void | 640 ret void |
354 } | 641 } |
355 | 642 |
356 define void @xor_32_seq_cst(i32* %p) { | 643 define void @xor_64r(i64* %p, i64 %v) { |
357 ; X64-LABEL: xor_32_seq_cst | 644 ; X64-LABEL: xor_64r: |
358 ; X64: xchgl | 645 ; X64-NOT: lock |
359 ; X32-LABEL: xor_32_seq_cst | 646 ; X64: xorq |
360 ; X32: xchgl | 647 ; X64-NOT: movq |
361 %1 = load atomic i32* %p monotonic, align 4 | 648 ; We do not check X86-32 as it cannot do 'xorq'. |
649 ; X32-LABEL: xor_64r: | |
650 %1 = load atomic i64, i64* %p acquire, align 8 | |
651 %2 = xor i64 %1, %v | |
652 store atomic i64 %2, i64* %p release, align 8 | |
653 ret void | |
654 } | |
655 | |
656 define void @xor_32i_seq_cst(i32* %p) { | |
657 ; X64-LABEL: xor_32i_seq_cst: | |
658 ; X64: xchgl | |
659 ; X32-LABEL: xor_32i_seq_cst: | |
660 ; X32: xchgl | |
661 %1 = load atomic i32, i32* %p monotonic, align 4 | |
362 %2 = xor i32 %1, 2 | 662 %2 = xor i32 %1, 2 |
363 store atomic i32 %2, i32* %p seq_cst, align 4 | 663 store atomic i32 %2, i32* %p seq_cst, align 4 |
364 ret void | 664 ret void |
365 } | 665 } |
366 | 666 |
667 define void @xor_32r_seq_cst(i32* %p, i32 %v) { | |
668 ; X64-LABEL: xor_32r_seq_cst: | |
669 ; X64: xchgl | |
670 ; X32-LABEL: xor_32r_seq_cst: | |
671 ; X32: xchgl | |
672 %1 = load atomic i32, i32* %p monotonic, align 4 | |
673 %2 = xor i32 %1, %v | |
674 store atomic i32 %2, i32* %p seq_cst, align 4 | |
675 ret void | |
676 } | |
677 | |
367 ; ----- INC ----- | 678 ; ----- INC ----- |
368 | 679 |
369 define void @inc_8(i8* %p) { | 680 define void @inc_8(i8* %p) { |
370 ; X64-LABEL: inc_8 | 681 ; X64-LABEL: inc_8: |
371 ; X64-NOT: lock | 682 ; X64-NOT: lock |
372 ; X64: incb | 683 ; X64: incb |
373 ; X64-NOT: movb | 684 ; X64-NOT: movb |
374 ; X32-LABEL: inc_8 | 685 ; X32-LABEL: inc_8: |
375 ; X32-NOT: lock | 686 ; X32-NOT: lock |
376 ; X32: incb | 687 ; X32: incb |
377 ; X32-NOT: movb | 688 ; X32-NOT: movb |
378 ; SLOW_INC-LABEL: inc_8 | 689 ; SLOW_INC-LABEL: inc_8: |
379 ; SLOW_INC-NOT: incb | 690 ; SLOW_INC-NOT: incb |
380 ; SLOW_INC-NOT: movb | 691 ; SLOW_INC-NOT: movb |
381 %1 = load atomic i8* %p seq_cst, align 1 | 692 %1 = load atomic i8, i8* %p seq_cst, align 1 |
382 %2 = add i8 %1, 1 | 693 %2 = add i8 %1, 1 |
383 store atomic i8 %2, i8* %p release, align 1 | 694 store atomic i8 %2, i8* %p release, align 1 |
384 ret void | 695 ret void |
385 } | 696 } |
386 | 697 |
387 define void @inc_16(i16* %p) { | 698 define void @inc_16(i16* %p) { |
388 ; Currently the transformation is not done on 16 bit accesses, as the backend | 699 ; Currently the transformation is not done on 16 bit accesses, as the backend |
389 ; treat 16 bit arithmetic as expensive on X86/X86_64. | 700 ; treat 16 bit arithmetic as expensive on X86/X86_64. |
390 ; X64-LABEL: inc_16 | 701 ; X64-LABEL: inc_16: |
391 ; X64-NOT: incw | 702 ; X64-NOT: incw |
392 ; X32-LABEL: inc_16 | 703 ; X32-LABEL: inc_16: |
393 ; X32-NOT: incw | 704 ; X32-NOT: incw |
394 ; SLOW_INC-LABEL: inc_16 | 705 ; SLOW_INC-LABEL: inc_16: |
395 ; SLOW_INC-NOT: incw | 706 ; SLOW_INC-NOT: incw |
396 %1 = load atomic i16* %p acquire, align 2 | 707 %1 = load atomic i16, i16* %p acquire, align 2 |
397 %2 = add i16 %1, 1 | 708 %2 = add i16 %1, 1 |
398 store atomic i16 %2, i16* %p release, align 2 | 709 store atomic i16 %2, i16* %p release, align 2 |
399 ret void | 710 ret void |
400 } | 711 } |
401 | 712 |
402 define void @inc_32(i32* %p) { | 713 define void @inc_32(i32* %p) { |
403 ; X64-LABEL: inc_32 | 714 ; X64-LABEL: inc_32: |
404 ; X64-NOT: lock | 715 ; X64-NOT: lock |
405 ; X64: incl | 716 ; X64: incl |
406 ; X64-NOT: movl | 717 ; X64-NOT: movl |
407 ; X32-LABEL: inc_32 | 718 ; X32-LABEL: inc_32: |
408 ; X32-NOT: lock | 719 ; X32-NOT: lock |
409 ; X32: incl | 720 ; X32: incl |
410 ; X32-NOT: movl | 721 ; X32-NOT: movl |
411 ; SLOW_INC-LABEL: inc_32 | 722 ; SLOW_INC-LABEL: inc_32: |
412 ; SLOW_INC-NOT: incl | 723 ; SLOW_INC-NOT: incl |
413 ; SLOW_INC-NOT: movl | 724 ; SLOW_INC-NOT: movl |
414 %1 = load atomic i32* %p acquire, align 4 | 725 %1 = load atomic i32, i32* %p acquire, align 4 |
415 %2 = add i32 %1, 1 | 726 %2 = add i32 %1, 1 |
416 store atomic i32 %2, i32* %p monotonic, align 4 | 727 store atomic i32 %2, i32* %p monotonic, align 4 |
417 ret void | 728 ret void |
418 } | 729 } |
419 | 730 |
420 define void @inc_64(i64* %p) { | 731 define void @inc_64(i64* %p) { |
421 ; X64-LABEL: inc_64 | 732 ; X64-LABEL: inc_64: |
422 ; X64-NOT: lock | 733 ; X64-NOT: lock |
423 ; X64: incq | 734 ; X64: incq |
424 ; X64-NOT: movq | 735 ; X64-NOT: movq |
425 ; We do not check X86-32 as it cannot do 'incq'. | 736 ; We do not check X86-32 as it cannot do 'incq'. |
426 ; X32-LABEL: inc_64 | 737 ; X32-LABEL: inc_64: |
427 ; SLOW_INC-LABEL: inc_64 | 738 ; SLOW_INC-LABEL: inc_64: |
428 ; SLOW_INC-NOT: incq | 739 ; SLOW_INC-NOT: incq |
429 ; SLOW_INC-NOT: movq | 740 ; SLOW_INC-NOT: movq |
430 %1 = load atomic i64* %p acquire, align 8 | 741 %1 = load atomic i64, i64* %p acquire, align 8 |
431 %2 = add i64 %1, 1 | 742 %2 = add i64 %1, 1 |
432 store atomic i64 %2, i64* %p release, align 8 | 743 store atomic i64 %2, i64* %p release, align 8 |
433 ret void | 744 ret void |
434 } | 745 } |
435 | 746 |
436 define void @inc_32_seq_cst(i32* %p) { | 747 define void @inc_32_seq_cst(i32* %p) { |
437 ; X64-LABEL: inc_32_seq_cst | 748 ; X64-LABEL: inc_32_seq_cst: |
438 ; X64: xchgl | 749 ; X64: xchgl |
439 ; X32-LABEL: inc_32_seq_cst | 750 ; X32-LABEL: inc_32_seq_cst: |
440 ; X32: xchgl | 751 ; X32: xchgl |
441 %1 = load atomic i32* %p monotonic, align 4 | 752 %1 = load atomic i32, i32* %p monotonic, align 4 |
442 %2 = add i32 %1, 1 | 753 %2 = add i32 %1, 1 |
443 store atomic i32 %2, i32* %p seq_cst, align 4 | 754 store atomic i32 %2, i32* %p seq_cst, align 4 |
444 ret void | 755 ret void |
445 } | 756 } |
446 | 757 |
447 ; ----- DEC ----- | 758 ; ----- DEC ----- |
448 | 759 |
449 define void @dec_8(i8* %p) { | 760 define void @dec_8(i8* %p) { |
450 ; X64-LABEL: dec_8 | 761 ; X64-LABEL: dec_8: |
451 ; X64-NOT: lock | 762 ; X64-NOT: lock |
452 ; X64: decb | 763 ; X64: decb |
453 ; X64-NOT: movb | 764 ; X64-NOT: movb |
454 ; X32-LABEL: dec_8 | 765 ; X32-LABEL: dec_8: |
455 ; X32-NOT: lock | 766 ; X32-NOT: lock |
456 ; X32: decb | 767 ; X32: decb |
457 ; X32-NOT: movb | 768 ; X32-NOT: movb |
458 ; SLOW_INC-LABEL: dec_8 | 769 ; SLOW_INC-LABEL: dec_8: |
459 ; SLOW_INC-NOT: decb | 770 ; SLOW_INC-NOT: decb |
460 ; SLOW_INC-NOT: movb | 771 ; SLOW_INC-NOT: movb |
461 %1 = load atomic i8* %p seq_cst, align 1 | 772 %1 = load atomic i8, i8* %p seq_cst, align 1 |
462 %2 = sub i8 %1, 1 | 773 %2 = sub i8 %1, 1 |
463 store atomic i8 %2, i8* %p release, align 1 | 774 store atomic i8 %2, i8* %p release, align 1 |
464 ret void | 775 ret void |
465 } | 776 } |
466 | 777 |
467 define void @dec_16(i16* %p) { | 778 define void @dec_16(i16* %p) { |
468 ; Currently the transformation is not done on 16 bit accesses, as the backend | 779 ; Currently the transformation is not done on 16 bit accesses, as the backend |
469 ; treat 16 bit arithmetic as expensive on X86/X86_64. | 780 ; treat 16 bit arithmetic as expensive on X86/X86_64. |
470 ; X64-LABEL: dec_16 | 781 ; X64-LABEL: dec_16: |
471 ; X64-NOT: decw | 782 ; X64-NOT: decw |
472 ; X32-LABEL: dec_16 | 783 ; X32-LABEL: dec_16: |
473 ; X32-NOT: decw | 784 ; X32-NOT: decw |
474 ; SLOW_INC-LABEL: dec_16 | 785 ; SLOW_INC-LABEL: dec_16: |
475 ; SLOW_INC-NOT: decw | 786 ; SLOW_INC-NOT: decw |
476 %1 = load atomic i16* %p acquire, align 2 | 787 %1 = load atomic i16, i16* %p acquire, align 2 |
477 %2 = sub i16 %1, 1 | 788 %2 = sub i16 %1, 1 |
478 store atomic i16 %2, i16* %p release, align 2 | 789 store atomic i16 %2, i16* %p release, align 2 |
479 ret void | 790 ret void |
480 } | 791 } |
481 | 792 |
482 define void @dec_32(i32* %p) { | 793 define void @dec_32(i32* %p) { |
483 ; X64-LABEL: dec_32 | 794 ; X64-LABEL: dec_32: |
484 ; X64-NOT: lock | 795 ; X64-NOT: lock |
485 ; X64: decl | 796 ; X64: decl |
486 ; X64-NOT: movl | 797 ; X64-NOT: movl |
487 ; X32-LABEL: dec_32 | 798 ; X32-LABEL: dec_32: |
488 ; X32-NOT: lock | 799 ; X32-NOT: lock |
489 ; X32: decl | 800 ; X32: decl |
490 ; X32-NOT: movl | 801 ; X32-NOT: movl |
491 ; SLOW_INC-LABEL: dec_32 | 802 ; SLOW_INC-LABEL: dec_32: |
492 ; SLOW_INC-NOT: decl | 803 ; SLOW_INC-NOT: decl |
493 ; SLOW_INC-NOT: movl | 804 ; SLOW_INC-NOT: movl |
494 %1 = load atomic i32* %p acquire, align 4 | 805 %1 = load atomic i32, i32* %p acquire, align 4 |
495 %2 = sub i32 %1, 1 | 806 %2 = sub i32 %1, 1 |
496 store atomic i32 %2, i32* %p monotonic, align 4 | 807 store atomic i32 %2, i32* %p monotonic, align 4 |
497 ret void | 808 ret void |
498 } | 809 } |
499 | 810 |
500 define void @dec_64(i64* %p) { | 811 define void @dec_64(i64* %p) { |
501 ; X64-LABEL: dec_64 | 812 ; X64-LABEL: dec_64: |
502 ; X64-NOT: lock | 813 ; X64-NOT: lock |
503 ; X64: decq | 814 ; X64: decq |
504 ; X64-NOT: movq | 815 ; X64-NOT: movq |
505 ; We do not check X86-32 as it cannot do 'decq'. | 816 ; We do not check X86-32 as it cannot do 'decq'. |
506 ; X32-LABEL: dec_64 | 817 ; X32-LABEL: dec_64: |
507 ; SLOW_INC-LABEL: dec_64 | 818 ; SLOW_INC-LABEL: dec_64: |
508 ; SLOW_INC-NOT: decq | 819 ; SLOW_INC-NOT: decq |
509 ; SLOW_INC-NOT: movq | 820 ; SLOW_INC-NOT: movq |
510 %1 = load atomic i64* %p acquire, align 8 | 821 %1 = load atomic i64, i64* %p acquire, align 8 |
511 %2 = sub i64 %1, 1 | 822 %2 = sub i64 %1, 1 |
512 store atomic i64 %2, i64* %p release, align 8 | 823 store atomic i64 %2, i64* %p release, align 8 |
513 ret void | 824 ret void |
514 } | 825 } |
515 | 826 |
516 define void @dec_32_seq_cst(i32* %p) { | 827 define void @dec_32_seq_cst(i32* %p) { |
517 ; X64-LABEL: dec_32_seq_cst | 828 ; X64-LABEL: dec_32_seq_cst: |
518 ; X64: xchgl | 829 ; X64: xchgl |
519 ; X32-LABEL: dec_32_seq_cst | 830 ; X32-LABEL: dec_32_seq_cst: |
520 ; X32: xchgl | 831 ; X32: xchgl |
521 %1 = load atomic i32* %p monotonic, align 4 | 832 %1 = load atomic i32, i32* %p monotonic, align 4 |
522 %2 = sub i32 %1, 1 | 833 %2 = sub i32 %1, 1 |
523 store atomic i32 %2, i32* %p seq_cst, align 4 | 834 store atomic i32 %2, i32* %p seq_cst, align 4 |
524 ret void | 835 ret void |
525 } | 836 } |
837 | |
838 ; ----- FADD ----- | |
839 | |
840 define void @fadd_32r(float* %loc, float %val) { | |
841 ; X64-LABEL: fadd_32r: | |
842 ; X64-NOT: lock | |
843 ; X64-NOT: mov | |
844 ; X64: addss (%[[M:[a-z]+]]), %[[XMM:xmm[0-9]+]] | |
845 ; X64-NEXT: movss %[[XMM]], (%[[M]]) | |
846 ; X32-LABEL: fadd_32r: | |
847 ; Don't check x86-32. | |
848 ; LLVM's SSE handling is conservative on x86-32 even without using atomics. | |
849 %floc = bitcast float* %loc to i32* | |
850 %1 = load atomic i32, i32* %floc seq_cst, align 4 | |
851 %2 = bitcast i32 %1 to float | |
852 %add = fadd float %2, %val | |
853 %3 = bitcast float %add to i32 | |
854 store atomic i32 %3, i32* %floc release, align 4 | |
855 ret void | |
856 } | |
857 | |
858 define void @fadd_64r(double* %loc, double %val) { | |
859 ; X64-LABEL: fadd_64r: | |
860 ; X64-NOT: lock | |
861 ; X64-NOT: mov | |
862 ; X64: addsd (%[[M:[a-z]+]]), %[[XMM:xmm[0-9]+]] | |
863 ; X64-NEXT: movsd %[[XMM]], (%[[M]]) | |
864 ; X32-LABEL: fadd_64r: | |
865 ; Don't check x86-32 (see comment above). | |
866 %floc = bitcast double* %loc to i64* | |
867 %1 = load atomic i64, i64* %floc seq_cst, align 8 | |
868 %2 = bitcast i64 %1 to double | |
869 %add = fadd double %2, %val | |
870 %3 = bitcast double %add to i64 | |
871 store atomic i64 %3, i64* %floc release, align 8 | |
872 ret void | |
873 } |