comparison test/CodeGen/X86/atomic_mi.ll @ 95:afa8332a0e37 LLVM3.8

LLVM 3.8
author Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
date Tue, 13 Oct 2015 17:48:58 +0900
parents 60c9769439b8
children 7d135dc70f03
comparison
equal deleted inserted replaced
84:f3e34b893a5f 95:afa8332a0e37
1 ; RUN: llc < %s -march=x86-64 -verify-machineinstrs | FileCheck %s --check-prefix X64 1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix X64
2 ; RUN: llc < %s -march=x86 -verify-machineinstrs | FileCheck %s --check-prefix X32 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix X32
3 ; RUN: llc < %s -march=x86-64 -mattr=slow-incdec -verify-machineinstrs | FileCheck %s --check-prefix SLOW_INC 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=slow-incdec -verify-machineinstrs | FileCheck %s --check-prefix SLOW_INC
4 4
5 ; This file checks that atomic (non-seq_cst) stores of immediate values are 5 ; This file checks that atomic (non-seq_cst) stores of immediate values are
6 ; done in one mov instruction and not 2. More precisely, it makes sure that the 6 ; done in one mov instruction and not 2. More precisely, it makes sure that the
7 ; immediate is not first copied uselessly into a register. 7 ; immediate is not first copied uselessly into a register.
8 8
12 ; should be just an add instruction, instead of loading x into a register, doing 12 ; should be just an add instruction, instead of loading x into a register, doing
13 ; an add and storing the result back. 13 ; an add and storing the result back.
14 ; The binary operations supported are currently add, and, or, xor. 14 ; The binary operations supported are currently add, and, or, xor.
15 ; sub is not supported because they are translated by an addition of the 15 ; sub is not supported because they are translated by an addition of the
16 ; negated immediate. 16 ; negated immediate.
17 ; Finally, we also check the same kind of pattern for inc/dec 17 ;
18 ; We also check the same patterns:
19 ; - For inc/dec.
20 ; - For register instead of immediate operands.
21 ; - For floating point operations.
18 22
19 ; seq_cst stores are left as (lock) xchgl, but we try to check every other 23 ; seq_cst stores are left as (lock) xchgl, but we try to check every other
20 ; attribute at least once. 24 ; attribute at least once.
21 25
22 ; Please note that these operations do not require the lock prefix: only 26 ; Please note that these operations do not require the lock prefix: only
23 ; sequentially consistent stores require this kind of protection on X86. 27 ; sequentially consistent stores require this kind of protection on X86.
24 ; And even for seq_cst operations, llvm uses the xchg instruction which has 28 ; And even for seq_cst operations, llvm uses the xchg instruction which has
25 ; an implicit lock prefix, so making it explicit is not required. 29 ; an implicit lock prefix, so making it explicit is not required.
26 30
27 define void @store_atomic_imm_8(i8* %p) { 31 define void @store_atomic_imm_8(i8* %p) {
28 ; X64-LABEL: store_atomic_imm_8 32 ; X64-LABEL: store_atomic_imm_8:
29 ; X64: movb 33 ; X64: movb
30 ; X64-NOT: movb 34 ; X64-NOT: movb
31 ; X32-LABEL: store_atomic_imm_8 35 ; X32-LABEL: store_atomic_imm_8:
32 ; X32: movb 36 ; X32: movb
33 ; X32-NOT: movb 37 ; X32-NOT: movb
34 store atomic i8 42, i8* %p release, align 1 38 store atomic i8 42, i8* %p release, align 1
35 ret void 39 ret void
36 } 40 }
37 41
38 define void @store_atomic_imm_16(i16* %p) { 42 define void @store_atomic_imm_16(i16* %p) {
39 ; X64-LABEL: store_atomic_imm_16 43 ; X64-LABEL: store_atomic_imm_16:
40 ; X64: movw 44 ; X64: movw
41 ; X64-NOT: movw 45 ; X64-NOT: movw
42 ; X32-LABEL: store_atomic_imm_16 46 ; X32-LABEL: store_atomic_imm_16:
43 ; X32: movw 47 ; X32: movw
44 ; X32-NOT: movw 48 ; X32-NOT: movw
45 store atomic i16 42, i16* %p monotonic, align 2 49 store atomic i16 42, i16* %p monotonic, align 2
46 ret void 50 ret void
47 } 51 }
48 52
49 define void @store_atomic_imm_32(i32* %p) { 53 define void @store_atomic_imm_32(i32* %p) {
50 ; X64-LABEL: store_atomic_imm_32 54 ; X64-LABEL: store_atomic_imm_32:
51 ; X64: movl 55 ; X64: movl
52 ; X64-NOT: movl 56 ; X64-NOT: movl
53 ; On 32 bits, there is an extra movl for each of those functions 57 ; On 32 bits, there is an extra movl for each of those functions
54 ; (probably for alignment reasons). 58 ; (probably for alignment reasons).
55 ; X32-LABEL: store_atomic_imm_32 59 ; X32-LABEL: store_atomic_imm_32:
56 ; X32: movl 4(%esp), %eax 60 ; X32: movl 4(%esp), %eax
57 ; X32: movl 61 ; X32: movl
58 ; X32-NOT: movl 62 ; X32-NOT: movl
59 store atomic i32 42, i32* %p release, align 4 63 store atomic i32 42, i32* %p release, align 4
60 ret void 64 ret void
61 } 65 }
62 66
63 define void @store_atomic_imm_64(i64* %p) { 67 define void @store_atomic_imm_64(i64* %p) {
64 ; X64-LABEL: store_atomic_imm_64 68 ; X64-LABEL: store_atomic_imm_64:
65 ; X64: movq 69 ; X64: movq
66 ; X64-NOT: movq 70 ; X64-NOT: movq
67 ; These are implemented with a CAS loop on 32 bit architectures, and thus 71 ; These are implemented with a CAS loop on 32 bit architectures, and thus
68 ; cannot be optimized in the same way as the others. 72 ; cannot be optimized in the same way as the others.
69 ; X32-LABEL: store_atomic_imm_64 73 ; X32-LABEL: store_atomic_imm_64:
70 ; X32: cmpxchg8b 74 ; X32: cmpxchg8b
71 store atomic i64 42, i64* %p release, align 8 75 store atomic i64 42, i64* %p release, align 8
72 ret void 76 ret void
73 } 77 }
74 78
75 ; If an immediate is too big to fit in 32 bits, it cannot be store in one mov, 79 ; If an immediate is too big to fit in 32 bits, it cannot be store in one mov,
76 ; even on X64, one must use movabsq that can only target a register. 80 ; even on X64, one must use movabsq that can only target a register.
77 define void @store_atomic_imm_64_big(i64* %p) { 81 define void @store_atomic_imm_64_big(i64* %p) {
78 ; X64-LABEL: store_atomic_imm_64_big 82 ; X64-LABEL: store_atomic_imm_64_big:
79 ; X64: movabsq 83 ; X64: movabsq
80 ; X64: movq 84 ; X64: movq
81 store atomic i64 100000000000, i64* %p monotonic, align 8 85 store atomic i64 100000000000, i64* %p monotonic, align 8
82 ret void 86 ret void
83 } 87 }
84 88
85 ; It would be incorrect to replace a lock xchgl by a movl 89 ; It would be incorrect to replace a lock xchgl by a movl
86 define void @store_atomic_imm_32_seq_cst(i32* %p) { 90 define void @store_atomic_imm_32_seq_cst(i32* %p) {
87 ; X64-LABEL: store_atomic_imm_32_seq_cst 91 ; X64-LABEL: store_atomic_imm_32_seq_cst:
88 ; X64: xchgl 92 ; X64: xchgl
89 ; X32-LABEL: store_atomic_imm_32_seq_cst 93 ; X32-LABEL: store_atomic_imm_32_seq_cst:
90 ; X32: xchgl 94 ; X32: xchgl
91 store atomic i32 42, i32* %p seq_cst, align 4 95 store atomic i32 42, i32* %p seq_cst, align 4
92 ret void 96 ret void
93 } 97 }
94 98
95 ; ----- ADD ----- 99 ; ----- ADD -----
96 100
97 define void @add_8(i8* %p) { 101 define void @add_8i(i8* %p) {
98 ; X64-LABEL: add_8 102 ; X64-LABEL: add_8i:
99 ; X64-NOT: lock 103 ; X64-NOT: lock
100 ; X64: addb 104 ; X64: addb
101 ; X64-NOT: movb 105 ; X64-NOT: movb
102 ; X32-LABEL: add_8 106 ; X32-LABEL: add_8i:
103 ; X32-NOT: lock 107 ; X32-NOT: lock
104 ; X32: addb 108 ; X32: addb
105 ; X32-NOT: movb 109 ; X32-NOT: movb
106 %1 = load atomic i8* %p seq_cst, align 1 110 %1 = load atomic i8, i8* %p seq_cst, align 1
107 %2 = add i8 %1, 2 111 %2 = add i8 %1, 2
108 store atomic i8 %2, i8* %p release, align 1 112 store atomic i8 %2, i8* %p release, align 1
109 ret void 113 ret void
110 } 114 }
111 115
112 define void @add_16(i16* %p) { 116 define void @add_8r(i8* %p, i8 %v) {
117 ; X64-LABEL: add_8r:
118 ; X64-NOT: lock
119 ; X64: addb
120 ; X64-NOT: movb
121 ; X32-LABEL: add_8r:
122 ; X32-NOT: lock
123 ; X32: addb
124 ; X32-NOT: movb
125 %1 = load atomic i8, i8* %p seq_cst, align 1
126 %2 = add i8 %1, %v
127 store atomic i8 %2, i8* %p release, align 1
128 ret void
129 }
130
131 define void @add_16i(i16* %p) {
113 ; Currently the transformation is not done on 16 bit accesses, as the backend 132 ; Currently the transformation is not done on 16 bit accesses, as the backend
114 ; treat 16 bit arithmetic as expensive on X86/X86_64. 133 ; treat 16 bit arithmetic as expensive on X86/X86_64.
115 ; X64-LABEL: add_16 134 ; X64-LABEL: add_16i:
116 ; X64-NOT: addw 135 ; X64-NOT: addw
117 ; X32-LABEL: add_16 136 ; X32-LABEL: add_16i:
118 ; X32-NOT: addw 137 ; X32-NOT: addw
119 %1 = load atomic i16* %p acquire, align 2 138 %1 = load atomic i16, i16* %p acquire, align 2
120 %2 = add i16 %1, 2 139 %2 = add i16 %1, 2
121 store atomic i16 %2, i16* %p release, align 2 140 store atomic i16 %2, i16* %p release, align 2
122 ret void 141 ret void
123 } 142 }
124 143
125 define void @add_32(i32* %p) { 144 define void @add_16r(i16* %p, i16 %v) {
126 ; X64-LABEL: add_32 145 ; Currently the transformation is not done on 16 bit accesses, as the backend
146 ; treat 16 bit arithmetic as expensive on X86/X86_64.
147 ; X64-LABEL: add_16r:
148 ; X64-NOT: addw
149 ; X32-LABEL: add_16r:
150 ; X32-NOT: addw [.*], (
151 %1 = load atomic i16, i16* %p acquire, align 2
152 %2 = add i16 %1, %v
153 store atomic i16 %2, i16* %p release, align 2
154 ret void
155 }
156
157 define void @add_32i(i32* %p) {
158 ; X64-LABEL: add_32i:
127 ; X64-NOT: lock 159 ; X64-NOT: lock
128 ; X64: addl 160 ; X64: addl
129 ; X64-NOT: movl 161 ; X64-NOT: movl
130 ; X32-LABEL: add_32 162 ; X32-LABEL: add_32i:
131 ; X32-NOT: lock 163 ; X32-NOT: lock
132 ; X32: addl 164 ; X32: addl
133 ; X32-NOT: movl 165 ; X32-NOT: movl
134 %1 = load atomic i32* %p acquire, align 4 166 %1 = load atomic i32, i32* %p acquire, align 4
135 %2 = add i32 %1, 2 167 %2 = add i32 %1, 2
136 store atomic i32 %2, i32* %p monotonic, align 4 168 store atomic i32 %2, i32* %p monotonic, align 4
137 ret void 169 ret void
138 } 170 }
139 171
140 define void @add_64(i64* %p) { 172 define void @add_32r(i32* %p, i32 %v) {
141 ; X64-LABEL: add_64 173 ; X64-LABEL: add_32r:
174 ; X64-NOT: lock
175 ; X64: addl
176 ; X64-NOT: movl
177 ; X32-LABEL: add_32r:
178 ; X32-NOT: lock
179 ; X32: addl
180 ; X32-NOT: movl
181 %1 = load atomic i32, i32* %p acquire, align 4
182 %2 = add i32 %1, %v
183 store atomic i32 %2, i32* %p monotonic, align 4
184 ret void
185 }
186
187 ; The following is a corner case where the load is added to itself. The pattern
188 ; matching should not fold this. We only test with 32-bit add, but the same
189 ; applies to other sizes and operations.
190 define void @add_32r_self(i32* %p) {
191 ; X64-LABEL: add_32r_self:
192 ; X64-NOT: lock
193 ; X64: movl (%[[M:[a-z]+]]), %[[R:[a-z]+]]
194 ; X64: addl %[[R]], %[[R]]
195 ; X64: movl %[[R]], (%[[M]])
196 ; X32-LABEL: add_32r_self:
197 ; X32-NOT: lock
198 ; X32: movl (%[[M:[a-z]+]]), %[[R:[a-z]+]]
199 ; X32: addl %[[R]], %[[R]]
200 ; X32: movl %[[R]], (%[[M]])
201 %1 = load atomic i32, i32* %p acquire, align 4
202 %2 = add i32 %1, %1
203 store atomic i32 %2, i32* %p monotonic, align 4
204 ret void
205 }
206
207 ; The following is a corner case where the load's result is returned. The
208 ; optimizer isn't allowed to duplicate the load because it's atomic.
209 define i32 @add_32r_ret_load(i32* %p, i32 %v) {
210 ; X64-LABEL: add_32r_ret_load:
211 ; X64-NOT: lock
212 ; X64: movl (%rdi), %eax
213 ; X64-NEXT: addl %eax, %esi
214 ; X64-NEXT: movl %esi, (%rdi)
215 ; X64-NEXT: retq
216 ; X32-LABEL: add_32r_ret_load:
217 ; X32-NOT: lock
218 ; X32: movl 4(%esp), %[[P:[a-z]+]]
219 ; X32-NEXT: movl (%[[P]]),
220 ; X32-NOT: %[[P]]
221 ; More code here, we just don't want it to load from P.
222 ; X32: movl %{{.*}}, (%[[P]])
223 ; X32-NEXT: retl
224 %1 = load atomic i32, i32* %p acquire, align 4
225 %2 = add i32 %1, %v
226 store atomic i32 %2, i32* %p monotonic, align 4
227 ret i32 %1
228 }
229
230 define void @add_64i(i64* %p) {
231 ; X64-LABEL: add_64i:
142 ; X64-NOT: lock 232 ; X64-NOT: lock
143 ; X64: addq 233 ; X64: addq
144 ; X64-NOT: movq 234 ; X64-NOT: movq
145 ; We do not check X86-32 as it cannot do 'addq'. 235 ; We do not check X86-32 as it cannot do 'addq'.
146 ; X32-LABEL: add_64 236 ; X32-LABEL: add_64i:
147 %1 = load atomic i64* %p acquire, align 8 237 %1 = load atomic i64, i64* %p acquire, align 8
148 %2 = add i64 %1, 2 238 %2 = add i64 %1, 2
149 store atomic i64 %2, i64* %p release, align 8 239 store atomic i64 %2, i64* %p release, align 8
150 ret void 240 ret void
151 } 241 }
152 242
153 define void @add_32_seq_cst(i32* %p) { 243 define void @add_64r(i64* %p, i64 %v) {
154 ; X64-LABEL: add_32_seq_cst 244 ; X64-LABEL: add_64r:
155 ; X64: xchgl 245 ; X64-NOT: lock
156 ; X32-LABEL: add_32_seq_cst 246 ; X64: addq
157 ; X32: xchgl 247 ; X64-NOT: movq
158 %1 = load atomic i32* %p monotonic, align 4 248 ; We do not check X86-32 as it cannot do 'addq'.
249 ; X32-LABEL: add_64r:
250 %1 = load atomic i64, i64* %p acquire, align 8
251 %2 = add i64 %1, %v
252 store atomic i64 %2, i64* %p release, align 8
253 ret void
254 }
255
256 define void @add_32i_seq_cst(i32* %p) {
257 ; X64-LABEL: add_32i_seq_cst:
258 ; X64: xchgl
259 ; X32-LABEL: add_32i_seq_cst:
260 ; X32: xchgl
261 %1 = load atomic i32, i32* %p monotonic, align 4
159 %2 = add i32 %1, 2 262 %2 = add i32 %1, 2
160 store atomic i32 %2, i32* %p seq_cst, align 4 263 store atomic i32 %2, i32* %p seq_cst, align 4
161 ret void 264 ret void
162 } 265 }
163 266
267 define void @add_32r_seq_cst(i32* %p, i32 %v) {
268 ; X64-LABEL: add_32r_seq_cst:
269 ; X64: xchgl
270 ; X32-LABEL: add_32r_seq_cst:
271 ; X32: xchgl
272 %1 = load atomic i32, i32* %p monotonic, align 4
273 %2 = add i32 %1, %v
274 store atomic i32 %2, i32* %p seq_cst, align 4
275 ret void
276 }
277
164 ; ----- AND ----- 278 ; ----- AND -----
165 279
166 define void @and_8(i8* %p) { 280 define void @and_8i(i8* %p) {
167 ; X64-LABEL: and_8 281 ; X64-LABEL: and_8i:
168 ; X64-NOT: lock 282 ; X64-NOT: lock
169 ; X64: andb 283 ; X64: andb
170 ; X64-NOT: movb 284 ; X64-NOT: movb
171 ; X32-LABEL: and_8 285 ; X32-LABEL: and_8i:
172 ; X32-NOT: lock 286 ; X32-NOT: lock
173 ; X32: andb 287 ; X32: andb
174 ; X32-NOT: movb 288 ; X32-NOT: movb
175 %1 = load atomic i8* %p monotonic, align 1 289 %1 = load atomic i8, i8* %p monotonic, align 1
176 %2 = and i8 %1, 2 290 %2 = and i8 %1, 2
177 store atomic i8 %2, i8* %p release, align 1 291 store atomic i8 %2, i8* %p release, align 1
178 ret void 292 ret void
179 } 293 }
180 294
181 define void @and_16(i16* %p) { 295 define void @and_8r(i8* %p, i8 %v) {
296 ; X64-LABEL: and_8r:
297 ; X64-NOT: lock
298 ; X64: andb
299 ; X64-NOT: movb
300 ; X32-LABEL: and_8r:
301 ; X32-NOT: lock
302 ; X32: andb
303 ; X32-NOT: movb
304 %1 = load atomic i8, i8* %p monotonic, align 1
305 %2 = and i8 %1, %v
306 store atomic i8 %2, i8* %p release, align 1
307 ret void
308 }
309
310 define void @and_16i(i16* %p) {
182 ; Currently the transformation is not done on 16 bit accesses, as the backend 311 ; Currently the transformation is not done on 16 bit accesses, as the backend
183 ; treat 16 bit arithmetic as expensive on X86/X86_64. 312 ; treat 16 bit arithmetic as expensive on X86/X86_64.
184 ; X64-LABEL: and_16 313 ; X64-LABEL: and_16i:
185 ; X64-NOT: andw 314 ; X64-NOT: andw
186 ; X32-LABEL: and_16 315 ; X32-LABEL: and_16i:
187 ; X32-NOT: andw 316 ; X32-NOT: andw
188 %1 = load atomic i16* %p acquire, align 2 317 %1 = load atomic i16, i16* %p acquire, align 2
189 %2 = and i16 %1, 2 318 %2 = and i16 %1, 2
190 store atomic i16 %2, i16* %p release, align 2 319 store atomic i16 %2, i16* %p release, align 2
191 ret void 320 ret void
192 } 321 }
193 322
194 define void @and_32(i32* %p) { 323 define void @and_16r(i16* %p, i16 %v) {
195 ; X64-LABEL: and_32 324 ; Currently the transformation is not done on 16 bit accesses, as the backend
325 ; treat 16 bit arithmetic as expensive on X86/X86_64.
326 ; X64-LABEL: and_16r:
327 ; X64-NOT: andw
328 ; X32-LABEL: and_16r:
329 ; X32-NOT: andw [.*], (
330 %1 = load atomic i16, i16* %p acquire, align 2
331 %2 = and i16 %1, %v
332 store atomic i16 %2, i16* %p release, align 2
333 ret void
334 }
335
336 define void @and_32i(i32* %p) {
337 ; X64-LABEL: and_32i:
196 ; X64-NOT: lock 338 ; X64-NOT: lock
197 ; X64: andl 339 ; X64: andl
198 ; X64-NOT: movl 340 ; X64-NOT: movl
199 ; X32-LABEL: and_32 341 ; X32-LABEL: and_32i:
200 ; X32-NOT: lock 342 ; X32-NOT: lock
201 ; X32: andl 343 ; X32: andl
202 ; X32-NOT: movl 344 ; X32-NOT: movl
203 %1 = load atomic i32* %p acquire, align 4 345 %1 = load atomic i32, i32* %p acquire, align 4
204 %2 = and i32 %1, 2 346 %2 = and i32 %1, 2
205 store atomic i32 %2, i32* %p release, align 4 347 store atomic i32 %2, i32* %p release, align 4
206 ret void 348 ret void
207 } 349 }
208 350
209 define void @and_64(i64* %p) { 351 define void @and_32r(i32* %p, i32 %v) {
210 ; X64-LABEL: and_64 352 ; X64-LABEL: and_32r:
353 ; X64-NOT: lock
354 ; X64: andl
355 ; X64-NOT: movl
356 ; X32-LABEL: and_32r:
357 ; X32-NOT: lock
358 ; X32: andl
359 ; X32-NOT: movl
360 %1 = load atomic i32, i32* %p acquire, align 4
361 %2 = and i32 %1, %v
362 store atomic i32 %2, i32* %p release, align 4
363 ret void
364 }
365
366 define void @and_64i(i64* %p) {
367 ; X64-LABEL: and_64i:
211 ; X64-NOT: lock 368 ; X64-NOT: lock
212 ; X64: andq 369 ; X64: andq
213 ; X64-NOT: movq 370 ; X64-NOT: movq
214 ; We do not check X86-32 as it cannot do 'andq'. 371 ; We do not check X86-32 as it cannot do 'andq'.
215 ; X32-LABEL: and_64 372 ; X32-LABEL: and_64i:
216 %1 = load atomic i64* %p acquire, align 8 373 %1 = load atomic i64, i64* %p acquire, align 8
217 %2 = and i64 %1, 2 374 %2 = and i64 %1, 2
218 store atomic i64 %2, i64* %p release, align 8 375 store atomic i64 %2, i64* %p release, align 8
219 ret void 376 ret void
220 } 377 }
221 378
222 define void @and_32_seq_cst(i32* %p) { 379 define void @and_64r(i64* %p, i64 %v) {
223 ; X64-LABEL: and_32_seq_cst 380 ; X64-LABEL: and_64r:
224 ; X64: xchgl 381 ; X64-NOT: lock
225 ; X32-LABEL: and_32_seq_cst 382 ; X64: andq
226 ; X32: xchgl 383 ; X64-NOT: movq
227 %1 = load atomic i32* %p monotonic, align 4 384 ; We do not check X86-32 as it cannot do 'andq'.
385 ; X32-LABEL: and_64r:
386 %1 = load atomic i64, i64* %p acquire, align 8
387 %2 = and i64 %1, %v
388 store atomic i64 %2, i64* %p release, align 8
389 ret void
390 }
391
392 define void @and_32i_seq_cst(i32* %p) {
393 ; X64-LABEL: and_32i_seq_cst:
394 ; X64: xchgl
395 ; X32-LABEL: and_32i_seq_cst:
396 ; X32: xchgl
397 %1 = load atomic i32, i32* %p monotonic, align 4
228 %2 = and i32 %1, 2 398 %2 = and i32 %1, 2
229 store atomic i32 %2, i32* %p seq_cst, align 4 399 store atomic i32 %2, i32* %p seq_cst, align 4
230 ret void 400 ret void
231 } 401 }
232 402
403 define void @and_32r_seq_cst(i32* %p, i32 %v) {
404 ; X64-LABEL: and_32r_seq_cst:
405 ; X64: xchgl
406 ; X32-LABEL: and_32r_seq_cst:
407 ; X32: xchgl
408 %1 = load atomic i32, i32* %p monotonic, align 4
409 %2 = and i32 %1, %v
410 store atomic i32 %2, i32* %p seq_cst, align 4
411 ret void
412 }
413
233 ; ----- OR ----- 414 ; ----- OR -----
234 415
235 define void @or_8(i8* %p) { 416 define void @or_8i(i8* %p) {
236 ; X64-LABEL: or_8 417 ; X64-LABEL: or_8i:
237 ; X64-NOT: lock 418 ; X64-NOT: lock
238 ; X64: orb 419 ; X64: orb
239 ; X64-NOT: movb 420 ; X64-NOT: movb
240 ; X32-LABEL: or_8 421 ; X32-LABEL: or_8i:
241 ; X32-NOT: lock 422 ; X32-NOT: lock
242 ; X32: orb 423 ; X32: orb
243 ; X32-NOT: movb 424 ; X32-NOT: movb
244 %1 = load atomic i8* %p acquire, align 1 425 %1 = load atomic i8, i8* %p acquire, align 1
245 %2 = or i8 %1, 2 426 %2 = or i8 %1, 2
246 store atomic i8 %2, i8* %p release, align 1 427 store atomic i8 %2, i8* %p release, align 1
247 ret void 428 ret void
248 } 429 }
249 430
250 define void @or_16(i16* %p) { 431 define void @or_8r(i8* %p, i8 %v) {
251 ; X64-LABEL: or_16 432 ; X64-LABEL: or_8r:
433 ; X64-NOT: lock
434 ; X64: orb
435 ; X64-NOT: movb
436 ; X32-LABEL: or_8r:
437 ; X32-NOT: lock
438 ; X32: orb
439 ; X32-NOT: movb
440 %1 = load atomic i8, i8* %p acquire, align 1
441 %2 = or i8 %1, %v
442 store atomic i8 %2, i8* %p release, align 1
443 ret void
444 }
445
446 define void @or_16i(i16* %p) {
447 ; X64-LABEL: or_16i:
252 ; X64-NOT: orw 448 ; X64-NOT: orw
253 ; X32-LABEL: or_16 449 ; X32-LABEL: or_16i:
254 ; X32-NOT: orw 450 ; X32-NOT: orw
255 %1 = load atomic i16* %p acquire, align 2 451 %1 = load atomic i16, i16* %p acquire, align 2
256 %2 = or i16 %1, 2 452 %2 = or i16 %1, 2
257 store atomic i16 %2, i16* %p release, align 2 453 store atomic i16 %2, i16* %p release, align 2
258 ret void 454 ret void
259 } 455 }
260 456
261 define void @or_32(i32* %p) { 457 define void @or_16r(i16* %p, i16 %v) {
262 ; X64-LABEL: or_32 458 ; X64-LABEL: or_16r:
459 ; X64-NOT: orw
460 ; X32-LABEL: or_16r:
461 ; X32-NOT: orw [.*], (
462 %1 = load atomic i16, i16* %p acquire, align 2
463 %2 = or i16 %1, %v
464 store atomic i16 %2, i16* %p release, align 2
465 ret void
466 }
467
468 define void @or_32i(i32* %p) {
469 ; X64-LABEL: or_32i:
263 ; X64-NOT: lock 470 ; X64-NOT: lock
264 ; X64: orl 471 ; X64: orl
265 ; X64-NOT: movl 472 ; X64-NOT: movl
266 ; X32-LABEL: or_32 473 ; X32-LABEL: or_32i:
267 ; X32-NOT: lock 474 ; X32-NOT: lock
268 ; X32: orl 475 ; X32: orl
269 ; X32-NOT: movl 476 ; X32-NOT: movl
270 %1 = load atomic i32* %p acquire, align 4 477 %1 = load atomic i32, i32* %p acquire, align 4
271 %2 = or i32 %1, 2 478 %2 = or i32 %1, 2
272 store atomic i32 %2, i32* %p release, align 4 479 store atomic i32 %2, i32* %p release, align 4
273 ret void 480 ret void
274 } 481 }
275 482
276 define void @or_64(i64* %p) { 483 define void @or_32r(i32* %p, i32 %v) {
277 ; X64-LABEL: or_64 484 ; X64-LABEL: or_32r:
485 ; X64-NOT: lock
486 ; X64: orl
487 ; X64-NOT: movl
488 ; X32-LABEL: or_32r:
489 ; X32-NOT: lock
490 ; X32: orl
491 ; X32-NOT: movl
492 %1 = load atomic i32, i32* %p acquire, align 4
493 %2 = or i32 %1, %v
494 store atomic i32 %2, i32* %p release, align 4
495 ret void
496 }
497
498 define void @or_64i(i64* %p) {
499 ; X64-LABEL: or_64i:
278 ; X64-NOT: lock 500 ; X64-NOT: lock
279 ; X64: orq 501 ; X64: orq
280 ; X64-NOT: movq 502 ; X64-NOT: movq
281 ; We do not check X86-32 as it cannot do 'orq'. 503 ; We do not check X86-32 as it cannot do 'orq'.
282 ; X32-LABEL: or_64 504 ; X32-LABEL: or_64i:
283 %1 = load atomic i64* %p acquire, align 8 505 %1 = load atomic i64, i64* %p acquire, align 8
284 %2 = or i64 %1, 2 506 %2 = or i64 %1, 2
285 store atomic i64 %2, i64* %p release, align 8 507 store atomic i64 %2, i64* %p release, align 8
286 ret void 508 ret void
287 } 509 }
288 510
289 define void @or_32_seq_cst(i32* %p) { 511 define void @or_64r(i64* %p, i64 %v) {
290 ; X64-LABEL: or_32_seq_cst 512 ; X64-LABEL: or_64r:
291 ; X64: xchgl 513 ; X64-NOT: lock
292 ; X32-LABEL: or_32_seq_cst 514 ; X64: orq
293 ; X32: xchgl 515 ; X64-NOT: movq
294 %1 = load atomic i32* %p monotonic, align 4 516 ; We do not check X86-32 as it cannot do 'orq'.
517 ; X32-LABEL: or_64r:
518 %1 = load atomic i64, i64* %p acquire, align 8
519 %2 = or i64 %1, %v
520 store atomic i64 %2, i64* %p release, align 8
521 ret void
522 }
523
524 define void @or_32i_seq_cst(i32* %p) {
525 ; X64-LABEL: or_32i_seq_cst:
526 ; X64: xchgl
527 ; X32-LABEL: or_32i_seq_cst:
528 ; X32: xchgl
529 %1 = load atomic i32, i32* %p monotonic, align 4
295 %2 = or i32 %1, 2 530 %2 = or i32 %1, 2
296 store atomic i32 %2, i32* %p seq_cst, align 4 531 store atomic i32 %2, i32* %p seq_cst, align 4
297 ret void 532 ret void
298 } 533 }
299 534
535 define void @or_32r_seq_cst(i32* %p, i32 %v) {
536 ; X64-LABEL: or_32r_seq_cst:
537 ; X64: xchgl
538 ; X32-LABEL: or_32r_seq_cst:
539 ; X32: xchgl
540 %1 = load atomic i32, i32* %p monotonic, align 4
541 %2 = or i32 %1, %v
542 store atomic i32 %2, i32* %p seq_cst, align 4
543 ret void
544 }
545
300 ; ----- XOR ----- 546 ; ----- XOR -----
301 547
302 define void @xor_8(i8* %p) { 548 define void @xor_8i(i8* %p) {
303 ; X64-LABEL: xor_8 549 ; X64-LABEL: xor_8i:
304 ; X64-NOT: lock 550 ; X64-NOT: lock
305 ; X64: xorb 551 ; X64: xorb
306 ; X64-NOT: movb 552 ; X64-NOT: movb
307 ; X32-LABEL: xor_8 553 ; X32-LABEL: xor_8i:
308 ; X32-NOT: lock 554 ; X32-NOT: lock
309 ; X32: xorb 555 ; X32: xorb
310 ; X32-NOT: movb 556 ; X32-NOT: movb
311 %1 = load atomic i8* %p acquire, align 1 557 %1 = load atomic i8, i8* %p acquire, align 1
312 %2 = xor i8 %1, 2 558 %2 = xor i8 %1, 2
313 store atomic i8 %2, i8* %p release, align 1 559 store atomic i8 %2, i8* %p release, align 1
314 ret void 560 ret void
315 } 561 }
316 562
317 define void @xor_16(i16* %p) { 563 define void @xor_8r(i8* %p, i8 %v) {
318 ; X64-LABEL: xor_16 564 ; X64-LABEL: xor_8r:
565 ; X64-NOT: lock
566 ; X64: xorb
567 ; X64-NOT: movb
568 ; X32-LABEL: xor_8r:
569 ; X32-NOT: lock
570 ; X32: xorb
571 ; X32-NOT: movb
572 %1 = load atomic i8, i8* %p acquire, align 1
573 %2 = xor i8 %1, %v
574 store atomic i8 %2, i8* %p release, align 1
575 ret void
576 }
577
578 define void @xor_16i(i16* %p) {
579 ; X64-LABEL: xor_16i:
319 ; X64-NOT: xorw 580 ; X64-NOT: xorw
320 ; X32-LABEL: xor_16 581 ; X32-LABEL: xor_16i:
321 ; X32-NOT: xorw 582 ; X32-NOT: xorw
322 %1 = load atomic i16* %p acquire, align 2 583 %1 = load atomic i16, i16* %p acquire, align 2
323 %2 = xor i16 %1, 2 584 %2 = xor i16 %1, 2
324 store atomic i16 %2, i16* %p release, align 2 585 store atomic i16 %2, i16* %p release, align 2
325 ret void 586 ret void
326 } 587 }
327 588
328 define void @xor_32(i32* %p) { 589 define void @xor_16r(i16* %p, i16 %v) {
329 ; X64-LABEL: xor_32 590 ; X64-LABEL: xor_16r:
591 ; X64-NOT: xorw
592 ; X32-LABEL: xor_16r:
593 ; X32-NOT: xorw [.*], (
594 %1 = load atomic i16, i16* %p acquire, align 2
595 %2 = xor i16 %1, %v
596 store atomic i16 %2, i16* %p release, align 2
597 ret void
598 }
599
600 define void @xor_32i(i32* %p) {
601 ; X64-LABEL: xor_32i:
330 ; X64-NOT: lock 602 ; X64-NOT: lock
331 ; X64: xorl 603 ; X64: xorl
332 ; X64-NOT: movl 604 ; X64-NOT: movl
333 ; X32-LABEL: xor_32 605 ; X32-LABEL: xor_32i:
334 ; X32-NOT: lock 606 ; X32-NOT: lock
335 ; X32: xorl 607 ; X32: xorl
336 ; X32-NOT: movl 608 ; X32-NOT: movl
337 %1 = load atomic i32* %p acquire, align 4 609 %1 = load atomic i32, i32* %p acquire, align 4
338 %2 = xor i32 %1, 2 610 %2 = xor i32 %1, 2
339 store atomic i32 %2, i32* %p release, align 4 611 store atomic i32 %2, i32* %p release, align 4
340 ret void 612 ret void
341 } 613 }
342 614
343 define void @xor_64(i64* %p) { 615 define void @xor_32r(i32* %p, i32 %v) {
344 ; X64-LABEL: xor_64 616 ; X64-LABEL: xor_32r:
617 ; X64-NOT: lock
618 ; X64: xorl
619 ; X64-NOT: movl
620 ; X32-LABEL: xor_32r:
621 ; X32-NOT: lock
622 ; X32: xorl
623 ; X32-NOT: movl
624 %1 = load atomic i32, i32* %p acquire, align 4
625 %2 = xor i32 %1, %v
626 store atomic i32 %2, i32* %p release, align 4
627 ret void
628 }
629
630 define void @xor_64i(i64* %p) {
631 ; X64-LABEL: xor_64i:
345 ; X64-NOT: lock 632 ; X64-NOT: lock
346 ; X64: xorq 633 ; X64: xorq
347 ; X64-NOT: movq 634 ; X64-NOT: movq
348 ; We do not check X86-32 as it cannot do 'xorq'. 635 ; We do not check X86-32 as it cannot do 'xorq'.
349 ; X32-LABEL: xor_64 636 ; X32-LABEL: xor_64i:
350 %1 = load atomic i64* %p acquire, align 8 637 %1 = load atomic i64, i64* %p acquire, align 8
351 %2 = xor i64 %1, 2 638 %2 = xor i64 %1, 2
352 store atomic i64 %2, i64* %p release, align 8 639 store atomic i64 %2, i64* %p release, align 8
353 ret void 640 ret void
354 } 641 }
355 642
356 define void @xor_32_seq_cst(i32* %p) { 643 define void @xor_64r(i64* %p, i64 %v) {
357 ; X64-LABEL: xor_32_seq_cst 644 ; X64-LABEL: xor_64r:
358 ; X64: xchgl 645 ; X64-NOT: lock
359 ; X32-LABEL: xor_32_seq_cst 646 ; X64: xorq
360 ; X32: xchgl 647 ; X64-NOT: movq
361 %1 = load atomic i32* %p monotonic, align 4 648 ; We do not check X86-32 as it cannot do 'xorq'.
649 ; X32-LABEL: xor_64r:
650 %1 = load atomic i64, i64* %p acquire, align 8
651 %2 = xor i64 %1, %v
652 store atomic i64 %2, i64* %p release, align 8
653 ret void
654 }
655
656 define void @xor_32i_seq_cst(i32* %p) {
657 ; X64-LABEL: xor_32i_seq_cst:
658 ; X64: xchgl
659 ; X32-LABEL: xor_32i_seq_cst:
660 ; X32: xchgl
661 %1 = load atomic i32, i32* %p monotonic, align 4
362 %2 = xor i32 %1, 2 662 %2 = xor i32 %1, 2
363 store atomic i32 %2, i32* %p seq_cst, align 4 663 store atomic i32 %2, i32* %p seq_cst, align 4
364 ret void 664 ret void
365 } 665 }
366 666
667 define void @xor_32r_seq_cst(i32* %p, i32 %v) {
668 ; X64-LABEL: xor_32r_seq_cst:
669 ; X64: xchgl
670 ; X32-LABEL: xor_32r_seq_cst:
671 ; X32: xchgl
672 %1 = load atomic i32, i32* %p monotonic, align 4
673 %2 = xor i32 %1, %v
674 store atomic i32 %2, i32* %p seq_cst, align 4
675 ret void
676 }
677
367 ; ----- INC ----- 678 ; ----- INC -----
368 679
369 define void @inc_8(i8* %p) { 680 define void @inc_8(i8* %p) {
370 ; X64-LABEL: inc_8 681 ; X64-LABEL: inc_8:
371 ; X64-NOT: lock 682 ; X64-NOT: lock
372 ; X64: incb 683 ; X64: incb
373 ; X64-NOT: movb 684 ; X64-NOT: movb
374 ; X32-LABEL: inc_8 685 ; X32-LABEL: inc_8:
375 ; X32-NOT: lock 686 ; X32-NOT: lock
376 ; X32: incb 687 ; X32: incb
377 ; X32-NOT: movb 688 ; X32-NOT: movb
378 ; SLOW_INC-LABEL: inc_8 689 ; SLOW_INC-LABEL: inc_8:
379 ; SLOW_INC-NOT: incb 690 ; SLOW_INC-NOT: incb
380 ; SLOW_INC-NOT: movb 691 ; SLOW_INC-NOT: movb
381 %1 = load atomic i8* %p seq_cst, align 1 692 %1 = load atomic i8, i8* %p seq_cst, align 1
382 %2 = add i8 %1, 1 693 %2 = add i8 %1, 1
383 store atomic i8 %2, i8* %p release, align 1 694 store atomic i8 %2, i8* %p release, align 1
384 ret void 695 ret void
385 } 696 }
386 697
387 define void @inc_16(i16* %p) { 698 define void @inc_16(i16* %p) {
388 ; Currently the transformation is not done on 16 bit accesses, as the backend 699 ; Currently the transformation is not done on 16 bit accesses, as the backend
389 ; treat 16 bit arithmetic as expensive on X86/X86_64. 700 ; treat 16 bit arithmetic as expensive on X86/X86_64.
390 ; X64-LABEL: inc_16 701 ; X64-LABEL: inc_16:
391 ; X64-NOT: incw 702 ; X64-NOT: incw
392 ; X32-LABEL: inc_16 703 ; X32-LABEL: inc_16:
393 ; X32-NOT: incw 704 ; X32-NOT: incw
394 ; SLOW_INC-LABEL: inc_16 705 ; SLOW_INC-LABEL: inc_16:
395 ; SLOW_INC-NOT: incw 706 ; SLOW_INC-NOT: incw
396 %1 = load atomic i16* %p acquire, align 2 707 %1 = load atomic i16, i16* %p acquire, align 2
397 %2 = add i16 %1, 1 708 %2 = add i16 %1, 1
398 store atomic i16 %2, i16* %p release, align 2 709 store atomic i16 %2, i16* %p release, align 2
399 ret void 710 ret void
400 } 711 }
401 712
402 define void @inc_32(i32* %p) { 713 define void @inc_32(i32* %p) {
403 ; X64-LABEL: inc_32 714 ; X64-LABEL: inc_32:
404 ; X64-NOT: lock 715 ; X64-NOT: lock
405 ; X64: incl 716 ; X64: incl
406 ; X64-NOT: movl 717 ; X64-NOT: movl
407 ; X32-LABEL: inc_32 718 ; X32-LABEL: inc_32:
408 ; X32-NOT: lock 719 ; X32-NOT: lock
409 ; X32: incl 720 ; X32: incl
410 ; X32-NOT: movl 721 ; X32-NOT: movl
411 ; SLOW_INC-LABEL: inc_32 722 ; SLOW_INC-LABEL: inc_32:
412 ; SLOW_INC-NOT: incl 723 ; SLOW_INC-NOT: incl
413 ; SLOW_INC-NOT: movl 724 ; SLOW_INC-NOT: movl
414 %1 = load atomic i32* %p acquire, align 4 725 %1 = load atomic i32, i32* %p acquire, align 4
415 %2 = add i32 %1, 1 726 %2 = add i32 %1, 1
416 store atomic i32 %2, i32* %p monotonic, align 4 727 store atomic i32 %2, i32* %p monotonic, align 4
417 ret void 728 ret void
418 } 729 }
419 730
420 define void @inc_64(i64* %p) { 731 define void @inc_64(i64* %p) {
421 ; X64-LABEL: inc_64 732 ; X64-LABEL: inc_64:
422 ; X64-NOT: lock 733 ; X64-NOT: lock
423 ; X64: incq 734 ; X64: incq
424 ; X64-NOT: movq 735 ; X64-NOT: movq
425 ; We do not check X86-32 as it cannot do 'incq'. 736 ; We do not check X86-32 as it cannot do 'incq'.
426 ; X32-LABEL: inc_64 737 ; X32-LABEL: inc_64:
427 ; SLOW_INC-LABEL: inc_64 738 ; SLOW_INC-LABEL: inc_64:
428 ; SLOW_INC-NOT: incq 739 ; SLOW_INC-NOT: incq
429 ; SLOW_INC-NOT: movq 740 ; SLOW_INC-NOT: movq
430 %1 = load atomic i64* %p acquire, align 8 741 %1 = load atomic i64, i64* %p acquire, align 8
431 %2 = add i64 %1, 1 742 %2 = add i64 %1, 1
432 store atomic i64 %2, i64* %p release, align 8 743 store atomic i64 %2, i64* %p release, align 8
433 ret void 744 ret void
434 } 745 }
435 746
436 define void @inc_32_seq_cst(i32* %p) { 747 define void @inc_32_seq_cst(i32* %p) {
437 ; X64-LABEL: inc_32_seq_cst 748 ; X64-LABEL: inc_32_seq_cst:
438 ; X64: xchgl 749 ; X64: xchgl
439 ; X32-LABEL: inc_32_seq_cst 750 ; X32-LABEL: inc_32_seq_cst:
440 ; X32: xchgl 751 ; X32: xchgl
441 %1 = load atomic i32* %p monotonic, align 4 752 %1 = load atomic i32, i32* %p monotonic, align 4
442 %2 = add i32 %1, 1 753 %2 = add i32 %1, 1
443 store atomic i32 %2, i32* %p seq_cst, align 4 754 store atomic i32 %2, i32* %p seq_cst, align 4
444 ret void 755 ret void
445 } 756 }
446 757
447 ; ----- DEC ----- 758 ; ----- DEC -----
448 759
449 define void @dec_8(i8* %p) { 760 define void @dec_8(i8* %p) {
450 ; X64-LABEL: dec_8 761 ; X64-LABEL: dec_8:
451 ; X64-NOT: lock 762 ; X64-NOT: lock
452 ; X64: decb 763 ; X64: decb
453 ; X64-NOT: movb 764 ; X64-NOT: movb
454 ; X32-LABEL: dec_8 765 ; X32-LABEL: dec_8:
455 ; X32-NOT: lock 766 ; X32-NOT: lock
456 ; X32: decb 767 ; X32: decb
457 ; X32-NOT: movb 768 ; X32-NOT: movb
458 ; SLOW_INC-LABEL: dec_8 769 ; SLOW_INC-LABEL: dec_8:
459 ; SLOW_INC-NOT: decb 770 ; SLOW_INC-NOT: decb
460 ; SLOW_INC-NOT: movb 771 ; SLOW_INC-NOT: movb
461 %1 = load atomic i8* %p seq_cst, align 1 772 %1 = load atomic i8, i8* %p seq_cst, align 1
462 %2 = sub i8 %1, 1 773 %2 = sub i8 %1, 1
463 store atomic i8 %2, i8* %p release, align 1 774 store atomic i8 %2, i8* %p release, align 1
464 ret void 775 ret void
465 } 776 }
466 777
467 define void @dec_16(i16* %p) { 778 define void @dec_16(i16* %p) {
468 ; Currently the transformation is not done on 16 bit accesses, as the backend 779 ; Currently the transformation is not done on 16 bit accesses, as the backend
469 ; treat 16 bit arithmetic as expensive on X86/X86_64. 780 ; treat 16 bit arithmetic as expensive on X86/X86_64.
470 ; X64-LABEL: dec_16 781 ; X64-LABEL: dec_16:
471 ; X64-NOT: decw 782 ; X64-NOT: decw
472 ; X32-LABEL: dec_16 783 ; X32-LABEL: dec_16:
473 ; X32-NOT: decw 784 ; X32-NOT: decw
474 ; SLOW_INC-LABEL: dec_16 785 ; SLOW_INC-LABEL: dec_16:
475 ; SLOW_INC-NOT: decw 786 ; SLOW_INC-NOT: decw
476 %1 = load atomic i16* %p acquire, align 2 787 %1 = load atomic i16, i16* %p acquire, align 2
477 %2 = sub i16 %1, 1 788 %2 = sub i16 %1, 1
478 store atomic i16 %2, i16* %p release, align 2 789 store atomic i16 %2, i16* %p release, align 2
479 ret void 790 ret void
480 } 791 }
481 792
482 define void @dec_32(i32* %p) { 793 define void @dec_32(i32* %p) {
483 ; X64-LABEL: dec_32 794 ; X64-LABEL: dec_32:
484 ; X64-NOT: lock 795 ; X64-NOT: lock
485 ; X64: decl 796 ; X64: decl
486 ; X64-NOT: movl 797 ; X64-NOT: movl
487 ; X32-LABEL: dec_32 798 ; X32-LABEL: dec_32:
488 ; X32-NOT: lock 799 ; X32-NOT: lock
489 ; X32: decl 800 ; X32: decl
490 ; X32-NOT: movl 801 ; X32-NOT: movl
491 ; SLOW_INC-LABEL: dec_32 802 ; SLOW_INC-LABEL: dec_32:
492 ; SLOW_INC-NOT: decl 803 ; SLOW_INC-NOT: decl
493 ; SLOW_INC-NOT: movl 804 ; SLOW_INC-NOT: movl
494 %1 = load atomic i32* %p acquire, align 4 805 %1 = load atomic i32, i32* %p acquire, align 4
495 %2 = sub i32 %1, 1 806 %2 = sub i32 %1, 1
496 store atomic i32 %2, i32* %p monotonic, align 4 807 store atomic i32 %2, i32* %p monotonic, align 4
497 ret void 808 ret void
498 } 809 }
499 810
500 define void @dec_64(i64* %p) { 811 define void @dec_64(i64* %p) {
501 ; X64-LABEL: dec_64 812 ; X64-LABEL: dec_64:
502 ; X64-NOT: lock 813 ; X64-NOT: lock
503 ; X64: decq 814 ; X64: decq
504 ; X64-NOT: movq 815 ; X64-NOT: movq
505 ; We do not check X86-32 as it cannot do 'decq'. 816 ; We do not check X86-32 as it cannot do 'decq'.
506 ; X32-LABEL: dec_64 817 ; X32-LABEL: dec_64:
507 ; SLOW_INC-LABEL: dec_64 818 ; SLOW_INC-LABEL: dec_64:
508 ; SLOW_INC-NOT: decq 819 ; SLOW_INC-NOT: decq
509 ; SLOW_INC-NOT: movq 820 ; SLOW_INC-NOT: movq
510 %1 = load atomic i64* %p acquire, align 8 821 %1 = load atomic i64, i64* %p acquire, align 8
511 %2 = sub i64 %1, 1 822 %2 = sub i64 %1, 1
512 store atomic i64 %2, i64* %p release, align 8 823 store atomic i64 %2, i64* %p release, align 8
513 ret void 824 ret void
514 } 825 }
515 826
516 define void @dec_32_seq_cst(i32* %p) { 827 define void @dec_32_seq_cst(i32* %p) {
517 ; X64-LABEL: dec_32_seq_cst 828 ; X64-LABEL: dec_32_seq_cst:
518 ; X64: xchgl 829 ; X64: xchgl
519 ; X32-LABEL: dec_32_seq_cst 830 ; X32-LABEL: dec_32_seq_cst:
520 ; X32: xchgl 831 ; X32: xchgl
521 %1 = load atomic i32* %p monotonic, align 4 832 %1 = load atomic i32, i32* %p monotonic, align 4
522 %2 = sub i32 %1, 1 833 %2 = sub i32 %1, 1
523 store atomic i32 %2, i32* %p seq_cst, align 4 834 store atomic i32 %2, i32* %p seq_cst, align 4
524 ret void 835 ret void
525 } 836 }
837
838 ; ----- FADD -----
839
840 define void @fadd_32r(float* %loc, float %val) {
841 ; X64-LABEL: fadd_32r:
842 ; X64-NOT: lock
843 ; X64-NOT: mov
844 ; X64: addss (%[[M:[a-z]+]]), %[[XMM:xmm[0-9]+]]
845 ; X64-NEXT: movss %[[XMM]], (%[[M]])
846 ; X32-LABEL: fadd_32r:
847 ; Don't check x86-32.
848 ; LLVM's SSE handling is conservative on x86-32 even without using atomics.
849 %floc = bitcast float* %loc to i32*
850 %1 = load atomic i32, i32* %floc seq_cst, align 4
851 %2 = bitcast i32 %1 to float
852 %add = fadd float %2, %val
853 %3 = bitcast float %add to i32
854 store atomic i32 %3, i32* %floc release, align 4
855 ret void
856 }
857
858 define void @fadd_64r(double* %loc, double %val) {
859 ; X64-LABEL: fadd_64r:
860 ; X64-NOT: lock
861 ; X64-NOT: mov
862 ; X64: addsd (%[[M:[a-z]+]]), %[[XMM:xmm[0-9]+]]
863 ; X64-NEXT: movsd %[[XMM]], (%[[M]])
864 ; X32-LABEL: fadd_64r:
865 ; Don't check x86-32 (see comment above).
866 %floc = bitcast double* %loc to i64*
867 %1 = load atomic i64, i64* %floc seq_cst, align 8
868 %2 = bitcast i64 %1 to double
869 %add = fadd double %2, %val
870 %3 = bitcast double %add to i64
871 store atomic i64 %3, i64* %floc release, align 8
872 ret void
873 }