150
|
1 #include <clc/clc.h>
|
|
2
|
|
3 void __clc_amdgcn_s_waitcnt(unsigned flags);
|
|
4
|
|
5 // s_waitcnt takes 16bit argument with a combined number of maximum allowed
|
|
6 // pending operations:
|
|
7 // [12:8] LGKM -- LDS, GDS, Konstant (SMRD), Messages
|
|
8 // [7] -- undefined
|
|
9 // [6:4] -- exports, GDS, and mem write
|
|
10 // [3:0] -- vector memory operations
|
|
11
|
|
12 // Newer clang supports __builtin_amdgcn_s_waitcnt
|
|
13 #if __clang_major__ >= 5
|
|
14 # define __waitcnt(x) __builtin_amdgcn_s_waitcnt(x)
|
|
15 #else
|
|
16 # define __waitcnt(x) __clc_amdgcn_s_waitcnt(x)
|
|
17 _CLC_DEF void __clc_amdgcn_s_waitcnt(unsigned) __asm("llvm.amdgcn.s.waitcnt");
|
|
18 #endif
|
|
19
|
207
|
20 _CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) {
|
|
21 if (flags & CLK_GLOBAL_MEM_FENCE) {
|
|
22 // scalar loads are counted with LGKM but we don't know whether
|
|
23 // the compiler turned any loads to scalar
|
|
24 __waitcnt(0);
|
|
25 } else if (flags & CLK_LOCAL_MEM_FENCE)
|
|
26 __waitcnt(0xff); // LGKM is [12:8]
|
150
|
27 }
|
|
28 #undef __waitcnt
|
|
29
|
|
30 // We don't have separate mechanism for read and write fences
|
207
|
31 _CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) {
|
|
32 mem_fence(flags);
|
150
|
33 }
|
|
34
|
207
|
35 _CLC_DEF _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags) {
|
|
36 mem_fence(flags);
|
150
|
37 }
|