221
|
1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefixes=SI-NOHSA,GCN-NOHSA,FUNC %s
|
|
2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=VI-NOHSA,GCN-NOHSA,FUNC %s
|
|
3 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefixes=EG,FUNC %s
|
150
|
4
|
|
5 ; Legacy intrinsics that just read implicit parameters
|
|
6
|
|
7 ; FUNC-LABEL: {{^}}ngroups_x:
|
|
8 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0
|
|
9 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0
|
|
10 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
|
11 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
|
|
12
|
|
13 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
|
|
14 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].X
|
|
15 define amdgpu_kernel void @ngroups_x (i32 addrspace(1)* %out) {
|
|
16 entry:
|
|
17 %0 = call i32 @llvm.r600.read.ngroups.x() #0
|
|
18 store i32 %0, i32 addrspace(1)* %out
|
|
19 ret void
|
|
20 }
|
|
21
|
|
22 ; FUNC-LABEL: {{^}}ngroups_y:
|
|
23 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1
|
|
24 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
|
|
25 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
|
26 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
|
|
27
|
|
28 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
|
|
29 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y
|
|
30 define amdgpu_kernel void @ngroups_y (i32 addrspace(1)* %out) {
|
|
31 entry:
|
|
32 %0 = call i32 @llvm.r600.read.ngroups.y() #0
|
|
33 store i32 %0, i32 addrspace(1)* %out
|
|
34 ret void
|
|
35 }
|
|
36
|
|
37 ; FUNC-LABEL: {{^}}ngroups_z:
|
|
38 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2
|
|
39 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
|
|
40 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
|
41 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
|
|
42
|
|
43 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
|
|
44 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z
|
|
45 define amdgpu_kernel void @ngroups_z (i32 addrspace(1)* %out) {
|
|
46 entry:
|
|
47 %0 = call i32 @llvm.r600.read.ngroups.z() #0
|
|
48 store i32 %0, i32 addrspace(1)* %out
|
|
49 ret void
|
|
50 }
|
|
51
|
|
52 ; FUNC-LABEL: {{^}}global_size_x:
|
|
53 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3
|
|
54 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc
|
|
55 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
|
56 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
|
|
57
|
|
58 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
|
|
59 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].W
|
|
60 define amdgpu_kernel void @global_size_x (i32 addrspace(1)* %out) {
|
|
61 entry:
|
|
62 %0 = call i32 @llvm.r600.read.global.size.x() #0
|
|
63 store i32 %0, i32 addrspace(1)* %out
|
|
64 ret void
|
|
65 }
|
|
66
|
|
67 ; FUNC-LABEL: {{^}}global_size_y:
|
|
68 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
|
|
69 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10
|
|
70 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
|
71 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
|
|
72
|
|
73 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
|
|
74 ; EG: MOV {{\*? *}}[[VAL]], KC0[1].X
|
|
75 define amdgpu_kernel void @global_size_y (i32 addrspace(1)* %out) {
|
|
76 entry:
|
|
77 %0 = call i32 @llvm.r600.read.global.size.y() #0
|
|
78 store i32 %0, i32 addrspace(1)* %out
|
|
79 ret void
|
|
80 }
|
|
81
|
|
82 ; FUNC-LABEL: {{^}}global_size_z:
|
|
83 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5
|
|
84 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14
|
|
85 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
|
86 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
|
|
87
|
|
88 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
|
|
89 ; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y
|
|
90 define amdgpu_kernel void @global_size_z (i32 addrspace(1)* %out) {
|
|
91 entry:
|
|
92 %0 = call i32 @llvm.r600.read.global.size.z() #0
|
|
93 store i32 %0, i32 addrspace(1)* %out
|
|
94 ret void
|
|
95 }
|
|
96
|
|
97 ; FUNC-LABEL: {{^}}local_size_x:
|
|
98 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
|
|
99 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
|
|
100 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
|
101 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
|
|
102
|
|
103 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
|
|
104 ; EG: MOV {{\*? *}}[[VAL]], KC0[1].Z
|
|
105 define amdgpu_kernel void @local_size_x (i32 addrspace(1)* %out) {
|
|
106 entry:
|
|
107 %0 = call i32 @llvm.r600.read.local.size.x() #0
|
|
108 store i32 %0, i32 addrspace(1)* %out
|
|
109 ret void
|
|
110 }
|
|
111
|
|
112 ; FUNC-LABEL: {{^}}local_size_y:
|
|
113 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
|
|
114 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
|
|
115 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
|
116 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
|
|
117
|
|
118 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
|
|
119 ; EG: MOV {{\*? *}}[[VAL]], KC0[1].W
|
|
120 define amdgpu_kernel void @local_size_y (i32 addrspace(1)* %out) {
|
|
121 entry:
|
|
122 %0 = call i32 @llvm.r600.read.local.size.y() #0
|
|
123 store i32 %0, i32 addrspace(1)* %out
|
|
124 ret void
|
|
125 }
|
|
126
|
|
127 ; FUNC-LABEL: {{^}}local_size_z:
|
|
128 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
|
|
129 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
|
|
130 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
|
|
131 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
|
|
132
|
|
133 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
|
|
134 ; EG: MOV {{\*? *}}[[VAL]], KC0[2].X
|
|
135 define amdgpu_kernel void @local_size_z (i32 addrspace(1)* %out) {
|
|
136 entry:
|
|
137 %0 = call i32 @llvm.r600.read.local.size.z() #0
|
|
138 store i32 %0, i32 addrspace(1)* %out
|
|
139 ret void
|
|
140 }
|
|
141
|
|
142 declare i32 @llvm.r600.read.ngroups.x() #0
|
|
143 declare i32 @llvm.r600.read.ngroups.y() #0
|
|
144 declare i32 @llvm.r600.read.ngroups.z() #0
|
|
145
|
|
146 declare i32 @llvm.r600.read.global.size.x() #0
|
|
147 declare i32 @llvm.r600.read.global.size.y() #0
|
|
148 declare i32 @llvm.r600.read.global.size.z() #0
|
|
149
|
|
150 declare i32 @llvm.r600.read.local.size.x() #0
|
|
151 declare i32 @llvm.r600.read.local.size.y() #0
|
|
152 declare i32 @llvm.r600.read.local.size.z() #0
|
|
153
|
|
154 attributes #0 = { readnone }
|