252
|
1 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
|
2 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
|
3 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
|
4 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
150
|
5
|
|
6 ; GCN-LABEL: {{^}}vgpr:
|
|
7 ; GCN-DAG: v_mov_b32_e32 v1, v0
|
|
8 ; GCN-DAG: exp mrt0 v0, v0, v0, v0 done vm
|
|
9 ; GCN: s_waitcnt expcnt(0)
|
|
10 ; GCN: v_add_f32_e32 v0, 1.0, v1
|
|
11 ; GCN-NOT: s_endpgm
|
252
|
12 define amdgpu_vs { float, float } @vgpr(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
|
150
|
13 bb:
|
|
14 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
|
|
15 %x = fadd float %arg3, 1.000000e+00
|
|
16 %a = insertvalue { float, float } undef, float %x, 0
|
|
17 %b = insertvalue { float, float } %a, float %arg3, 1
|
|
18 ret { float, float } %b
|
|
19 }
|
|
20
|
|
21 ; GCN-LABEL: {{^}}vgpr_literal:
|
|
22 ; GCN: exp mrt0 v0, v0, v0, v0 done vm
|
|
23
|
|
24 ; GCN-DAG: v_mov_b32_e32 v0, 1.0
|
|
25 ; GCN-DAG: v_mov_b32_e32 v1, 2.0
|
|
26 ; GCN-DAG: v_mov_b32_e32 v2, 4.0
|
|
27 ; GCN-DAG: v_mov_b32_e32 v3, -1.0
|
|
28 ; GCN-DAG: s_waitcnt expcnt(0)
|
|
29 ; GCN-NOT: s_endpgm
|
252
|
30 define amdgpu_vs { float, float, float, float } @vgpr_literal(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
|
150
|
31 bb:
|
|
32 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
|
|
33 ret { float, float, float, float } { float 1.000000e+00, float 2.000000e+00, float 4.000000e+00, float -1.000000e+00 }
|
|
34 }
|
|
35
|
|
36 ; GCN: .long 165580
|
|
37 ; GCN-NEXT: .long 562
|
|
38 ; GCN-NEXT: .long 165584
|
|
39 ; GCN-NEXT: .long 562
|
|
40 ; GCN-LABEL: {{^}}vgpr_ps_addr0:
|
|
41 ; GCN-NOT: v_mov_b32_e32 v0
|
|
42 ; GCN-NOT: v_mov_b32_e32 v1
|
|
43 ; GCN-NOT: v_mov_b32_e32 v2
|
|
44 ; GCN: v_mov_b32_e32 v3, v4
|
|
45 ; GCN: v_mov_b32_e32 v4, v6
|
|
46 ; GCN-NOT: s_endpgm
|
252
|
47 define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr0(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
|
150
|
48 bb:
|
|
49 %i0 = extractelement <2 x i32> %arg4, i32 0
|
|
50 %i1 = extractelement <2 x i32> %arg4, i32 1
|
|
51 %i2 = extractelement <2 x i32> %arg7, i32 0
|
|
52 %i3 = extractelement <2 x i32> %arg8, i32 0
|
|
53 %f0 = bitcast i32 %i0 to float
|
|
54 %f1 = bitcast i32 %i1 to float
|
|
55 %f2 = bitcast i32 %i2 to float
|
|
56 %f3 = bitcast i32 %i3 to float
|
|
57 %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
|
|
58 %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
|
|
59 %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
|
|
60 %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
|
|
61 %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
|
|
62 ret { float, float, float, float, float } %r4
|
|
63 }
|
|
64
|
|
65 ; GCN: .long 165580
|
|
66 ; GCN-NEXT: .long 1
|
|
67 ; GCN-NEXT: .long 165584
|
|
68 ; GCN-NEXT: .long 1
|
|
69 ; GCN-LABEL: {{^}}ps_input_ena_no_inputs:
|
|
70 ; GCN: v_mov_b32_e32 v0, 1.0
|
|
71 ; GCN-NOT: s_endpgm
|
252
|
72 define amdgpu_ps float @ps_input_ena_no_inputs(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
|
150
|
73 bb:
|
|
74 ret float 1.000000e+00
|
|
75 }
|
|
76
|
|
77 ; GCN: .long 165580
|
|
78 ; GCN-NEXT: .long 2081
|
|
79 ; GCN-NEXT: .long 165584
|
|
80 ; GCN-NEXT: .long 2081
|
|
81 ; GCN-LABEL: {{^}}ps_input_ena_pos_w:
|
|
82 ; GCN-DAG: v_mov_b32_e32 v0, v4
|
|
83 ; GCN-DAG: v_mov_b32_e32 v1, v2
|
|
84 ; GCN-DAG: v_mov_b32_e32 v2, v3
|
|
85 ; GCN-NOT: s_endpgm
|
252
|
86 define amdgpu_ps { float, <2 x float> } @ps_input_ena_pos_w(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
|
150
|
87 bb:
|
|
88 %f = bitcast <2 x i32> %arg8 to <2 x float>
|
|
89 %s = insertvalue { float, <2 x float> } undef, float %arg14, 0
|
|
90 %s1 = insertvalue { float, <2 x float> } %s, <2 x float> %f, 1
|
|
91 ret { float, <2 x float> } %s1
|
|
92 }
|
|
93
|
|
94 ; GCN: .long 165580
|
|
95 ; GCN-NEXT: .long 562
|
|
96 ; GCN-NEXT: .long 165584
|
|
97 ; GCN-NEXT: .long 563
|
|
98 ; GCN-LABEL: {{^}}vgpr_ps_addr1:
|
|
99 ; GCN-DAG: v_mov_b32_e32 v0, v2
|
|
100 ; GCN-DAG: v_mov_b32_e32 v1, v3
|
|
101 ; GCN: v_mov_b32_e32 v2, v4
|
|
102 ; GCN-DAG: v_mov_b32_e32 v3, v6
|
|
103 ; GCN-DAG: v_mov_b32_e32 v4, v8
|
|
104 ; GCN-NOT: s_endpgm
|
252
|
105 define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr1(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #2 {
|
150
|
106 bb:
|
|
107 %i0 = extractelement <2 x i32> %arg4, i32 0
|
|
108 %i1 = extractelement <2 x i32> %arg4, i32 1
|
|
109 %i2 = extractelement <2 x i32> %arg7, i32 0
|
|
110 %i3 = extractelement <2 x i32> %arg8, i32 0
|
|
111 %f0 = bitcast i32 %i0 to float
|
|
112 %f1 = bitcast i32 %i1 to float
|
|
113 %f2 = bitcast i32 %i2 to float
|
|
114 %f3 = bitcast i32 %i3 to float
|
|
115 %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
|
|
116 %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
|
|
117 %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
|
|
118 %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
|
|
119 %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
|
|
120 ret { float, float, float, float, float } %r4
|
|
121 }
|
|
122
|
|
123 ; GCN: .long 165580
|
|
124 ; GCN-NEXT: .long 562
|
|
125 ; GCN-NEXT: .long 165584
|
|
126 ; GCN-NEXT: .long 631
|
|
127 ; GCN-LABEL: {{^}}vgpr_ps_addr119:
|
|
128 ; GCN-DAG: v_mov_b32_e32 v0, v2
|
|
129 ; GCN-DAG: v_mov_b32_e32 v1, v3
|
|
130 ; GCN-DAG: v_mov_b32_e32 v2, v6
|
|
131 ; GCN-DAG: v_mov_b32_e32 v3, v8
|
|
132 ; GCN-DAG: v_mov_b32_e32 v4, v12
|
|
133 ; GCN-NOT: s_endpgm
|
252
|
134 define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr119(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #3 {
|
150
|
135 bb:
|
|
136 %i0 = extractelement <2 x i32> %arg4, i32 0
|
|
137 %i1 = extractelement <2 x i32> %arg4, i32 1
|
|
138 %i2 = extractelement <2 x i32> %arg7, i32 0
|
|
139 %i3 = extractelement <2 x i32> %arg8, i32 0
|
|
140 %f0 = bitcast i32 %i0 to float
|
|
141 %f1 = bitcast i32 %i1 to float
|
|
142 %f2 = bitcast i32 %i2 to float
|
|
143 %f3 = bitcast i32 %i3 to float
|
|
144 %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
|
|
145 %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
|
|
146 %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
|
|
147 %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
|
|
148 %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
|
|
149 ret { float, float, float, float, float } %r4
|
|
150 }
|
|
151
|
|
152 ; GCN: .long 165580
|
|
153 ; GCN-NEXT: .long 562
|
|
154 ; GCN-NEXT: .long 165584
|
|
155 ; GCN-NEXT: .long 946
|
|
156 ; GCN-LABEL: {{^}}vgpr_ps_addr418:
|
|
157 ; GCN-NOT: v_mov_b32_e32 v0
|
|
158 ; GCN-NOT: v_mov_b32_e32 v1
|
|
159 ; GCN-NOT: v_mov_b32_e32 v2
|
|
160 ; GCN: v_mov_b32_e32 v3, v4
|
|
161 ; GCN: v_mov_b32_e32 v4, v8
|
|
162 ; GCN-NOT: s_endpgm
|
252
|
163 define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr418(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #4 {
|
150
|
164 bb:
|
|
165 %i0 = extractelement <2 x i32> %arg4, i32 0
|
|
166 %i1 = extractelement <2 x i32> %arg4, i32 1
|
|
167 %i2 = extractelement <2 x i32> %arg7, i32 0
|
|
168 %i3 = extractelement <2 x i32> %arg8, i32 0
|
|
169 %f0 = bitcast i32 %i0 to float
|
|
170 %f1 = bitcast i32 %i1 to float
|
|
171 %f2 = bitcast i32 %i2 to float
|
|
172 %f3 = bitcast i32 %i3 to float
|
|
173 %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
|
|
174 %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
|
|
175 %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
|
|
176 %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
|
|
177 %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
|
|
178 ret { float, float, float, float, float } %r4
|
|
179 }
|
|
180
|
|
181 ; GCN-LABEL: {{^}}sgpr:
|
|
182 ; GCN-DAG: s_mov_b32 s2, s3
|
|
183 ; GCN-DAG: s_add_{{i|u}}32 s0, s3, 2
|
|
184 ; GCN-NOT: s_endpgm
|
252
|
185 define amdgpu_vs { i32, i32, i32 } @sgpr(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
|
150
|
186 bb:
|
|
187 %x = add i32 %arg2, 2
|
|
188 %a = insertvalue { i32, i32, i32 } undef, i32 %x, 0
|
|
189 %b = insertvalue { i32, i32, i32 } %a, i32 %arg1, 1
|
|
190 %c = insertvalue { i32, i32, i32 } %a, i32 %arg2, 2
|
|
191 ret { i32, i32, i32 } %c
|
|
192 }
|
|
193
|
|
194 ; GCN-LABEL: {{^}}sgpr_literal:
|
|
195 ; GCN: s_mov_b32 s0, 5
|
|
196 ; GCN-NOT: s_mov_b32 s0, s0
|
|
197 ; GCN-DAG: s_mov_b32 s1, 6
|
|
198 ; GCN-DAG: s_mov_b32 s2, 7
|
|
199 ; GCN-DAG: s_mov_b32 s3, 8
|
|
200 ; GCN-NOT: s_endpgm
|
252
|
201 define amdgpu_vs { i32, i32, i32, i32 } @sgpr_literal(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
|
150
|
202 bb:
|
|
203 %x = add i32 %arg2, 2
|
|
204 ret { i32, i32, i32, i32 } { i32 5, i32 6, i32 7, i32 8 }
|
|
205 }
|
|
206
|
|
207 ; GCN-LABEL: {{^}}both:
|
|
208 ; GCN-DAG: exp mrt0 v0, v0, v0, v0 done vm
|
|
209 ; GCN-DAG: v_mov_b32_e32 v1, v0
|
|
210 ; GCN-DAG: s_mov_b32 s1, s2
|
|
211 ; GCN-DAG: s_waitcnt expcnt(0)
|
|
212 ; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
|
|
213 ; GCN-DAG: s_add_{{i|u}}32 s0, s3, 2
|
|
214 ; GCN-DAG: s_mov_b32 s2, s3
|
|
215 ; GCN-NOT: s_endpgm
|
252
|
216 define amdgpu_vs { float, i32, float, i32, i32 } @both(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
|
150
|
217 bb:
|
|
218 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
|
|
219 %v = fadd float %arg3, 1.000000e+00
|
|
220 %s = add i32 %arg2, 2
|
|
221 %a0 = insertvalue { float, i32, float, i32, i32 } undef, float %v, 0
|
|
222 %a1 = insertvalue { float, i32, float, i32, i32 } %a0, i32 %s, 1
|
|
223 %a2 = insertvalue { float, i32, float, i32, i32 } %a1, float %arg3, 2
|
|
224 %a3 = insertvalue { float, i32, float, i32, i32 } %a2, i32 %arg1, 3
|
|
225 %a4 = insertvalue { float, i32, float, i32, i32 } %a3, i32 %arg2, 4
|
|
226 ret { float, i32, float, i32, i32 } %a4
|
|
227 }
|
|
228
|
|
229 ; GCN-LABEL: {{^}}structure_literal:
|
|
230 ; GCN: exp mrt0 v0, v0, v0, v0 done vm
|
|
231
|
|
232 ; GCN-DAG: v_mov_b32_e32 v0, 1.0
|
|
233 ; GCN-DAG: s_mov_b32 s0, 2
|
|
234 ; GCN-DAG: s_mov_b32 s1, 3
|
|
235 ; GCN-DAG: v_mov_b32_e32 v1, 2.0
|
|
236 ; GCN-DAG: v_mov_b32_e32 v2, 4.0
|
|
237 ; GCN-DAG: s_waitcnt expcnt(0)
|
252
|
238 define amdgpu_vs { { float, i32 }, { i32, <2 x float> } } @structure_literal(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
|
150
|
239 bb:
|
|
240 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
|
|
241 ret { { float, i32 }, { i32, <2 x float> } } { { float, i32 } { float 1.000000e+00, i32 2 }, { i32, <2 x float> } { i32 3, <2 x float> <float 2.000000e+00, float 4.000000e+00> } }
|
|
242 }
|
|
243
|
|
244 ; GCN-LABEL: {{^}}ret_return_to_epilog_pseudo_size:
|
|
245 ; GCN: codeLenInByte = 0{{$}}
|
|
246 define amdgpu_ps float @ret_return_to_epilog_pseudo_size() #0 {
|
|
247 ret float undef
|
|
248 }
|
|
249
|
|
250 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
|
|
251
|
|
252 attributes #0 = { nounwind }
|
|
253 attributes #1 = { nounwind "InitialPSInputAddr"="0" }
|
|
254 attributes #2 = { nounwind "InitialPSInputAddr"="1" }
|
|
255 attributes #3 = { nounwind "InitialPSInputAddr"="119" }
|
|
256 attributes #4 = { nounwind "InitialPSInputAddr"="418" }
|