Mercurial > hg > CbC > CbC_llvm
comparison llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-gfx10.mir @ 150:1d019706d866
LLVM10
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 15:10:13 +0900 |
parents | |
children | 2e18cbf3894f |
comparison
equal
deleted
inserted
replaced
147:c2174574ed3a | 150:1d019706d866 |
---|---|
1 # RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX1010 -check-prefix=GCN %s | |
2 | |
3 # GCN-LABEL: {{^}}name: vop1_instructions | |
4 | |
5 # GFX1010: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit $exec | |
6 # GFX1010: %{{[0-9]+}}:vgpr_32 = V_FRACT_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit $exec | |
7 # GFX1010: %{{[0-9]+}}:vgpr_32 = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit $exec | |
8 # GFX1010: %{{[0-9]+}}:vgpr_32 = V_CVT_U32_F32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit $exec | |
9 # GFX1010: %{{[0-9]+}}:vgpr_32 = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit $exec | |
10 | |
11 # GFX1010: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_sdwa 0, %{{[0-9]+}}, 0, 6, 0, 5, implicit $exec | |
12 # GFX1010: %{{[0-9]+}}:vgpr_32 = V_FRACT_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit $exec | |
13 # GFX1010: %{{[0-9]+}}:vgpr_32 = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit $exec | |
14 # GFX1010: %{{[0-9]+}}:vgpr_32 = V_CVT_U32_F32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit $exec | |
15 # GFX1010: %{{[0-9]+}}:vgpr_32 = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit $exec | |
16 | |
17 # GFX1010: %{{[0-9]+}}:vgpr_32 = V_FRACT_F32_sdwa 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit $exec | |
18 # GFX1010: %{{[0-9]+}}:vgpr_32 = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 1, 0, 5, 0, 5, implicit $exec | |
19 # GFX1010: %{{[0-9]+}}:vgpr_32 = V_CVT_U32_F32_sdwa 1, %{{[0-9]+}}, 0, 5, 0, 5, implicit $exec | |
20 # GFX1010: %{{[0-9]+}}:vgpr_32 = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 1, 5, 0, 5, implicit $exec | |
21 | |
22 --- | |
23 name: vop1_instructions | |
24 tracksRegLiveness: true | |
25 registers: | |
26 - { id: 0, class: vreg_64 } | |
27 - { id: 1, class: vreg_64 } | |
28 - { id: 2, class: sreg_64 } | |
29 - { id: 3, class: vgpr_32 } | |
30 - { id: 4, class: sreg_32_xm0 } | |
31 - { id: 5, class: sreg_32_xm0 } | |
32 - { id: 6, class: sreg_32_xm0 } | |
33 - { id: 7, class: sreg_32_xm0 } | |
34 - { id: 8, class: sreg_32 } | |
35 - { id: 9, class: vgpr_32 } | |
36 - { id: 10, class: vgpr_32 } | |
37 - { id: 11, class: vgpr_32 } | |
38 - { id: 12, class: vgpr_32 } | |
39 - { id: 13, class: vgpr_32 } | |
40 - { id: 14, class: vgpr_32 } | |
41 - { id: 15, class: vgpr_32 } | |
42 - { id: 16, class: vgpr_32 } | |
43 - { id: 17, class: vgpr_32 } | |
44 - { id: 18, class: vgpr_32 } | |
45 - { id: 19, class: vgpr_32 } | |
46 - { id: 20, class: vgpr_32 } | |
47 - { id: 21, class: vgpr_32 } | |
48 - { id: 22, class: vgpr_32 } | |
49 - { id: 23, class: vgpr_32 } | |
50 - { id: 24, class: vgpr_32 } | |
51 - { id: 25, class: vgpr_32 } | |
52 - { id: 26, class: vgpr_32 } | |
53 - { id: 27, class: vgpr_32 } | |
54 - { id: 28, class: vgpr_32 } | |
55 - { id: 29, class: vgpr_32 } | |
56 - { id: 30, class: vgpr_32 } | |
57 - { id: 31, class: vgpr_32 } | |
58 - { id: 32, class: vgpr_32 } | |
59 - { id: 33, class: vgpr_32 } | |
60 - { id: 34, class: vgpr_32 } | |
61 - { id: 35, class: vgpr_32 } | |
62 - { id: 36, class: vgpr_32 } | |
63 - { id: 37, class: vgpr_32 } | |
64 - { id: 38, class: vgpr_32 } | |
65 - { id: 39, class: vgpr_32 } | |
66 - { id: 40, class: vgpr_32 } | |
67 - { id: 41, class: vgpr_32 } | |
68 - { id: 42, class: vgpr_32 } | |
69 - { id: 43, class: vgpr_32 } | |
70 - { id: 44, class: vgpr_32 } | |
71 - { id: 45, class: vgpr_32 } | |
72 - { id: 46, class: vgpr_32 } | |
73 - { id: 47, class: vgpr_32 } | |
74 - { id: 48, class: vgpr_32 } | |
75 - { id: 100, class: vgpr_32 } | |
76 body: | | |
77 bb.0: | |
78 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31 | |
79 | |
80 %2 = COPY $sgpr30_sgpr31 | |
81 %1 = COPY $vgpr2_vgpr3 | |
82 %0 = COPY $vgpr0_vgpr1 | |
83 %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) | |
84 | |
85 %5 = S_MOV_B32 65535 | |
86 %6 = S_MOV_B32 65535 | |
87 | |
88 %10 = V_LSHRREV_B32_e64 16, %3, implicit $exec | |
89 %11 = V_MOV_B32_e32 %10, implicit $exec | |
90 %12 = V_LSHLREV_B32_e64 16, %11, implicit $exec | |
91 %14 = V_FRACT_F32_e32 123, implicit $exec | |
92 %15 = V_LSHLREV_B32_e64 16, %14, implicit $exec | |
93 %16 = V_LSHRREV_B32_e64 16, %15, implicit $exec | |
94 %17 = V_SIN_F32_e32 %16, implicit $exec | |
95 %18 = V_LSHLREV_B32_e64 16, %17, implicit $exec | |
96 %19 = V_LSHRREV_B32_e64 16, %18, implicit $exec | |
97 %20 = V_CVT_U32_F32_e32 %19, implicit $exec | |
98 %21 = V_LSHLREV_B32_e64 16, %20, implicit $exec | |
99 %23 = V_CVT_F32_I32_e32 123, implicit $exec | |
100 %24 = V_LSHLREV_B32_e64 16, %23, implicit $exec | |
101 | |
102 %25 = V_LSHRREV_B32_e64 16, %3, implicit $exec | |
103 %26 = V_MOV_B32_e64 %25, implicit $exec | |
104 %26 = V_LSHLREV_B32_e64 16, %26, implicit $exec | |
105 %27 = V_FRACT_F32_e64 0, %6, 0, 0, implicit $exec | |
106 %28 = V_LSHLREV_B32_e64 16, %27, implicit $exec | |
107 %29 = V_LSHRREV_B32_e64 16, %28, implicit $exec | |
108 %30 = V_SIN_F32_e64 0, %29, 0, 0, implicit $exec | |
109 %31 = V_LSHLREV_B32_e64 16, %30, implicit $exec | |
110 %32 = V_LSHRREV_B32_e64 16, %31, implicit $exec | |
111 %33 = V_CVT_U32_F32_e64 0, %32, 0, 0, implicit $exec | |
112 %34 = V_LSHLREV_B32_e64 16, %33, implicit $exec | |
113 %35 = V_CVT_F32_I32_e64 %6, 0, 0, implicit $exec | |
114 %36 = V_LSHLREV_B32_e64 16, %35, implicit $exec | |
115 | |
116 | |
117 %37 = V_LSHRREV_B32_e64 16, %36, implicit $exec | |
118 %38 = V_FRACT_F32_e64 1, %37, 0, 0, implicit $exec | |
119 %39 = V_LSHLREV_B32_e64 16, %38, implicit $exec | |
120 %40 = V_LSHRREV_B32_e64 16, %39, implicit $exec | |
121 %41 = V_SIN_F32_e64 0, %40, 1, 0, implicit $exec | |
122 %42 = V_LSHLREV_B32_e64 16, %41, implicit $exec | |
123 %43 = V_LSHRREV_B32_e64 16, %42, implicit $exec | |
124 %44 = V_CVT_U32_F32_e64 1, %43, 0, 0, implicit $exec | |
125 %45 = V_LSHLREV_B32_e64 16, %44, implicit $exec | |
126 %46 = V_LSHRREV_B32_e64 16, %45, implicit $exec | |
127 %47 = V_CVT_F32_I32_e64 %46, 0, 1, implicit $exec | |
128 %48 = V_LSHLREV_B32_e64 16, %47, implicit $exec | |
129 | |
130 | |
131 %100 = V_MOV_B32_e32 %48, implicit $exec | |
132 | |
133 FLAT_STORE_DWORD %0, %100, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) | |
134 $sgpr30_sgpr31 = COPY %2 | |
135 S_SETPC_B64_return $sgpr30_sgpr31 | |
136 | |
137 ... | |
138 --- | |
139 # GCN-LABEL: {{^}}name: vop2_instructions | |
140 | |
141 # GFX1010: %{{[0-9]+}}:vgpr_32 = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 5, 0, 6, 5, implicit $exec | |
142 # GFX1010: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit $exec | |
143 # GFX1010: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit $exec | |
144 # GFX1010: %{{[0-9]+}}:vgpr_32 = V_FMAC_F32_e32 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, implicit $exec | |
145 # GFX1010: %{{[0-9]+}}:vgpr_32 = V_FMAC_F16_e32 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, implicit $exec | |
146 | |
147 # GFX1010: %{{[0-9]+}}:vgpr_32 = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 5, 0, 6, 5, implicit $exec | |
148 # GFX1010: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit $exec | |
149 # GFX1010: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit $exec | |
150 # GFX1010: %{{[0-9]+}}:vgpr_32 = V_FMAC_F32_e64 0, 23, 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, implicit $exec | |
151 # GFX1010: %{{[0-9]+}}:vgpr_32 = V_FMAC_F16_e64 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, implicit $exec | |
152 | |
153 # GFX1010: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit $exec | |
154 # GFX1010: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit $exec | |
155 # GFX1010: %{{[0-9]+}}:vgpr_32 = V_FMAC_F32_e64 1, 23, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, 0, implicit $exec | |
156 # GFX1010: %{{[0-9]+}}:vgpr_32 = V_FMAC_F16_e64 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 2, implicit $exec | |
157 | |
158 name: vop2_instructions | |
159 tracksRegLiveness: true | |
160 registers: | |
161 - { id: 0, class: vreg_64 } | |
162 - { id: 1, class: vreg_64 } | |
163 - { id: 2, class: sreg_64 } | |
164 - { id: 3, class: vgpr_32 } | |
165 - { id: 4, class: sreg_32_xm0 } | |
166 - { id: 5, class: sreg_32_xm0 } | |
167 - { id: 6, class: sreg_32_xm0 } | |
168 - { id: 7, class: sreg_32_xm0 } | |
169 - { id: 8, class: sreg_32 } | |
170 - { id: 9, class: vgpr_32 } | |
171 - { id: 10, class: vgpr_32 } | |
172 - { id: 11, class: vgpr_32 } | |
173 - { id: 12, class: vgpr_32 } | |
174 - { id: 13, class: vgpr_32 } | |
175 - { id: 14, class: vgpr_32 } | |
176 - { id: 15, class: vgpr_32 } | |
177 - { id: 16, class: vgpr_32 } | |
178 - { id: 17, class: vgpr_32 } | |
179 - { id: 18, class: vgpr_32 } | |
180 - { id: 19, class: vgpr_32 } | |
181 - { id: 20, class: vgpr_32 } | |
182 - { id: 21, class: vgpr_32 } | |
183 - { id: 22, class: vgpr_32 } | |
184 - { id: 23, class: vgpr_32 } | |
185 - { id: 24, class: vgpr_32 } | |
186 - { id: 25, class: vgpr_32 } | |
187 - { id: 26, class: vgpr_32 } | |
188 - { id: 27, class: vgpr_32 } | |
189 - { id: 28, class: vgpr_32 } | |
190 - { id: 29, class: vgpr_32 } | |
191 - { id: 30, class: vgpr_32 } | |
192 - { id: 31, class: vgpr_32 } | |
193 - { id: 32, class: vgpr_32 } | |
194 - { id: 33, class: vgpr_32 } | |
195 - { id: 34, class: vgpr_32 } | |
196 - { id: 35, class: vgpr_32 } | |
197 - { id: 36, class: vgpr_32 } | |
198 - { id: 37, class: vgpr_32 } | |
199 - { id: 38, class: vgpr_32 } | |
200 - { id: 39, class: vgpr_32 } | |
201 - { id: 40, class: vgpr_32 } | |
202 - { id: 41, class: vgpr_32 } | |
203 - { id: 42, class: vgpr_32 } | |
204 - { id: 43, class: vgpr_32 } | |
205 - { id: 44, class: vgpr_32 } | |
206 - { id: 45, class: vgpr_32 } | |
207 - { id: 46, class: vgpr_32 } | |
208 - { id: 47, class: vgpr_32 } | |
209 - { id: 48, class: vgpr_32 } | |
210 - { id: 49, class: vgpr_32 } | |
211 - { id: 50, class: vgpr_32 } | |
212 - { id: 51, class: vgpr_32 } | |
213 - { id: 52, class: vgpr_32 } | |
214 - { id: 53, class: vgpr_32 } | |
215 - { id: 54, class: vgpr_32 } | |
216 - { id: 55, class: vgpr_32 } | |
217 - { id: 56, class: vgpr_32 } | |
218 - { id: 57, class: vgpr_32 } | |
219 - { id: 58, class: vgpr_32 } | |
220 - { id: 59, class: vgpr_32 } | |
221 - { id: 60, class: vgpr_32 } | |
222 - { id: 100, class: vgpr_32 } | |
223 body: | | |
224 bb.0: | |
225 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31 | |
226 | |
227 %2 = COPY $sgpr30_sgpr31 | |
228 %1 = COPY $vgpr2_vgpr3 | |
229 %0 = COPY $vgpr0_vgpr1 | |
230 %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) | |
231 | |
232 %5 = S_MOV_B32 65535 | |
233 %6 = S_MOV_B32 65535 | |
234 | |
235 %11 = V_LSHRREV_B32_e64 16, %3, implicit $exec | |
236 %12 = V_AND_B32_e32 %6, %11, implicit $exec | |
237 %13 = V_LSHLREV_B32_e64 16, %12, implicit $exec | |
238 %14 = V_LSHRREV_B32_e64 16, %13, implicit $exec | |
239 %15 = V_BFE_U32 %13, 8, 8, implicit $exec | |
240 %16 = V_ADD_F32_e32 %14, %15, implicit $exec | |
241 %17 = V_LSHLREV_B32_e64 16, %16, implicit $exec | |
242 %18 = V_LSHRREV_B32_e64 16, %17, implicit $exec | |
243 %19 = V_BFE_U32 %17, 8, 8, implicit $exec | |
244 %20 = V_SUB_F16_e32 %18, %19, implicit $exec | |
245 %21 = V_LSHLREV_B32_e64 16, %20, implicit $exec | |
246 %22 = V_BFE_U32 %20, 8, 8, implicit $exec | |
247 %23 = V_FMAC_F32_e32 %21, %22, %22, implicit $exec | |
248 %24 = V_LSHLREV_B32_e64 16, %23, implicit $exec | |
249 %25 = V_LSHRREV_B32_e64 16, %24, implicit $exec | |
250 %26 = V_BFE_U32 %24, 8, 8, implicit $exec | |
251 %27 = V_FMAC_F16_e32 %25, %26, %26, implicit $exec | |
252 %28 = V_LSHLREV_B32_e64 16, %27, implicit $exec | |
253 | |
254 %29 = V_LSHRREV_B32_e64 16, %28, implicit $exec | |
255 %30 = V_AND_B32_e64 23, %29, implicit $exec | |
256 %31 = V_LSHLREV_B32_e64 16, %30, implicit $exec | |
257 %32 = V_LSHRREV_B32_e64 16, %31, implicit $exec | |
258 %33 = V_BFE_U32 %31, 8, 8, implicit $exec | |
259 %34 = V_ADD_F32_e64 0, %32, 0, %33, 0, 0, implicit $exec | |
260 %35 = V_LSHLREV_B32_e64 16, %34, implicit $exec | |
261 %37 = V_BFE_U32 %35, 8, 8, implicit $exec | |
262 %38 = V_SUB_F16_e64 0, 23, 0, %37, 0, 0, implicit $exec | |
263 %39 = V_LSHLREV_B32_e64 16, %38, implicit $exec | |
264 %40 = V_BFE_U32 %39, 8, 8, implicit $exec | |
265 %41 = V_FMAC_F32_e64 0, 23, 0, %40, 0, %40, 0, 0, implicit $exec | |
266 %42 = V_LSHLREV_B32_e64 16, %41, implicit $exec | |
267 %43 = V_LSHRREV_B32_e64 16, %42, implicit $exec | |
268 %44 = V_BFE_U32 %42, 8, 8, implicit $exec | |
269 %45 = V_FMAC_F16_e64 0, %43, 0, %44, 0, %44, 0, 0, implicit $exec | |
270 %46 = V_LSHLREV_B32_e64 16, %45, implicit $exec | |
271 | |
272 %47 = V_LSHRREV_B32_e64 16, %46, implicit $exec | |
273 %48 = V_BFE_U32 %46, 8, 8, implicit $exec | |
274 %49 = V_ADD_F32_e64 0, %47, 1, %48, 0, 0, implicit $exec | |
275 %50 = V_LSHLREV_B32_e64 16, %49, implicit $exec | |
276 %51 = V_BFE_U32 %50, 8, 8, implicit $exec | |
277 %52 = V_SUB_F16_e64 1, 23, 1, %51, 0, 0, implicit $exec | |
278 %53 = V_LSHLREV_B32_e64 16, %52, implicit $exec | |
279 %54 = V_BFE_U32 %53, 8, 8, implicit $exec | |
280 %55 = V_FMAC_F32_e64 1, 23, 1, %54, 1, %54, 1, 0, implicit $exec | |
281 %56 = V_LSHLREV_B32_e64 16, %55, implicit $exec | |
282 %57 = V_LSHRREV_B32_e64 16, %56, implicit $exec | |
283 %58 = V_BFE_U32 %56, 8, 8, implicit $exec | |
284 %59 = V_FMAC_F16_e64 1, %57, 1, %58, 1, %58, 0, 2, implicit $exec | |
285 %60 = V_LSHLREV_B32_e64 16, %59, implicit $exec | |
286 | |
287 %100 = V_MOV_B32_e32 %60, implicit $exec | |
288 | |
289 FLAT_STORE_DWORD %0, %100, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) | |
290 $sgpr30_sgpr31 = COPY %2 | |
291 S_SETPC_B64_return $sgpr30_sgpr31 | |
292 | |
293 ... |