annotate llvm/test/CodeGen/AMDGPU/r600.extract-lowbits.ll @ 150:1d019706d866

LLVM10
author anatofuz
date Thu, 13 Feb 2020 15:10:13 +0900
parents
children 1f2b6ac9f198
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
150
anatofuz
parents:
diff changeset
1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
anatofuz
parents:
diff changeset
2 ; RUN: llc -mtriple=r600-- -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG %s
anatofuz
parents:
diff changeset
3 ; RUN: llc -mtriple=r600-- -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM %s
anatofuz
parents:
diff changeset
4
anatofuz
parents:
diff changeset
5 ; Loosely based on test/CodeGen/{X86,AArch64}/extract-lowbits.ll,
anatofuz
parents:
diff changeset
6 ; but with all 64-bit tests, and tests with loads dropped.
anatofuz
parents:
diff changeset
7
anatofuz
parents:
diff changeset
8 ; Patterns:
anatofuz
parents:
diff changeset
9 ; a) x & (1 << nbits) - 1
anatofuz
parents:
diff changeset
10 ; b) x & ~(-1 << nbits)
anatofuz
parents:
diff changeset
11 ; c) x & (-1 >> (32 - y))
anatofuz
parents:
diff changeset
12 ; d) x << (32 - y) >> (32 - y)
anatofuz
parents:
diff changeset
13 ; are equivalent.
anatofuz
parents:
diff changeset
14
anatofuz
parents:
diff changeset
15 ; ---------------------------------------------------------------------------- ;
anatofuz
parents:
diff changeset
16 ; Pattern a. 32-bit
anatofuz
parents:
diff changeset
17 ; ---------------------------------------------------------------------------- ;
anatofuz
parents:
diff changeset
18
anatofuz
parents:
diff changeset
19 define amdgpu_kernel void @bzhi32_a0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
anatofuz
parents:
diff changeset
20 ; EG-LABEL: bzhi32_a0:
anatofuz
parents:
diff changeset
21 ; EG: ; %bb.0:
anatofuz
parents:
diff changeset
22 ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
anatofuz
parents:
diff changeset
23 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
anatofuz
parents:
diff changeset
24 ; EG-NEXT: CF_END
anatofuz
parents:
diff changeset
25 ; EG-NEXT: PAD
anatofuz
parents:
diff changeset
26 ; EG-NEXT: ALU clause starting at 4:
anatofuz
parents:
diff changeset
27 ; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
anatofuz
parents:
diff changeset
28 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
anatofuz
parents:
diff changeset
29 ; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
anatofuz
parents:
diff changeset
30 ;
anatofuz
parents:
diff changeset
31 ; CM-LABEL: bzhi32_a0:
anatofuz
parents:
diff changeset
32 ; CM: ; %bb.0:
anatofuz
parents:
diff changeset
33 ; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
anatofuz
parents:
diff changeset
34 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
anatofuz
parents:
diff changeset
35 ; CM-NEXT: CF_END
anatofuz
parents:
diff changeset
36 ; CM-NEXT: PAD
anatofuz
parents:
diff changeset
37 ; CM-NEXT: ALU clause starting at 4:
anatofuz
parents:
diff changeset
38 ; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
anatofuz
parents:
diff changeset
39 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
anatofuz
parents:
diff changeset
40 ; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
anatofuz
parents:
diff changeset
41 %onebit = shl i32 1, %numlowbits
anatofuz
parents:
diff changeset
42 %mask = add nsw i32 %onebit, -1
anatofuz
parents:
diff changeset
43 %masked = and i32 %mask, %val
anatofuz
parents:
diff changeset
44 store i32 %masked, i32 addrspace(1)* %out
anatofuz
parents:
diff changeset
45 ret void
anatofuz
parents:
diff changeset
46 }
anatofuz
parents:
diff changeset
47
anatofuz
parents:
diff changeset
48 define amdgpu_kernel void @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits, i32 addrspace(1)* %out) {
anatofuz
parents:
diff changeset
49 ; EG-LABEL: bzhi32_a1_indexzext:
anatofuz
parents:
diff changeset
50 ; EG: ; %bb.0:
anatofuz
parents:
diff changeset
51 ; EG-NEXT: ALU 0, @8, KC0[], KC1[]
anatofuz
parents:
diff changeset
52 ; EG-NEXT: TEX 0 @6
anatofuz
parents:
diff changeset
53 ; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[]
anatofuz
parents:
diff changeset
54 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
anatofuz
parents:
diff changeset
55 ; EG-NEXT: CF_END
anatofuz
parents:
diff changeset
56 ; EG-NEXT: PAD
anatofuz
parents:
diff changeset
57 ; EG-NEXT: Fetch clause starting at 6:
anatofuz
parents:
diff changeset
58 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3
anatofuz
parents:
diff changeset
59 ; EG-NEXT: ALU clause starting at 8:
anatofuz
parents:
diff changeset
60 ; EG-NEXT: MOV * T0.X, 0.0,
anatofuz
parents:
diff changeset
61 ; EG-NEXT: ALU clause starting at 9:
anatofuz
parents:
diff changeset
62 ; EG-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x,
anatofuz
parents:
diff changeset
63 ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
anatofuz
parents:
diff changeset
64 ; EG-NEXT: BFE_UINT T0.X, KC0[2].Y, 0.0, PV.W,
anatofuz
parents:
diff changeset
65 ; EG-NEXT: LSHR * T1.X, KC0[2].W, literal.x,
anatofuz
parents:
diff changeset
66 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
anatofuz
parents:
diff changeset
67 ;
anatofuz
parents:
diff changeset
68 ; CM-LABEL: bzhi32_a1_indexzext:
anatofuz
parents:
diff changeset
69 ; CM: ; %bb.0:
anatofuz
parents:
diff changeset
70 ; CM-NEXT: ALU 0, @8, KC0[], KC1[]
anatofuz
parents:
diff changeset
71 ; CM-NEXT: TEX 0 @6
anatofuz
parents:
diff changeset
72 ; CM-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[]
anatofuz
parents:
diff changeset
73 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
anatofuz
parents:
diff changeset
74 ; CM-NEXT: CF_END
anatofuz
parents:
diff changeset
75 ; CM-NEXT: PAD
anatofuz
parents:
diff changeset
76 ; CM-NEXT: Fetch clause starting at 6:
anatofuz
parents:
diff changeset
77 ; CM-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3
anatofuz
parents:
diff changeset
78 ; CM-NEXT: ALU clause starting at 8:
anatofuz
parents:
diff changeset
79 ; CM-NEXT: MOV * T0.X, 0.0,
anatofuz
parents:
diff changeset
80 ; CM-NEXT: ALU clause starting at 9:
anatofuz
parents:
diff changeset
81 ; CM-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x,
anatofuz
parents:
diff changeset
82 ; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
anatofuz
parents:
diff changeset
83 ; CM-NEXT: BFE_UINT * T0.X, KC0[2].Y, 0.0, PV.W,
anatofuz
parents:
diff changeset
84 ; CM-NEXT: LSHR * T1.X, KC0[2].W, literal.x,
anatofuz
parents:
diff changeset
85 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
anatofuz
parents:
diff changeset
86 %conv = zext i8 %numlowbits to i32
anatofuz
parents:
diff changeset
87 %onebit = shl i32 1, %conv
anatofuz
parents:
diff changeset
88 %mask = add nsw i32 %onebit, -1
anatofuz
parents:
diff changeset
89 %masked = and i32 %mask, %val
anatofuz
parents:
diff changeset
90 store i32 %masked, i32 addrspace(1)* %out
anatofuz
parents:
diff changeset
91 ret void
anatofuz
parents:
diff changeset
92 }
anatofuz
parents:
diff changeset
93
anatofuz
parents:
diff changeset
94 define amdgpu_kernel void @bzhi32_a4_commutative(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
anatofuz
parents:
diff changeset
95 ; EG-LABEL: bzhi32_a4_commutative:
anatofuz
parents:
diff changeset
96 ; EG: ; %bb.0:
anatofuz
parents:
diff changeset
97 ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
anatofuz
parents:
diff changeset
98 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
anatofuz
parents:
diff changeset
99 ; EG-NEXT: CF_END
anatofuz
parents:
diff changeset
100 ; EG-NEXT: PAD
anatofuz
parents:
diff changeset
101 ; EG-NEXT: ALU clause starting at 4:
anatofuz
parents:
diff changeset
102 ; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
anatofuz
parents:
diff changeset
103 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
anatofuz
parents:
diff changeset
104 ; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
anatofuz
parents:
diff changeset
105 ;
anatofuz
parents:
diff changeset
106 ; CM-LABEL: bzhi32_a4_commutative:
anatofuz
parents:
diff changeset
107 ; CM: ; %bb.0:
anatofuz
parents:
diff changeset
108 ; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
anatofuz
parents:
diff changeset
109 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
anatofuz
parents:
diff changeset
110 ; CM-NEXT: CF_END
anatofuz
parents:
diff changeset
111 ; CM-NEXT: PAD
anatofuz
parents:
diff changeset
112 ; CM-NEXT: ALU clause starting at 4:
anatofuz
parents:
diff changeset
113 ; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
anatofuz
parents:
diff changeset
114 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
anatofuz
parents:
diff changeset
115 ; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
anatofuz
parents:
diff changeset
116 %onebit = shl i32 1, %numlowbits
anatofuz
parents:
diff changeset
117 %mask = add nsw i32 %onebit, -1
anatofuz
parents:
diff changeset
118 %masked = and i32 %val, %mask ; swapped order
anatofuz
parents:
diff changeset
119 store i32 %masked, i32 addrspace(1)* %out
anatofuz
parents:
diff changeset
120 ret void
anatofuz
parents:
diff changeset
121 }
anatofuz
parents:
diff changeset
122
anatofuz
parents:
diff changeset
123 ; ---------------------------------------------------------------------------- ;
anatofuz
parents:
diff changeset
124 ; Pattern b. 32-bit
anatofuz
parents:
diff changeset
125 ; ---------------------------------------------------------------------------- ;
anatofuz
parents:
diff changeset
126
anatofuz
parents:
diff changeset
127 define amdgpu_kernel void @bzhi32_b0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
anatofuz
parents:
diff changeset
128 ; EG-LABEL: bzhi32_b0:
anatofuz
parents:
diff changeset
129 ; EG: ; %bb.0:
anatofuz
parents:
diff changeset
130 ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
anatofuz
parents:
diff changeset
131 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
anatofuz
parents:
diff changeset
132 ; EG-NEXT: CF_END
anatofuz
parents:
diff changeset
133 ; EG-NEXT: PAD
anatofuz
parents:
diff changeset
134 ; EG-NEXT: ALU clause starting at 4:
anatofuz
parents:
diff changeset
135 ; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
anatofuz
parents:
diff changeset
136 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
anatofuz
parents:
diff changeset
137 ; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
anatofuz
parents:
diff changeset
138 ;
anatofuz
parents:
diff changeset
139 ; CM-LABEL: bzhi32_b0:
anatofuz
parents:
diff changeset
140 ; CM: ; %bb.0:
anatofuz
parents:
diff changeset
141 ; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
anatofuz
parents:
diff changeset
142 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
anatofuz
parents:
diff changeset
143 ; CM-NEXT: CF_END
anatofuz
parents:
diff changeset
144 ; CM-NEXT: PAD
anatofuz
parents:
diff changeset
145 ; CM-NEXT: ALU clause starting at 4:
anatofuz
parents:
diff changeset
146 ; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
anatofuz
parents:
diff changeset
147 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
anatofuz
parents:
diff changeset
148 ; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
anatofuz
parents:
diff changeset
149 %notmask = shl i32 -1, %numlowbits
anatofuz
parents:
diff changeset
150 %mask = xor i32 %notmask, -1
anatofuz
parents:
diff changeset
151 %masked = and i32 %mask, %val
anatofuz
parents:
diff changeset
152 store i32 %masked, i32 addrspace(1)* %out
anatofuz
parents:
diff changeset
153 ret void
anatofuz
parents:
diff changeset
154 }
anatofuz
parents:
diff changeset
155
anatofuz
parents:
diff changeset
156 define amdgpu_kernel void @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits, i32 addrspace(1)* %out) {
anatofuz
parents:
diff changeset
157 ; EG-LABEL: bzhi32_b1_indexzext:
anatofuz
parents:
diff changeset
158 ; EG: ; %bb.0:
anatofuz
parents:
diff changeset
159 ; EG-NEXT: ALU 0, @8, KC0[], KC1[]
anatofuz
parents:
diff changeset
160 ; EG-NEXT: TEX 0 @6
anatofuz
parents:
diff changeset
161 ; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[]
anatofuz
parents:
diff changeset
162 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
anatofuz
parents:
diff changeset
163 ; EG-NEXT: CF_END
anatofuz
parents:
diff changeset
164 ; EG-NEXT: PAD
anatofuz
parents:
diff changeset
165 ; EG-NEXT: Fetch clause starting at 6:
anatofuz
parents:
diff changeset
166 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3
anatofuz
parents:
diff changeset
167 ; EG-NEXT: ALU clause starting at 8:
anatofuz
parents:
diff changeset
168 ; EG-NEXT: MOV * T0.X, 0.0,
anatofuz
parents:
diff changeset
169 ; EG-NEXT: ALU clause starting at 9:
anatofuz
parents:
diff changeset
170 ; EG-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x,
anatofuz
parents:
diff changeset
171 ; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
anatofuz
parents:
diff changeset
172 ; EG-NEXT: BFE_UINT T0.X, KC0[2].Y, 0.0, PV.W,
anatofuz
parents:
diff changeset
173 ; EG-NEXT: LSHR * T1.X, KC0[2].W, literal.x,
anatofuz
parents:
diff changeset
174 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
anatofuz
parents:
diff changeset
175 ;
anatofuz
parents:
diff changeset
176 ; CM-LABEL: bzhi32_b1_indexzext:
anatofuz
parents:
diff changeset
177 ; CM: ; %bb.0:
anatofuz
parents:
diff changeset
178 ; CM-NEXT: ALU 0, @8, KC0[], KC1[]
anatofuz
parents:
diff changeset
179 ; CM-NEXT: TEX 0 @6
anatofuz
parents:
diff changeset
180 ; CM-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[]
anatofuz
parents:
diff changeset
181 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
anatofuz
parents:
diff changeset
182 ; CM-NEXT: CF_END
anatofuz
parents:
diff changeset
183 ; CM-NEXT: PAD
anatofuz
parents:
diff changeset
184 ; CM-NEXT: Fetch clause starting at 6:
anatofuz
parents:
diff changeset
185 ; CM-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3
anatofuz
parents:
diff changeset
186 ; CM-NEXT: ALU clause starting at 8:
anatofuz
parents:
diff changeset
187 ; CM-NEXT: MOV * T0.X, 0.0,
anatofuz
parents:
diff changeset
188 ; CM-NEXT: ALU clause starting at 9:
anatofuz
parents:
diff changeset
189 ; CM-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x,
anatofuz
parents:
diff changeset
190 ; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
anatofuz
parents:
diff changeset
191 ; CM-NEXT: BFE_UINT * T0.X, KC0[2].Y, 0.0, PV.W,
anatofuz
parents:
diff changeset
192 ; CM-NEXT: LSHR * T1.X, KC0[2].W, literal.x,
anatofuz
parents:
diff changeset
193 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
anatofuz
parents:
diff changeset
194 %conv = zext i8 %numlowbits to i32
anatofuz
parents:
diff changeset
195 %notmask = shl i32 -1, %conv
anatofuz
parents:
diff changeset
196 %mask = xor i32 %notmask, -1
anatofuz
parents:
diff changeset
197 %masked = and i32 %mask, %val
anatofuz
parents:
diff changeset
198 store i32 %masked, i32 addrspace(1)* %out
anatofuz
parents:
diff changeset
199 ret void
anatofuz
parents:
diff changeset
200 }
anatofuz
parents:
diff changeset
201
anatofuz
parents:
diff changeset
202 define amdgpu_kernel void @bzhi32_b4_commutative(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
anatofuz
parents:
diff changeset
203 ; EG-LABEL: bzhi32_b4_commutative:
anatofuz
parents:
diff changeset
204 ; EG: ; %bb.0:
anatofuz
parents:
diff changeset
205 ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
anatofuz
parents:
diff changeset
206 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
anatofuz
parents:
diff changeset
207 ; EG-NEXT: CF_END
anatofuz
parents:
diff changeset
208 ; EG-NEXT: PAD
anatofuz
parents:
diff changeset
209 ; EG-NEXT: ALU clause starting at 4:
anatofuz
parents:
diff changeset
210 ; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
anatofuz
parents:
diff changeset
211 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
anatofuz
parents:
diff changeset
212 ; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
anatofuz
parents:
diff changeset
213 ;
anatofuz
parents:
diff changeset
214 ; CM-LABEL: bzhi32_b4_commutative:
anatofuz
parents:
diff changeset
215 ; CM: ; %bb.0:
anatofuz
parents:
diff changeset
216 ; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
anatofuz
parents:
diff changeset
217 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
anatofuz
parents:
diff changeset
218 ; CM-NEXT: CF_END
anatofuz
parents:
diff changeset
219 ; CM-NEXT: PAD
anatofuz
parents:
diff changeset
220 ; CM-NEXT: ALU clause starting at 4:
anatofuz
parents:
diff changeset
221 ; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
anatofuz
parents:
diff changeset
222 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
anatofuz
parents:
diff changeset
223 ; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
anatofuz
parents:
diff changeset
224 %notmask = shl i32 -1, %numlowbits
anatofuz
parents:
diff changeset
225 %mask = xor i32 %notmask, -1
anatofuz
parents:
diff changeset
226 %masked = and i32 %val, %mask ; swapped order
anatofuz
parents:
diff changeset
227 store i32 %masked, i32 addrspace(1)* %out
anatofuz
parents:
diff changeset
228 ret void
anatofuz
parents:
diff changeset
229 }
anatofuz
parents:
diff changeset
230
anatofuz
parents:
diff changeset
231 ; ---------------------------------------------------------------------------- ;
anatofuz
parents:
diff changeset
232 ; Pattern c. 32-bit
anatofuz
parents:
diff changeset
233 ; ---------------------------------------------------------------------------- ;
anatofuz
parents:
diff changeset
234
anatofuz
parents:
diff changeset
235 define amdgpu_kernel void @bzhi32_c0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
anatofuz
parents:
diff changeset
236 ; EG-LABEL: bzhi32_c0:
anatofuz
parents:
diff changeset
237 ; EG: ; %bb.0:
anatofuz
parents:
diff changeset
238 ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
anatofuz
parents:
diff changeset
239 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
anatofuz
parents:
diff changeset
240 ; EG-NEXT: CF_END
anatofuz
parents:
diff changeset
241 ; EG-NEXT: PAD
anatofuz
parents:
diff changeset
242 ; EG-NEXT: ALU clause starting at 4:
anatofuz
parents:
diff changeset
243 ; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
anatofuz
parents:
diff changeset
244 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
anatofuz
parents:
diff changeset
245 ; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
anatofuz
parents:
diff changeset
246 ;
anatofuz
parents:
diff changeset
247 ; CM-LABEL: bzhi32_c0:
anatofuz
parents:
diff changeset
248 ; CM: ; %bb.0:
anatofuz
parents:
diff changeset
249 ; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
anatofuz
parents:
diff changeset
250 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
anatofuz
parents:
diff changeset
251 ; CM-NEXT: CF_END
anatofuz
parents:
diff changeset
252 ; CM-NEXT: PAD
anatofuz
parents:
diff changeset
253 ; CM-NEXT: ALU clause starting at 4:
anatofuz
parents:
diff changeset
254 ; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
anatofuz
parents:
diff changeset
255 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
anatofuz
parents:
diff changeset
256 ; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
anatofuz
parents:
diff changeset
257 %numhighbits = sub i32 32, %numlowbits
anatofuz
parents:
diff changeset
258 %mask = lshr i32 -1, %numhighbits
anatofuz
parents:
diff changeset
259 %masked = and i32 %mask, %val
anatofuz
parents:
diff changeset
260 store i32 %masked, i32 addrspace(1)* %out
anatofuz
parents:
diff changeset
261 ret void
anatofuz
parents:
diff changeset
262 }
anatofuz
parents:
diff changeset
263
anatofuz
parents:
diff changeset
264 define amdgpu_kernel void @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits, i32 addrspace(1)* %out) {
anatofuz
parents:
diff changeset
265 ; EG-LABEL: bzhi32_c1_indexzext:
anatofuz
parents:
diff changeset
266 ; EG: ; %bb.0:
anatofuz
parents:
diff changeset
267 ; EG-NEXT: ALU 0, @8, KC0[], KC1[]
anatofuz
parents:
diff changeset
268 ; EG-NEXT: TEX 0 @6
anatofuz
parents:
diff changeset
269 ; EG-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[]
anatofuz
parents:
diff changeset
270 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
anatofuz
parents:
diff changeset
271 ; EG-NEXT: CF_END
anatofuz
parents:
diff changeset
272 ; EG-NEXT: PAD
anatofuz
parents:
diff changeset
273 ; EG-NEXT: Fetch clause starting at 6:
anatofuz
parents:
diff changeset
274 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3
anatofuz
parents:
diff changeset
275 ; EG-NEXT: ALU clause starting at 8:
anatofuz
parents:
diff changeset
276 ; EG-NEXT: MOV * T0.X, 0.0,
anatofuz
parents:
diff changeset
277 ; EG-NEXT: ALU clause starting at 9:
anatofuz
parents:
diff changeset
278 ; EG-NEXT: SUB_INT * T0.W, literal.x, T0.X,
anatofuz
parents:
diff changeset
279 ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
anatofuz
parents:
diff changeset
280 ; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
anatofuz
parents:
diff changeset
281 ; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
anatofuz
parents:
diff changeset
282 ; EG-NEXT: LSHR * T0.W, literal.x, PV.W,
anatofuz
parents:
diff changeset
283 ; EG-NEXT: -1(nan), 0(0.000000e+00)
anatofuz
parents:
diff changeset
284 ; EG-NEXT: AND_INT T0.X, PV.W, KC0[2].Y,
anatofuz
parents:
diff changeset
285 ; EG-NEXT: LSHR * T1.X, KC0[2].W, literal.x,
anatofuz
parents:
diff changeset
286 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
anatofuz
parents:
diff changeset
287 ;
anatofuz
parents:
diff changeset
288 ; CM-LABEL: bzhi32_c1_indexzext:
anatofuz
parents:
diff changeset
289 ; CM: ; %bb.0:
anatofuz
parents:
diff changeset
290 ; CM-NEXT: ALU 0, @8, KC0[], KC1[]
anatofuz
parents:
diff changeset
291 ; CM-NEXT: TEX 0 @6
anatofuz
parents:
diff changeset
292 ; CM-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[]
anatofuz
parents:
diff changeset
293 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
anatofuz
parents:
diff changeset
294 ; CM-NEXT: CF_END
anatofuz
parents:
diff changeset
295 ; CM-NEXT: PAD
anatofuz
parents:
diff changeset
296 ; CM-NEXT: Fetch clause starting at 6:
anatofuz
parents:
diff changeset
297 ; CM-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3
anatofuz
parents:
diff changeset
298 ; CM-NEXT: ALU clause starting at 8:
anatofuz
parents:
diff changeset
299 ; CM-NEXT: MOV * T0.X, 0.0,
anatofuz
parents:
diff changeset
300 ; CM-NEXT: ALU clause starting at 9:
anatofuz
parents:
diff changeset
301 ; CM-NEXT: SUB_INT * T0.W, literal.x, T0.X,
anatofuz
parents:
diff changeset
302 ; CM-NEXT: 32(4.484155e-44), 0(0.000000e+00)
anatofuz
parents:
diff changeset
303 ; CM-NEXT: AND_INT * T0.W, PV.W, literal.x,
anatofuz
parents:
diff changeset
304 ; CM-NEXT: 255(3.573311e-43), 0(0.000000e+00)
anatofuz
parents:
diff changeset
305 ; CM-NEXT: LSHR * T0.W, literal.x, PV.W,
anatofuz
parents:
diff changeset
306 ; CM-NEXT: -1(nan), 0(0.000000e+00)
anatofuz
parents:
diff changeset
307 ; CM-NEXT: AND_INT * T0.X, PV.W, KC0[2].Y,
anatofuz
parents:
diff changeset
308 ; CM-NEXT: LSHR * T1.X, KC0[2].W, literal.x,
anatofuz
parents:
diff changeset
309 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
anatofuz
parents:
diff changeset
310 %numhighbits = sub i8 32, %numlowbits
anatofuz
parents:
diff changeset
311 %sh_prom = zext i8 %numhighbits to i32
anatofuz
parents:
diff changeset
312 %mask = lshr i32 -1, %sh_prom
anatofuz
parents:
diff changeset
313 %masked = and i32 %mask, %val
anatofuz
parents:
diff changeset
314 store i32 %masked, i32 addrspace(1)* %out
anatofuz
parents:
diff changeset
315 ret void
anatofuz
parents:
diff changeset
316 }
anatofuz
parents:
diff changeset
317
anatofuz
parents:
diff changeset
318 define amdgpu_kernel void @bzhi32_c4_commutative(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
anatofuz
parents:
diff changeset
319 ; EG-LABEL: bzhi32_c4_commutative:
anatofuz
parents:
diff changeset
320 ; EG: ; %bb.0:
anatofuz
parents:
diff changeset
321 ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
anatofuz
parents:
diff changeset
322 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
anatofuz
parents:
diff changeset
323 ; EG-NEXT: CF_END
anatofuz
parents:
diff changeset
324 ; EG-NEXT: PAD
anatofuz
parents:
diff changeset
325 ; EG-NEXT: ALU clause starting at 4:
anatofuz
parents:
diff changeset
326 ; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
anatofuz
parents:
diff changeset
327 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
anatofuz
parents:
diff changeset
328 ; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
anatofuz
parents:
diff changeset
329 ;
anatofuz
parents:
diff changeset
330 ; CM-LABEL: bzhi32_c4_commutative:
anatofuz
parents:
diff changeset
331 ; CM: ; %bb.0:
anatofuz
parents:
diff changeset
332 ; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
anatofuz
parents:
diff changeset
333 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
anatofuz
parents:
diff changeset
334 ; CM-NEXT: CF_END
anatofuz
parents:
diff changeset
335 ; CM-NEXT: PAD
anatofuz
parents:
diff changeset
336 ; CM-NEXT: ALU clause starting at 4:
anatofuz
parents:
diff changeset
337 ; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
anatofuz
parents:
diff changeset
338 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
anatofuz
parents:
diff changeset
339 ; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
anatofuz
parents:
diff changeset
340 %numhighbits = sub i32 32, %numlowbits
anatofuz
parents:
diff changeset
341 %mask = lshr i32 -1, %numhighbits
anatofuz
parents:
diff changeset
342 %masked = and i32 %val, %mask ; swapped order
anatofuz
parents:
diff changeset
343 store i32 %masked, i32 addrspace(1)* %out
anatofuz
parents:
diff changeset
344 ret void
anatofuz
parents:
diff changeset
345 }
anatofuz
parents:
diff changeset
346
anatofuz
parents:
diff changeset
347 ; ---------------------------------------------------------------------------- ;
anatofuz
parents:
diff changeset
348 ; Pattern d. 32-bit.
anatofuz
parents:
diff changeset
349 ; ---------------------------------------------------------------------------- ;
anatofuz
parents:
diff changeset
350
anatofuz
parents:
diff changeset
351 define amdgpu_kernel void @bzhi32_d0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) {
anatofuz
parents:
diff changeset
352 ; EG-LABEL: bzhi32_d0:
anatofuz
parents:
diff changeset
353 ; EG: ; %bb.0:
anatofuz
parents:
diff changeset
354 ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
anatofuz
parents:
diff changeset
355 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
anatofuz
parents:
diff changeset
356 ; EG-NEXT: CF_END
anatofuz
parents:
diff changeset
357 ; EG-NEXT: PAD
anatofuz
parents:
diff changeset
358 ; EG-NEXT: ALU clause starting at 4:
anatofuz
parents:
diff changeset
359 ; EG-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
anatofuz
parents:
diff changeset
360 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
anatofuz
parents:
diff changeset
361 ; EG-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
anatofuz
parents:
diff changeset
362 ;
anatofuz
parents:
diff changeset
363 ; CM-LABEL: bzhi32_d0:
anatofuz
parents:
diff changeset
364 ; CM: ; %bb.0:
anatofuz
parents:
diff changeset
365 ; CM-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
anatofuz
parents:
diff changeset
366 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
anatofuz
parents:
diff changeset
367 ; CM-NEXT: CF_END
anatofuz
parents:
diff changeset
368 ; CM-NEXT: PAD
anatofuz
parents:
diff changeset
369 ; CM-NEXT: ALU clause starting at 4:
anatofuz
parents:
diff changeset
370 ; CM-NEXT: LSHR * T0.X, KC0[2].W, literal.x,
anatofuz
parents:
diff changeset
371 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
anatofuz
parents:
diff changeset
372 ; CM-NEXT: BFE_UINT * T1.X, KC0[2].Y, 0.0, KC0[2].Z,
anatofuz
parents:
diff changeset
373 %numhighbits = sub i32 32, %numlowbits
anatofuz
parents:
diff changeset
374 %highbitscleared = shl i32 %val, %numhighbits
anatofuz
parents:
diff changeset
375 %masked = lshr i32 %highbitscleared, %numhighbits
anatofuz
parents:
diff changeset
376 store i32 %masked, i32 addrspace(1)* %out
anatofuz
parents:
diff changeset
377 ret void
anatofuz
parents:
diff changeset
378 }
anatofuz
parents:
diff changeset
379
anatofuz
parents:
diff changeset
380 define amdgpu_kernel void @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits, i32 addrspace(1)* %out) {
anatofuz
parents:
diff changeset
381 ; EG-LABEL: bzhi32_d1_indexzext:
anatofuz
parents:
diff changeset
382 ; EG: ; %bb.0:
anatofuz
parents:
diff changeset
383 ; EG-NEXT: ALU 0, @8, KC0[], KC1[]
anatofuz
parents:
diff changeset
384 ; EG-NEXT: TEX 0 @6
anatofuz
parents:
diff changeset
385 ; EG-NEXT: ALU 7, @9, KC0[CB0:0-32], KC1[]
anatofuz
parents:
diff changeset
386 ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
anatofuz
parents:
diff changeset
387 ; EG-NEXT: CF_END
anatofuz
parents:
diff changeset
388 ; EG-NEXT: PAD
anatofuz
parents:
diff changeset
389 ; EG-NEXT: Fetch clause starting at 6:
anatofuz
parents:
diff changeset
390 ; EG-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3
anatofuz
parents:
diff changeset
391 ; EG-NEXT: ALU clause starting at 8:
anatofuz
parents:
diff changeset
392 ; EG-NEXT: MOV * T0.X, 0.0,
anatofuz
parents:
diff changeset
393 ; EG-NEXT: ALU clause starting at 9:
anatofuz
parents:
diff changeset
394 ; EG-NEXT: SUB_INT * T0.W, literal.x, T0.X,
anatofuz
parents:
diff changeset
395 ; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
anatofuz
parents:
diff changeset
396 ; EG-NEXT: AND_INT * T0.W, PV.W, literal.x,
anatofuz
parents:
diff changeset
397 ; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
anatofuz
parents:
diff changeset
398 ; EG-NEXT: LSHL * T1.W, KC0[2].Y, PV.W,
anatofuz
parents:
diff changeset
399 ; EG-NEXT: LSHR T0.X, PV.W, T0.W,
anatofuz
parents:
diff changeset
400 ; EG-NEXT: LSHR * T1.X, KC0[2].W, literal.x,
anatofuz
parents:
diff changeset
401 ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
anatofuz
parents:
diff changeset
402 ;
anatofuz
parents:
diff changeset
403 ; CM-LABEL: bzhi32_d1_indexzext:
anatofuz
parents:
diff changeset
404 ; CM: ; %bb.0:
anatofuz
parents:
diff changeset
405 ; CM-NEXT: ALU 0, @8, KC0[], KC1[]
anatofuz
parents:
diff changeset
406 ; CM-NEXT: TEX 0 @6
anatofuz
parents:
diff changeset
407 ; CM-NEXT: ALU 7, @9, KC0[CB0:0-32], KC1[]
anatofuz
parents:
diff changeset
408 ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
anatofuz
parents:
diff changeset
409 ; CM-NEXT: CF_END
anatofuz
parents:
diff changeset
410 ; CM-NEXT: PAD
anatofuz
parents:
diff changeset
411 ; CM-NEXT: Fetch clause starting at 6:
anatofuz
parents:
diff changeset
412 ; CM-NEXT: VTX_READ_8 T0.X, T0.X, 40, #3
anatofuz
parents:
diff changeset
413 ; CM-NEXT: ALU clause starting at 8:
anatofuz
parents:
diff changeset
414 ; CM-NEXT: MOV * T0.X, 0.0,
anatofuz
parents:
diff changeset
415 ; CM-NEXT: ALU clause starting at 9:
anatofuz
parents:
diff changeset
416 ; CM-NEXT: SUB_INT * T0.W, literal.x, T0.X,
anatofuz
parents:
diff changeset
417 ; CM-NEXT: 32(4.484155e-44), 0(0.000000e+00)
anatofuz
parents:
diff changeset
418 ; CM-NEXT: AND_INT * T0.W, PV.W, literal.x,
anatofuz
parents:
diff changeset
419 ; CM-NEXT: 255(3.573311e-43), 0(0.000000e+00)
anatofuz
parents:
diff changeset
420 ; CM-NEXT: LSHL * T1.W, KC0[2].Y, PV.W,
anatofuz
parents:
diff changeset
421 ; CM-NEXT: LSHR * T0.X, PV.W, T0.W,
anatofuz
parents:
diff changeset
422 ; CM-NEXT: LSHR * T1.X, KC0[2].W, literal.x,
anatofuz
parents:
diff changeset
423 ; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
anatofuz
parents:
diff changeset
424 %numhighbits = sub i8 32, %numlowbits
anatofuz
parents:
diff changeset
425 %sh_prom = zext i8 %numhighbits to i32
anatofuz
parents:
diff changeset
426 %highbitscleared = shl i32 %val, %sh_prom
anatofuz
parents:
diff changeset
427 %masked = lshr i32 %highbitscleared, %sh_prom
anatofuz
parents:
diff changeset
428 store i32 %masked, i32 addrspace(1)* %out
anatofuz
parents:
diff changeset
429 ret void
anatofuz
parents:
diff changeset
430 }