Mercurial > hg > Members > tobaru > cbc > CbC_llvm
comparison test/CodeGen/AMDGPU/sext-in-reg.ll @ 95:afa8332a0e37
LLVM 3.8
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 13 Oct 2015 17:48:58 +0900 |
parents | |
children | 7d135dc70f03 |
comparison
equal
deleted
inserted
replaced
84:f3e34b893a5f | 95:afa8332a0e37 |
---|---|
1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s | |
2 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s | |
3 | |
4 declare i32 @llvm.AMDGPU.imax(i32, i32) nounwind readnone | |
5 declare i32 @llvm.r600.read.tidig.x() nounwind readnone | |
6 | |
7 | |
8 ; FUNC-LABEL: {{^}}sext_in_reg_i1_i32: | |
9 ; SI: s_load_dword [[ARG:s[0-9]+]], | |
10 ; SI: s_bfe_i32 [[SEXTRACT:s[0-9]+]], [[ARG]], 0x10000 | |
11 ; SI: v_mov_b32_e32 [[EXTRACT:v[0-9]+]], [[SEXTRACT]] | |
12 ; SI: buffer_store_dword [[EXTRACT]], | |
13 | |
14 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] | |
15 ; EG: BFE_INT [[RES]], {{.*}}, 0.0, 1 | |
16 ; EG-NEXT: LSHR * [[ADDR]] | |
17 define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) { | |
18 %shl = shl i32 %in, 31 | |
19 %sext = ashr i32 %shl, 31 | |
20 store i32 %sext, i32 addrspace(1)* %out | |
21 ret void | |
22 } | |
23 | |
24 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32: | |
25 ; SI: s_add_i32 [[VAL:s[0-9]+]], | |
26 ; SI: s_sext_i32_i8 [[EXTRACT:s[0-9]+]], [[VAL]] | |
27 ; SI: v_mov_b32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]] | |
28 ; SI: buffer_store_dword [[VEXTRACT]], | |
29 | |
30 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] | |
31 ; EG: ADD_INT | |
32 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal | |
33 ; EG-NEXT: LSHR * [[ADDR]] | |
34 define void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { | |
35 %c = add i32 %a, %b ; add to prevent folding into extload | |
36 %shl = shl i32 %c, 24 | |
37 %ashr = ashr i32 %shl, 24 | |
38 store i32 %ashr, i32 addrspace(1)* %out, align 4 | |
39 ret void | |
40 } | |
41 | |
42 ; FUNC-LABEL: {{^}}sext_in_reg_i16_to_i32: | |
43 ; SI: s_add_i32 [[VAL:s[0-9]+]], | |
44 ; SI: s_sext_i32_i16 [[EXTRACT:s[0-9]+]], [[VAL]] | |
45 ; SI: v_mov_b32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]] | |
46 ; SI: buffer_store_dword [[VEXTRACT]], | |
47 | |
48 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] | |
49 ; EG: ADD_INT | |
50 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal | |
51 ; EG-NEXT: LSHR * [[ADDR]] | |
52 define void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { | |
53 %c = add i32 %a, %b ; add to prevent folding into extload | |
54 %shl = shl i32 %c, 16 | |
55 %ashr = ashr i32 %shl, 16 | |
56 store i32 %ashr, i32 addrspace(1)* %out, align 4 | |
57 ret void | |
58 } | |
59 | |
60 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_v1i32: | |
61 ; SI: s_add_i32 [[VAL:s[0-9]+]], | |
62 ; SI: s_sext_i32_i8 [[EXTRACT:s[0-9]+]], [[VAL]] | |
63 ; SI: v_mov_b32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]] | |
64 ; SI: buffer_store_dword [[VEXTRACT]], | |
65 | |
66 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] | |
67 ; EG: ADD_INT | |
68 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal | |
69 ; EG-NEXT: LSHR * [[ADDR]] | |
70 define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind { | |
71 %c = add <1 x i32> %a, %b ; add to prevent folding into extload | |
72 %shl = shl <1 x i32> %c, <i32 24> | |
73 %ashr = ashr <1 x i32> %shl, <i32 24> | |
74 store <1 x i32> %ashr, <1 x i32> addrspace(1)* %out, align 4 | |
75 ret void | |
76 } | |
77 | |
78 ; FUNC-LABEL: {{^}}sext_in_reg_i1_to_i64: | |
79 ; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]] | |
80 ; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x10000 | |
81 ; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]] | |
82 ; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]] | |
83 ; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}} | |
84 define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { | |
85 %c = shl i64 %a, %b | |
86 %shl = shl i64 %c, 63 | |
87 %ashr = ashr i64 %shl, 63 | |
88 store i64 %ashr, i64 addrspace(1)* %out, align 8 | |
89 ret void | |
90 } | |
91 | |
92 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i64: | |
93 ; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]] | |
94 ; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x80000 | |
95 ; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]] | |
96 ; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]] | |
97 ; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}} | |
98 | |
99 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]] | |
100 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]] | |
101 ; EG: LSHL | |
102 ; EG: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal | |
103 ; EG: ASHR [[RES_HI]] | |
104 ; EG-NOT: BFE_INT | |
105 ; EG: LSHR | |
106 ; EG: LSHR | |
107 ;; TODO Check address computation, using | with variables in {{}} does not work, | |
108 ;; also the _LO/_HI order might be different | |
109 define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { | |
110 %c = shl i64 %a, %b | |
111 %shl = shl i64 %c, 56 | |
112 %ashr = ashr i64 %shl, 56 | |
113 store i64 %ashr, i64 addrspace(1)* %out, align 8 | |
114 ret void | |
115 } | |
116 | |
117 ; FUNC-LABEL: {{^}}sext_in_reg_i16_to_i64: | |
118 ; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]] | |
119 ; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x100000 | |
120 ; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]] | |
121 ; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]] | |
122 ; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}} | |
123 | |
124 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]] | |
125 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]] | |
126 ; EG: LSHL | |
127 ; EG: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal | |
128 ; EG: ASHR [[RES_HI]] | |
129 ; EG-NOT: BFE_INT | |
130 ; EG: LSHR | |
131 ; EG: LSHR | |
132 ;; TODO Check address computation, using | with variables in {{}} does not work, | |
133 ;; also the _LO/_HI order might be different | |
134 define void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { | |
135 %c = shl i64 %a, %b | |
136 %shl = shl i64 %c, 48 | |
137 %ashr = ashr i64 %shl, 48 | |
138 store i64 %ashr, i64 addrspace(1)* %out, align 8 | |
139 ret void | |
140 } | |
141 | |
142 ; FUNC-LABEL: {{^}}sext_in_reg_i32_to_i64: | |
143 ; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]] | |
144 ; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x200000 | |
145 ; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]] | |
146 ; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]] | |
147 ; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}} | |
148 | |
149 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]] | |
150 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]] | |
151 ; EG-NOT: BFE_INT | |
152 | |
153 ; EG: ASHR [[RES_HI]] | |
154 | |
155 ; EG: LSHR | |
156 ; EG: LSHR | |
157 ;; TODO Check address computation, using | with variables in {{}} does not work, | |
158 ;; also the _LO/_HI order might be different | |
159 define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { | |
160 %c = shl i64 %a, %b | |
161 %shl = shl i64 %c, 32 | |
162 %ashr = ashr i64 %shl, 32 | |
163 store i64 %ashr, i64 addrspace(1)* %out, align 8 | |
164 ret void | |
165 } | |
166 | |
167 ; This is broken on Evergreen for some reason related to the <1 x i64> kernel arguments. | |
168 ; XFUNC-LABEL: {{^}}sext_in_reg_i8_to_v1i64: | |
169 ; XSI: s_bfe_i32 [[EXTRACT:s[0-9]+]], {{s[0-9]+}}, 524288 | |
170 ; XSI: s_ashr_i32 {{v[0-9]+}}, [[EXTRACT]], 31 | |
171 ; XSI: buffer_store_dword | |
172 ; XEG: BFE_INT | |
173 ; XEG: ASHR | |
174 ; define void @sext_in_reg_i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a, <1 x i64> %b) nounwind { | |
175 ; %c = add <1 x i64> %a, %b | |
176 ; %shl = shl <1 x i64> %c, <i64 56> | |
177 ; %ashr = ashr <1 x i64> %shl, <i64 56> | |
178 ; store <1 x i64> %ashr, <1 x i64> addrspace(1)* %out, align 8 | |
179 ; ret void | |
180 ; } | |
181 | |
182 ; FUNC-LABEL: {{^}}v_sext_in_reg_i1_to_i64: | |
183 ; SI: buffer_load_dwordx2 | |
184 ; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}} | |
185 ; SI: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 1 | |
186 ; SI: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] | |
187 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} | |
188 define void @v_sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { | |
189 %tid = call i32 @llvm.r600.read.tidig.x() | |
190 %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid | |
191 %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid | |
192 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid | |
193 %a = load i64, i64 addrspace(1)* %a.gep, align 8 | |
194 %b = load i64, i64 addrspace(1)* %b.gep, align 8 | |
195 | |
196 %c = shl i64 %a, %b | |
197 %shl = shl i64 %c, 63 | |
198 %ashr = ashr i64 %shl, 63 | |
199 store i64 %ashr, i64 addrspace(1)* %out.gep, align 8 | |
200 ret void | |
201 } | |
202 | |
203 ; FUNC-LABEL: {{^}}v_sext_in_reg_i8_to_i64: | |
204 ; SI: buffer_load_dwordx2 | |
205 ; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}} | |
206 ; SI: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 8 | |
207 ; SI: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] | |
208 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} | |
209 define void @v_sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { | |
210 %tid = call i32 @llvm.r600.read.tidig.x() | |
211 %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid | |
212 %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid | |
213 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid | |
214 %a = load i64, i64 addrspace(1)* %a.gep, align 8 | |
215 %b = load i64, i64 addrspace(1)* %b.gep, align 8 | |
216 | |
217 %c = shl i64 %a, %b | |
218 %shl = shl i64 %c, 56 | |
219 %ashr = ashr i64 %shl, 56 | |
220 store i64 %ashr, i64 addrspace(1)* %out.gep, align 8 | |
221 ret void | |
222 } | |
223 | |
224 ; FUNC-LABEL: {{^}}v_sext_in_reg_i16_to_i64: | |
225 ; SI: buffer_load_dwordx2 | |
226 ; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}} | |
227 ; SI: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 16 | |
228 ; SI: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] | |
229 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} | |
230 define void @v_sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { | |
231 %tid = call i32 @llvm.r600.read.tidig.x() | |
232 %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid | |
233 %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid | |
234 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid | |
235 %a = load i64, i64 addrspace(1)* %a.gep, align 8 | |
236 %b = load i64, i64 addrspace(1)* %b.gep, align 8 | |
237 | |
238 %c = shl i64 %a, %b | |
239 %shl = shl i64 %c, 48 | |
240 %ashr = ashr i64 %shl, 48 | |
241 store i64 %ashr, i64 addrspace(1)* %out.gep, align 8 | |
242 ret void | |
243 } | |
244 | |
245 ; FUNC-LABEL: {{^}}v_sext_in_reg_i32_to_i64: | |
246 ; SI: buffer_load_dwordx2 | |
247 ; SI: v_lshl_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, | |
248 ; SI: v_ashrrev_i32_e32 v[[SHR:[0-9]+]], 31, v[[LO]] | |
249 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[SHR]]{{\]}} | |
250 define void @v_sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { | |
251 %tid = call i32 @llvm.r600.read.tidig.x() | |
252 %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid | |
253 %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid | |
254 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid | |
255 %a = load i64, i64 addrspace(1)* %a.gep, align 8 | |
256 %b = load i64, i64 addrspace(1)* %b.gep, align 8 | |
257 | |
258 %c = shl i64 %a, %b | |
259 %shl = shl i64 %c, 32 | |
260 %ashr = ashr i64 %shl, 32 | |
261 store i64 %ashr, i64 addrspace(1)* %out.gep, align 8 | |
262 ret void | |
263 } | |
264 | |
265 ; FUNC-LABEL: {{^}}sext_in_reg_i1_in_i32_other_amount: | |
266 ; SI-NOT: s_lshl | |
267 ; SI-NOT: s_ashr | |
268 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x190001 | |
269 | |
270 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] | |
271 ; EG-NOT: BFE | |
272 ; EG: ADD_INT | |
273 ; EG: LSHL | |
274 ; EG: ASHR [[RES]] | |
275 ; EG: LSHR {{\*?}} [[ADDR]] | |
276 define void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { | |
277 %c = add i32 %a, %b | |
278 %x = shl i32 %c, 6 | |
279 %y = ashr i32 %x, 7 | |
280 store i32 %y, i32 addrspace(1)* %out | |
281 ret void | |
282 } | |
283 | |
284 ; FUNC-LABEL: {{^}}sext_in_reg_v2i1_in_v2i32_other_amount: | |
285 ; SI-NOT: s_lshl | |
286 ; SI-NOT: s_ashr | |
287 ; SI-DAG: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x190001 | |
288 ; SI-DAG: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x190001 | |
289 ; SI: s_endpgm | |
290 | |
291 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] | |
292 ; EG-NOT: BFE | |
293 ; EG: ADD_INT | |
294 ; EG: LSHL | |
295 ; EG: ASHR [[RES]] | |
296 ; EG: LSHL | |
297 ; EG: ASHR [[RES]] | |
298 ; EG: LSHR {{\*?}} [[ADDR]] | |
299 define void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind { | |
300 %c = add <2 x i32> %a, %b | |
301 %x = shl <2 x i32> %c, <i32 6, i32 6> | |
302 %y = ashr <2 x i32> %x, <i32 7, i32 7> | |
303 store <2 x i32> %y, <2 x i32> addrspace(1)* %out, align 2 | |
304 ret void | |
305 } | |
306 | |
307 | |
308 ; FUNC-LABEL: {{^}}sext_in_reg_v2i1_to_v2i32: | |
309 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 | |
310 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 | |
311 ; SI: buffer_store_dwordx2 | |
312 | |
313 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] | |
314 ; EG: BFE_INT [[RES]] | |
315 ; EG: BFE_INT [[RES]] | |
316 ; EG: LSHR {{\*?}} [[ADDR]] | |
317 define void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind { | |
318 %c = add <2 x i32> %a, %b ; add to prevent folding into extload | |
319 %shl = shl <2 x i32> %c, <i32 31, i32 31> | |
320 %ashr = ashr <2 x i32> %shl, <i32 31, i32 31> | |
321 store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8 | |
322 ret void | |
323 } | |
324 | |
325 ; FUNC-LABEL: {{^}}sext_in_reg_v4i1_to_v4i32: | |
326 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 | |
327 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 | |
328 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 | |
329 ; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000 | |
330 ; SI: buffer_store_dwordx4 | |
331 | |
332 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] | |
333 ; EG: BFE_INT [[RES]] | |
334 ; EG: BFE_INT [[RES]] | |
335 ; EG: BFE_INT [[RES]] | |
336 ; EG: BFE_INT [[RES]] | |
337 ; EG: LSHR {{\*?}} [[ADDR]] | |
338 define void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind { | |
339 %c = add <4 x i32> %a, %b ; add to prevent folding into extload | |
340 %shl = shl <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31> | |
341 %ashr = ashr <4 x i32> %shl, <i32 31, i32 31, i32 31, i32 31> | |
342 store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8 | |
343 ret void | |
344 } | |
345 | |
346 ; FUNC-LABEL: {{^}}sext_in_reg_v2i8_to_v2i32: | |
347 ; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}} | |
348 ; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}} | |
349 ; SI: buffer_store_dwordx2 | |
350 | |
351 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] | |
352 ; EG: BFE_INT [[RES]] | |
353 ; EG: BFE_INT [[RES]] | |
354 ; EG: LSHR {{\*?}} [[ADDR]] | |
355 define void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind { | |
356 %c = add <2 x i32> %a, %b ; add to prevent folding into extload | |
357 %shl = shl <2 x i32> %c, <i32 24, i32 24> | |
358 %ashr = ashr <2 x i32> %shl, <i32 24, i32 24> | |
359 store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8 | |
360 ret void | |
361 } | |
362 | |
363 ; FUNC-LABEL: {{^}}sext_in_reg_v4i8_to_v4i32: | |
364 ; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}} | |
365 ; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}} | |
366 ; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}} | |
367 ; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}} | |
368 ; SI: buffer_store_dwordx4 | |
369 | |
370 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] | |
371 ; EG: BFE_INT [[RES]] | |
372 ; EG: BFE_INT [[RES]] | |
373 ; EG: BFE_INT [[RES]] | |
374 ; EG: BFE_INT [[RES]] | |
375 ; EG: LSHR {{\*?}} [[ADDR]] | |
376 define void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind { | |
377 %c = add <4 x i32> %a, %b ; add to prevent folding into extload | |
378 %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24> | |
379 %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24> | |
380 store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8 | |
381 ret void | |
382 } | |
383 | |
384 ; FUNC-LABEL: {{^}}sext_in_reg_v2i16_to_v2i32: | |
385 ; SI: s_sext_i32_i16 {{s[0-9]+}}, {{s[0-9]+}} | |
386 ; SI: s_sext_i32_i16 {{s[0-9]+}}, {{s[0-9]+}} | |
387 ; SI: buffer_store_dwordx2 | |
388 | |
389 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]] | |
390 ; EG: BFE_INT [[RES]] | |
391 ; EG: BFE_INT [[RES]] | |
392 ; EG: LSHR {{\*?}} [[ADDR]] | |
393 define void @sext_in_reg_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind { | |
394 %c = add <2 x i32> %a, %b ; add to prevent folding into extload | |
395 %shl = shl <2 x i32> %c, <i32 16, i32 16> | |
396 %ashr = ashr <2 x i32> %shl, <i32 16, i32 16> | |
397 store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8 | |
398 ret void | |
399 } | |
400 | |
401 ; FUNC-LABEL: {{^}}testcase: | |
402 define void @testcase(i8 addrspace(1)* %out, i8 %a) nounwind { | |
403 %and_a_1 = and i8 %a, 1 | |
404 %cmp_eq = icmp eq i8 %and_a_1, 0 | |
405 %cmp_slt = icmp slt i8 %a, 0 | |
406 %sel0 = select i1 %cmp_slt, i8 0, i8 %a | |
407 %sel1 = select i1 %cmp_eq, i8 0, i8 %a | |
408 %xor = xor i8 %sel0, %sel1 | |
409 store i8 %xor, i8 addrspace(1)* %out | |
410 ret void | |
411 } | |
412 | |
413 ; FUNC-LABEL: {{^}}testcase_3: | |
414 define void @testcase_3(i8 addrspace(1)* %out, i8 %a) nounwind { | |
415 %and_a_1 = and i8 %a, 1 | |
416 %cmp_eq = icmp eq i8 %and_a_1, 0 | |
417 %cmp_slt = icmp slt i8 %a, 0 | |
418 %sel0 = select i1 %cmp_slt, i8 0, i8 %a | |
419 %sel1 = select i1 %cmp_eq, i8 0, i8 %a | |
420 %xor = xor i8 %sel0, %sel1 | |
421 store i8 %xor, i8 addrspace(1)* %out | |
422 ret void | |
423 } | |
424 | |
425 ; FUNC-LABEL: {{^}}vgpr_sext_in_reg_v4i8_to_v4i32: | |
426 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 | |
427 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 | |
428 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 | |
429 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8 | |
430 define void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind { | |
431 %loada = load <4 x i32>, <4 x i32> addrspace(1)* %a, align 16 | |
432 %loadb = load <4 x i32>, <4 x i32> addrspace(1)* %b, align 16 | |
433 %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload | |
434 %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24> | |
435 %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24> | |
436 store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8 | |
437 ret void | |
438 } | |
439 | |
440 ; FUNC-LABEL: {{^}}vgpr_sext_in_reg_v4i16_to_v4i32: | |
441 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16 | |
442 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16 | |
443 define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind { | |
444 %loada = load <4 x i32>, <4 x i32> addrspace(1)* %a, align 16 | |
445 %loadb = load <4 x i32>, <4 x i32> addrspace(1)* %b, align 16 | |
446 %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload | |
447 %shl = shl <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16> | |
448 %ashr = ashr <4 x i32> %shl, <i32 16, i32 16, i32 16, i32 16> | |
449 store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8 | |
450 ret void | |
451 } | |
452 | |
453 ; FUNC-LABEL: {{^}}sext_in_reg_to_illegal_type: | |
454 ; SI: buffer_load_sbyte | |
455 ; SI: v_max_i32 | |
456 ; SI-NOT: bfe | |
457 ; SI: buffer_store_short | |
458 define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind { | |
459 %tmp5 = load i8, i8 addrspace(1)* %src, align 1 | |
460 %tmp2 = sext i8 %tmp5 to i32 | |
461 %tmp3 = tail call i32 @llvm.AMDGPU.imax(i32 %tmp2, i32 0) nounwind readnone | |
462 %tmp4 = trunc i32 %tmp3 to i8 | |
463 %tmp6 = sext i8 %tmp4 to i16 | |
464 store i16 %tmp6, i16 addrspace(1)* %out, align 2 | |
465 ret void | |
466 } | |
467 | |
468 declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone | |
469 | |
470 ; FUNC-LABEL: {{^}}bfe_0_width: | |
471 ; SI-NOT: {{[^@]}}bfe | |
472 ; SI: s_endpgm | |
473 define void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { | |
474 %load = load i32, i32 addrspace(1)* %ptr, align 4 | |
475 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 8, i32 0) nounwind readnone | |
476 store i32 %bfe, i32 addrspace(1)* %out, align 4 | |
477 ret void | |
478 } | |
479 | |
480 ; FUNC-LABEL: {{^}}bfe_8_bfe_8: | |
481 ; SI: v_bfe_i32 | |
482 ; SI-NOT: {{[^@]}}bfe | |
483 ; SI: s_endpgm | |
484 define void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { | |
485 %load = load i32, i32 addrspace(1)* %ptr, align 4 | |
486 %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone | |
487 %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone | |
488 store i32 %bfe1, i32 addrspace(1)* %out, align 4 | |
489 ret void | |
490 } | |
491 | |
492 ; FUNC-LABEL: {{^}}bfe_8_bfe_16: | |
493 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 | |
494 ; SI: s_endpgm | |
495 define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { | |
496 %load = load i32, i32 addrspace(1)* %ptr, align 4 | |
497 %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone | |
498 %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 16) nounwind readnone | |
499 store i32 %bfe1, i32 addrspace(1)* %out, align 4 | |
500 ret void | |
501 } | |
502 | |
503 ; This really should be folded into 1 | |
504 ; FUNC-LABEL: {{^}}bfe_16_bfe_8: | |
505 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 | |
506 ; SI-NOT: {{[^@]}}bfe | |
507 ; SI: s_endpgm | |
508 define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { | |
509 %load = load i32, i32 addrspace(1)* %ptr, align 4 | |
510 %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 16) nounwind readnone | |
511 %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone | |
512 store i32 %bfe1, i32 addrspace(1)* %out, align 4 | |
513 ret void | |
514 } | |
515 | |
516 ; Make sure there isn't a redundant BFE | |
517 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32_bfe: | |
518 ; SI: s_sext_i32_i8 s{{[0-9]+}}, s{{[0-9]+}} | |
519 ; SI-NOT: {{[^@]}}bfe | |
520 ; SI: s_endpgm | |
521 define void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { | |
522 %c = add i32 %a, %b ; add to prevent folding into extload | |
523 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 0, i32 8) nounwind readnone | |
524 %shl = shl i32 %bfe, 24 | |
525 %ashr = ashr i32 %shl, 24 | |
526 store i32 %ashr, i32 addrspace(1)* %out, align 4 | |
527 ret void | |
528 } | |
529 | |
530 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32_bfe_wrong: | |
531 define void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { | |
532 %c = add i32 %a, %b ; add to prevent folding into extload | |
533 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 8, i32 0) nounwind readnone | |
534 %shl = shl i32 %bfe, 24 | |
535 %ashr = ashr i32 %shl, 24 | |
536 store i32 %ashr, i32 addrspace(1)* %out, align 4 | |
537 ret void | |
538 } | |
539 | |
540 ; FUNC-LABEL: {{^}}sextload_i8_to_i32_bfe: | |
541 ; SI: buffer_load_sbyte | |
542 ; SI-NOT: {{[^@]}}bfe | |
543 ; SI: s_endpgm | |
544 define void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind { | |
545 %load = load i8, i8 addrspace(1)* %ptr, align 1 | |
546 %sext = sext i8 %load to i32 | |
547 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 0, i32 8) nounwind readnone | |
548 %shl = shl i32 %bfe, 24 | |
549 %ashr = ashr i32 %shl, 24 | |
550 store i32 %ashr, i32 addrspace(1)* %out, align 4 | |
551 ret void | |
552 } | |
553 | |
554 ; SI: .text | |
555 ; FUNC-LABEL: {{^}}sextload_i8_to_i32_bfe_0:{{.*$}} | |
556 ; SI-NOT: {{[^@]}}bfe | |
557 ; SI: s_endpgm | |
558 define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind { | |
559 %load = load i8, i8 addrspace(1)* %ptr, align 1 | |
560 %sext = sext i8 %load to i32 | |
561 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 8, i32 0) nounwind readnone | |
562 %shl = shl i32 %bfe, 24 | |
563 %ashr = ashr i32 %shl, 24 | |
564 store i32 %ashr, i32 addrspace(1)* %out, align 4 | |
565 ret void | |
566 } | |
567 | |
568 ; FUNC-LABEL: {{^}}sext_in_reg_i1_bfe_offset_0: | |
569 ; SI-NOT: shr | |
570 ; SI-NOT: shl | |
571 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1 | |
572 ; SI: s_endpgm | |
573 define void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { | |
574 %x = load i32, i32 addrspace(1)* %in, align 4 | |
575 %shl = shl i32 %x, 31 | |
576 %shr = ashr i32 %shl, 31 | |
577 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 0, i32 1) | |
578 store i32 %bfe, i32 addrspace(1)* %out, align 4 | |
579 ret void | |
580 } | |
581 | |
582 ; FUNC-LABEL: {{^}}sext_in_reg_i1_bfe_offset_1: | |
583 ; SI: buffer_load_dword | |
584 ; SI-NOT: shl | |
585 ; SI-NOT: shr | |
586 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1 | |
587 ; SI: s_endpgm | |
588 define void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { | |
589 %x = load i32, i32 addrspace(1)* %in, align 4 | |
590 %shl = shl i32 %x, 30 | |
591 %shr = ashr i32 %shl, 30 | |
592 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 1) | |
593 store i32 %bfe, i32 addrspace(1)* %out, align 4 | |
594 ret void | |
595 } | |
596 | |
597 ; FUNC-LABEL: {{^}}sext_in_reg_i2_bfe_offset_1: | |
598 ; SI: buffer_load_dword | |
599 ; SI-NOT: v_lshl | |
600 ; SI-NOT: v_ashr | |
601 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 2 | |
602 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2 | |
603 ; SI: s_endpgm | |
604 define void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { | |
605 %x = load i32, i32 addrspace(1)* %in, align 4 | |
606 %shl = shl i32 %x, 30 | |
607 %shr = ashr i32 %shl, 30 | |
608 %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 2) | |
609 store i32 %bfe, i32 addrspace(1)* %out, align 4 | |
610 ret void | |
611 } | |
612 | |
613 ; Make sure we propagate the VALUness to users of a moved scalar BFE. | |
614 | |
615 ; FUNC-LABEL: {{^}}v_sext_in_reg_i1_to_i64_move_use: | |
616 ; SI: buffer_load_dwordx2 | |
617 ; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}} | |
618 ; SI-DAG: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 1 | |
619 ; SI-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] | |
620 ; SI-DAG: v_and_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, v[[LO]] | |
621 ; SI-DAG: v_and_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}, v[[HI]] | |
622 ; SI: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} | |
623 define void @v_sext_in_reg_i1_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) nounwind { | |
624 %tid = call i32 @llvm.r600.read.tidig.x() | |
625 %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid | |
626 %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid | |
627 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid | |
628 %a = load i64, i64 addrspace(1)* %a.gep, align 8 | |
629 %b = load i64, i64 addrspace(1)* %b.gep, align 8 | |
630 | |
631 %c = shl i64 %a, %b | |
632 %shl = shl i64 %c, 63 | |
633 %ashr = ashr i64 %shl, 63 | |
634 | |
635 %and = and i64 %ashr, %s.val | |
636 store i64 %and, i64 addrspace(1)* %out.gep, align 8 | |
637 ret void | |
638 } | |
639 | |
640 ; FUNC-LABEL: {{^}}v_sext_in_reg_i32_to_i64_move_use: | |
641 ; SI: buffer_load_dwordx2 | |
642 ; SI: v_lshl_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, | |
643 ; SI-DAG: v_ashrrev_i32_e32 v[[SHR:[0-9]+]], 31, v[[LO]] | |
644 ; SI-DAG: v_and_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, v[[LO]] | |
645 ; SI-DAG: v_and_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}, v[[SHR]] | |
646 ; SI: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} | |
647 define void @v_sext_in_reg_i32_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) nounwind { | |
648 %tid = call i32 @llvm.r600.read.tidig.x() | |
649 %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid | |
650 %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid | |
651 %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid | |
652 %a = load i64, i64 addrspace(1)* %a.gep, align 8 | |
653 %b = load i64, i64 addrspace(1)* %b.gep, align 8 | |
654 | |
655 %c = shl i64 %a, %b | |
656 %shl = shl i64 %c, 32 | |
657 %ashr = ashr i64 %shl, 32 | |
658 %and = and i64 %ashr, %s.val | |
659 store i64 %and, i64 addrspace(1)* %out.gep, align 8 | |
660 ret void | |
661 } |