Mercurial > hg > CbC > CbC_llvm
comparison test/CodeGen/AMDGPU/unaligned-load-store.ll @ 95:afa8332a0e37 LLVM3.8
LLVM 3.8
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 13 Oct 2015 17:48:58 +0900 |
parents | |
children | 1172e4bd9c6f |
comparison
equal
deleted
inserted
replaced
84:f3e34b893a5f | 95:afa8332a0e37 |
---|---|
1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s | |
2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s | |
3 | |
4 ; SI-LABEL: {{^}}unaligned_load_store_i16_local: | |
5 ; SI: ds_read_u8 | |
6 ; SI: ds_read_u8 | |
7 ; SI: ds_write_b8 | |
8 ; SI: ds_write_b8 | |
9 ; SI: s_endpgm | |
10 define void @unaligned_load_store_i16_local(i16 addrspace(3)* %p, i16 addrspace(3)* %r) nounwind { | |
11 %v = load i16, i16 addrspace(3)* %p, align 1 | |
12 store i16 %v, i16 addrspace(3)* %r, align 1 | |
13 ret void | |
14 } | |
15 | |
16 ; SI-LABEL: {{^}}unaligned_load_store_i16_global: | |
17 ; SI: buffer_load_ubyte | |
18 ; SI: buffer_load_ubyte | |
19 ; SI: buffer_store_byte | |
20 ; SI: buffer_store_byte | |
21 ; SI: s_endpgm | |
22 define void @unaligned_load_store_i16_global(i16 addrspace(1)* %p, i16 addrspace(1)* %r) nounwind { | |
23 %v = load i16, i16 addrspace(1)* %p, align 1 | |
24 store i16 %v, i16 addrspace(1)* %r, align 1 | |
25 ret void | |
26 } | |
27 | |
28 ; SI-LABEL: {{^}}unaligned_load_store_i32_local: | |
29 ; SI: ds_read_u8 | |
30 ; SI: ds_read_u8 | |
31 ; SI: ds_read_u8 | |
32 ; SI: ds_read_u8 | |
33 ; SI: ds_write_b8 | |
34 ; SI: ds_write_b8 | |
35 ; SI: ds_write_b8 | |
36 ; SI: ds_write_b8 | |
37 ; SI: s_endpgm | |
38 define void @unaligned_load_store_i32_local(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind { | |
39 %v = load i32, i32 addrspace(3)* %p, align 1 | |
40 store i32 %v, i32 addrspace(3)* %r, align 1 | |
41 ret void | |
42 } | |
43 | |
44 ; SI-LABEL: {{^}}unaligned_load_store_i32_global: | |
45 ; SI: buffer_load_ubyte | |
46 ; SI: buffer_load_ubyte | |
47 ; SI: buffer_load_ubyte | |
48 ; SI: buffer_load_ubyte | |
49 ; SI: buffer_store_byte | |
50 ; SI: buffer_store_byte | |
51 ; SI: buffer_store_byte | |
52 ; SI: buffer_store_byte | |
53 define void @unaligned_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace(1)* %r) nounwind { | |
54 %v = load i32, i32 addrspace(1)* %p, align 1 | |
55 store i32 %v, i32 addrspace(1)* %r, align 1 | |
56 ret void | |
57 } | |
58 | |
59 ; SI-LABEL: {{^}}unaligned_load_store_i64_local: | |
60 ; SI: ds_read_u8 | |
61 ; SI: ds_read_u8 | |
62 ; SI: ds_read_u8 | |
63 ; SI: ds_read_u8 | |
64 ; SI: ds_read_u8 | |
65 ; SI: ds_read_u8 | |
66 ; SI: ds_read_u8 | |
67 ; SI: ds_read_u8 | |
68 ; SI: ds_write_b8 | |
69 ; SI: ds_write_b8 | |
70 ; SI: ds_write_b8 | |
71 ; SI: ds_write_b8 | |
72 ; SI: ds_write_b8 | |
73 ; SI: ds_write_b8 | |
74 ; SI: ds_write_b8 | |
75 ; SI: ds_write_b8 | |
76 ; SI: s_endpgm | |
77 define void @unaligned_load_store_i64_local(i64 addrspace(3)* %p, i64 addrspace(3)* %r) { | |
78 %v = load i64, i64 addrspace(3)* %p, align 1 | |
79 store i64 %v, i64 addrspace(3)* %r, align 1 | |
80 ret void | |
81 } | |
82 | |
83 ; SI-LABEL: {{^}}unaligned_load_store_i64_global: | |
84 ; SI: buffer_load_ubyte | |
85 ; SI: buffer_load_ubyte | |
86 ; SI: buffer_load_ubyte | |
87 ; SI: buffer_load_ubyte | |
88 ; SI: buffer_load_ubyte | |
89 ; SI: buffer_load_ubyte | |
90 ; SI: buffer_load_ubyte | |
91 ; SI: buffer_load_ubyte | |
92 ; SI: buffer_store_byte | |
93 ; SI: buffer_store_byte | |
94 ; SI: buffer_store_byte | |
95 ; SI: buffer_store_byte | |
96 ; SI: buffer_store_byte | |
97 ; SI: buffer_store_byte | |
98 ; SI: buffer_store_byte | |
99 ; SI: buffer_store_byte | |
100 define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) { | |
101 %v = load i64, i64 addrspace(1)* %p, align 1 | |
102 store i64 %v, i64 addrspace(1)* %r, align 1 | |
103 ret void | |
104 } | |
105 | |
106 ; SI-LABEL: {{^}}unaligned_load_store_v4i32_local: | |
107 ; SI: ds_read_u8 | |
108 ; SI: ds_read_u8 | |
109 ; SI: ds_read_u8 | |
110 ; SI: ds_read_u8 | |
111 | |
112 ; SI: ds_read_u8 | |
113 ; SI: ds_read_u8 | |
114 ; SI: ds_read_u8 | |
115 ; SI: ds_read_u8 | |
116 | |
117 ; SI: ds_read_u8 | |
118 ; SI: ds_read_u8 | |
119 ; SI: ds_read_u8 | |
120 ; SI: ds_read_u8 | |
121 | |
122 ; SI: ds_read_u8 | |
123 ; SI: ds_read_u8 | |
124 ; SI: ds_read_u8 | |
125 ; SI: ds_read_u8 | |
126 | |
127 ; SI: ds_write_b8 | |
128 ; SI: ds_write_b8 | |
129 ; SI: ds_write_b8 | |
130 ; SI: ds_write_b8 | |
131 | |
132 ; SI: ds_write_b8 | |
133 ; SI: ds_write_b8 | |
134 ; SI: ds_write_b8 | |
135 ; SI: ds_write_b8 | |
136 | |
137 ; SI: ds_write_b8 | |
138 ; SI: ds_write_b8 | |
139 ; SI: ds_write_b8 | |
140 ; SI: ds_write_b8 | |
141 | |
142 ; SI: ds_write_b8 | |
143 ; SI: ds_write_b8 | |
144 ; SI: ds_write_b8 | |
145 ; SI: ds_write_b8 | |
146 ; SI: s_endpgm | |
147 define void @unaligned_load_store_v4i32_local(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) nounwind { | |
148 %v = load <4 x i32>, <4 x i32> addrspace(3)* %p, align 1 | |
149 store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1 | |
150 ret void | |
151 } | |
152 | |
153 ; FIXME: We mark v4i32 as custom, so misaligned loads are never expanded. | |
154 ; FIXME-SI-LABEL: {{^}}unaligned_load_store_v4i32_global | |
155 ; FIXME-SI: buffer_load_ubyte | |
156 ; FIXME-SI: buffer_load_ubyte | |
157 ; FIXME-SI: buffer_load_ubyte | |
158 ; FIXME-SI: buffer_load_ubyte | |
159 ; FIXME-SI: buffer_load_ubyte | |
160 ; FIXME-SI: buffer_load_ubyte | |
161 ; FIXME-SI: buffer_load_ubyte | |
162 ; FIXME-SI: buffer_load_ubyte | |
163 ; FIXME-SI: buffer_load_ubyte | |
164 ; FIXME-SI: buffer_load_ubyte | |
165 ; FIXME-SI: buffer_load_ubyte | |
166 ; FIXME-SI: buffer_load_ubyte | |
167 ; FIXME-SI: buffer_load_ubyte | |
168 ; FIXME-SI: buffer_load_ubyte | |
169 ; FIXME-SI: buffer_load_ubyte | |
170 ; FIXME-SI: buffer_load_ubyte | |
171 define void @unaligned_load_store_v4i32_global(<4 x i32> addrspace(1)* %p, <4 x i32> addrspace(1)* %r) nounwind { | |
172 %v = load <4 x i32>, <4 x i32> addrspace(1)* %p, align 1 | |
173 store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 1 | |
174 ret void | |
175 } | |
176 | |
177 ; SI-LABEL: {{^}}load_lds_i64_align_4: | |
178 ; SI: ds_read2_b32 | |
179 ; SI: s_endpgm | |
180 define void @load_lds_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { | |
181 %val = load i64, i64 addrspace(3)* %in, align 4 | |
182 store i64 %val, i64 addrspace(1)* %out, align 8 | |
183 ret void | |
184 } | |
185 | |
186 ; SI-LABEL: {{^}}load_lds_i64_align_4_with_offset | |
187 ; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:8 offset1:9 | |
188 ; SI: s_endpgm | |
189 define void @load_lds_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { | |
190 %ptr = getelementptr i64, i64 addrspace(3)* %in, i32 4 | |
191 %val = load i64, i64 addrspace(3)* %ptr, align 4 | |
192 store i64 %val, i64 addrspace(1)* %out, align 8 | |
193 ret void | |
194 } | |
195 | |
196 ; SI-LABEL: {{^}}load_lds_i64_align_4_with_split_offset: | |
197 ; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits | |
198 ; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset1:1 | |
199 ; SI: s_endpgm | |
200 define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { | |
201 %ptr = bitcast i64 addrspace(3)* %in to i32 addrspace(3)* | |
202 %ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255 | |
203 %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)* | |
204 %val = load i64, i64 addrspace(3)* %ptri64, align 4 | |
205 store i64 %val, i64 addrspace(1)* %out, align 8 | |
206 ret void | |
207 } | |
208 | |
209 ; SI-LABEL: {{^}}load_lds_i64_align_1: | |
210 ; SI: ds_read_u8 | |
211 ; SI: ds_read_u8 | |
212 ; SI: ds_read_u8 | |
213 ; SI: ds_read_u8 | |
214 ; SI: ds_read_u8 | |
215 ; SI: ds_read_u8 | |
216 ; SI: ds_read_u8 | |
217 ; SI: ds_read_u8 | |
218 ; SI: buffer_store_dwordx2 | |
219 ; SI: s_endpgm | |
220 | |
221 define void @load_lds_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { | |
222 %val = load i64, i64 addrspace(3)* %in, align 1 | |
223 store i64 %val, i64 addrspace(1)* %out, align 8 | |
224 ret void | |
225 } | |
226 | |
227 ; SI-LABEL: {{^}}store_lds_i64_align_4: | |
228 ; SI: ds_write2_b32 | |
229 ; SI: s_endpgm | |
230 define void @store_lds_i64_align_4(i64 addrspace(3)* %out, i64 %val) #0 { | |
231 store i64 %val, i64 addrspace(3)* %out, align 4 | |
232 ret void | |
233 } | |
234 | |
235 ; SI-LABEL: {{^}}store_lds_i64_align_4_with_offset | |
236 ; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:8 offset1:9 | |
237 ; SI: s_endpgm | |
238 define void @store_lds_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 { | |
239 %ptr = getelementptr i64, i64 addrspace(3)* %out, i32 4 | |
240 store i64 0, i64 addrspace(3)* %ptr, align 4 | |
241 ret void | |
242 } | |
243 | |
244 ; SI-LABEL: {{^}}store_lds_i64_align_4_with_split_offset: | |
245 ; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits | |
246 ; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1 | |
247 ; SI: s_endpgm | |
248 define void @store_lds_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 { | |
249 %ptr = bitcast i64 addrspace(3)* %out to i32 addrspace(3)* | |
250 %ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255 | |
251 %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)* | |
252 store i64 0, i64 addrspace(3)* %out, align 4 | |
253 ret void | |
254 } |