121
|
1 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
|
2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
3
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
4 declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i32, i1) nounwind
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
5 declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
|
120
|
6 declare void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(2)* nocapture, i64, i32, i1) nounwind
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
7
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
8
|
83
|
9 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1:
|
120
|
10 ; SI-DAG: ds_read_u8
|
|
11 ; SI-DAG: ds_read_u8
|
|
12 ; SI-DAG: ds_read_u8
|
|
13 ; SI-DAG: ds_read_u8
|
|
14 ; SI-DAG: ds_read_u8
|
|
15 ; SI-DAG: ds_read_u8
|
|
16 ; SI-DAG: ds_read_u8
|
|
17 ; SI-DAG: ds_read_u8
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
18
|
120
|
19 ; SI-DAG: ds_read_u8
|
|
20 ; SI-DAG: ds_read_u8
|
|
21 ; SI-DAG: ds_read_u8
|
|
22 ; SI-DAG: ds_read_u8
|
|
23 ; SI-DAG: ds_read_u8
|
|
24 ; SI-DAG: ds_read_u8
|
|
25 ; SI-DAG: ds_read_u8
|
|
26 ; SI-DAG: ds_read_u8
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
27
|
120
|
28 ; SI-DAG: ds_read_u8
|
|
29 ; SI-DAG: ds_read_u8
|
|
30 ; SI-DAG: ds_read_u8
|
|
31 ; SI-DAG: ds_read_u8
|
|
32 ; SI-DAG: ds_read_u8
|
|
33 ; SI-DAG: ds_read_u8
|
|
34 ; SI-DAG: ds_read_u8
|
|
35 ; SI-DAG: ds_read_u8
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
36
|
120
|
37 ; SI-DAG: ds_read_u8
|
|
38 ; SI-DAG: ds_read_u8
|
|
39 ; SI-DAG: ds_read_u8
|
|
40 ; SI-DAG: ds_read_u8
|
|
41 ; SI-DAG: ds_read_u8
|
|
42 ; SI-DAG: ds_read_u8
|
|
43 ; SI-DAG: ds_read_u8
|
|
44 ; SI-DAG: ds_read_u8
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
45
|
120
|
46 ; SI-DAG: ds_write_b8
|
|
47 ; SI-DAG: ds_write_b8
|
|
48 ; SI-DAG: ds_write_b8
|
|
49 ; SI-DAG: ds_write_b8
|
|
50 ; SI-DAG: ds_write_b8
|
|
51 ; SI-DAG: ds_write_b8
|
|
52 ; SI-DAG: ds_write_b8
|
|
53 ; SI-DAG: ds_write_b8
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
54
|
120
|
55 ; SI-DAG: ds_write_b8
|
|
56 ; SI-DAG: ds_write_b8
|
|
57 ; SI-DAG: ds_write_b8
|
|
58 ; SI-DAG: ds_write_b8
|
|
59 ; SI-DAG: ds_write_b8
|
|
60 ; SI-DAG: ds_write_b8
|
|
61 ; SI-DAG: ds_write_b8
|
|
62 ; SI-DAG: ds_write_b8
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
63
|
120
|
64 ; SI-DAG: ds_write_b8
|
|
65 ; SI-DAG: ds_write_b8
|
|
66 ; SI-DAG: ds_write_b8
|
|
67 ; SI-DAG: ds_write_b8
|
|
68 ; SI-DAG: ds_write_b8
|
|
69 ; SI-DAG: ds_write_b8
|
|
70 ; SI-DAG: ds_write_b8
|
|
71 ; SI-DAG: ds_write_b8
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
72
|
120
|
73 ; SI-DAG: ds_write_b8
|
|
74 ; SI-DAG: ds_write_b8
|
|
75 ; SI-DAG: ds_write_b8
|
|
76 ; SI-DAG: ds_write_b8
|
|
77 ; SI-DAG: ds_write_b8
|
|
78 ; SI-DAG: ds_write_b8
|
|
79 ; SI-DAG: ds_write_b8
|
|
80 ; SI-DAG: ds_write_b8
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
81
|
83
|
82 ; SI: s_endpgm
|
121
|
83 define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
84 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
85 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
86 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 1, i1 false) nounwind
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
87 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
88 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
89
|
83
|
90 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align2:
|
120
|
91 ; SI-DAG: ds_read_u16
|
|
92 ; SI-DAG: ds_read_u16
|
|
93 ; SI-DAG: ds_read_u16
|
|
94 ; SI-DAG: ds_read_u16
|
|
95 ; SI-DAG: ds_read_u16
|
|
96 ; SI-DAG: ds_read_u16
|
|
97 ; SI-DAG: ds_read_u16
|
|
98 ; SI-DAG: ds_read_u16
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
99
|
120
|
100 ; SI-DAG: ds_read_u16
|
|
101 ; SI-DAG: ds_read_u16
|
|
102 ; SI-DAG: ds_read_u16
|
|
103 ; SI-DAG: ds_read_u16
|
|
104 ; SI-DAG: ds_read_u16
|
|
105 ; SI-DAG: ds_read_u16
|
|
106 ; SI-DAG: ds_read_u16
|
|
107 ; SI-DAG: ds_read_u16
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
108
|
120
|
109 ; SI-DAG: ds_write_b16
|
|
110 ; SI-DAG: ds_write_b16
|
|
111 ; SI-DAG: ds_write_b16
|
|
112 ; SI-DAG: ds_write_b16
|
|
113 ; SI-DAG: ds_write_b16
|
|
114 ; SI-DAG: ds_write_b16
|
|
115 ; SI-DAG: ds_write_b16
|
|
116 ; SI-DAG: ds_write_b16
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
117
|
120
|
118 ; SI-DAG: ds_write_b16
|
|
119 ; SI-DAG: ds_write_b16
|
|
120 ; SI-DAG: ds_write_b16
|
|
121 ; SI-DAG: ds_write_b16
|
|
122 ; SI-DAG: ds_write_b16
|
|
123 ; SI-DAG: ds_write_b16
|
|
124 ; SI-DAG: ds_write_b16
|
|
125 ; SI-DAG: ds_write_b16
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
126
|
83
|
127 ; SI: s_endpgm
|
121
|
128 define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
129 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
130 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
131 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 2, i1 false) nounwind
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
132 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
133 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
134
|
83
|
135 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align4:
|
100
|
136 ; SI: ds_read2_b32
|
|
137 ; SI: ds_read2_b32
|
|
138 ; SI: ds_read2_b32
|
|
139 ; SI: ds_read2_b32
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
140
|
100
|
141 ; SI: ds_write2_b32
|
|
142 ; SI: ds_write2_b32
|
|
143 ; SI: ds_write2_b32
|
|
144 ; SI: ds_write2_b32
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
145
|
83
|
146 ; SI: s_endpgm
|
121
|
147 define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
148 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
149 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
150 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 4, i1 false) nounwind
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
151 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
152 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
153
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
154 ; FIXME: Use 64-bit ops
|
83
|
155 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align8:
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
156
|
120
|
157 ; SI: ds_read2_b64
|
|
158 ; SI: ds_read2_b64
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
159
|
120
|
160 ; SI: ds_write2_b64
|
|
161 ; SI: ds_write2_b64
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
162
|
83
|
163 ; SI-DAG: s_endpgm
|
121
|
164 define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
165 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
166 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
167 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 8, i1 false) nounwind
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
168 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
169 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
170
|
83
|
171 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align1:
|
|
172 ; SI-DAG: buffer_load_ubyte
|
|
173 ; SI-DAG: buffer_store_byte
|
|
174 ; SI-DAG: buffer_load_ubyte
|
|
175 ; SI-DAG: buffer_store_byte
|
|
176 ; SI-DAG: buffer_load_ubyte
|
|
177 ; SI-DAG: buffer_store_byte
|
|
178 ; SI-DAG: buffer_load_ubyte
|
|
179 ; SI-DAG: buffer_store_byte
|
|
180 ; SI-DAG: buffer_load_ubyte
|
|
181 ; SI-DAG: buffer_store_byte
|
|
182 ; SI-DAG: buffer_load_ubyte
|
|
183 ; SI-DAG: buffer_store_byte
|
|
184 ; SI-DAG: buffer_load_ubyte
|
|
185 ; SI-DAG: buffer_store_byte
|
|
186 ; SI-DAG: buffer_load_ubyte
|
|
187 ; SI-DAG: buffer_store_byte
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
188
|
83
|
189 ; SI-DAG: buffer_load_ubyte
|
|
190 ; SI-DAG: buffer_store_byte
|
|
191 ; SI-DAG: buffer_load_ubyte
|
|
192 ; SI-DAG: buffer_store_byte
|
|
193 ; SI-DAG: buffer_load_ubyte
|
|
194 ; SI-DAG: buffer_store_byte
|
|
195 ; SI-DAG: buffer_load_ubyte
|
|
196 ; SI-DAG: buffer_store_byte
|
|
197 ; SI-DAG: buffer_load_ubyte
|
|
198 ; SI-DAG: buffer_store_byte
|
|
199 ; SI-DAG: buffer_load_ubyte
|
|
200 ; SI-DAG: buffer_store_byte
|
|
201 ; SI-DAG: buffer_load_ubyte
|
|
202 ; SI-DAG: buffer_store_byte
|
|
203 ; SI-DAG: buffer_load_ubyte
|
|
204 ; SI-DAG: buffer_store_byte
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
205
|
83
|
206 ; SI-DAG: buffer_load_ubyte
|
|
207 ; SI-DAG: buffer_store_byte
|
|
208 ; SI-DAG: buffer_load_ubyte
|
|
209 ; SI-DAG: buffer_store_byte
|
|
210 ; SI-DAG: buffer_load_ubyte
|
|
211 ; SI-DAG: buffer_store_byte
|
|
212 ; SI-DAG: buffer_load_ubyte
|
|
213 ; SI-DAG: buffer_store_byte
|
|
214 ; SI-DAG: buffer_load_ubyte
|
|
215 ; SI-DAG: buffer_store_byte
|
|
216 ; SI-DAG: buffer_load_ubyte
|
|
217 ; SI-DAG: buffer_store_byte
|
|
218 ; SI-DAG: buffer_load_ubyte
|
|
219 ; SI-DAG: buffer_store_byte
|
|
220 ; SI-DAG: buffer_load_ubyte
|
|
221 ; SI-DAG: buffer_store_byte
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
222
|
83
|
223 ; SI-DAG: buffer_load_ubyte
|
|
224 ; SI-DAG: buffer_store_byte
|
|
225 ; SI-DAG: buffer_load_ubyte
|
|
226 ; SI-DAG: buffer_store_byte
|
|
227 ; SI-DAG: buffer_load_ubyte
|
|
228 ; SI-DAG: buffer_store_byte
|
|
229 ; SI-DAG: buffer_load_ubyte
|
|
230 ; SI-DAG: buffer_store_byte
|
|
231 ; SI-DAG: buffer_load_ubyte
|
|
232 ; SI-DAG: buffer_store_byte
|
|
233 ; SI-DAG: buffer_load_ubyte
|
|
234 ; SI-DAG: buffer_store_byte
|
|
235 ; SI-DAG: buffer_load_ubyte
|
|
236 ; SI-DAG: buffer_store_byte
|
|
237 ; SI-DAG: buffer_load_ubyte
|
|
238 ; SI-DAG: buffer_store_byte
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
239
|
83
|
240 ; SI: s_endpgm
|
121
|
241 define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align1(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
242 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
243 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
244 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 1, i1 false) nounwind
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
245 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
246 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
247
|
83
|
248 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align2:
|
|
249 ; SI-DAG: buffer_load_ushort
|
|
250 ; SI-DAG: buffer_load_ushort
|
|
251 ; SI-DAG: buffer_load_ushort
|
|
252 ; SI-DAG: buffer_load_ushort
|
|
253 ; SI-DAG: buffer_load_ushort
|
|
254 ; SI-DAG: buffer_load_ushort
|
|
255 ; SI-DAG: buffer_load_ushort
|
|
256 ; SI-DAG: buffer_load_ushort
|
|
257 ; SI-DAG: buffer_load_ushort
|
|
258 ; SI-DAG: buffer_load_ushort
|
|
259 ; SI-DAG: buffer_load_ushort
|
|
260 ; SI-DAG: buffer_load_ushort
|
|
261 ; SI-DAG: buffer_load_ushort
|
|
262 ; SI-DAG: buffer_load_ushort
|
|
263 ; SI-DAG: buffer_load_ushort
|
|
264 ; SI-DAG: buffer_load_ushort
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
265
|
83
|
266 ; SI-DAG: buffer_store_short
|
|
267 ; SI-DAG: buffer_store_short
|
|
268 ; SI-DAG: buffer_store_short
|
|
269 ; SI-DAG: buffer_store_short
|
|
270 ; SI-DAG: buffer_store_short
|
|
271 ; SI-DAG: buffer_store_short
|
|
272 ; SI-DAG: buffer_store_short
|
|
273 ; SI-DAG: buffer_store_short
|
|
274 ; SI-DAG: buffer_store_short
|
|
275 ; SI-DAG: buffer_store_short
|
|
276 ; SI-DAG: buffer_store_short
|
|
277 ; SI-DAG: buffer_store_short
|
|
278 ; SI-DAG: buffer_store_short
|
|
279 ; SI-DAG: buffer_store_short
|
|
280 ; SI-DAG: buffer_store_short
|
|
281 ; SI-DAG: buffer_store_short
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
282
|
83
|
283 ; SI: s_endpgm
|
121
|
284 define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align2(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
285 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
286 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
287 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 2, i1 false) nounwind
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
288 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
289 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
290
|
83
|
291 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align4:
|
|
292 ; SI: buffer_load_dwordx4
|
|
293 ; SI: buffer_load_dwordx4
|
|
294 ; SI: buffer_store_dwordx4
|
|
295 ; SI: buffer_store_dwordx4
|
|
296 ; SI: s_endpgm
|
121
|
297 define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align4(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
298 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
299 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
300 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 4, i1 false) nounwind
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
301 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
302 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
303
|
83
|
304 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align8:
|
|
305 ; SI: buffer_load_dwordx4
|
|
306 ; SI: buffer_load_dwordx4
|
|
307 ; SI: buffer_store_dwordx4
|
|
308 ; SI: buffer_store_dwordx4
|
|
309 ; SI: s_endpgm
|
121
|
310 define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align8(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
311 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
312 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
313 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 8, i1 false) nounwind
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
314 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
315 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
316
|
83
|
317 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align16:
|
|
318 ; SI: buffer_load_dwordx4
|
|
319 ; SI: buffer_load_dwordx4
|
|
320 ; SI: buffer_store_dwordx4
|
|
321 ; SI: buffer_store_dwordx4
|
|
322 ; SI: s_endpgm
|
121
|
323 define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align16(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
324 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
325 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
326 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 16, i1 false) nounwind
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
327 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
328 }
|
120
|
329
|
|
330 ; Test shouldConvertConstantLoadToIntImm
|
|
331 @hello.align4 = private unnamed_addr addrspace(2) constant [16 x i8] c"constant string\00", align 4
|
|
332 @hello.align1 = private unnamed_addr addrspace(2) constant [16 x i8] c"constant string\00", align 1
|
|
333
|
|
334 ; FUNC-LABEL: {{^}}test_memcpy_const_string_align4:
|
|
335 ; SI: s_getpc_b64
|
|
336 ; SI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, hello.align4+20
|
|
337 ; SI: s_addc_u32
|
|
338 ; SI-DAG: s_load_dwordx4
|
|
339 ; SI-DAG: s_load_dwordx4
|
|
340 ; SI-DAG: s_load_dwordx2
|
|
341 ; SI-DAG: buffer_store_dwordx4
|
|
342 ; SI-DAG: buffer_store_dwordx4
|
121
|
343 define amdgpu_kernel void @test_memcpy_const_string_align4(i8 addrspace(1)* noalias %out) nounwind {
|
120
|
344 %str = bitcast [16 x i8] addrspace(2)* @hello.align4 to i8 addrspace(2)*
|
|
345 call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* %out, i8 addrspace(2)* %str, i64 32, i32 4, i1 false)
|
|
346 ret void
|
|
347 }
|
|
348
|
|
349 ; FUNC-LABEL: {{^}}test_memcpy_const_string_align1:
|
|
350 ; SI-NOT: buffer_load
|
|
351 ; SI: v_mov_b32_e32 v{{[0-9]+}}, 0x69
|
|
352 ; SI: buffer_store_byte
|
|
353 ; SI: buffer_store_byte
|
|
354 ; SI: buffer_store_byte
|
|
355 ; SI: buffer_store_byte
|
|
356 ; SI: buffer_store_byte
|
|
357 ; SI: buffer_store_byte
|
|
358 ; SI: buffer_store_byte
|
|
359 ; SI: buffer_store_byte
|
|
360 ; SI: buffer_store_byte
|
|
361 ; SI: buffer_store_byte
|
|
362 ; SI: buffer_store_byte
|
|
363 ; SI: buffer_store_byte
|
|
364 ; SI: buffer_store_byte
|
|
365 ; SI: buffer_store_byte
|
|
366 ; SI: buffer_store_byte
|
|
367 ; SI: buffer_store_byte
|
121
|
368 define amdgpu_kernel void @test_memcpy_const_string_align1(i8 addrspace(1)* noalias %out) nounwind {
|
120
|
369 %str = bitcast [16 x i8] addrspace(2)* @hello.align1 to i8 addrspace(2)*
|
|
370 call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* %out, i8 addrspace(2)* %str, i64 32, i32 1, i1 false)
|
|
371 ret void
|
|
372 }
|