120
|
1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
|
2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
|
3 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
|
4
|
|
5 ; FUNC-LABEL: {{^}}load_f32_local:
|
|
6 ; GCN: s_mov_b32 m0
|
|
7 ; GCN: ds_read_b32
|
|
8
|
|
9 ; EG: LDS_READ_RET
|
121
|
10 define amdgpu_kernel void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) #0 {
|
120
|
11 entry:
|
|
12 %tmp0 = load float, float addrspace(3)* %in
|
|
13 store float %tmp0, float addrspace(1)* %out
|
|
14 ret void
|
|
15 }
|
|
16
|
|
17 ; FUNC-LABEL: {{^}}load_v2f32_local:
|
|
18 ; GCN: s_mov_b32 m0
|
|
19 ; GCN: ds_read_b64
|
|
20
|
|
21 ; EG: LDS_READ_RET
|
|
22 ; EG: LDS_READ_RET
|
121
|
23 define amdgpu_kernel void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) #0 {
|
120
|
24 entry:
|
|
25 %tmp0 = load <2 x float>, <2 x float> addrspace(3)* %in
|
|
26 store <2 x float> %tmp0, <2 x float> addrspace(1)* %out
|
|
27 ret void
|
|
28 }
|
|
29
|
|
30 ; FIXME: should this do a read2_b64?
|
|
31 ; FUNC-LABEL: {{^}}local_load_v3f32:
|
|
32 ; GCN-DAG: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:8
|
|
33 ; GCN-DAG: ds_read_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+$}}
|
|
34 ; GCN: s_waitcnt
|
|
35 ; GCN-DAG: ds_write_b64
|
|
36 ; GCN-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:8{{$}}
|
|
37
|
|
38 ; EG: LDS_READ_RET
|
|
39 ; EG: LDS_READ_RET
|
|
40 ; EG: LDS_READ_RET
|
121
|
41 define amdgpu_kernel void @local_load_v3f32(<3 x float> addrspace(3)* %out, <3 x float> addrspace(3)* %in) #0 {
|
120
|
42 entry:
|
|
43 %tmp0 = load <3 x float>, <3 x float> addrspace(3)* %in
|
|
44 store <3 x float> %tmp0, <3 x float> addrspace(3)* %out
|
|
45 ret void
|
|
46 }
|
|
47
|
|
48 ; FUNC-LABEL: {{^}}local_load_v4f32:
|
|
49 ; GCN: ds_read2_b64
|
|
50
|
|
51 ; EG: LDS_READ_RET
|
|
52 ; EG: LDS_READ_RET
|
|
53 ; EG: LDS_READ_RET
|
|
54 ; EG: LDS_READ_RET
|
121
|
55 define amdgpu_kernel void @local_load_v4f32(<4 x float> addrspace(3)* %out, <4 x float> addrspace(3)* %in) #0 {
|
120
|
56 entry:
|
|
57 %tmp0 = load <4 x float>, <4 x float> addrspace(3)* %in
|
|
58 store <4 x float> %tmp0, <4 x float> addrspace(3)* %out
|
|
59 ret void
|
|
60 }
|
|
61
|
|
62 ; FUNC-LABEL: {{^}}local_load_v8f32:
|
|
63 ; GCN: ds_read2_b64
|
|
64 ; GCN: ds_read2_b64
|
|
65
|
|
66 ; EG: LDS_READ_RET
|
|
67 ; EG: LDS_READ_RET
|
|
68 ; EG: LDS_READ_RET
|
|
69 ; EG: LDS_READ_RET
|
|
70 ; EG: LDS_READ_RET
|
|
71 ; EG: LDS_READ_RET
|
|
72 ; EG: LDS_READ_RET
|
|
73 ; EG: LDS_READ_RET
|
121
|
74 define amdgpu_kernel void @local_load_v8f32(<8 x float> addrspace(3)* %out, <8 x float> addrspace(3)* %in) #0 {
|
120
|
75 entry:
|
|
76 %tmp0 = load <8 x float>, <8 x float> addrspace(3)* %in
|
|
77 store <8 x float> %tmp0, <8 x float> addrspace(3)* %out
|
|
78 ret void
|
|
79 }
|
|
80
|
|
81 ; FUNC-LABEL: {{^}}local_load_v16f32:
|
|
82 ; GCN: ds_read2_b64
|
|
83 ; GCN: ds_read2_b64
|
|
84 ; GCN: ds_read2_b64
|
|
85 ; GCN: ds_read2_b64
|
|
86
|
|
87 ; EG: LDS_READ_RET
|
|
88 ; EG: LDS_READ_RET
|
|
89 ; EG: LDS_READ_RET
|
|
90 ; EG: LDS_READ_RET
|
|
91 ; EG: LDS_READ_RET
|
|
92 ; EG: LDS_READ_RET
|
|
93 ; EG: LDS_READ_RET
|
|
94 ; EG: LDS_READ_RET
|
|
95 ; EG: LDS_READ_RET
|
|
96 ; EG: LDS_READ_RET
|
|
97 ; EG: LDS_READ_RET
|
|
98 ; EG: LDS_READ_RET
|
|
99 ; EG: LDS_READ_RET
|
|
100 ; EG: LDS_READ_RET
|
|
101 ; EG: LDS_READ_RET
|
|
102 ; EG: LDS_READ_RET
|
121
|
103 define amdgpu_kernel void @local_load_v16f32(<16 x float> addrspace(3)* %out, <16 x float> addrspace(3)* %in) #0 {
|
120
|
104 entry:
|
|
105 %tmp0 = load <16 x float>, <16 x float> addrspace(3)* %in
|
|
106 store <16 x float> %tmp0, <16 x float> addrspace(3)* %out
|
|
107 ret void
|
|
108 }
|
|
109
|
|
110 attributes #0 = { nounwind }
|