120
|
1 ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=CHECK -check-prefix=SICI
|
|
2 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefix=CHECK -check-prefix=VI
|
|
3
|
|
4 ;CHECK-LABEL: {{^}}buffer_load:
|
|
5 ;CHECK: buffer_load_format_xyzw v[0:3], off, s[0:3], 0
|
|
6 ;CHECK: buffer_load_format_xyzw v[4:7], off, s[0:3], 0 glc
|
|
7 ;CHECK: buffer_load_format_xyzw v[8:11], off, s[0:3], 0 slc
|
|
8 ;CHECK: s_waitcnt
|
|
9 define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load(<4 x i32> inreg) {
|
|
10 main_body:
|
|
11 %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 0, i1 0, i1 0)
|
|
12 %data_glc = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 0, i1 1, i1 0)
|
|
13 %data_slc = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 0, i1 0, i1 1)
|
|
14 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %data, 0
|
|
15 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %data_glc, 1
|
|
16 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %data_slc, 2
|
|
17 ret {<4 x float>, <4 x float>, <4 x float>} %r2
|
|
18 }
|
|
19
|
|
20 ;CHECK-LABEL: {{^}}buffer_load_immoffs:
|
|
21 ;CHECK: buffer_load_format_xyzw v[0:3], off, s[0:3], 0 offset:42
|
|
22 ;CHECK: s_waitcnt
|
|
23 define amdgpu_ps <4 x float> @buffer_load_immoffs(<4 x i32> inreg) {
|
|
24 main_body:
|
|
25 %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 42, i1 0, i1 0)
|
|
26 ret <4 x float> %data
|
|
27 }
|
|
28
|
|
29 ;CHECK-LABEL: {{^}}buffer_load_immoffs_large:
|
121
|
30 ;SICI: v_mov_b32_e32 [[VOFS:v[0-9]+]], 0x1038
|
120
|
31 ;SICI: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[VOFS]], s[0:3], 0 offen
|
|
32 ;SICI: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[0:3], 0 offen
|
121
|
33 ;VI-DAG: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 60 offset:4092
|
|
34 ;VI-DAG: s_movk_i32 [[OFS1:s[0-9]+]], 0x7ffc
|
|
35 ;VI-DAG: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS1]] offset:4092
|
120
|
36 ;SICI: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[0:3], 0 offen
|
121
|
37 ;VI-DAG: s_mov_b32 [[OFS2:s[0-9]+]], 0x8ffc
|
|
38 ;VI-DAG: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS2]] offset:4
|
120
|
39 ;CHECK: s_waitcnt
|
|
40 define amdgpu_ps <4 x float> @buffer_load_immoffs_large(<4 x i32> inreg) {
|
|
41 main_body:
|
121
|
42 %d.0 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 4152, i1 0, i1 0)
|
|
43 %d.1 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 36856, i1 0, i1 0)
|
120
|
44 %d.2 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 36864, i1 0, i1 0)
|
|
45 %d.3 = fadd <4 x float> %d.0, %d.1
|
|
46 %data = fadd <4 x float> %d.2, %d.3
|
|
47 ret <4 x float> %data
|
|
48 }
|
|
49
|
|
50 ;CHECK-LABEL: {{^}}buffer_load_immoffs_reuse:
|
121
|
51 ;VI: s_movk_i32 [[OFS:s[0-9]+]], 0xffc
|
|
52 ;VI: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS]] offset:68
|
120
|
53 ;VI-NOT: s_mov
|
121
|
54 ;VI: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS]] offset:84
|
120
|
55 ;VI: s_waitcnt
|
|
56 define amdgpu_ps <4 x float> @buffer_load_immoffs_reuse(<4 x i32> inreg) {
|
|
57 main_body:
|
|
58 %d.0 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 4160, i1 0, i1 0)
|
|
59 %d.1 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 4176, i1 0, i1 0)
|
|
60 %data = fadd <4 x float> %d.0, %d.1
|
|
61 ret <4 x float> %data
|
|
62 }
|
|
63
|
|
64 ;CHECK-LABEL: {{^}}buffer_load_idx:
|
|
65 ;CHECK: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen
|
|
66 ;CHECK: s_waitcnt
|
|
67 define amdgpu_ps <4 x float> @buffer_load_idx(<4 x i32> inreg, i32) {
|
|
68 main_body:
|
|
69 %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 %1, i32 0, i1 0, i1 0)
|
|
70 ret <4 x float> %data
|
|
71 }
|
|
72
|
|
73 ;CHECK-LABEL: {{^}}buffer_load_ofs:
|
|
74 ;CHECK: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 offen
|
|
75 ;CHECK: s_waitcnt
|
|
76 define amdgpu_ps <4 x float> @buffer_load_ofs(<4 x i32> inreg, i32) {
|
|
77 main_body:
|
|
78 %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 %1, i1 0, i1 0)
|
|
79 ret <4 x float> %data
|
|
80 }
|
|
81
|
|
82 ;CHECK-LABEL: {{^}}buffer_load_ofs_imm:
|
121
|
83 ;CHECK: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 offen offset:60
|
120
|
84 ;CHECK: s_waitcnt
|
|
85 define amdgpu_ps <4 x float> @buffer_load_ofs_imm(<4 x i32> inreg, i32) {
|
|
86 main_body:
|
121
|
87 %ofs = add i32 %1, 60
|
120
|
88 %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 %ofs, i1 0, i1 0)
|
|
89 ret <4 x float> %data
|
|
90 }
|
|
91
|
|
92 ;CHECK-LABEL: {{^}}buffer_load_both:
|
|
93 ;CHECK: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
|
|
94 ;CHECK: s_waitcnt
|
|
95 define amdgpu_ps <4 x float> @buffer_load_both(<4 x i32> inreg, i32, i32) {
|
|
96 main_body:
|
|
97 %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 %1, i32 %2, i1 0, i1 0)
|
|
98 ret <4 x float> %data
|
|
99 }
|
|
100
|
|
101 ;CHECK-LABEL: {{^}}buffer_load_both_reversed:
|
|
102 ;CHECK: v_mov_b32_e32 v2, v0
|
|
103 ;CHECK: buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen
|
|
104 ;CHECK: s_waitcnt
|
|
105 define amdgpu_ps <4 x float> @buffer_load_both_reversed(<4 x i32> inreg, i32, i32) {
|
|
106 main_body:
|
|
107 %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 %2, i32 %1, i1 0, i1 0)
|
|
108 ret <4 x float> %data
|
|
109 }
|
|
110
|
|
111 ;CHECK-LABEL: {{^}}buffer_load_x:
|
|
112 ;CHECK: buffer_load_format_x v0, off, s[0:3], 0
|
|
113 ;CHECK: s_waitcnt
|
|
114 define amdgpu_ps float @buffer_load_x(<4 x i32> inreg %rsrc) {
|
|
115 main_body:
|
|
116 %data = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %rsrc, i32 0, i32 0, i1 0, i1 0)
|
|
117 ret float %data
|
|
118 }
|
|
119
|
|
120 ;CHECK-LABEL: {{^}}buffer_load_xy:
|
|
121 ;CHECK: buffer_load_format_xy v[0:1], off, s[0:3], 0
|
|
122 ;CHECK: s_waitcnt
|
|
123 define amdgpu_ps <2 x float> @buffer_load_xy(<4 x i32> inreg %rsrc) {
|
|
124 main_body:
|
|
125 %data = call <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 0, i32 0, i1 0, i1 0)
|
|
126 ret <2 x float> %data
|
|
127 }
|
|
128
|
|
129 declare float @llvm.amdgcn.buffer.load.format.f32(<4 x i32>, i32, i32, i1, i1) #0
|
|
130 declare <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32>, i32, i32, i1, i1) #0
|
|
131 declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #0
|
|
132
|
|
133 attributes #0 = { nounwind readonly }
|