Mercurial > hg > CbC > CbC_llvm
comparison test/CodeGen/AMDGPU/mubuf.ll @ 95:afa8332a0e37 LLVM3.8
LLVM 3.8
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 13 Oct 2015 17:48:58 +0900 |
parents | |
children | 1172e4bd9c6f |
comparison
equal
deleted
inserted
replaced
84:f3e34b893a5f | 95:afa8332a0e37 |
---|---|
1 ; RUN: llc -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s | |
2 | |
3 declare i32 @llvm.r600.read.tidig.x() readnone | |
4 | |
5 ;;;==========================================================================;;; | |
6 ;;; MUBUF LOAD TESTS | |
7 ;;;==========================================================================;;; | |
8 | |
9 ; MUBUF load with an immediate byte offset that fits into 12-bits | |
10 ; CHECK-LABEL: {{^}}mubuf_load0: | |
11 ; CHECK: buffer_load_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4 ; encoding: [0x04,0x00,0x30,0xe0 | |
12 define void @mubuf_load0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { | |
13 entry: | |
14 %0 = getelementptr i32, i32 addrspace(1)* %in, i64 1 | |
15 %1 = load i32, i32 addrspace(1)* %0 | |
16 store i32 %1, i32 addrspace(1)* %out | |
17 ret void | |
18 } | |
19 | |
20 ; MUBUF load with the largest possible immediate offset | |
21 ; CHECK-LABEL: {{^}}mubuf_load1: | |
22 ; CHECK: buffer_load_ubyte v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0 | |
23 define void @mubuf_load1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) { | |
24 entry: | |
25 %0 = getelementptr i8, i8 addrspace(1)* %in, i64 4095 | |
26 %1 = load i8, i8 addrspace(1)* %0 | |
27 store i8 %1, i8 addrspace(1)* %out | |
28 ret void | |
29 } | |
30 | |
31 ; MUBUF load with an immediate byte offset that doesn't fit into 12-bits | |
32 ; CHECK-LABEL: {{^}}mubuf_load2: | |
33 ; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x1000 | |
34 ; CHECK: buffer_load_dword v{{[0-9]}}, s[{{[0-9]+:[0-9]+}}], [[SOFFSET]] ; encoding: [0x00,0x00,0x30,0xe0 | |
35 define void @mubuf_load2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { | |
36 entry: | |
37 %0 = getelementptr i32, i32 addrspace(1)* %in, i64 1024 | |
38 %1 = load i32, i32 addrspace(1)* %0 | |
39 store i32 %1, i32 addrspace(1)* %out | |
40 ret void | |
41 } | |
42 | |
43 ; MUBUF load with a 12-bit immediate offset and a register offset | |
44 ; CHECK-LABEL: {{^}}mubuf_load3: | |
45 ; CHECK-NOT: ADD | |
46 ; CHECK: buffer_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:4 ; encoding: [0x04,0x80,0x30,0xe0 | |
47 define void @mubuf_load3(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i64 %offset) { | |
48 entry: | |
49 %0 = getelementptr i32, i32 addrspace(1)* %in, i64 %offset | |
50 %1 = getelementptr i32, i32 addrspace(1)* %0, i64 1 | |
51 %2 = load i32, i32 addrspace(1)* %1 | |
52 store i32 %2, i32 addrspace(1)* %out | |
53 ret void | |
54 } | |
55 | |
56 ; CHECK-LABEL: {{^}}soffset_max_imm: | |
57 ; CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 64 offen glc | |
58 define void @soffset_max_imm([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #1 { | |
59 main_body: | |
60 %tmp0 = getelementptr [6 x <16 x i8>], [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0 | |
61 %tmp1 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp0 | |
62 %tmp2 = shl i32 %6, 2 | |
63 %tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 64, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0) | |
64 %tmp4 = add i32 %6, 16 | |
65 %tmp5 = bitcast float 0.0 to i32 | |
66 call void @llvm.SI.tbuffer.store.i32(<16 x i8> %tmp1, i32 %tmp5, i32 1, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0) | |
67 ret void | |
68 } | |
69 | |
70 ; Make sure immediates that aren't inline constants don't get folded into | |
71 ; the soffset operand. | |
72 ; FIXME: for this test we should be smart enough to shift the immediate into | |
73 ; the offset field. | |
74 ; CHECK-LABEL: {{^}}soffset_no_fold: | |
75 ; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x41 | |
76 ; CHECK: buffer_load_dword v{{[0-9+]}}, v{{[0-9+]}}, s[{{[0-9]+}}:{{[0-9]+}}], [[SOFFSET]] offen glc | |
77 define void @soffset_no_fold([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #1 { | |
78 main_body: | |
79 %tmp0 = getelementptr [6 x <16 x i8>], [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0 | |
80 %tmp1 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp0 | |
81 %tmp2 = shl i32 %6, 2 | |
82 %tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 65, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0) | |
83 %tmp4 = add i32 %6, 16 | |
84 %tmp5 = bitcast float 0.0 to i32 | |
85 call void @llvm.SI.tbuffer.store.i32(<16 x i8> %tmp1, i32 %tmp5, i32 1, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0) | |
86 ret void | |
87 } | |
88 | |
89 ;;;==========================================================================;;; | |
90 ;;; MUBUF STORE TESTS | |
91 ;;;==========================================================================;;; | |
92 | |
93 ; MUBUF store with an immediate byte offset that fits into 12-bits | |
94 ; CHECK-LABEL: {{^}}mubuf_store0: | |
95 ; CHECK: buffer_store_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4 ; encoding: [0x04,0x00,0x70,0xe0 | |
96 define void @mubuf_store0(i32 addrspace(1)* %out) { | |
97 entry: | |
98 %0 = getelementptr i32, i32 addrspace(1)* %out, i64 1 | |
99 store i32 0, i32 addrspace(1)* %0 | |
100 ret void | |
101 } | |
102 | |
103 ; MUBUF store with the largest possible immediate offset | |
104 ; CHECK-LABEL: {{^}}mubuf_store1: | |
105 ; CHECK: buffer_store_byte v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0 | |
106 | |
107 define void @mubuf_store1(i8 addrspace(1)* %out) { | |
108 entry: | |
109 %0 = getelementptr i8, i8 addrspace(1)* %out, i64 4095 | |
110 store i8 0, i8 addrspace(1)* %0 | |
111 ret void | |
112 } | |
113 | |
114 ; MUBUF store with an immediate byte offset that doesn't fit into 12-bits | |
115 ; CHECK-LABEL: {{^}}mubuf_store2: | |
116 ; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x1000 | |
117 ; CHECK: buffer_store_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[SOFFSET]] ; encoding: [0x00,0x00,0x70,0xe0 | |
118 define void @mubuf_store2(i32 addrspace(1)* %out) { | |
119 entry: | |
120 %0 = getelementptr i32, i32 addrspace(1)* %out, i64 1024 | |
121 store i32 0, i32 addrspace(1)* %0 | |
122 ret void | |
123 } | |
124 | |
125 ; MUBUF store with a 12-bit immediate offset and a register offset | |
126 ; CHECK-LABEL: {{^}}mubuf_store3: | |
127 ; CHECK-NOT: ADD | |
128 ; CHECK: buffer_store_dword v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:4 ; encoding: [0x04,0x80,0x70,0xe0 | |
129 define void @mubuf_store3(i32 addrspace(1)* %out, i64 %offset) { | |
130 entry: | |
131 %0 = getelementptr i32, i32 addrspace(1)* %out, i64 %offset | |
132 %1 = getelementptr i32, i32 addrspace(1)* %0, i64 1 | |
133 store i32 0, i32 addrspace(1)* %1 | |
134 ret void | |
135 } | |
136 | |
137 ; CHECK-LABEL: {{^}}store_sgpr_ptr: | |
138 ; CHECK: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 | |
139 define void @store_sgpr_ptr(i32 addrspace(1)* %out) #0 { | |
140 store i32 99, i32 addrspace(1)* %out, align 4 | |
141 ret void | |
142 } | |
143 | |
144 ; CHECK-LABEL: {{^}}store_sgpr_ptr_offset: | |
145 ; CHECK: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:40 | |
146 define void @store_sgpr_ptr_offset(i32 addrspace(1)* %out) #0 { | |
147 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 10 | |
148 store i32 99, i32 addrspace(1)* %out.gep, align 4 | |
149 ret void | |
150 } | |
151 | |
152 ; CHECK-LABEL: {{^}}store_sgpr_ptr_large_offset: | |
153 ; CHECK: s_mov_b32 [[SOFFSET:s[0-9]+]], 0x20000 | |
154 ; CHECK: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[SOFFSET]] | |
155 define void @store_sgpr_ptr_large_offset(i32 addrspace(1)* %out) #0 { | |
156 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 32768 | |
157 store i32 99, i32 addrspace(1)* %out.gep, align 4 | |
158 ret void | |
159 } | |
160 | |
161 ; CHECK-LABEL: {{^}}store_sgpr_ptr_large_offset_atomic: | |
162 ; CHECK: s_mov_b32 [[SOFFSET:s[0-9]+]], 0x20000 | |
163 ; CHECK: buffer_atomic_add v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[SOFFSET]] | |
164 define void @store_sgpr_ptr_large_offset_atomic(i32 addrspace(1)* %out) #0 { | |
165 %gep = getelementptr i32, i32 addrspace(1)* %out, i32 32768 | |
166 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 5 seq_cst | |
167 ret void | |
168 } | |
169 | |
170 ; CHECK-LABEL: {{^}}store_vgpr_ptr: | |
171 ; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 | |
172 define void @store_vgpr_ptr(i32 addrspace(1)* %out) #0 { | |
173 %tid = call i32 @llvm.r600.read.tidig.x() readnone | |
174 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid | |
175 store i32 99, i32 addrspace(1)* %out.gep, align 4 | |
176 ret void | |
177 } | |
178 | |
179 declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #3 | |
180 declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) | |
181 | |
182 attributes #1 = { "ShaderType"="2" "unsafe-fp-math"="true" } | |
183 attributes #3 = { nounwind readonly } |