Mercurial > hg > CbC > CbC_llvm
comparison test/CodeGen/AMDGPU/indirect-addressing-si.ll @ 95:afa8332a0e37 LLVM3.8
LLVM 3.8
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 13 Oct 2015 17:48:58 +0900 |
parents | |
children | 1172e4bd9c6f |
comparison
equal
deleted
inserted
replaced
84:f3e34b893a5f | 95:afa8332a0e37 |
---|---|
1 ; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s | |
2 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s | |
3 | |
4 ; Tests for indirect addressing on SI, which is implemented using dynamic | |
5 ; indexing of vectors. | |
6 | |
7 ; CHECK-LABEL: {{^}}extract_w_offset: | |
8 ; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0 | |
9 ; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000 | |
10 ; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0 | |
11 ; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 1.0 | |
12 ; CHECK: s_mov_b32 m0 | |
13 ; CHECK-NEXT: v_movrels_b32_e32 | |
14 define void @extract_w_offset(float addrspace(1)* %out, i32 %in) { | |
15 entry: | |
16 %idx = add i32 %in, 1 | |
17 %elt = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %idx | |
18 store float %elt, float addrspace(1)* %out | |
19 ret void | |
20 } | |
21 | |
22 ; XXX: Could do v_or_b32 directly | |
23 ; CHECK-LABEL: {{^}}extract_w_offset_salu_use_vector: | |
24 ; CHECK-DAG: s_or_b32 | |
25 ; CHECK-DAG: s_or_b32 | |
26 ; CHECK-DAG: s_or_b32 | |
27 ; CHECK-DAG: s_or_b32 | |
28 ; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} | |
29 ; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} | |
30 ; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} | |
31 ; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} | |
32 ; CHECK: s_mov_b32 m0 | |
33 ; CHECK-NEXT: v_movrels_b32_e32 | |
34 define void @extract_w_offset_salu_use_vector(i32 addrspace(1)* %out, i32 %in, <4 x i32> %or.val) { | |
35 entry: | |
36 %idx = add i32 %in, 1 | |
37 %vec = or <4 x i32> %or.val, <i32 1, i32 2, i32 3, i32 4> | |
38 %elt = extractelement <4 x i32> %vec, i32 %idx | |
39 store i32 %elt, i32 addrspace(1)* %out | |
40 ret void | |
41 } | |
42 | |
43 ; CHECK-LABEL: {{^}}extract_wo_offset: | |
44 ; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0 | |
45 ; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000 | |
46 ; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0 | |
47 ; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 1.0 | |
48 ; CHECK: s_mov_b32 m0 | |
49 ; CHECK-NEXT: v_movrels_b32_e32 | |
50 define void @extract_wo_offset(float addrspace(1)* %out, i32 %in) { | |
51 entry: | |
52 %elt = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %in | |
53 store float %elt, float addrspace(1)* %out | |
54 ret void | |
55 } | |
56 | |
57 ; CHECK-LABEL: {{^}}extract_neg_offset_sgpr: | |
58 ; The offset depends on the register that holds the first element of the vector. | |
59 ; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}} | |
60 ; CHECK: v_movrels_b32_e32 v{{[0-9]}}, v0 | |
61 define void @extract_neg_offset_sgpr(i32 addrspace(1)* %out, i32 %offset) { | |
62 entry: | |
63 %index = add i32 %offset, -512 | |
64 %value = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index | |
65 store i32 %value, i32 addrspace(1)* %out | |
66 ret void | |
67 } | |
68 | |
69 ; CHECK-LABEL: {{^}}extract_neg_offset_sgpr_loaded: | |
70 ; The offset depends on the register that holds the first element of the vector. | |
71 ; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}} | |
72 ; CHECK: v_movrels_b32_e32 v{{[0-9]}}, v0 | |
73 define void @extract_neg_offset_sgpr_loaded(i32 addrspace(1)* %out, <4 x i32> %vec0, <4 x i32> %vec1, i32 %offset) { | |
74 entry: | |
75 %index = add i32 %offset, -512 | |
76 %or = or <4 x i32> %vec0, %vec1 | |
77 %value = extractelement <4 x i32> %or, i32 %index | |
78 store i32 %value, i32 addrspace(1)* %out | |
79 ret void | |
80 } | |
81 | |
82 ; CHECK-LABEL: {{^}}extract_neg_offset_vgpr: | |
83 ; The offset depends on the register that holds the first element of the vector. | |
84 ; CHECK: v_readfirstlane_b32 | |
85 ; CHECK: s_add_i32 m0, m0, 0xfffffe{{[0-9a-z]+}} | |
86 ; CHECK-NEXT: v_movrels_b32_e32 v{{[0-9]}}, v0 | |
87 ; CHECK: s_cbranch_execnz | |
88 define void @extract_neg_offset_vgpr(i32 addrspace(1)* %out) { | |
89 entry: | |
90 %id = call i32 @llvm.r600.read.tidig.x() #1 | |
91 %index = add i32 %id, -512 | |
92 %value = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index | |
93 store i32 %value, i32 addrspace(1)* %out | |
94 ret void | |
95 } | |
96 | |
97 ; CHECK-LABEL: {{^}}insert_w_offset: | |
98 ; CHECK: s_mov_b32 m0 | |
99 ; CHECK-NEXT: v_movreld_b32_e32 | |
100 define void @insert_w_offset(float addrspace(1)* %out, i32 %in) { | |
101 entry: | |
102 %0 = add i32 %in, 1 | |
103 %1 = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %0 | |
104 %2 = extractelement <4 x float> %1, i32 2 | |
105 store float %2, float addrspace(1)* %out | |
106 ret void | |
107 } | |
108 | |
109 ; CHECK-LABEL: {{^}}insert_wo_offset: | |
110 ; CHECK: s_mov_b32 m0 | |
111 ; CHECK-NEXT: v_movreld_b32_e32 | |
112 define void @insert_wo_offset(float addrspace(1)* %out, i32 %in) { | |
113 entry: | |
114 %0 = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %in | |
115 %1 = extractelement <4 x float> %0, i32 2 | |
116 store float %1, float addrspace(1)* %out | |
117 ret void | |
118 } | |
119 | |
120 ; CHECK-LABEL: {{^}}insert_neg_offset_sgpr: | |
121 ; The offset depends on the register that holds the first element of the vector. | |
122 ; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}} | |
123 ; CHECK: v_movreld_b32_e32 v0, v{{[0-9]}} | |
124 define void @insert_neg_offset_sgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out, i32 %offset) { | |
125 entry: | |
126 %index = add i32 %offset, -512 | |
127 %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index | |
128 store <4 x i32> %value, <4 x i32> addrspace(1)* %out | |
129 ret void | |
130 } | |
131 | |
132 ; The vector indexed into is originally loaded into an SGPR rather | |
133 ; than built with a reg_sequence | |
134 | |
135 ; CHECK-LABEL: {{^}}insert_neg_offset_sgpr_loadreg: | |
136 ; The offset depends on the register that holds the first element of the vector. | |
137 ; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}} | |
138 ; CHECK: v_movreld_b32_e32 v0, v{{[0-9]}} | |
139 define void @insert_neg_offset_sgpr_loadreg(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out, <4 x i32> %vec, i32 %offset) { | |
140 entry: | |
141 %index = add i32 %offset, -512 | |
142 %value = insertelement <4 x i32> %vec, i32 5, i32 %index | |
143 store <4 x i32> %value, <4 x i32> addrspace(1)* %out | |
144 ret void | |
145 } | |
146 | |
147 ; CHECK-LABEL: {{^}}insert_neg_offset_vgpr: | |
148 ; The offset depends on the register that holds the first element of the vector. | |
149 ; CHECK: v_readfirstlane_b32 | |
150 ; CHECK: s_add_i32 m0, m0, 0xfffffe{{[0-9a-z]+}} | |
151 ; CHECK-NEXT: v_movreld_b32_e32 v0, v{{[0-9]}} | |
152 ; CHECK: s_cbranch_execnz | |
153 define void @insert_neg_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) { | |
154 entry: | |
155 %id = call i32 @llvm.r600.read.tidig.x() #1 | |
156 %index = add i32 %id, -512 | |
157 %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index | |
158 store <4 x i32> %value, <4 x i32> addrspace(1)* %out | |
159 ret void | |
160 } | |
161 | |
162 ; CHECK-LABEL: {{^}}insert_neg_inline_offset_vgpr: | |
163 ; The offset depends on the register that holds the first element of the vector. | |
164 ; CHECK: v_readfirstlane_b32 | |
165 ; CHECK: s_add_i32 m0, m0, -{{[0-9]+}} | |
166 ; CHECK-NEXT: v_movreld_b32_e32 v0, v{{[0-9]}} | |
167 ; CHECK: s_cbranch_execnz | |
168 define void @insert_neg_inline_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) { | |
169 entry: | |
170 %id = call i32 @llvm.r600.read.tidig.x() #1 | |
171 %index = add i32 %id, -16 | |
172 %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index | |
173 store <4 x i32> %value, <4 x i32> addrspace(1)* %out | |
174 ret void | |
175 } | |
176 | |
177 declare i32 @llvm.r600.read.tidig.x() #1 | |
178 attributes #1 = { nounwind readnone } |