Mercurial > hg > Members > tobaru > cbc > CbC_llvm
comparison test/CodeGen/AMDGPU/valu-i1.ll @ 95:afa8332a0e37
LLVM 3.8
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 13 Oct 2015 17:48:58 +0900 |
parents | |
children | 7d135dc70f03 |
comparison
equal
deleted
inserted
replaced
84:f3e34b893a5f | 95:afa8332a0e37 |
---|---|
1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -enable-misched -asm-verbose < %s | FileCheck -check-prefix=SI %s | |
2 | |
3 declare i32 @llvm.r600.read.tidig.x() nounwind readnone | |
4 | |
5 ; SI-LABEL: @test_if | |
6 ; Make sure the i1 values created by the cfg structurizer pass are | |
7 ; moved using VALU instructions | |
8 ; SI-NOT: s_mov_b64 s[{{[0-9]:[0-9]}}], -1 | |
9 ; SI: v_mov_b32_e32 v{{[0-9]}}, -1 | |
10 define void @test_if(i32 %a, i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) #1 { | |
11 entry: | |
12 switch i32 %a, label %default [ | |
13 i32 0, label %case0 | |
14 i32 1, label %case1 | |
15 ] | |
16 | |
17 case0: | |
18 %arrayidx1 = getelementptr i32, i32 addrspace(1)* %dst, i32 %b | |
19 store i32 0, i32 addrspace(1)* %arrayidx1, align 4 | |
20 br label %end | |
21 | |
22 case1: | |
23 %arrayidx5 = getelementptr i32, i32 addrspace(1)* %dst, i32 %b | |
24 store i32 1, i32 addrspace(1)* %arrayidx5, align 4 | |
25 br label %end | |
26 | |
27 default: | |
28 %cmp8 = icmp eq i32 %a, 2 | |
29 %arrayidx10 = getelementptr i32, i32 addrspace(1)* %dst, i32 %b | |
30 br i1 %cmp8, label %if, label %else | |
31 | |
32 if: | |
33 store i32 2, i32 addrspace(1)* %arrayidx10, align 4 | |
34 br label %end | |
35 | |
36 else: | |
37 store i32 3, i32 addrspace(1)* %arrayidx10, align 4 | |
38 br label %end | |
39 | |
40 end: | |
41 ret void | |
42 } | |
43 | |
44 ; SI-LABEL: @simple_test_v_if | |
45 ; SI: v_cmp_ne_i32_e32 vcc, 0, v{{[0-9]+}} | |
46 ; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc | |
47 ; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]] | |
48 | |
49 ; SI: ; BB#1 | |
50 ; SI: buffer_store_dword | |
51 ; SI: s_endpgm | |
52 | |
53 ; SI: BB1_2: | |
54 ; SI: s_or_b64 exec, exec, [[BR_SREG]] | |
55 ; SI: s_endpgm | |
56 define void @simple_test_v_if(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 { | |
57 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone | |
58 %is.0 = icmp ne i32 %tid, 0 | |
59 br i1 %is.0, label %store, label %exit | |
60 | |
61 store: | |
62 %gep = getelementptr i32, i32 addrspace(1)* %dst, i32 %tid | |
63 store i32 999, i32 addrspace(1)* %gep | |
64 ret void | |
65 | |
66 exit: | |
67 ret void | |
68 } | |
69 | |
70 ; SI-LABEL: @simple_test_v_loop | |
71 ; SI: v_cmp_ne_i32_e32 vcc, 0, v{{[0-9]+}} | |
72 ; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc | |
73 ; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]] | |
74 ; SI: s_cbranch_execz BB2_2 | |
75 | |
76 ; SI: ; BB#1: | |
77 ; SI: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, 0{{$}} | |
78 | |
79 ; SI: BB2_3: | |
80 ; SI: buffer_load_dword | |
81 ; SI: buffer_store_dword | |
82 ; SI: v_cmp_eq_i32_e32 vcc, | |
83 ; SI: s_or_b64 [[OR_SREG:s\[[0-9]+:[0-9]+\]]] | |
84 ; SI: s_andn2_b64 exec, exec, [[OR_SREG]] | |
85 ; SI: s_cbranch_execnz BB2_3 | |
86 | |
87 define void @simple_test_v_loop(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 { | |
88 entry: | |
89 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone | |
90 %is.0 = icmp ne i32 %tid, 0 | |
91 %limit = add i32 %tid, 64 | |
92 br i1 %is.0, label %loop, label %exit | |
93 | |
94 loop: | |
95 %i = phi i32 [%tid, %entry], [%i.inc, %loop] | |
96 %gep.src = getelementptr i32, i32 addrspace(1)* %src, i32 %i | |
97 %gep.dst = getelementptr i32, i32 addrspace(1)* %dst, i32 %i | |
98 %load = load i32, i32 addrspace(1)* %src | |
99 store i32 %load, i32 addrspace(1)* %gep.dst | |
100 %i.inc = add nsw i32 %i, 1 | |
101 %cmp = icmp eq i32 %limit, %i.inc | |
102 br i1 %cmp, label %exit, label %loop | |
103 | |
104 exit: | |
105 ret void | |
106 } | |
107 | |
108 ; SI-LABEL: @multi_vcond_loop | |
109 | |
110 ; Load loop limit from buffer | |
111 ; Branch to exit if uniformly not taken | |
112 ; SI: ; BB#0: | |
113 ; SI: buffer_load_dword [[VBOUND:v[0-9]+]] | |
114 ; SI: v_cmp_lt_i32_e32 vcc | |
115 ; SI: s_and_saveexec_b64 [[OUTER_CMP_SREG:s\[[0-9]+:[0-9]+\]]], vcc | |
116 ; SI: s_xor_b64 [[OUTER_CMP_SREG]], exec, [[OUTER_CMP_SREG]] | |
117 ; SI: s_cbranch_execz BB3_2 | |
118 | |
119 ; Initialize inner condition to false | |
120 ; SI: ; BB#1: | |
121 ; SI: s_mov_b64 [[ZERO:s\[[0-9]+:[0-9]+\]]], 0{{$}} | |
122 ; SI: s_mov_b64 [[COND_STATE:s\[[0-9]+:[0-9]+\]]], [[ZERO]] | |
123 | |
124 ; Clear exec bits for workitems that load -1s | |
125 ; SI: BB3_3: | |
126 ; SI: buffer_load_dword [[B:v[0-9]+]] | |
127 ; SI: buffer_load_dword [[A:v[0-9]+]] | |
128 ; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], -1, [[A]] | |
129 ; SI-DAG: v_cmp_ne_i32_e32 [[NEG1_CHECK_1:vcc]], -1, [[B]] | |
130 ; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]] | |
131 ; SI: s_and_saveexec_b64 [[ORNEG2:s\[[0-9]+:[0-9]+\]]], [[ORNEG1]] | |
132 ; SI: s_xor_b64 [[ORNEG2]], exec, [[ORNEG2]] | |
133 ; SI: s_cbranch_execz BB3_5 | |
134 | |
135 ; SI: BB#4: | |
136 ; SI: buffer_store_dword | |
137 ; SI: v_cmp_ge_i64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]] | |
138 ; SI: s_or_b64 [[COND_STATE]], [[CMP]], [[COND_STATE]] | |
139 | |
140 ; SI: BB3_5: | |
141 ; SI: s_or_b64 exec, exec, [[ORNEG2]] | |
142 ; SI: s_or_b64 [[COND_STATE]], [[ORNEG2]], [[COND_STATE]] | |
143 ; SI: s_andn2_b64 exec, exec, [[COND_STATE]] | |
144 ; SI: s_cbranch_execnz BB3_3 | |
145 | |
146 ; SI: BB#6 | |
147 ; SI: s_or_b64 exec, exec, [[COND_STATE]] | |
148 | |
149 ; SI: BB3_2: | |
150 ; SI-NOT: [[COND_STATE]] | |
151 ; SI: s_endpgm | |
152 | |
153 define void @multi_vcond_loop(i32 addrspace(1)* noalias nocapture %arg, i32 addrspace(1)* noalias nocapture readonly %arg1, i32 addrspace(1)* noalias nocapture readonly %arg2, i32 addrspace(1)* noalias nocapture readonly %arg3) #1 { | |
154 bb: | |
155 %tmp = tail call i32 @llvm.r600.read.tidig.x() #0 | |
156 %tmp4 = sext i32 %tmp to i64 | |
157 %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg3, i64 %tmp4 | |
158 %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4 | |
159 %tmp7 = icmp sgt i32 %tmp6, 0 | |
160 %tmp8 = sext i32 %tmp6 to i64 | |
161 br i1 %tmp7, label %bb10, label %bb26 | |
162 | |
163 bb10: ; preds = %bb, %bb20 | |
164 %tmp11 = phi i64 [ %tmp23, %bb20 ], [ 0, %bb ] | |
165 %tmp12 = add nsw i64 %tmp11, %tmp4 | |
166 %tmp13 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp12 | |
167 %tmp14 = load i32, i32 addrspace(1)* %tmp13, align 4 | |
168 %tmp15 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp12 | |
169 %tmp16 = load i32, i32 addrspace(1)* %tmp15, align 4 | |
170 %tmp17 = icmp ne i32 %tmp14, -1 | |
171 %tmp18 = icmp ne i32 %tmp16, -1 | |
172 %tmp19 = and i1 %tmp17, %tmp18 | |
173 br i1 %tmp19, label %bb20, label %bb26 | |
174 | |
175 bb20: ; preds = %bb10 | |
176 %tmp21 = add nsw i32 %tmp16, %tmp14 | |
177 %tmp22 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp12 | |
178 store i32 %tmp21, i32 addrspace(1)* %tmp22, align 4 | |
179 %tmp23 = add nuw nsw i64 %tmp11, 1 | |
180 %tmp24 = icmp slt i64 %tmp23, %tmp8 | |
181 br i1 %tmp24, label %bb10, label %bb26 | |
182 | |
183 bb26: ; preds = %bb10, %bb20, %bb | |
184 ret void | |
185 } | |
186 | |
187 attributes #0 = { nounwind readnone } | |
188 attributes #1 = { nounwind } |