150
|
1 ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s
|
|
2
|
|
3
|
|
4 ; CHECK-LABEL: @branch_ptr_var_same_alloca(
|
|
5 ; CHECK: getelementptr inbounds [256 x [64 x i32]], [256 x [64 x i32]] addrspace(3)* @branch_ptr_var_same_alloca.alloca, i32 0, i32 %{{[0-9]+}}
|
|
6
|
|
7 ; CHECK: if:
|
|
8 ; CHECK: %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %a
|
|
9
|
|
10 ; CHECK: else:
|
|
11 ; CHECK: %arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* %15, i32 0, i32 %b
|
|
12
|
|
13 ; CHECK: endif:
|
|
14 ; CHECK: %phi.ptr = phi i32 addrspace(3)* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
|
|
15 ; CHECK: store i32 0, i32 addrspace(3)* %phi.ptr, align 4
|
|
16 define amdgpu_kernel void @branch_ptr_var_same_alloca(i32 %a, i32 %b) #0 {
|
|
17 entry:
|
|
18 %alloca = alloca [64 x i32], align 4
|
|
19 br i1 undef, label %if, label %else
|
|
20
|
|
21 if:
|
|
22 %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
|
|
23 br label %endif
|
|
24
|
|
25 else:
|
|
26 %arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %b
|
|
27 br label %endif
|
|
28
|
|
29 endif:
|
|
30 %phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
|
|
31 store i32 0, i32* %phi.ptr, align 4
|
|
32 ret void
|
|
33 }
|
|
34
|
|
35 ; CHECK-LABEL: @branch_ptr_phi_alloca_null_0(
|
|
36 ; CHECK: %phi.ptr = phi i32 addrspace(3)* [ %arrayidx0, %if ], [ null, %entry ]
|
|
37 define amdgpu_kernel void @branch_ptr_phi_alloca_null_0(i32 %a, i32 %b) #0 {
|
|
38 entry:
|
|
39 %alloca = alloca [64 x i32], align 4
|
|
40 br i1 undef, label %if, label %endif
|
|
41
|
|
42 if:
|
|
43 %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
|
|
44 br label %endif
|
|
45
|
|
46 endif:
|
|
47 %phi.ptr = phi i32* [ %arrayidx0, %if ], [ null, %entry ]
|
|
48 store i32 0, i32* %phi.ptr, align 4
|
|
49 ret void
|
|
50 }
|
|
51
|
|
52 ; CHECK-LABEL: @branch_ptr_phi_alloca_null_1(
|
|
53 ; CHECK: %phi.ptr = phi i32 addrspace(3)* [ null, %entry ], [ %arrayidx0, %if ]
|
|
54 define amdgpu_kernel void @branch_ptr_phi_alloca_null_1(i32 %a, i32 %b) #0 {
|
|
55 entry:
|
|
56 %alloca = alloca [64 x i32], align 4
|
|
57 br i1 undef, label %if, label %endif
|
|
58
|
|
59 if:
|
|
60 %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
|
|
61 br label %endif
|
|
62
|
|
63 endif:
|
|
64 %phi.ptr = phi i32* [ null, %entry ], [ %arrayidx0, %if ]
|
|
65 store i32 0, i32* %phi.ptr, align 4
|
|
66 ret void
|
|
67 }
|
|
68
|
|
69 ; CHECK-LABEL: @one_phi_value(
|
|
70 ; CHECK: getelementptr inbounds [256 x [64 x i32]], [256 x [64 x i32]] addrspace(3)* @one_phi_value.alloca, i32 0, i32 %14
|
|
71 ; CHECK: %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %a
|
|
72
|
|
73 ; CHECK: br label %exit
|
|
74 ; CHECK: %phi.ptr = phi i32 addrspace(3)* [ %arrayidx0, %entry ]
|
|
75 ; CHECK: store i32 0, i32 addrspace(3)* %phi.ptr, align 4
|
|
76 define amdgpu_kernel void @one_phi_value(i32 %a) #0 {
|
|
77 entry:
|
|
78 %alloca = alloca [64 x i32], align 4
|
|
79 %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
|
|
80 br label %exit
|
|
81
|
|
82 exit:
|
|
83 %phi.ptr = phi i32* [ %arrayidx0, %entry ]
|
|
84 store i32 0, i32* %phi.ptr, align 4
|
|
85 ret void
|
|
86 }
|
|
87
|
|
88 ; CHECK-LABEL: @branch_ptr_alloca_unknown_obj(
|
|
89 ; CHECK: %alloca = alloca [64 x i32], align 4
|
|
90
|
|
91 ; CHECK: if:
|
|
92 ; CHECK: %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
|
|
93
|
|
94 ; CHECK: else:
|
|
95 ; CHECK: %arrayidx1 = call i32* @get_unknown_pointer()
|
|
96
|
|
97 ; CHECK: endif:
|
|
98 ; CHECK: %phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
|
|
99 ; CHECK: store i32 0, i32* %phi.ptr, align 4
|
|
100 define amdgpu_kernel void @branch_ptr_alloca_unknown_obj(i32 %a, i32 %b) #0 {
|
|
101 entry:
|
|
102 %alloca = alloca [64 x i32], align 4
|
|
103 br i1 undef, label %if, label %else
|
|
104
|
|
105 if:
|
|
106 %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a
|
|
107 br label %endif
|
|
108
|
|
109 else:
|
|
110 %arrayidx1 = call i32* @get_unknown_pointer()
|
|
111 br label %endif
|
|
112
|
|
113 endif:
|
|
114 %phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ]
|
|
115 store i32 0, i32* %phi.ptr, align 4
|
|
116 ret void
|
|
117 }
|
|
118
|
|
119 ; kernel void ptr_induction_var_same_alloca(void)
|
|
120 ; {
|
|
121 ; int alloca[64];
|
|
122 ; int i = 0;
|
|
123
|
|
124 ; #pragma nounroll
|
|
125 ; for (int* p = &alloca[2], *e = &alloca[48]; p != e; ++p, ++i)
|
|
126 ; {
|
|
127 ; *p = i;
|
|
128 ; }
|
|
129 ; }
|
|
130
|
|
131 ; FIXME: This should be promotable. We need to use
|
|
132 ; GetUnderlyingObjects when looking at the icmp user.
|
|
133
|
|
134 ; CHECK-LABEL: @ptr_induction_var_same_alloca(
|
|
135 ; CHECK: %alloca = alloca [64 x i32], align 4
|
|
136 ; CHECK: phi i32* [ %arrayidx, %entry ], [ %incdec.ptr, %for.body ]
|
|
137 define amdgpu_kernel void @ptr_induction_var_same_alloca() #0 {
|
|
138 entry:
|
|
139 %alloca = alloca [64 x i32], align 4
|
|
140 %arrayidx = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 2
|
|
141 %arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 48
|
|
142 br label %for.body
|
|
143
|
|
144 for.cond.cleanup: ; preds = %for.body
|
|
145 ret void
|
|
146
|
|
147 for.body: ; preds = %for.body, %entry
|
|
148 %i.09 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
149 %p.08 = phi i32* [ %arrayidx, %entry ], [ %incdec.ptr, %for.body ]
|
|
150 store i32 %i.09, i32* %p.08, align 4
|
|
151 %incdec.ptr = getelementptr inbounds i32, i32* %p.08, i32 1
|
|
152 %inc = add nuw nsw i32 %i.09, 1
|
|
153 %cmp = icmp eq i32* %incdec.ptr, %arrayidx1
|
|
154 br i1 %cmp, label %for.cond.cleanup, label %for.body
|
|
155 }
|
|
156
|
|
157
|
|
158 ; extern int* get_unknown_pointer(void);
|
|
159
|
|
160 ; kernel void ptr_induction_var_alloca_unknown(void)
|
|
161 ; {
|
|
162 ; int alloca[64];
|
|
163 ; int i = 0;
|
|
164 ;
|
|
165 ; for (int* p = &alloca[2], *e = get_unknown_pointer(); p != e; ++p, ++i)
|
|
166 ; {
|
|
167 ; *p = i;
|
|
168 ; }
|
|
169 ; }
|
|
170
|
|
171 ; CHECK-LABEL: @ptr_induction_var_alloca_unknown(
|
|
172 ; CHECK: %alloca = alloca [64 x i32], align 4
|
|
173 ; CHECK: %p.08 = phi i32* [ %incdec.ptr, %for.body ], [ %arrayidx, %for.body.preheader ]
|
|
174 ; CHECK: %cmp = icmp eq i32* %incdec.ptr, %call
|
|
175 define amdgpu_kernel void @ptr_induction_var_alloca_unknown() #0 {
|
|
176 entry:
|
|
177 %alloca = alloca [64 x i32], align 4
|
|
178 %arrayidx = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 2
|
|
179 %call = tail call i32* @get_unknown_pointer() #2
|
|
180 %cmp.7 = icmp eq i32* %arrayidx, %call
|
|
181 br i1 %cmp.7, label %for.cond.cleanup, label %for.body.preheader
|
|
182
|
|
183 for.body.preheader: ; preds = %entry
|
|
184 br label %for.body
|
|
185
|
|
186 for.cond.cleanup.loopexit: ; preds = %for.body
|
|
187 br label %for.cond.cleanup
|
|
188
|
|
189 for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
|
|
190 ret void
|
|
191
|
|
192 for.body: ; preds = %for.body, %for.body.preheader
|
|
193 %i.09 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
|
|
194 %p.08 = phi i32* [ %incdec.ptr, %for.body ], [ %arrayidx, %for.body.preheader ]
|
|
195 store i32 %i.09, i32* %p.08, align 4
|
|
196 %incdec.ptr = getelementptr inbounds i32, i32* %p.08, i32 1
|
|
197 %inc = add nuw nsw i32 %i.09, 1
|
|
198 %cmp = icmp eq i32* %incdec.ptr, %call
|
|
199 br i1 %cmp, label %for.cond.cleanup.loopexit, label %for.body
|
|
200 }
|
|
201
|
|
202 declare i32* @get_unknown_pointer() #0
|
|
203
|
|
204 attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" }
|