annotate llvm/test/Transforms/LoopUnroll/unroll-pragmas.ll @ 201:a96fbbdf2d0f

...
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Fri, 04 Jun 2021 21:07:06 +0900
parents 1d019706d866
children c4bab56944e8
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
150
anatofuz
parents:
diff changeset
1 ; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,REM %s
anatofuz
parents:
diff changeset
2 ; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,REM %s
anatofuz
parents:
diff changeset
3 ; RUN: opt < %s -loop-unroll -unroll-allow-remainder=0 -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,NOREM %s
anatofuz
parents:
diff changeset
4 ;
anatofuz
parents:
diff changeset
5 ; Run loop unrolling twice to verify that loop unrolling metadata is properly
anatofuz
parents:
diff changeset
6 ; removed and further unrolling is disabled after the pass is run once.
anatofuz
parents:
diff changeset
7
anatofuz
parents:
diff changeset
8 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
anatofuz
parents:
diff changeset
9 target triple = "x86_64-unknown-linux-gnu"
anatofuz
parents:
diff changeset
10
anatofuz
parents:
diff changeset
11 ; loop4 contains a small loop which should be completely unrolled by
anatofuz
parents:
diff changeset
12 ; the default unrolling heuristics. It serves as a control for the
anatofuz
parents:
diff changeset
13 ; unroll(disable) pragma test loop4_with_disable.
anatofuz
parents:
diff changeset
14 ;
anatofuz
parents:
diff changeset
15 ; CHECK-LABEL: @loop4(
anatofuz
parents:
diff changeset
16 ; CHECK-NOT: br i1
anatofuz
parents:
diff changeset
17 define void @loop4(i32* nocapture %a) {
anatofuz
parents:
diff changeset
18 entry:
anatofuz
parents:
diff changeset
19 br label %for.body
anatofuz
parents:
diff changeset
20
anatofuz
parents:
diff changeset
21 for.body: ; preds = %for.body, %entry
anatofuz
parents:
diff changeset
22 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
anatofuz
parents:
diff changeset
23 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
anatofuz
parents:
diff changeset
24 %0 = load i32, i32* %arrayidx, align 4
anatofuz
parents:
diff changeset
25 %inc = add nsw i32 %0, 1
anatofuz
parents:
diff changeset
26 store i32 %inc, i32* %arrayidx, align 4
anatofuz
parents:
diff changeset
27 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
anatofuz
parents:
diff changeset
28 %exitcond = icmp eq i64 %indvars.iv.next, 4
anatofuz
parents:
diff changeset
29 br i1 %exitcond, label %for.end, label %for.body
anatofuz
parents:
diff changeset
30
anatofuz
parents:
diff changeset
31 for.end: ; preds = %for.body
anatofuz
parents:
diff changeset
32 ret void
anatofuz
parents:
diff changeset
33 }
anatofuz
parents:
diff changeset
34
anatofuz
parents:
diff changeset
35 ; #pragma clang loop unroll(disable)
anatofuz
parents:
diff changeset
36 ;
anatofuz
parents:
diff changeset
37 ; CHECK-LABEL: @loop4_with_disable(
anatofuz
parents:
diff changeset
38 ; CHECK: store i32
anatofuz
parents:
diff changeset
39 ; CHECK-NOT: store i32
anatofuz
parents:
diff changeset
40 ; CHECK: br i1
anatofuz
parents:
diff changeset
41 define void @loop4_with_disable(i32* nocapture %a) {
anatofuz
parents:
diff changeset
42 entry:
anatofuz
parents:
diff changeset
43 br label %for.body
anatofuz
parents:
diff changeset
44
anatofuz
parents:
diff changeset
45 for.body: ; preds = %for.body, %entry
anatofuz
parents:
diff changeset
46 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
anatofuz
parents:
diff changeset
47 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
anatofuz
parents:
diff changeset
48 %0 = load i32, i32* %arrayidx, align 4
anatofuz
parents:
diff changeset
49 %inc = add nsw i32 %0, 1
anatofuz
parents:
diff changeset
50 store i32 %inc, i32* %arrayidx, align 4
anatofuz
parents:
diff changeset
51 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
anatofuz
parents:
diff changeset
52 %exitcond = icmp eq i64 %indvars.iv.next, 4
anatofuz
parents:
diff changeset
53 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
anatofuz
parents:
diff changeset
54
anatofuz
parents:
diff changeset
55 for.end: ; preds = %for.body
anatofuz
parents:
diff changeset
56 ret void
anatofuz
parents:
diff changeset
57 }
anatofuz
parents:
diff changeset
58 !1 = !{!1, !2}
anatofuz
parents:
diff changeset
59 !2 = !{!"llvm.loop.unroll.disable"}
anatofuz
parents:
diff changeset
60
anatofuz
parents:
diff changeset
61 ; loop64 has a high enough count that it should *not* be unrolled by
anatofuz
parents:
diff changeset
62 ; the default unrolling heuristic. It serves as the control for the
anatofuz
parents:
diff changeset
63 ; unroll(full) pragma test loop64_with_.* tests below.
anatofuz
parents:
diff changeset
64 ;
anatofuz
parents:
diff changeset
65 ; CHECK-LABEL: @loop64(
anatofuz
parents:
diff changeset
66 ; CHECK: store i32
anatofuz
parents:
diff changeset
67 ; CHECK-NOT: store i32
anatofuz
parents:
diff changeset
68 ; CHECK: br i1
anatofuz
parents:
diff changeset
69 define void @loop64(i32* nocapture %a) {
anatofuz
parents:
diff changeset
70 entry:
anatofuz
parents:
diff changeset
71 br label %for.body
anatofuz
parents:
diff changeset
72
anatofuz
parents:
diff changeset
73 for.body: ; preds = %for.body, %entry
anatofuz
parents:
diff changeset
74 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
anatofuz
parents:
diff changeset
75 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
anatofuz
parents:
diff changeset
76 %0 = load i32, i32* %arrayidx, align 4
anatofuz
parents:
diff changeset
77 %inc = add nsw i32 %0, 1
anatofuz
parents:
diff changeset
78 store i32 %inc, i32* %arrayidx, align 4
anatofuz
parents:
diff changeset
79 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
anatofuz
parents:
diff changeset
80 %exitcond = icmp eq i64 %indvars.iv.next, 64
anatofuz
parents:
diff changeset
81 br i1 %exitcond, label %for.end, label %for.body
anatofuz
parents:
diff changeset
82
anatofuz
parents:
diff changeset
83 for.end: ; preds = %for.body
anatofuz
parents:
diff changeset
84 ret void
anatofuz
parents:
diff changeset
85 }
anatofuz
parents:
diff changeset
86
anatofuz
parents:
diff changeset
87 ; #pragma clang loop unroll(full)
anatofuz
parents:
diff changeset
88 ; Loop should be fully unrolled.
anatofuz
parents:
diff changeset
89 ;
anatofuz
parents:
diff changeset
90 ; CHECK-LABEL: @loop64_with_full(
anatofuz
parents:
diff changeset
91 ; CHECK-NOT: br i1
anatofuz
parents:
diff changeset
92 define void @loop64_with_full(i32* nocapture %a) {
anatofuz
parents:
diff changeset
93 entry:
anatofuz
parents:
diff changeset
94 br label %for.body
anatofuz
parents:
diff changeset
95
anatofuz
parents:
diff changeset
96 for.body: ; preds = %for.body, %entry
anatofuz
parents:
diff changeset
97 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
anatofuz
parents:
diff changeset
98 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
anatofuz
parents:
diff changeset
99 %0 = load i32, i32* %arrayidx, align 4
anatofuz
parents:
diff changeset
100 %inc = add nsw i32 %0, 1
anatofuz
parents:
diff changeset
101 store i32 %inc, i32* %arrayidx, align 4
anatofuz
parents:
diff changeset
102 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
anatofuz
parents:
diff changeset
103 %exitcond = icmp eq i64 %indvars.iv.next, 64
anatofuz
parents:
diff changeset
104 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3
anatofuz
parents:
diff changeset
105
anatofuz
parents:
diff changeset
106 for.end: ; preds = %for.body
anatofuz
parents:
diff changeset
107 ret void
anatofuz
parents:
diff changeset
108 }
anatofuz
parents:
diff changeset
109 !3 = !{!3, !4}
anatofuz
parents:
diff changeset
110 !4 = !{!"llvm.loop.unroll.full"}
anatofuz
parents:
diff changeset
111
anatofuz
parents:
diff changeset
112 ; #pragma clang loop unroll_count(4)
anatofuz
parents:
diff changeset
113 ; Loop should be unrolled 4 times.
anatofuz
parents:
diff changeset
114 ;
anatofuz
parents:
diff changeset
115 ; CHECK-LABEL: @loop64_with_count4(
anatofuz
parents:
diff changeset
116 ; CHECK: store i32
anatofuz
parents:
diff changeset
117 ; CHECK: store i32
anatofuz
parents:
diff changeset
118 ; CHECK: store i32
anatofuz
parents:
diff changeset
119 ; CHECK: store i32
anatofuz
parents:
diff changeset
120 ; CHECK-NOT: store i32
anatofuz
parents:
diff changeset
121 ; CHECK: br i1
anatofuz
parents:
diff changeset
122 define void @loop64_with_count4(i32* nocapture %a) {
anatofuz
parents:
diff changeset
123 entry:
anatofuz
parents:
diff changeset
124 br label %for.body
anatofuz
parents:
diff changeset
125
anatofuz
parents:
diff changeset
126 for.body: ; preds = %for.body, %entry
anatofuz
parents:
diff changeset
127 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
anatofuz
parents:
diff changeset
128 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
anatofuz
parents:
diff changeset
129 %0 = load i32, i32* %arrayidx, align 4
anatofuz
parents:
diff changeset
130 %inc = add nsw i32 %0, 1
anatofuz
parents:
diff changeset
131 store i32 %inc, i32* %arrayidx, align 4
anatofuz
parents:
diff changeset
132 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
anatofuz
parents:
diff changeset
133 %exitcond = icmp eq i64 %indvars.iv.next, 64
anatofuz
parents:
diff changeset
134 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5
anatofuz
parents:
diff changeset
135
anatofuz
parents:
diff changeset
136 for.end: ; preds = %for.body
anatofuz
parents:
diff changeset
137 ret void
anatofuz
parents:
diff changeset
138 }
anatofuz
parents:
diff changeset
139 !5 = !{!5, !6}
anatofuz
parents:
diff changeset
140 !6 = !{!"llvm.loop.unroll.count", i32 4}
anatofuz
parents:
diff changeset
141
anatofuz
parents:
diff changeset
142 ; #pragma clang loop unroll(full)
anatofuz
parents:
diff changeset
143 ; Full unrolling is requested, but loop has a runtime trip count so
anatofuz
parents:
diff changeset
144 ; no unrolling should occur.
anatofuz
parents:
diff changeset
145 ;
anatofuz
parents:
diff changeset
146 ; CHECK-LABEL: @runtime_loop_with_full(
anatofuz
parents:
diff changeset
147 ; CHECK: store i32
anatofuz
parents:
diff changeset
148 ; CHECK-NOT: store i32
anatofuz
parents:
diff changeset
149 define void @runtime_loop_with_full(i32* nocapture %a, i32 %b) {
anatofuz
parents:
diff changeset
150 entry:
anatofuz
parents:
diff changeset
151 %cmp3 = icmp sgt i32 %b, 0
anatofuz
parents:
diff changeset
152 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8
anatofuz
parents:
diff changeset
153
anatofuz
parents:
diff changeset
154 for.body: ; preds = %entry, %for.body
anatofuz
parents:
diff changeset
155 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
anatofuz
parents:
diff changeset
156 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
anatofuz
parents:
diff changeset
157 %0 = load i32, i32* %arrayidx, align 4
anatofuz
parents:
diff changeset
158 %inc = add nsw i32 %0, 1
anatofuz
parents:
diff changeset
159 store i32 %inc, i32* %arrayidx, align 4
anatofuz
parents:
diff changeset
160 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
anatofuz
parents:
diff changeset
161 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
anatofuz
parents:
diff changeset
162 %exitcond = icmp eq i32 %lftr.wideiv, %b
anatofuz
parents:
diff changeset
163 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !8
anatofuz
parents:
diff changeset
164
anatofuz
parents:
diff changeset
165 for.end: ; preds = %for.body, %entry
anatofuz
parents:
diff changeset
166 ret void
anatofuz
parents:
diff changeset
167 }
anatofuz
parents:
diff changeset
168 !8 = !{!8, !4}
anatofuz
parents:
diff changeset
169
anatofuz
parents:
diff changeset
170 ; #pragma clang loop unroll_count(4)
anatofuz
parents:
diff changeset
171 ; Loop has a runtime trip count. Runtime unrolling should occur and loop
anatofuz
parents:
diff changeset
172 ; should be duplicated (original and 4x unrolled) if remainder is allowed,
anatofuz
parents:
diff changeset
173 ; otherwise loop should not be unrolled.
anatofuz
parents:
diff changeset
174 ;
anatofuz
parents:
diff changeset
175 ; CHECK-LABEL: @runtime_loop_with_count4(
anatofuz
parents:
diff changeset
176 ; CHECK: for.body
anatofuz
parents:
diff changeset
177 ; CHECK: store
anatofuz
parents:
diff changeset
178 ; REM: store
anatofuz
parents:
diff changeset
179 ; REM: store
anatofuz
parents:
diff changeset
180 ; REM: store
anatofuz
parents:
diff changeset
181 ; CHECK-NOT: store
anatofuz
parents:
diff changeset
182 ; CHECK: br i1
anatofuz
parents:
diff changeset
183 ; REM: for.body.epil:
anatofuz
parents:
diff changeset
184 ; REM: store
anatofuz
parents:
diff changeset
185 ; NOREM-NOT: for.body.epil:
anatofuz
parents:
diff changeset
186 ; NOREM-NOT: store
anatofuz
parents:
diff changeset
187 ; CHECK-NOT: store
anatofuz
parents:
diff changeset
188 ; REM: br i1
anatofuz
parents:
diff changeset
189 ; NOREM-NOT: br i1
anatofuz
parents:
diff changeset
190 define void @runtime_loop_with_count4(i32* nocapture %a, i32 %b) {
anatofuz
parents:
diff changeset
191 entry:
anatofuz
parents:
diff changeset
192 %cmp3 = icmp sgt i32 %b, 0
anatofuz
parents:
diff changeset
193 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !9
anatofuz
parents:
diff changeset
194
anatofuz
parents:
diff changeset
195 for.body: ; preds = %entry, %for.body
anatofuz
parents:
diff changeset
196 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
anatofuz
parents:
diff changeset
197 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
anatofuz
parents:
diff changeset
198 %0 = load i32, i32* %arrayidx, align 4
anatofuz
parents:
diff changeset
199 %inc = add nsw i32 %0, 1
anatofuz
parents:
diff changeset
200 store i32 %inc, i32* %arrayidx, align 4
anatofuz
parents:
diff changeset
201 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
anatofuz
parents:
diff changeset
202 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
anatofuz
parents:
diff changeset
203 %exitcond = icmp eq i32 %lftr.wideiv, %b
anatofuz
parents:
diff changeset
204 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !9
anatofuz
parents:
diff changeset
205
anatofuz
parents:
diff changeset
206 for.end: ; preds = %for.body, %entry
anatofuz
parents:
diff changeset
207 ret void
anatofuz
parents:
diff changeset
208 }
anatofuz
parents:
diff changeset
209 !9 = !{!9, !6}
anatofuz
parents:
diff changeset
210
anatofuz
parents:
diff changeset
211 ; #pragma clang loop unroll_count(1)
anatofuz
parents:
diff changeset
212 ; Loop should not be unrolled
anatofuz
parents:
diff changeset
213 ;
anatofuz
parents:
diff changeset
214 ; CHECK-LABEL: @unroll_1(
anatofuz
parents:
diff changeset
215 ; CHECK: store i32
anatofuz
parents:
diff changeset
216 ; CHECK-NOT: store i32
anatofuz
parents:
diff changeset
217 ; CHECK: br i1
anatofuz
parents:
diff changeset
218 define void @unroll_1(i32* nocapture %a, i32 %b) {
anatofuz
parents:
diff changeset
219 entry:
anatofuz
parents:
diff changeset
220 br label %for.body
anatofuz
parents:
diff changeset
221
anatofuz
parents:
diff changeset
222 for.body: ; preds = %for.body, %entry
anatofuz
parents:
diff changeset
223 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
anatofuz
parents:
diff changeset
224 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
anatofuz
parents:
diff changeset
225 %0 = load i32, i32* %arrayidx, align 4
anatofuz
parents:
diff changeset
226 %inc = add nsw i32 %0, 1
anatofuz
parents:
diff changeset
227 store i32 %inc, i32* %arrayidx, align 4
anatofuz
parents:
diff changeset
228 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
anatofuz
parents:
diff changeset
229 %exitcond = icmp eq i64 %indvars.iv.next, 4
anatofuz
parents:
diff changeset
230 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !10
anatofuz
parents:
diff changeset
231
anatofuz
parents:
diff changeset
232 for.end: ; preds = %for.body
anatofuz
parents:
diff changeset
233 ret void
anatofuz
parents:
diff changeset
234 }
anatofuz
parents:
diff changeset
235 !10 = !{!10, !11}
anatofuz
parents:
diff changeset
236 !11 = !{!"llvm.loop.unroll.count", i32 1}
anatofuz
parents:
diff changeset
237
anatofuz
parents:
diff changeset
238 ; #pragma clang loop unroll(full)
anatofuz
parents:
diff changeset
239 ; Loop has very high loop count (1 million) and full unrolling was requested.
anatofuz
parents:
diff changeset
240 ; Loop should unrolled up to the pragma threshold, but not completely.
anatofuz
parents:
diff changeset
241 ;
anatofuz
parents:
diff changeset
242 ; CHECK-LABEL: @unroll_1M(
anatofuz
parents:
diff changeset
243 ; CHECK: store i32
anatofuz
parents:
diff changeset
244 ; CHECK: store i32
anatofuz
parents:
diff changeset
245 ; CHECK: br i1
anatofuz
parents:
diff changeset
246 define void @unroll_1M(i32* nocapture %a, i32 %b) {
anatofuz
parents:
diff changeset
247 entry:
anatofuz
parents:
diff changeset
248 br label %for.body
anatofuz
parents:
diff changeset
249
anatofuz
parents:
diff changeset
250 for.body: ; preds = %for.body, %entry
anatofuz
parents:
diff changeset
251 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
anatofuz
parents:
diff changeset
252 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
anatofuz
parents:
diff changeset
253 %0 = load i32, i32* %arrayidx, align 4
anatofuz
parents:
diff changeset
254 %inc = add nsw i32 %0, 1
anatofuz
parents:
diff changeset
255 store i32 %inc, i32* %arrayidx, align 4
anatofuz
parents:
diff changeset
256 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
anatofuz
parents:
diff changeset
257 %exitcond = icmp eq i64 %indvars.iv.next, 1000000
anatofuz
parents:
diff changeset
258 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !12
anatofuz
parents:
diff changeset
259
anatofuz
parents:
diff changeset
260 for.end: ; preds = %for.body
anatofuz
parents:
diff changeset
261 ret void
anatofuz
parents:
diff changeset
262 }
anatofuz
parents:
diff changeset
263 !12 = !{!12, !4}
anatofuz
parents:
diff changeset
264
anatofuz
parents:
diff changeset
265 ; #pragma clang loop unroll(enable)
anatofuz
parents:
diff changeset
266 ; Loop should be fully unrolled.
anatofuz
parents:
diff changeset
267 ;
anatofuz
parents:
diff changeset
268 ; CHECK-LABEL: @loop64_with_enable(
anatofuz
parents:
diff changeset
269 ; CHECK-NOT: br i1
anatofuz
parents:
diff changeset
270 define void @loop64_with_enable(i32* nocapture %a) {
anatofuz
parents:
diff changeset
271 entry:
anatofuz
parents:
diff changeset
272 br label %for.body
anatofuz
parents:
diff changeset
273
anatofuz
parents:
diff changeset
274 for.body: ; preds = %for.body, %entry
anatofuz
parents:
diff changeset
275 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
anatofuz
parents:
diff changeset
276 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
anatofuz
parents:
diff changeset
277 %0 = load i32, i32* %arrayidx, align 4
anatofuz
parents:
diff changeset
278 %inc = add nsw i32 %0, 1
anatofuz
parents:
diff changeset
279 store i32 %inc, i32* %arrayidx, align 4
anatofuz
parents:
diff changeset
280 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
anatofuz
parents:
diff changeset
281 %exitcond = icmp eq i64 %indvars.iv.next, 64
anatofuz
parents:
diff changeset
282 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !13
anatofuz
parents:
diff changeset
283
anatofuz
parents:
diff changeset
284 for.end: ; preds = %for.body
anatofuz
parents:
diff changeset
285 ret void
anatofuz
parents:
diff changeset
286 }
anatofuz
parents:
diff changeset
287 !13 = !{!13, !14}
anatofuz
parents:
diff changeset
288 !14 = !{!"llvm.loop.unroll.enable"}
anatofuz
parents:
diff changeset
289
anatofuz
parents:
diff changeset
290 ; #pragma clang loop unroll(enable)
anatofuz
parents:
diff changeset
291 ; Loop has a runtime trip count and should be runtime unrolled and duplicated
anatofuz
parents:
diff changeset
292 ; (original and 8x) if remainder is allowed, otherwise it should not be
anatofuz
parents:
diff changeset
293 ; unrolled.
anatofuz
parents:
diff changeset
294 ;
anatofuz
parents:
diff changeset
295 ; CHECK-LABEL: @runtime_loop_with_enable(
anatofuz
parents:
diff changeset
296 ; CHECK: for.body:
anatofuz
parents:
diff changeset
297 ; CHECK: store i32
anatofuz
parents:
diff changeset
298 ; REM: store i32
anatofuz
parents:
diff changeset
299 ; REM: store i32
anatofuz
parents:
diff changeset
300 ; REM: store i32
anatofuz
parents:
diff changeset
301 ; REM: store i32
anatofuz
parents:
diff changeset
302 ; REM: store i32
anatofuz
parents:
diff changeset
303 ; REM: store i32
anatofuz
parents:
diff changeset
304 ; REM: store i32
anatofuz
parents:
diff changeset
305 ; CHECK-NOT: store i32
anatofuz
parents:
diff changeset
306 ; CHECK: br i1
anatofuz
parents:
diff changeset
307 ; REM: for.body.epil:
anatofuz
parents:
diff changeset
308 ; NOREM-NOT: for.body.epil:
anatofuz
parents:
diff changeset
309 ; REM: store
anatofuz
parents:
diff changeset
310 ; CHECK-NOT: store
anatofuz
parents:
diff changeset
311 ; REM: br i1
anatofuz
parents:
diff changeset
312 ; NOREM-NOT: br i1
anatofuz
parents:
diff changeset
313 define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) {
anatofuz
parents:
diff changeset
314 entry:
anatofuz
parents:
diff changeset
315 %cmp3 = icmp sgt i32 %b, 0
anatofuz
parents:
diff changeset
316 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8
anatofuz
parents:
diff changeset
317
anatofuz
parents:
diff changeset
318 for.body: ; preds = %entry, %for.body
anatofuz
parents:
diff changeset
319 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
anatofuz
parents:
diff changeset
320 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
anatofuz
parents:
diff changeset
321 %0 = load i32, i32* %arrayidx, align 4
anatofuz
parents:
diff changeset
322 %inc = add nsw i32 %0, 1
anatofuz
parents:
diff changeset
323 store i32 %inc, i32* %arrayidx, align 4
anatofuz
parents:
diff changeset
324 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
anatofuz
parents:
diff changeset
325 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
anatofuz
parents:
diff changeset
326 %exitcond = icmp eq i32 %lftr.wideiv, %b
anatofuz
parents:
diff changeset
327 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15
anatofuz
parents:
diff changeset
328
anatofuz
parents:
diff changeset
329 for.end: ; preds = %for.body, %entry
anatofuz
parents:
diff changeset
330 ret void
anatofuz
parents:
diff changeset
331 }
anatofuz
parents:
diff changeset
332 !15 = !{!15, !14}
anatofuz
parents:
diff changeset
333
anatofuz
parents:
diff changeset
334 ; #pragma clang loop unroll_count(3)
anatofuz
parents:
diff changeset
335 ; Loop has a runtime trip count. Runtime unrolling should occur and loop
anatofuz
parents:
diff changeset
336 ; should be duplicated (original and 3x unrolled) if remainder is allowed,
anatofuz
parents:
diff changeset
337 ; otherwise it should not be unrolled.
anatofuz
parents:
diff changeset
338 ;
anatofuz
parents:
diff changeset
339 ; CHECK-LABEL: @runtime_loop_with_count3(
anatofuz
parents:
diff changeset
340 ; CHECK: for.body
anatofuz
parents:
diff changeset
341 ; CHECK: store
anatofuz
parents:
diff changeset
342 ; REM: store
anatofuz
parents:
diff changeset
343 ; REM: store
anatofuz
parents:
diff changeset
344 ; CHECK-NOT: store
anatofuz
parents:
diff changeset
345 ; CHECK: br i1
anatofuz
parents:
diff changeset
346 ; REM: for.body.epil:
anatofuz
parents:
diff changeset
347 ; REM: store
anatofuz
parents:
diff changeset
348 ; NOREM-NOT: for.body.epil:
anatofuz
parents:
diff changeset
349 ; NOREM-NOT: store
anatofuz
parents:
diff changeset
350 ; CHECK-NOT: store
anatofuz
parents:
diff changeset
351 ; REM: br i1
anatofuz
parents:
diff changeset
352 define void @runtime_loop_with_count3(i32* nocapture %a, i32 %b) {
anatofuz
parents:
diff changeset
353 entry:
anatofuz
parents:
diff changeset
354 %cmp3 = icmp sgt i32 %b, 0
anatofuz
parents:
diff changeset
355 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !16
anatofuz
parents:
diff changeset
356
anatofuz
parents:
diff changeset
357 for.body: ; preds = %entry, %for.body
anatofuz
parents:
diff changeset
358 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
anatofuz
parents:
diff changeset
359 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
anatofuz
parents:
diff changeset
360 %0 = load i32, i32* %arrayidx, align 4
anatofuz
parents:
diff changeset
361 %inc = add nsw i32 %0, 1
anatofuz
parents:
diff changeset
362 store i32 %inc, i32* %arrayidx, align 4
anatofuz
parents:
diff changeset
363 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
anatofuz
parents:
diff changeset
364 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
anatofuz
parents:
diff changeset
365 %exitcond = icmp eq i32 %lftr.wideiv, %b
anatofuz
parents:
diff changeset
366 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !16
anatofuz
parents:
diff changeset
367
anatofuz
parents:
diff changeset
368 for.end: ; preds = %for.body, %entry
anatofuz
parents:
diff changeset
369 ret void
anatofuz
parents:
diff changeset
370 }
anatofuz
parents:
diff changeset
371 !16 = !{!16, !17}
anatofuz
parents:
diff changeset
372 !17 = !{!"llvm.loop.unroll.count", i32 3}