0
|
1 /* Loop unswitching for GNU compiler.
|
|
2 Copyright (C) 2002, 2003, 2004, 2005, 2007, 2008
|
|
3 Free Software Foundation, Inc.
|
|
4
|
|
5 This file is part of GCC.
|
|
6
|
|
7 GCC is free software; you can redistribute it and/or modify it under
|
|
8 the terms of the GNU General Public License as published by the Free
|
|
9 Software Foundation; either version 3, or (at your option) any later
|
|
10 version.
|
|
11
|
|
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
15 for more details.
|
|
16
|
|
17 You should have received a copy of the GNU General Public License
|
|
18 along with GCC; see the file COPYING3. If not see
|
|
19 <http://www.gnu.org/licenses/>. */
|
|
20
|
|
21 #include "config.h"
|
|
22 #include "system.h"
|
|
23 #include "coretypes.h"
|
|
24 #include "tm.h"
|
|
25 #include "rtl.h"
|
|
26 #include "hard-reg-set.h"
|
|
27 #include "obstack.h"
|
|
28 #include "basic-block.h"
|
|
29 #include "cfgloop.h"
|
|
30 #include "cfglayout.h"
|
|
31 #include "params.h"
|
|
32 #include "output.h"
|
|
33 #include "expr.h"
|
|
34
|
|
35 /* This pass moves constant conditions out of loops, duplicating the loop
|
|
36 in progress, i.e. this code:
|
|
37
|
|
38 while (loop_cond)
|
|
39 {
|
|
40 A;
|
|
41 if (cond)
|
|
42 branch1;
|
|
43 else
|
|
44 branch2;
|
|
45 B;
|
|
46 if (cond)
|
|
47 branch3;
|
|
48 C;
|
|
49 }
|
|
50 where nothing inside the loop alters cond is transformed
|
|
51 into
|
|
52
|
|
53 if (cond)
|
|
54 {
|
|
55 while (loop_cond)
|
|
56 {
|
|
57 A;
|
|
58 branch1;
|
|
59 B;
|
|
60 branch3;
|
|
61 C;
|
|
62 }
|
|
63 }
|
|
64 else
|
|
65 {
|
|
66 while (loop_cond)
|
|
67 {
|
|
68 A;
|
|
69 branch2;
|
|
70 B;
|
|
71 C;
|
|
72 }
|
|
73 }
|
|
74
|
|
75 Duplicating the loop might lead to code growth exponential in number of
|
|
76 branches inside loop, so we limit the number of unswitchings performed
|
|
77 in a single loop to PARAM_MAX_UNSWITCH_LEVEL. We only perform the
|
|
78 transformation on innermost loops, as the benefit of doing it on loops
|
|
79 containing subloops would not be very large compared to complications
|
|
80 with handling this case. */
|
|
81
|
|
82 static struct loop *unswitch_loop (struct loop *, basic_block, rtx, rtx);
|
|
83 static void unswitch_single_loop (struct loop *, rtx, int);
|
|
84 static rtx may_unswitch_on (basic_block, struct loop *, rtx *);
|
|
85
|
|
86 /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
|
|
87 true, with probability PROB. If CINSN is not NULL, it is the insn to copy
|
|
88 in order to create a jump. */
|
|
89
|
|
90 rtx
|
|
91 compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, rtx label, int prob,
|
|
92 rtx cinsn)
|
|
93 {
|
|
94 rtx seq, jump, cond;
|
|
95 enum machine_mode mode;
|
|
96
|
|
97 mode = GET_MODE (op0);
|
|
98 if (mode == VOIDmode)
|
|
99 mode = GET_MODE (op1);
|
|
100
|
|
101 start_sequence ();
|
|
102 if (GET_MODE_CLASS (mode) == MODE_CC)
|
|
103 {
|
|
104 /* A hack -- there seems to be no easy generic way how to make a
|
|
105 conditional jump from a ccmode comparison. */
|
|
106 gcc_assert (cinsn);
|
|
107 cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
|
|
108 gcc_assert (GET_CODE (cond) == comp);
|
|
109 gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
|
|
110 gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
|
|
111 emit_jump_insn (copy_insn (PATTERN (cinsn)));
|
|
112 jump = get_last_insn ();
|
|
113 JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
|
|
114 LABEL_NUSES (JUMP_LABEL (jump))++;
|
|
115 redirect_jump (jump, label, 0);
|
|
116 }
|
|
117 else
|
|
118 {
|
|
119 gcc_assert (!cinsn);
|
|
120
|
|
121 op0 = force_operand (op0, NULL_RTX);
|
|
122 op1 = force_operand (op1, NULL_RTX);
|
|
123 do_compare_rtx_and_jump (op0, op1, comp, 0,
|
|
124 mode, NULL_RTX, NULL_RTX, label);
|
|
125 jump = get_last_insn ();
|
|
126 JUMP_LABEL (jump) = label;
|
|
127 LABEL_NUSES (label)++;
|
|
128 }
|
|
129 add_reg_note (jump, REG_BR_PROB, GEN_INT (prob));
|
|
130
|
|
131 seq = get_insns ();
|
|
132 end_sequence ();
|
|
133
|
|
134 return seq;
|
|
135 }
|
|
136
|
|
137 /* Main entry point. Perform loop unswitching on all suitable loops. */
|
|
138 void
|
|
139 unswitch_loops (void)
|
|
140 {
|
|
141 loop_iterator li;
|
|
142 struct loop *loop;
|
|
143
|
|
144 /* Go through inner loops (only original ones). */
|
|
145
|
|
146 FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
|
|
147 {
|
|
148 unswitch_single_loop (loop, NULL_RTX, 0);
|
|
149 #ifdef ENABLE_CHECKING
|
|
150 verify_dominators (CDI_DOMINATORS);
|
|
151 verify_loop_structure ();
|
|
152 #endif
|
|
153 }
|
|
154
|
|
155 iv_analysis_done ();
|
|
156 }
|
|
157
|
|
158 /* Checks whether we can unswitch LOOP on condition at end of BB -- one of its
|
|
159 basic blocks (for what it means see comments below). In case condition
|
|
160 compares loop invariant cc mode register, return the jump in CINSN. */
|
|
161
|
|
162 static rtx
|
|
163 may_unswitch_on (basic_block bb, struct loop *loop, rtx *cinsn)
|
|
164 {
|
|
165 rtx test, at, op[2], stest;
|
|
166 struct rtx_iv iv;
|
|
167 unsigned i;
|
|
168 enum machine_mode mode;
|
|
169
|
|
170 /* BB must end in a simple conditional jump. */
|
|
171 if (EDGE_COUNT (bb->succs) != 2)
|
|
172 return NULL_RTX;
|
|
173 if (!any_condjump_p (BB_END (bb)))
|
|
174 return NULL_RTX;
|
|
175
|
|
176 /* With branches inside loop. */
|
|
177 if (!flow_bb_inside_loop_p (loop, EDGE_SUCC (bb, 0)->dest)
|
|
178 || !flow_bb_inside_loop_p (loop, EDGE_SUCC (bb, 1)->dest))
|
|
179 return NULL_RTX;
|
|
180
|
|
181 /* It must be executed just once each iteration (because otherwise we
|
|
182 are unable to update dominator/irreducible loop information correctly). */
|
|
183 if (!just_once_each_iteration_p (loop, bb))
|
|
184 return NULL_RTX;
|
|
185
|
|
186 /* Condition must be invariant. */
|
|
187 test = get_condition (BB_END (bb), &at, true, false);
|
|
188 if (!test)
|
|
189 return NULL_RTX;
|
|
190
|
|
191 for (i = 0; i < 2; i++)
|
|
192 {
|
|
193 op[i] = XEXP (test, i);
|
|
194
|
|
195 if (CONSTANT_P (op[i]))
|
|
196 continue;
|
|
197
|
|
198 if (!iv_analyze (at, op[i], &iv))
|
|
199 return NULL_RTX;
|
|
200 if (iv.step != const0_rtx
|
|
201 || iv.first_special)
|
|
202 return NULL_RTX;
|
|
203
|
|
204 op[i] = get_iv_value (&iv, const0_rtx);
|
|
205 }
|
|
206
|
|
207 mode = GET_MODE (op[0]);
|
|
208 if (mode == VOIDmode)
|
|
209 mode = GET_MODE (op[1]);
|
|
210 if (GET_MODE_CLASS (mode) == MODE_CC)
|
|
211 {
|
|
212 if (at != BB_END (bb))
|
|
213 return NULL_RTX;
|
|
214
|
|
215 if (!rtx_equal_p (op[0], XEXP (test, 0))
|
|
216 || !rtx_equal_p (op[1], XEXP (test, 1)))
|
|
217 return NULL_RTX;
|
|
218
|
|
219 *cinsn = BB_END (bb);
|
|
220 return test;
|
|
221 }
|
|
222
|
|
223 stest = simplify_gen_relational (GET_CODE (test), SImode,
|
|
224 mode, op[0], op[1]);
|
|
225 if (stest == const0_rtx
|
|
226 || stest == const_true_rtx)
|
|
227 return stest;
|
|
228
|
|
229 return canon_condition (gen_rtx_fmt_ee (GET_CODE (test), SImode,
|
|
230 op[0], op[1]));
|
|
231 }
|
|
232
|
|
233 /* Reverses CONDition; returns NULL if we cannot. */
|
|
234 rtx
|
|
235 reversed_condition (rtx cond)
|
|
236 {
|
|
237 enum rtx_code reversed;
|
|
238 reversed = reversed_comparison_code (cond, NULL);
|
|
239 if (reversed == UNKNOWN)
|
|
240 return NULL_RTX;
|
|
241 else
|
|
242 return gen_rtx_fmt_ee (reversed,
|
|
243 GET_MODE (cond), XEXP (cond, 0),
|
|
244 XEXP (cond, 1));
|
|
245 }
|
|
246
|
|
247 /* Unswitch single LOOP. COND_CHECKED holds list of conditions we already
|
|
248 unswitched on and are therefore known to be true in this LOOP. NUM is
|
|
249 number of unswitchings done; do not allow it to grow too much, it is too
|
|
250 easy to create example on that the code would grow exponentially. */
|
|
251 static void
|
|
252 unswitch_single_loop (struct loop *loop, rtx cond_checked, int num)
|
|
253 {
|
|
254 basic_block *bbs;
|
|
255 struct loop *nloop;
|
|
256 unsigned i;
|
|
257 rtx cond, rcond = NULL_RTX, conds, rconds, acond, cinsn;
|
|
258 int repeat;
|
|
259 edge e;
|
|
260
|
|
261 /* Do not unswitch too much. */
|
|
262 if (num > PARAM_VALUE (PARAM_MAX_UNSWITCH_LEVEL))
|
|
263 {
|
|
264 if (dump_file)
|
|
265 fprintf (dump_file, ";; Not unswitching anymore, hit max level\n");
|
|
266 return;
|
|
267 }
|
|
268
|
|
269 /* Only unswitch innermost loops. */
|
|
270 if (loop->inner)
|
|
271 {
|
|
272 if (dump_file)
|
|
273 fprintf (dump_file, ";; Not unswitching, not innermost loop\n");
|
|
274 return;
|
|
275 }
|
|
276
|
|
277 /* We must be able to duplicate loop body. */
|
|
278 if (!can_duplicate_loop_p (loop))
|
|
279 {
|
|
280 if (dump_file)
|
|
281 fprintf (dump_file, ";; Not unswitching, can't duplicate loop\n");
|
|
282 return;
|
|
283 }
|
|
284
|
|
285 /* The loop should not be too large, to limit code growth. */
|
|
286 if (num_loop_insns (loop) > PARAM_VALUE (PARAM_MAX_UNSWITCH_INSNS))
|
|
287 {
|
|
288 if (dump_file)
|
|
289 fprintf (dump_file, ";; Not unswitching, loop too big\n");
|
|
290 return;
|
|
291 }
|
|
292
|
|
293 /* Do not unswitch in cold areas. */
|
|
294 if (optimize_loop_for_size_p (loop))
|
|
295 {
|
|
296 if (dump_file)
|
|
297 fprintf (dump_file, ";; Not unswitching, not hot area\n");
|
|
298 return;
|
|
299 }
|
|
300
|
|
301 /* Nor if the loop usually does not roll. */
|
|
302 if (expected_loop_iterations (loop) < 1)
|
|
303 {
|
|
304 if (dump_file)
|
|
305 fprintf (dump_file, ";; Not unswitching, loop iterations < 1\n");
|
|
306 return;
|
|
307 }
|
|
308
|
|
309 do
|
|
310 {
|
|
311 repeat = 0;
|
|
312 cinsn = NULL_RTX;
|
|
313
|
|
314 /* Find a bb to unswitch on. */
|
|
315 bbs = get_loop_body (loop);
|
|
316 iv_analysis_loop_init (loop);
|
|
317 for (i = 0; i < loop->num_nodes; i++)
|
|
318 if ((cond = may_unswitch_on (bbs[i], loop, &cinsn)))
|
|
319 break;
|
|
320
|
|
321 if (i == loop->num_nodes)
|
|
322 {
|
|
323 free (bbs);
|
|
324 return;
|
|
325 }
|
|
326
|
|
327 if (cond != const0_rtx
|
|
328 && cond != const_true_rtx)
|
|
329 {
|
|
330 rcond = reversed_condition (cond);
|
|
331 if (rcond)
|
|
332 rcond = canon_condition (rcond);
|
|
333
|
|
334 /* Check whether the result can be predicted. */
|
|
335 for (acond = cond_checked; acond; acond = XEXP (acond, 1))
|
|
336 simplify_using_condition (XEXP (acond, 0), &cond, NULL);
|
|
337 }
|
|
338
|
|
339 if (cond == const_true_rtx)
|
|
340 {
|
|
341 /* Remove false path. */
|
|
342 e = FALLTHRU_EDGE (bbs[i]);
|
|
343 remove_path (e);
|
|
344 free (bbs);
|
|
345 repeat = 1;
|
|
346 }
|
|
347 else if (cond == const0_rtx)
|
|
348 {
|
|
349 /* Remove true path. */
|
|
350 e = BRANCH_EDGE (bbs[i]);
|
|
351 remove_path (e);
|
|
352 free (bbs);
|
|
353 repeat = 1;
|
|
354 }
|
|
355 } while (repeat);
|
|
356
|
|
357 /* We found the condition we can unswitch on. */
|
|
358 conds = alloc_EXPR_LIST (0, cond, cond_checked);
|
|
359 if (rcond)
|
|
360 rconds = alloc_EXPR_LIST (0, rcond, cond_checked);
|
|
361 else
|
|
362 rconds = cond_checked;
|
|
363
|
|
364 if (dump_file)
|
|
365 fprintf (dump_file, ";; Unswitching loop\n");
|
|
366
|
|
367 /* Unswitch the loop on this condition. */
|
|
368 nloop = unswitch_loop (loop, bbs[i], cond, cinsn);
|
|
369 gcc_assert (nloop);
|
|
370
|
|
371 /* Invoke itself on modified loops. */
|
|
372 unswitch_single_loop (nloop, rconds, num + 1);
|
|
373 unswitch_single_loop (loop, conds, num + 1);
|
|
374
|
|
375 free_EXPR_LIST_node (conds);
|
|
376 if (rcond)
|
|
377 free_EXPR_LIST_node (rconds);
|
|
378
|
|
379 free (bbs);
|
|
380 }
|
|
381
|
|
382 /* Unswitch a LOOP w.r. to given basic block UNSWITCH_ON. We only support
|
|
383 unswitching of innermost loops. UNSWITCH_ON must be executed in every
|
|
384 iteration, i.e. it must dominate LOOP latch. COND is the condition
|
|
385 determining which loop is entered. Returns NULL if impossible, new loop
|
|
386 otherwise. The new loop is entered if COND is true. If CINSN is not
|
|
387 NULL, it is the insn in that COND is compared. */
|
|
388
|
|
389 static struct loop *
|
|
390 unswitch_loop (struct loop *loop, basic_block unswitch_on, rtx cond, rtx cinsn)
|
|
391 {
|
|
392 edge entry, latch_edge, true_edge, false_edge, e;
|
|
393 basic_block switch_bb, unswitch_on_alt;
|
|
394 struct loop *nloop;
|
|
395 sbitmap zero_bitmap;
|
|
396 int irred_flag, prob;
|
|
397 rtx seq;
|
|
398
|
|
399 /* Some sanity checking. */
|
|
400 gcc_assert (flow_bb_inside_loop_p (loop, unswitch_on));
|
|
401 gcc_assert (EDGE_COUNT (unswitch_on->succs) == 2);
|
|
402 gcc_assert (just_once_each_iteration_p (loop, unswitch_on));
|
|
403 gcc_assert (!loop->inner);
|
|
404 gcc_assert (flow_bb_inside_loop_p (loop, EDGE_SUCC (unswitch_on, 0)->dest));
|
|
405 gcc_assert (flow_bb_inside_loop_p (loop, EDGE_SUCC (unswitch_on, 1)->dest));
|
|
406
|
|
407 entry = loop_preheader_edge (loop);
|
|
408
|
|
409 /* Make a copy. */
|
|
410 irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP;
|
|
411 entry->flags &= ~EDGE_IRREDUCIBLE_LOOP;
|
|
412 zero_bitmap = sbitmap_alloc (2);
|
|
413 if (!duplicate_loop_to_header_edge (loop, entry, 1,
|
|
414 NULL, NULL, NULL, 0))
|
|
415 return NULL;
|
|
416 entry->flags |= irred_flag;
|
|
417
|
|
418 /* Record the block with condition we unswitch on. */
|
|
419 unswitch_on_alt = get_bb_copy (unswitch_on);
|
|
420 true_edge = BRANCH_EDGE (unswitch_on_alt);
|
|
421 false_edge = FALLTHRU_EDGE (unswitch_on);
|
|
422 latch_edge = single_succ_edge (get_bb_copy (loop->latch));
|
|
423
|
|
424 /* Create a block with the condition. */
|
|
425 prob = true_edge->probability;
|
|
426 switch_bb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
|
|
427 seq = compare_and_jump_seq (XEXP (cond, 0), XEXP (cond, 1), GET_CODE (cond),
|
|
428 block_label (true_edge->dest),
|
|
429 prob, cinsn);
|
|
430 emit_insn_after (seq, BB_END (switch_bb));
|
|
431 e = make_edge (switch_bb, true_edge->dest, 0);
|
|
432 e->probability = prob;
|
|
433 e->count = latch_edge->count * prob / REG_BR_PROB_BASE;
|
|
434 e = make_edge (switch_bb, FALLTHRU_EDGE (unswitch_on)->dest, EDGE_FALLTHRU);
|
|
435 e->probability = false_edge->probability;
|
|
436 e->count = latch_edge->count * (false_edge->probability) / REG_BR_PROB_BASE;
|
|
437
|
|
438 if (irred_flag)
|
|
439 {
|
|
440 switch_bb->flags |= BB_IRREDUCIBLE_LOOP;
|
|
441 EDGE_SUCC (switch_bb, 0)->flags |= EDGE_IRREDUCIBLE_LOOP;
|
|
442 EDGE_SUCC (switch_bb, 1)->flags |= EDGE_IRREDUCIBLE_LOOP;
|
|
443 }
|
|
444 else
|
|
445 {
|
|
446 switch_bb->flags &= ~BB_IRREDUCIBLE_LOOP;
|
|
447 EDGE_SUCC (switch_bb, 0)->flags &= ~EDGE_IRREDUCIBLE_LOOP;
|
|
448 EDGE_SUCC (switch_bb, 1)->flags &= ~EDGE_IRREDUCIBLE_LOOP;
|
|
449 }
|
|
450
|
|
451 /* Loopify from the copy of LOOP body, constructing the new loop. */
|
|
452 nloop = loopify (latch_edge,
|
|
453 single_pred_edge (get_bb_copy (loop->header)), switch_bb,
|
|
454 BRANCH_EDGE (switch_bb), FALLTHRU_EDGE (switch_bb), true,
|
|
455 prob, REG_BR_PROB_BASE - prob);
|
|
456
|
|
457 /* Remove branches that are now unreachable in new loops. */
|
|
458 remove_path (true_edge);
|
|
459 remove_path (false_edge);
|
|
460
|
|
461 /* Preserve the simple loop preheaders. */
|
|
462 split_edge (loop_preheader_edge (loop));
|
|
463 split_edge (loop_preheader_edge (nloop));
|
|
464
|
|
465 return nloop;
|
|
466 }
|