Mercurial > hg > CbC > CbC_gcc
annotate gcc/loop-unroll.c @ 158:494b0b89df80 default tip
...
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 25 May 2020 18:13:55 +0900 |
parents | 1830386684a0 |
children |
rev | line source |
---|---|
111 | 1 /* Loop unrolling. |
145 | 2 Copyright (C) 2002-2020 Free Software Foundation, Inc. |
0 | 3 |
4 This file is part of GCC. | |
5 | |
6 GCC is free software; you can redistribute it and/or modify it under | |
7 the terms of the GNU General Public License as published by the Free | |
8 Software Foundation; either version 3, or (at your option) any later | |
9 version. | |
10 | |
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 for more details. | |
15 | |
16 You should have received a copy of the GNU General Public License | |
17 along with GCC; see the file COPYING3. If not see | |
18 <http://www.gnu.org/licenses/>. */ | |
19 | |
20 #include "config.h" | |
21 #include "system.h" | |
22 #include "coretypes.h" | |
111 | 23 #include "backend.h" |
24 #include "target.h" | |
0 | 25 #include "rtl.h" |
111 | 26 #include "tree.h" |
27 #include "cfghooks.h" | |
28 #include "memmodel.h" | |
29 #include "optabs.h" | |
30 #include "emit-rtl.h" | |
31 #include "recog.h" | |
32 #include "profile.h" | |
33 #include "cfgrtl.h" | |
0 | 34 #include "cfgloop.h" |
111 | 35 #include "dojump.h" |
0 | 36 #include "expr.h" |
111 | 37 #include "dumpfile.h" |
0 | 38 |
111 | 39 /* This pass performs loop unrolling. We only perform this |
40 optimization on innermost loops (with single exception) because | |
0 | 41 the impact on performance is greatest here, and we want to avoid |
42 unnecessary code size growth. The gain is caused by greater sequentiality | |
43 of code, better code to optimize for further passes and in some cases | |
44 by fewer testings of exit conditions. The main problem is code growth, | |
45 that impacts performance negatively due to effect of caches. | |
46 | |
47 What we do: | |
48 | |
49 -- unrolling of loops that roll constant times; this is almost always | |
50 win, as we get rid of exit condition tests. | |
51 -- unrolling of loops that roll number of times that we can compute | |
52 in runtime; we also get rid of exit condition tests here, but there | |
53 is the extra expense for calculating the number of iterations | |
54 -- simple unrolling of remaining loops; this is performed only if we | |
55 are asked to, as the gain is questionable in this case and often | |
56 it may even slow down the code | |
57 For more detailed descriptions of each of those, see comments at | |
58 appropriate function below. | |
59 | |
60 There is a lot of parameters (defined and described in params.def) that | |
111 | 61 control how much we unroll. |
0 | 62 |
63 ??? A great problem is that we don't have a good way how to determine | |
64 how many times we should unroll the loop; the experiments I have made | |
65 showed that this choice may affect performance in order of several %. | |
66 */ | |
67 | |
68 /* Information about induction variables to split. */ | |
69 | |
70 struct iv_to_split | |
71 { | |
111 | 72 rtx_insn *insn; /* The insn in that the induction variable occurs. */ |
73 rtx orig_var; /* The variable (register) for the IV before split. */ | |
0 | 74 rtx base_var; /* The variable on that the values in the further |
75 iterations are based. */ | |
76 rtx step; /* Step of the induction variable. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
77 struct iv_to_split *next; /* Next entry in walking order. */ |
0 | 78 }; |
79 | |
80 /* Information about accumulators to expand. */ | |
81 | |
82 struct var_to_expand | |
83 { | |
111 | 84 rtx_insn *insn; /* The insn in that the variable expansion occurs. */ |
0 | 85 rtx reg; /* The accumulator which is expanded. */ |
111 | 86 vec<rtx> var_expansions; /* The copies of the accumulator which is expanded. */ |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
87 struct var_to_expand *next; /* Next entry in walking order. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
88 enum rtx_code op; /* The type of the accumulation - addition, subtraction |
0 | 89 or multiplication. */ |
90 int expansion_count; /* Count the number of expansions generated so far. */ | |
91 int reuse_expansion; /* The expansion we intend to reuse to expand | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
92 the accumulator. If REUSE_EXPANSION is 0 reuse |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
93 the original accumulator. Else use |
0 | 94 var_expansions[REUSE_EXPANSION - 1]. */ |
111 | 95 }; |
96 | |
97 /* Hashtable helper for iv_to_split. */ | |
98 | |
99 struct iv_split_hasher : free_ptr_hash <iv_to_split> | |
100 { | |
101 static inline hashval_t hash (const iv_to_split *); | |
102 static inline bool equal (const iv_to_split *, const iv_to_split *); | |
0 | 103 }; |
104 | |
111 | 105 |
106 /* A hash function for information about insns to split. */ | |
107 | |
108 inline hashval_t | |
109 iv_split_hasher::hash (const iv_to_split *ivts) | |
110 { | |
111 return (hashval_t) INSN_UID (ivts->insn); | |
112 } | |
113 | |
114 /* An equality functions for information about insns to split. */ | |
115 | |
116 inline bool | |
117 iv_split_hasher::equal (const iv_to_split *i1, const iv_to_split *i2) | |
118 { | |
119 return i1->insn == i2->insn; | |
120 } | |
121 | |
122 /* Hashtable helper for iv_to_split. */ | |
123 | |
124 struct var_expand_hasher : free_ptr_hash <var_to_expand> | |
125 { | |
126 static inline hashval_t hash (const var_to_expand *); | |
127 static inline bool equal (const var_to_expand *, const var_to_expand *); | |
128 }; | |
129 | |
130 /* Return a hash for VES. */ | |
131 | |
132 inline hashval_t | |
133 var_expand_hasher::hash (const var_to_expand *ves) | |
134 { | |
135 return (hashval_t) INSN_UID (ves->insn); | |
136 } | |
137 | |
138 /* Return true if I1 and I2 refer to the same instruction. */ | |
139 | |
140 inline bool | |
141 var_expand_hasher::equal (const var_to_expand *i1, const var_to_expand *i2) | |
142 { | |
143 return i1->insn == i2->insn; | |
144 } | |
145 | |
0 | 146 /* Information about optimization applied in |
147 the unrolled loop. */ | |
148 | |
149 struct opt_info | |
150 { | |
111 | 151 hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to |
152 split. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
153 struct iv_to_split *iv_to_split_head; /* The first iv to split. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
154 struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list. */ |
111 | 155 hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of |
156 insns with accumulators to expand. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
157 struct var_to_expand *var_to_expand_head; /* The first var to expand. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
158 struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list. */ |
0 | 159 unsigned first_new_block; /* The first basic block that was |
160 duplicated. */ | |
161 basic_block loop_exit; /* The loop exit basic block. */ | |
162 basic_block loop_preheader; /* The loop preheader basic block. */ | |
163 }; | |
164 | |
145 | 165 static void decide_unroll_stupid (class loop *, int); |
166 static void decide_unroll_constant_iterations (class loop *, int); | |
167 static void decide_unroll_runtime_iterations (class loop *, int); | |
168 static void unroll_loop_stupid (class loop *); | |
111 | 169 static void decide_unrolling (int); |
145 | 170 static void unroll_loop_constant_iterations (class loop *); |
171 static void unroll_loop_runtime_iterations (class loop *); | |
172 static struct opt_info *analyze_insns_in_loop (class loop *); | |
0 | 173 static void opt_info_start_duplication (struct opt_info *); |
174 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool); | |
175 static void free_opt_info (struct opt_info *); | |
145 | 176 static struct var_to_expand *analyze_insn_to_expand_var (class loop*, rtx_insn *); |
177 static bool referenced_in_one_insn_in_loop_p (class loop *, rtx, int *); | |
111 | 178 static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *); |
179 static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
180 static void insert_var_expansion_initialization (struct var_to_expand *, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
181 basic_block); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
182 static void combine_var_copies_in_loop_exit (struct var_to_expand *, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
183 basic_block); |
0 | 184 static rtx get_expansion (struct var_to_expand *); |
185 | |
111 | 186 /* Emit a message summarizing the unroll that will be |
187 performed for LOOP, along with the loop's location LOCUS, if | |
188 appropriate given the dump or -fopt-info settings. */ | |
189 | |
190 static void | |
145 | 191 report_unroll (class loop *loop, dump_location_t locus) |
0 | 192 { |
111 | 193 dump_flags_t report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS; |
0 | 194 |
111 | 195 if (loop->lpt_decision.decision == LPT_NONE) |
196 return; | |
0 | 197 |
111 | 198 if (!dump_enabled_p ()) |
199 return; | |
0 | 200 |
145 | 201 dump_metadata_t metadata (report_flags, locus.get_impl_location ()); |
202 dump_printf_loc (metadata, locus.get_user_location (), | |
111 | 203 "loop unrolled %d times", |
204 loop->lpt_decision.times); | |
205 if (profile_info && loop->header->count.initialized_p ()) | |
145 | 206 dump_printf (metadata, |
111 | 207 " (header execution count %d)", |
208 (int)loop->header->count.to_gcov_type ()); | |
209 | |
145 | 210 dump_printf (metadata, "\n"); |
0 | 211 } |
212 | |
111 | 213 /* Decide whether unroll loops and how much. */ |
0 | 214 static void |
111 | 215 decide_unrolling (int flags) |
0 | 216 { |
145 | 217 class loop *loop; |
0 | 218 |
111 | 219 /* Scan the loops, inner ones first. */ |
220 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) | |
0 | 221 { |
222 loop->lpt_decision.decision = LPT_NONE; | |
131 | 223 dump_user_location_t locus = get_loop_location (loop); |
0 | 224 |
111 | 225 if (dump_enabled_p ()) |
226 dump_printf_loc (MSG_NOTE, locus, | |
131 | 227 "considering unrolling loop %d at BB %d\n", |
228 loop->num, loop->header->index); | |
229 | |
230 if (loop->unroll == 1) | |
231 { | |
232 if (dump_file) | |
233 fprintf (dump_file, | |
234 ";; Not unrolling loop, user didn't want it unrolled\n"); | |
235 continue; | |
236 } | |
0 | 237 |
238 /* Do not peel cold areas. */ | |
239 if (optimize_loop_for_size_p (loop)) | |
240 { | |
241 if (dump_file) | |
242 fprintf (dump_file, ";; Not considering loop, cold area\n"); | |
243 continue; | |
244 } | |
245 | |
246 /* Can the loop be manipulated? */ | |
247 if (!can_duplicate_loop_p (loop)) | |
248 { | |
249 if (dump_file) | |
250 fprintf (dump_file, | |
251 ";; Not considering loop, cannot duplicate\n"); | |
252 continue; | |
253 } | |
254 | |
255 /* Skip non-innermost loops. */ | |
256 if (loop->inner) | |
257 { | |
258 if (dump_file) | |
259 fprintf (dump_file, ";; Not considering loop, is not innermost\n"); | |
260 continue; | |
261 } | |
262 | |
263 loop->ninsns = num_loop_insns (loop); | |
264 loop->av_ninsns = average_num_loop_insns (loop); | |
265 | |
131 | 266 /* Try transformations one by one in decreasing order of priority. */ |
0 | 267 decide_unroll_constant_iterations (loop, flags); |
268 if (loop->lpt_decision.decision == LPT_NONE) | |
269 decide_unroll_runtime_iterations (loop, flags); | |
270 if (loop->lpt_decision.decision == LPT_NONE) | |
271 decide_unroll_stupid (loop, flags); | |
111 | 272 |
273 report_unroll (loop, locus); | |
0 | 274 } |
275 } | |
276 | |
111 | 277 /* Unroll LOOPS. */ |
278 void | |
279 unroll_loops (int flags) | |
0 | 280 { |
145 | 281 class loop *loop; |
111 | 282 bool changed = false; |
0 | 283 |
111 | 284 /* Now decide rest of unrolling. */ |
285 decide_unrolling (flags); | |
0 | 286 |
111 | 287 /* Scan the loops, inner ones first. */ |
288 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) | |
0 | 289 { |
111 | 290 /* And perform the appropriate transformations. */ |
291 switch (loop->lpt_decision.decision) | |
292 { | |
293 case LPT_UNROLL_CONSTANT: | |
294 unroll_loop_constant_iterations (loop); | |
295 changed = true; | |
296 break; | |
297 case LPT_UNROLL_RUNTIME: | |
298 unroll_loop_runtime_iterations (loop); | |
299 changed = true; | |
300 break; | |
301 case LPT_UNROLL_STUPID: | |
302 unroll_loop_stupid (loop); | |
303 changed = true; | |
304 break; | |
305 case LPT_NONE: | |
306 break; | |
307 default: | |
308 gcc_unreachable (); | |
309 } | |
0 | 310 } |
311 | |
111 | 312 if (changed) |
313 { | |
314 calculate_dominance_info (CDI_DOMINATORS); | |
315 fix_loop_structure (NULL); | |
316 } | |
0 | 317 |
111 | 318 iv_analysis_done (); |
0 | 319 } |
320 | |
111 | 321 /* Check whether exit of the LOOP is at the end of loop body. */ |
0 | 322 |
111 | 323 static bool |
145 | 324 loop_exit_at_end_p (class loop *loop) |
0 | 325 { |
145 | 326 class niter_desc *desc = get_simple_loop_desc (loop); |
111 | 327 rtx_insn *insn; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
328 |
111 | 329 /* We should never have conditional in latch block. */ |
330 gcc_assert (desc->in_edge->dest != loop->header); | |
0 | 331 |
111 | 332 if (desc->in_edge->dest != loop->latch) |
333 return false; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
334 |
111 | 335 /* Check that the latch is empty. */ |
336 FOR_BB_INSNS (loop->latch, insn) | |
337 { | |
338 if (INSN_P (insn) && active_insn_p (insn)) | |
339 return false; | |
0 | 340 } |
341 | |
111 | 342 return true; |
0 | 343 } |
344 | |
345 /* Decide whether to unroll LOOP iterating constant number of times | |
346 and how much. */ | |
347 | |
348 static void | |
145 | 349 decide_unroll_constant_iterations (class loop *loop, int flags) |
0 | 350 { |
351 unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i; | |
145 | 352 class niter_desc *desc; |
111 | 353 widest_int iterations; |
0 | 354 |
131 | 355 /* If we were not asked to unroll this loop, just return back silently. */ |
356 if (!(flags & UAP_UNROLL) && !loop->unroll) | |
357 return; | |
0 | 358 |
131 | 359 if (dump_enabled_p ()) |
360 dump_printf (MSG_NOTE, | |
361 "considering unrolling loop with constant " | |
362 "number of iterations\n"); | |
0 | 363 |
364 /* nunroll = total number of copies of the original loop body in | |
131 | 365 unrolled loop (i.e. if it is 2, we have to duplicate loop body once). */ |
145 | 366 nunroll = param_max_unrolled_insns / loop->ninsns; |
0 | 367 nunroll_by_av |
145 | 368 = param_max_average_unrolled_insns / loop->av_ninsns; |
0 | 369 if (nunroll > nunroll_by_av) |
370 nunroll = nunroll_by_av; | |
145 | 371 if (nunroll > (unsigned) param_max_unroll_times) |
372 nunroll = param_max_unroll_times; | |
0 | 373 |
111 | 374 if (targetm.loop_unroll_adjust) |
375 nunroll = targetm.loop_unroll_adjust (nunroll, loop); | |
376 | |
0 | 377 /* Skip big loops. */ |
378 if (nunroll <= 1) | |
379 { | |
380 if (dump_file) | |
381 fprintf (dump_file, ";; Not considering loop, is too big\n"); | |
382 return; | |
383 } | |
384 | |
385 /* Check for simple loops. */ | |
386 desc = get_simple_loop_desc (loop); | |
387 | |
388 /* Check number of iterations. */ | |
389 if (!desc->simple_p || !desc->const_iter || desc->assumptions) | |
390 { | |
391 if (dump_file) | |
392 fprintf (dump_file, | |
393 ";; Unable to prove that the loop iterates constant times\n"); | |
394 return; | |
395 } | |
396 | |
131 | 397 /* Check for an explicit unrolling factor. */ |
398 if (loop->unroll > 0 && loop->unroll < USHRT_MAX) | |
399 { | |
400 /* However we cannot unroll completely at the RTL level a loop with | |
401 constant number of iterations; it should have been peeled instead. */ | |
145 | 402 if (desc->niter == 0 || (unsigned) loop->unroll > desc->niter - 1) |
131 | 403 { |
404 if (dump_file) | |
405 fprintf (dump_file, ";; Loop should have been peeled\n"); | |
406 } | |
407 else | |
408 { | |
409 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT; | |
410 loop->lpt_decision.times = loop->unroll - 1; | |
411 } | |
412 return; | |
413 } | |
414 | |
111 | 415 /* Check whether the loop rolls enough to consider. |
416 Consult also loop bounds and profile; in the case the loop has more | |
417 than one exit it may well loop less than determined maximal number | |
418 of iterations. */ | |
419 if (desc->niter < 2 * nunroll | |
420 || ((get_estimated_loop_iterations (loop, &iterations) | |
421 || get_likely_max_loop_iterations (loop, &iterations)) | |
422 && wi::ltu_p (iterations, 2 * nunroll))) | |
0 | 423 { |
424 if (dump_file) | |
425 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); | |
426 return; | |
427 } | |
428 | |
429 /* Success; now compute number of iterations to unroll. We alter | |
430 nunroll so that as few as possible copies of loop body are | |
431 necessary, while still not decreasing the number of unrollings | |
432 too much (at most by 1). */ | |
433 best_copies = 2 * nunroll + 10; | |
434 | |
435 i = 2 * nunroll + 2; | |
131 | 436 if (i > desc->niter - 2) |
0 | 437 i = desc->niter - 2; |
438 | |
439 for (; i >= nunroll - 1; i--) | |
440 { | |
441 unsigned exit_mod = desc->niter % (i + 1); | |
442 | |
443 if (!loop_exit_at_end_p (loop)) | |
444 n_copies = exit_mod + i + 1; | |
445 else if (exit_mod != (unsigned) i | |
446 || desc->noloop_assumptions != NULL_RTX) | |
447 n_copies = exit_mod + i + 2; | |
448 else | |
449 n_copies = i + 1; | |
450 | |
451 if (n_copies < best_copies) | |
452 { | |
453 best_copies = n_copies; | |
454 best_unroll = i; | |
455 } | |
456 } | |
457 | |
458 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT; | |
459 loop->lpt_decision.times = best_unroll; | |
460 } | |
461 | |
111 | 462 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times. |
463 The transformation does this: | |
0 | 464 |
465 for (i = 0; i < 102; i++) | |
466 body; | |
467 | |
111 | 468 ==> (LOOP->LPT_DECISION.TIMES == 3) |
0 | 469 |
470 i = 0; | |
471 body; i++; | |
472 body; i++; | |
473 while (i < 102) | |
474 { | |
475 body; i++; | |
476 body; i++; | |
477 body; i++; | |
478 body; i++; | |
479 } | |
480 */ | |
481 static void | |
145 | 482 unroll_loop_constant_iterations (class loop *loop) |
0 | 483 { |
484 unsigned HOST_WIDE_INT niter; | |
485 unsigned exit_mod; | |
486 unsigned i; | |
487 edge e; | |
488 unsigned max_unroll = loop->lpt_decision.times; | |
145 | 489 class niter_desc *desc = get_simple_loop_desc (loop); |
0 | 490 bool exit_at_end = loop_exit_at_end_p (loop); |
491 struct opt_info *opt_info = NULL; | |
492 bool ok; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
493 |
0 | 494 niter = desc->niter; |
495 | |
496 /* Should not get here (such loop should be peeled instead). */ | |
497 gcc_assert (niter > max_unroll + 1); | |
498 | |
499 exit_mod = niter % (max_unroll + 1); | |
500 | |
131 | 501 auto_sbitmap wont_exit (max_unroll + 2); |
111 | 502 bitmap_ones (wont_exit); |
0 | 503 |
111 | 504 auto_vec<edge> remove_edges; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
505 if (flag_split_ivs_in_unroller |
0 | 506 || flag_variable_expansion_in_unroller) |
507 opt_info = analyze_insns_in_loop (loop); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
508 |
0 | 509 if (!exit_at_end) |
510 { | |
511 /* The exit is not at the end of the loop; leave exit test | |
512 in the first copy, so that the loops that start with test | |
513 of exit condition have continuous body after unrolling. */ | |
514 | |
515 if (dump_file) | |
111 | 516 fprintf (dump_file, ";; Condition at beginning of loop.\n"); |
0 | 517 |
518 /* Peel exit_mod iterations. */ | |
111 | 519 bitmap_clear_bit (wont_exit, 0); |
0 | 520 if (desc->noloop_assumptions) |
111 | 521 bitmap_clear_bit (wont_exit, 1); |
0 | 522 |
523 if (exit_mod) | |
524 { | |
525 opt_info_start_duplication (opt_info); | |
526 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | |
527 exit_mod, | |
528 wont_exit, desc->out_edge, | |
529 &remove_edges, | |
530 DLTHE_FLAG_UPDATE_FREQ | |
531 | (opt_info && exit_mod > 1 | |
532 ? DLTHE_RECORD_COPY_NUMBER | |
533 : 0)); | |
534 gcc_assert (ok); | |
535 | |
536 if (opt_info && exit_mod > 1) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
537 apply_opt_in_copies (opt_info, exit_mod, false, false); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
538 |
0 | 539 desc->noloop_assumptions = NULL_RTX; |
540 desc->niter -= exit_mod; | |
111 | 541 loop->nb_iterations_upper_bound -= exit_mod; |
542 if (loop->any_estimate | |
543 && wi::leu_p (exit_mod, loop->nb_iterations_estimate)) | |
544 loop->nb_iterations_estimate -= exit_mod; | |
545 else | |
546 loop->any_estimate = false; | |
547 if (loop->any_likely_upper_bound | |
548 && wi::leu_p (exit_mod, loop->nb_iterations_likely_upper_bound)) | |
549 loop->nb_iterations_likely_upper_bound -= exit_mod; | |
550 else | |
551 loop->any_likely_upper_bound = false; | |
0 | 552 } |
553 | |
111 | 554 bitmap_set_bit (wont_exit, 1); |
0 | 555 } |
556 else | |
557 { | |
558 /* Leave exit test in last copy, for the same reason as above if | |
559 the loop tests the condition at the end of loop body. */ | |
560 | |
561 if (dump_file) | |
111 | 562 fprintf (dump_file, ";; Condition at end of loop.\n"); |
0 | 563 |
564 /* We know that niter >= max_unroll + 2; so we do not need to care of | |
565 case when we would exit before reaching the loop. So just peel | |
566 exit_mod + 1 iterations. */ | |
567 if (exit_mod != max_unroll | |
568 || desc->noloop_assumptions) | |
569 { | |
111 | 570 bitmap_clear_bit (wont_exit, 0); |
0 | 571 if (desc->noloop_assumptions) |
111 | 572 bitmap_clear_bit (wont_exit, 1); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
573 |
0 | 574 opt_info_start_duplication (opt_info); |
575 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | |
576 exit_mod + 1, | |
577 wont_exit, desc->out_edge, | |
578 &remove_edges, | |
579 DLTHE_FLAG_UPDATE_FREQ | |
580 | (opt_info && exit_mod > 0 | |
581 ? DLTHE_RECORD_COPY_NUMBER | |
582 : 0)); | |
583 gcc_assert (ok); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
584 |
0 | 585 if (opt_info && exit_mod > 0) |
586 apply_opt_in_copies (opt_info, exit_mod + 1, false, false); | |
587 | |
588 desc->niter -= exit_mod + 1; | |
111 | 589 loop->nb_iterations_upper_bound -= exit_mod + 1; |
590 if (loop->any_estimate | |
591 && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate)) | |
592 loop->nb_iterations_estimate -= exit_mod + 1; | |
593 else | |
594 loop->any_estimate = false; | |
595 if (loop->any_likely_upper_bound | |
596 && wi::leu_p (exit_mod + 1, loop->nb_iterations_likely_upper_bound)) | |
597 loop->nb_iterations_likely_upper_bound -= exit_mod + 1; | |
598 else | |
599 loop->any_likely_upper_bound = false; | |
0 | 600 desc->noloop_assumptions = NULL_RTX; |
601 | |
111 | 602 bitmap_set_bit (wont_exit, 0); |
603 bitmap_set_bit (wont_exit, 1); | |
0 | 604 } |
605 | |
111 | 606 bitmap_clear_bit (wont_exit, max_unroll); |
0 | 607 } |
608 | |
609 /* Now unroll the loop. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
610 |
0 | 611 opt_info_start_duplication (opt_info); |
612 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), | |
613 max_unroll, | |
614 wont_exit, desc->out_edge, | |
615 &remove_edges, | |
616 DLTHE_FLAG_UPDATE_FREQ | |
617 | (opt_info | |
618 ? DLTHE_RECORD_COPY_NUMBER | |
619 : 0)); | |
620 gcc_assert (ok); | |
621 | |
622 if (opt_info) | |
623 { | |
624 apply_opt_in_copies (opt_info, max_unroll, true, true); | |
625 free_opt_info (opt_info); | |
626 } | |
627 | |
628 if (exit_at_end) | |
629 { | |
630 basic_block exit_block = get_bb_copy (desc->in_edge->src); | |
631 /* Find a new in and out edge; they are in the last copy we have made. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
632 |
0 | 633 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest) |
634 { | |
635 desc->out_edge = EDGE_SUCC (exit_block, 0); | |
636 desc->in_edge = EDGE_SUCC (exit_block, 1); | |
637 } | |
638 else | |
639 { | |
640 desc->out_edge = EDGE_SUCC (exit_block, 1); | |
641 desc->in_edge = EDGE_SUCC (exit_block, 0); | |
642 } | |
643 } | |
644 | |
645 desc->niter /= max_unroll + 1; | |
111 | 646 loop->nb_iterations_upper_bound |
647 = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1); | |
648 if (loop->any_estimate) | |
649 loop->nb_iterations_estimate | |
650 = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1); | |
651 if (loop->any_likely_upper_bound) | |
652 loop->nb_iterations_likely_upper_bound | |
653 = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1); | |
145 | 654 desc->niter_expr = gen_int_mode (desc->niter, desc->mode); |
0 | 655 |
656 /* Remove the edges. */ | |
111 | 657 FOR_EACH_VEC_ELT (remove_edges, i, e) |
0 | 658 remove_path (e); |
659 | |
660 if (dump_file) | |
661 fprintf (dump_file, | |
662 ";; Unrolled loop %d times, constant # of iterations %i insns\n", | |
663 max_unroll, num_loop_insns (loop)); | |
664 } | |
665 | |
666 /* Decide whether to unroll LOOP iterating runtime computable number of times | |
667 and how much. */ | |
668 static void | |
145 | 669 decide_unroll_runtime_iterations (class loop *loop, int flags) |
0 | 670 { |
671 unsigned nunroll, nunroll_by_av, i; | |
145 | 672 class niter_desc *desc; |
111 | 673 widest_int iterations; |
0 | 674 |
131 | 675 /* If we were not asked to unroll this loop, just return back silently. */ |
676 if (!(flags & UAP_UNROLL) && !loop->unroll) | |
677 return; | |
0 | 678 |
131 | 679 if (dump_enabled_p ()) |
680 dump_printf (MSG_NOTE, | |
681 "considering unrolling loop with runtime-" | |
682 "computable number of iterations\n"); | |
0 | 683 |
684 /* nunroll = total number of copies of the original loop body in | |
685 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ | |
145 | 686 nunroll = param_max_unrolled_insns / loop->ninsns; |
687 nunroll_by_av = param_max_average_unrolled_insns / loop->av_ninsns; | |
0 | 688 if (nunroll > nunroll_by_av) |
689 nunroll = nunroll_by_av; | |
145 | 690 if (nunroll > (unsigned) param_max_unroll_times) |
691 nunroll = param_max_unroll_times; | |
0 | 692 |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
693 if (targetm.loop_unroll_adjust) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
694 nunroll = targetm.loop_unroll_adjust (nunroll, loop); |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
695 |
131 | 696 if (loop->unroll > 0 && loop->unroll < USHRT_MAX) |
697 nunroll = loop->unroll; | |
698 | |
0 | 699 /* Skip big loops. */ |
700 if (nunroll <= 1) | |
701 { | |
702 if (dump_file) | |
703 fprintf (dump_file, ";; Not considering loop, is too big\n"); | |
704 return; | |
705 } | |
706 | |
707 /* Check for simple loops. */ | |
708 desc = get_simple_loop_desc (loop); | |
709 | |
710 /* Check simpleness. */ | |
711 if (!desc->simple_p || desc->assumptions) | |
712 { | |
713 if (dump_file) | |
714 fprintf (dump_file, | |
715 ";; Unable to prove that the number of iterations " | |
716 "can be counted in runtime\n"); | |
717 return; | |
718 } | |
719 | |
720 if (desc->const_iter) | |
721 { | |
722 if (dump_file) | |
723 fprintf (dump_file, ";; Loop iterates constant times\n"); | |
724 return; | |
725 } | |
726 | |
111 | 727 /* Check whether the loop rolls. */ |
728 if ((get_estimated_loop_iterations (loop, &iterations) | |
729 || get_likely_max_loop_iterations (loop, &iterations)) | |
730 && wi::ltu_p (iterations, 2 * nunroll)) | |
0 | 731 { |
732 if (dump_file) | |
733 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); | |
734 return; | |
735 } | |
736 | |
131 | 737 /* Success; now force nunroll to be power of 2, as code-gen |
738 requires it, we are unable to cope with overflows in | |
739 computation of number of iterations. */ | |
0 | 740 for (i = 1; 2 * i <= nunroll; i *= 2) |
741 continue; | |
742 | |
743 loop->lpt_decision.decision = LPT_UNROLL_RUNTIME; | |
744 loop->lpt_decision.times = i - 1; | |
745 } | |
746 | |
747 /* Splits edge E and inserts the sequence of instructions INSNS on it, and | |
748 returns the newly created block. If INSNS is NULL_RTX, nothing is changed | |
749 and NULL is returned instead. */ | |
750 | |
751 basic_block | |
111 | 752 split_edge_and_insert (edge e, rtx_insn *insns) |
0 | 753 { |
754 basic_block bb; | |
755 | |
756 if (!insns) | |
757 return NULL; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
758 bb = split_edge (e); |
0 | 759 emit_insn_after (insns, BB_END (bb)); |
760 | |
761 /* ??? We used to assume that INSNS can contain control flow insns, and | |
762 that we had to try to find sub basic blocks in BB to maintain a valid | |
763 CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB | |
764 and call break_superblocks when going out of cfglayout mode. But it | |
765 turns out that this never happens; and that if it does ever happen, | |
111 | 766 the verify_flow_info at the end of the RTL loop passes would fail. |
0 | 767 |
768 There are two reasons why we expected we could have control flow insns | |
769 in INSNS. The first is when a comparison has to be done in parts, and | |
770 the second is when the number of iterations is computed for loops with | |
771 the number of iterations known at runtime. In both cases, test cases | |
772 to get control flow in INSNS appear to be impossible to construct: | |
773 | |
774 * If do_compare_rtx_and_jump needs several branches to do comparison | |
775 in a mode that needs comparison by parts, we cannot analyze the | |
776 number of iterations of the loop, and we never get to unrolling it. | |
777 | |
778 * The code in expand_divmod that was suspected to cause creation of | |
779 branching code seems to be only accessed for signed division. The | |
780 divisions used by # of iterations analysis are always unsigned. | |
781 Problems might arise on architectures that emits branching code | |
782 for some operations that may appear in the unroller (especially | |
783 for division), but we have no such architectures. | |
784 | |
785 Considering all this, it was decided that we should for now assume | |
786 that INSNS can in theory contain control flow insns, but in practice | |
787 it never does. So we don't handle the theoretical case, and should | |
788 a real failure ever show up, we have a pretty good clue for how to | |
789 fix it. */ | |
790 | |
791 return bb; | |
792 } | |
793 | |
111 | 794 /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if |
795 true, with probability PROB. If CINSN is not NULL, it is the insn to copy | |
796 in order to create a jump. */ | |
797 | |
798 static rtx_insn * | |
799 compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, | |
800 rtx_code_label *label, profile_probability prob, | |
801 rtx_insn *cinsn) | |
802 { | |
803 rtx_insn *seq; | |
804 rtx_jump_insn *jump; | |
805 rtx cond; | |
806 machine_mode mode; | |
807 | |
808 mode = GET_MODE (op0); | |
809 if (mode == VOIDmode) | |
810 mode = GET_MODE (op1); | |
811 | |
812 start_sequence (); | |
813 if (GET_MODE_CLASS (mode) == MODE_CC) | |
814 { | |
815 /* A hack -- there seems to be no easy generic way how to make a | |
816 conditional jump from a ccmode comparison. */ | |
817 gcc_assert (cinsn); | |
818 cond = XEXP (SET_SRC (pc_set (cinsn)), 0); | |
819 gcc_assert (GET_CODE (cond) == comp); | |
820 gcc_assert (rtx_equal_p (op0, XEXP (cond, 0))); | |
821 gcc_assert (rtx_equal_p (op1, XEXP (cond, 1))); | |
822 emit_jump_insn (copy_insn (PATTERN (cinsn))); | |
823 jump = as_a <rtx_jump_insn *> (get_last_insn ()); | |
824 JUMP_LABEL (jump) = JUMP_LABEL (cinsn); | |
825 LABEL_NUSES (JUMP_LABEL (jump))++; | |
826 redirect_jump (jump, label, 0); | |
827 } | |
828 else | |
829 { | |
830 gcc_assert (!cinsn); | |
831 | |
832 op0 = force_operand (op0, NULL_RTX); | |
833 op1 = force_operand (op1, NULL_RTX); | |
834 do_compare_rtx_and_jump (op0, op1, comp, 0, | |
835 mode, NULL_RTX, NULL, label, | |
836 profile_probability::uninitialized ()); | |
837 jump = as_a <rtx_jump_insn *> (get_last_insn ()); | |
838 jump->set_jump_target (label); | |
839 LABEL_NUSES (label)++; | |
840 } | |
841 if (prob.initialized_p ()) | |
842 add_reg_br_prob_note (jump, prob); | |
843 | |
844 seq = get_insns (); | |
845 end_sequence (); | |
846 | |
847 return seq; | |
848 } | |
849 | |
131 | 850 /* Unroll LOOP for which we are able to count number of iterations in |
851 runtime LOOP->LPT_DECISION.TIMES times. The times value must be a | |
852 power of two. The transformation does this (with some extra care | |
853 for case n < 0): | |
0 | 854 |
855 for (i = 0; i < n; i++) | |
856 body; | |
857 | |
111 | 858 ==> (LOOP->LPT_DECISION.TIMES == 3) |
0 | 859 |
860 i = 0; | |
861 mod = n % 4; | |
862 | |
863 switch (mod) | |
864 { | |
865 case 3: | |
866 body; i++; | |
867 case 2: | |
868 body; i++; | |
869 case 1: | |
870 body; i++; | |
871 case 0: ; | |
872 } | |
873 | |
874 while (i < n) | |
875 { | |
876 body; i++; | |
877 body; i++; | |
878 body; i++; | |
879 body; i++; | |
880 } | |
881 */ | |
882 static void | |
145 | 883 unroll_loop_runtime_iterations (class loop *loop) |
0 | 884 { |
111 | 885 rtx old_niter, niter, tmp; |
886 rtx_insn *init_code, *branch_code; | |
887 unsigned i, j; | |
888 profile_probability p; | |
889 basic_block preheader, *body, swtch, ezc_swtch = NULL; | |
131 | 890 int may_exit_copy; |
111 | 891 profile_count iter_count, new_count; |
0 | 892 unsigned n_peel; |
893 edge e; | |
894 bool extra_zero_check, last_may_exit; | |
895 unsigned max_unroll = loop->lpt_decision.times; | |
145 | 896 class niter_desc *desc = get_simple_loop_desc (loop); |
0 | 897 bool exit_at_end = loop_exit_at_end_p (loop); |
898 struct opt_info *opt_info = NULL; | |
899 bool ok; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
900 |
0 | 901 if (flag_split_ivs_in_unroller |
902 || flag_variable_expansion_in_unroller) | |
903 opt_info = analyze_insns_in_loop (loop); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
904 |
0 | 905 /* Remember blocks whose dominators will have to be updated. */ |
111 | 906 auto_vec<basic_block> dom_bbs; |
0 | 907 |
908 body = get_loop_body (loop); | |
909 for (i = 0; i < loop->num_nodes; i++) | |
910 { | |
111 | 911 vec<basic_block> ldom; |
0 | 912 basic_block bb; |
913 | |
914 ldom = get_dominated_by (CDI_DOMINATORS, body[i]); | |
111 | 915 FOR_EACH_VEC_ELT (ldom, j, bb) |
0 | 916 if (!flow_bb_inside_loop_p (loop, bb)) |
111 | 917 dom_bbs.safe_push (bb); |
0 | 918 |
111 | 919 ldom.release (); |
0 | 920 } |
921 free (body); | |
922 | |
923 if (!exit_at_end) | |
924 { | |
925 /* Leave exit in first copy (for explanation why see comment in | |
926 unroll_loop_constant_iterations). */ | |
927 may_exit_copy = 0; | |
928 n_peel = max_unroll - 1; | |
929 extra_zero_check = true; | |
930 last_may_exit = false; | |
931 } | |
932 else | |
933 { | |
934 /* Leave exit in last copy (for explanation why see comment in | |
935 unroll_loop_constant_iterations). */ | |
936 may_exit_copy = max_unroll; | |
937 n_peel = max_unroll; | |
938 extra_zero_check = false; | |
939 last_may_exit = true; | |
940 } | |
941 | |
942 /* Get expression for number of iterations. */ | |
943 start_sequence (); | |
944 old_niter = niter = gen_reg_rtx (desc->mode); | |
945 tmp = force_operand (copy_rtx (desc->niter_expr), niter); | |
946 if (tmp != niter) | |
947 emit_move_insn (niter, tmp); | |
948 | |
111 | 949 /* For loops that exit at end and whose number of iterations is reliable, |
950 add one to niter to account for first pass through loop body before | |
951 reaching exit test. */ | |
952 if (exit_at_end && !desc->noloop_assumptions) | |
953 { | |
954 niter = expand_simple_binop (desc->mode, PLUS, | |
955 niter, const1_rtx, | |
956 NULL_RTX, 0, OPTAB_LIB_WIDEN); | |
957 old_niter = niter; | |
958 } | |
959 | |
0 | 960 /* Count modulo by ANDing it with max_unroll; we use the fact that |
961 the number of unrollings is a power of two, and thus this is correct | |
962 even if there is overflow in the computation. */ | |
963 niter = expand_simple_binop (desc->mode, AND, | |
111 | 964 niter, gen_int_mode (max_unroll, desc->mode), |
0 | 965 NULL_RTX, 0, OPTAB_LIB_WIDEN); |
966 | |
967 init_code = get_insns (); | |
968 end_sequence (); | |
969 unshare_all_rtl_in_chain (init_code); | |
970 | |
971 /* Precondition the loop. */ | |
972 split_edge_and_insert (loop_preheader_edge (loop), init_code); | |
973 | |
111 | 974 auto_vec<edge> remove_edges; |
0 | 975 |
111 | 976 auto_sbitmap wont_exit (max_unroll + 2); |
0 | 977 |
111 | 978 if (extra_zero_check || desc->noloop_assumptions) |
979 { | |
980 /* Peel the first copy of loop body. Leave the exit test if the number | |
981 of iterations is not reliable. Also record the place of the extra zero | |
982 check. */ | |
983 bitmap_clear (wont_exit); | |
984 if (!desc->noloop_assumptions) | |
985 bitmap_set_bit (wont_exit, 1); | |
986 ezc_swtch = loop_preheader_edge (loop)->src; | |
987 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | |
988 1, wont_exit, desc->out_edge, | |
989 &remove_edges, | |
990 DLTHE_FLAG_UPDATE_FREQ); | |
991 gcc_assert (ok); | |
992 } | |
0 | 993 |
994 /* Record the place where switch will be built for preconditioning. */ | |
995 swtch = split_edge (loop_preheader_edge (loop)); | |
996 | |
131 | 997 /* Compute count increments for each switch block and initialize |
111 | 998 innermost switch block. Switch blocks and peeled loop copies are built |
999 from innermost outward. */ | |
1000 iter_count = new_count = swtch->count.apply_scale (1, max_unroll + 1); | |
1001 swtch->count = new_count; | |
1002 | |
0 | 1003 for (i = 0; i < n_peel; i++) |
1004 { | |
1005 /* Peel the copy. */ | |
111 | 1006 bitmap_clear (wont_exit); |
0 | 1007 if (i != n_peel - 1 || !last_may_exit) |
111 | 1008 bitmap_set_bit (wont_exit, 1); |
0 | 1009 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), |
1010 1, wont_exit, desc->out_edge, | |
1011 &remove_edges, | |
1012 DLTHE_FLAG_UPDATE_FREQ); | |
1013 gcc_assert (ok); | |
1014 | |
1015 /* Create item for switch. */ | |
1016 j = n_peel - i - (extra_zero_check ? 0 : 1); | |
111 | 1017 p = profile_probability::always ().apply_scale (1, i + 2); |
0 | 1018 |
1019 preheader = split_edge (loop_preheader_edge (loop)); | |
131 | 1020 /* Add in count of edge from switch block. */ |
111 | 1021 preheader->count += iter_count; |
145 | 1022 branch_code = compare_and_jump_seq (copy_rtx (niter), |
1023 gen_int_mode (j, desc->mode), EQ, | |
1024 block_label (preheader), p, NULL); | |
0 | 1025 |
1026 /* We rely on the fact that the compare and jump cannot be optimized out, | |
1027 and hence the cfg we create is correct. */ | |
1028 gcc_assert (branch_code != NULL_RTX); | |
1029 | |
1030 swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code); | |
1031 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch); | |
111 | 1032 single_succ_edge (swtch)->probability = p.invert (); |
1033 new_count += iter_count; | |
1034 swtch->count = new_count; | |
0 | 1035 e = make_edge (swtch, preheader, |
1036 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP); | |
1037 e->probability = p; | |
1038 } | |
1039 | |
1040 if (extra_zero_check) | |
1041 { | |
1042 /* Add branch for zero iterations. */ | |
111 | 1043 p = profile_probability::always ().apply_scale (1, max_unroll + 1); |
0 | 1044 swtch = ezc_swtch; |
1045 preheader = split_edge (loop_preheader_edge (loop)); | |
131 | 1046 /* Recompute count adjustments since initial peel copy may |
111 | 1047 have exited and reduced those values that were computed above. */ |
1048 iter_count = swtch->count.apply_scale (1, max_unroll + 1); | |
131 | 1049 /* Add in count of edge from switch block. */ |
111 | 1050 preheader->count += iter_count; |
0 | 1051 branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ, |
1052 block_label (preheader), p, | |
111 | 1053 NULL); |
0 | 1054 gcc_assert (branch_code != NULL_RTX); |
1055 | |
1056 swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code); | |
1057 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch); | |
111 | 1058 single_succ_edge (swtch)->probability = p.invert (); |
0 | 1059 e = make_edge (swtch, preheader, |
1060 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP); | |
1061 e->probability = p; | |
1062 } | |
1063 | |
1064 /* Recount dominators for outer blocks. */ | |
1065 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false); | |
1066 | |
1067 /* And unroll loop. */ | |
1068 | |
111 | 1069 bitmap_ones (wont_exit); |
1070 bitmap_clear_bit (wont_exit, may_exit_copy); | |
0 | 1071 opt_info_start_duplication (opt_info); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1072 |
0 | 1073 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), |
1074 max_unroll, | |
1075 wont_exit, desc->out_edge, | |
1076 &remove_edges, | |
1077 DLTHE_FLAG_UPDATE_FREQ | |
1078 | (opt_info | |
1079 ? DLTHE_RECORD_COPY_NUMBER | |
1080 : 0)); | |
1081 gcc_assert (ok); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1082 |
0 | 1083 if (opt_info) |
1084 { | |
1085 apply_opt_in_copies (opt_info, max_unroll, true, true); | |
1086 free_opt_info (opt_info); | |
1087 } | |
1088 | |
1089 if (exit_at_end) | |
1090 { | |
1091 basic_block exit_block = get_bb_copy (desc->in_edge->src); | |
1092 /* Find a new in and out edge; they are in the last copy we have | |
1093 made. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1094 |
0 | 1095 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest) |
1096 { | |
1097 desc->out_edge = EDGE_SUCC (exit_block, 0); | |
1098 desc->in_edge = EDGE_SUCC (exit_block, 1); | |
1099 } | |
1100 else | |
1101 { | |
1102 desc->out_edge = EDGE_SUCC (exit_block, 1); | |
1103 desc->in_edge = EDGE_SUCC (exit_block, 0); | |
1104 } | |
1105 } | |
1106 | |
1107 /* Remove the edges. */ | |
111 | 1108 FOR_EACH_VEC_ELT (remove_edges, i, e) |
0 | 1109 remove_path (e); |
1110 | |
1111 /* We must be careful when updating the number of iterations due to | |
1112 preconditioning and the fact that the value must be valid at entry | |
1113 of the loop. After passing through the above code, we see that | |
1114 the correct new number of iterations is this: */ | |
1115 gcc_assert (!desc->const_iter); | |
1116 desc->niter_expr = | |
1117 simplify_gen_binary (UDIV, desc->mode, old_niter, | |
111 | 1118 gen_int_mode (max_unroll + 1, desc->mode)); |
1119 loop->nb_iterations_upper_bound | |
1120 = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1); | |
1121 if (loop->any_estimate) | |
1122 loop->nb_iterations_estimate | |
1123 = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1); | |
1124 if (loop->any_likely_upper_bound) | |
1125 loop->nb_iterations_likely_upper_bound | |
1126 = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1); | |
0 | 1127 if (exit_at_end) |
1128 { | |
1129 desc->niter_expr = | |
1130 simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx); | |
1131 desc->noloop_assumptions = NULL_RTX; | |
111 | 1132 --loop->nb_iterations_upper_bound; |
1133 if (loop->any_estimate | |
1134 && loop->nb_iterations_estimate != 0) | |
1135 --loop->nb_iterations_estimate; | |
1136 else | |
1137 loop->any_estimate = false; | |
1138 if (loop->any_likely_upper_bound | |
1139 && loop->nb_iterations_likely_upper_bound != 0) | |
1140 --loop->nb_iterations_likely_upper_bound; | |
1141 else | |
1142 loop->any_likely_upper_bound = false; | |
0 | 1143 } |
1144 | |
1145 if (dump_file) | |
1146 fprintf (dump_file, | |
1147 ";; Unrolled loop %d times, counting # of iterations " | |
1148 "in runtime, %i insns\n", | |
1149 max_unroll, num_loop_insns (loop)); | |
1150 } | |
1151 | |
1152 /* Decide whether to unroll LOOP stupidly and how much. */ | |
1153 static void | |
145 | 1154 decide_unroll_stupid (class loop *loop, int flags) |
0 | 1155 { |
1156 unsigned nunroll, nunroll_by_av, i; | |
145 | 1157 class niter_desc *desc; |
111 | 1158 widest_int iterations; |
0 | 1159 |
131 | 1160 /* If we were not asked to unroll this loop, just return back silently. */ |
1161 if (!(flags & UAP_UNROLL_ALL) && !loop->unroll) | |
1162 return; | |
0 | 1163 |
131 | 1164 if (dump_enabled_p ()) |
1165 dump_printf (MSG_NOTE, "considering unrolling loop stupidly\n"); | |
0 | 1166 |
1167 /* nunroll = total number of copies of the original loop body in | |
1168 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ | |
145 | 1169 nunroll = param_max_unrolled_insns / loop->ninsns; |
0 | 1170 nunroll_by_av |
145 | 1171 = param_max_average_unrolled_insns / loop->av_ninsns; |
0 | 1172 if (nunroll > nunroll_by_av) |
1173 nunroll = nunroll_by_av; | |
145 | 1174 if (nunroll > (unsigned) param_max_unroll_times) |
1175 nunroll = param_max_unroll_times; | |
0 | 1176 |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1177 if (targetm.loop_unroll_adjust) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1178 nunroll = targetm.loop_unroll_adjust (nunroll, loop); |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1179 |
131 | 1180 if (loop->unroll > 0 && loop->unroll < USHRT_MAX) |
1181 nunroll = loop->unroll; | |
1182 | |
0 | 1183 /* Skip big loops. */ |
1184 if (nunroll <= 1) | |
1185 { | |
1186 if (dump_file) | |
1187 fprintf (dump_file, ";; Not considering loop, is too big\n"); | |
1188 return; | |
1189 } | |
1190 | |
1191 /* Check for simple loops. */ | |
1192 desc = get_simple_loop_desc (loop); | |
1193 | |
1194 /* Check simpleness. */ | |
1195 if (desc->simple_p && !desc->assumptions) | |
1196 { | |
1197 if (dump_file) | |
131 | 1198 fprintf (dump_file, ";; Loop is simple\n"); |
0 | 1199 return; |
1200 } | |
1201 | |
1202 /* Do not unroll loops with branches inside -- it increases number | |
111 | 1203 of mispredicts. |
1204 TODO: this heuristic needs tunning; call inside the loop body | |
1205 is also relatively good reason to not unroll. */ | |
0 | 1206 if (num_loop_branches (loop) > 1) |
1207 { | |
1208 if (dump_file) | |
1209 fprintf (dump_file, ";; Not unrolling, contains branches\n"); | |
1210 return; | |
1211 } | |
1212 | |
111 | 1213 /* Check whether the loop rolls. */ |
1214 if ((get_estimated_loop_iterations (loop, &iterations) | |
1215 || get_likely_max_loop_iterations (loop, &iterations)) | |
1216 && wi::ltu_p (iterations, 2 * nunroll)) | |
0 | 1217 { |
1218 if (dump_file) | |
1219 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); | |
1220 return; | |
1221 } | |
1222 | |
1223 /* Success. Now force nunroll to be power of 2, as it seems that this | |
1224 improves results (partially because of better alignments, partially | |
1225 because of some dark magic). */ | |
1226 for (i = 1; 2 * i <= nunroll; i *= 2) | |
1227 continue; | |
1228 | |
1229 loop->lpt_decision.decision = LPT_UNROLL_STUPID; | |
1230 loop->lpt_decision.times = i - 1; | |
1231 } | |
1232 | |
111 | 1233 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation does this: |
1234 | |
0 | 1235 while (cond) |
1236 body; | |
1237 | |
111 | 1238 ==> (LOOP->LPT_DECISION.TIMES == 3) |
0 | 1239 |
1240 while (cond) | |
1241 { | |
1242 body; | |
1243 if (!cond) break; | |
1244 body; | |
1245 if (!cond) break; | |
1246 body; | |
1247 if (!cond) break; | |
1248 body; | |
1249 } | |
1250 */ | |
1251 static void | |
145 | 1252 unroll_loop_stupid (class loop *loop) |
0 | 1253 { |
1254 unsigned nunroll = loop->lpt_decision.times; | |
145 | 1255 class niter_desc *desc = get_simple_loop_desc (loop); |
0 | 1256 struct opt_info *opt_info = NULL; |
1257 bool ok; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1258 |
0 | 1259 if (flag_split_ivs_in_unroller |
1260 || flag_variable_expansion_in_unroller) | |
1261 opt_info = analyze_insns_in_loop (loop); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1262 |
111 | 1263 auto_sbitmap wont_exit (nunroll + 1); |
1264 bitmap_clear (wont_exit); | |
0 | 1265 opt_info_start_duplication (opt_info); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1266 |
0 | 1267 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), |
1268 nunroll, wont_exit, | |
1269 NULL, NULL, | |
1270 DLTHE_FLAG_UPDATE_FREQ | |
1271 | (opt_info | |
1272 ? DLTHE_RECORD_COPY_NUMBER | |
1273 : 0)); | |
1274 gcc_assert (ok); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1275 |
0 | 1276 if (opt_info) |
1277 { | |
1278 apply_opt_in_copies (opt_info, nunroll, true, true); | |
1279 free_opt_info (opt_info); | |
1280 } | |
1281 | |
1282 if (desc->simple_p) | |
1283 { | |
1284 /* We indeed may get here provided that there are nontrivial assumptions | |
1285 for a loop to be really simple. We could update the counts, but the | |
1286 problem is that we are unable to decide which exit will be taken | |
1287 (not really true in case the number of iterations is constant, | |
111 | 1288 but no one will do anything with this information, so we do not |
0 | 1289 worry about it). */ |
1290 desc->simple_p = false; | |
1291 } | |
1292 | |
1293 if (dump_file) | |
1294 fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n", | |
1295 nunroll, num_loop_insns (loop)); | |
1296 } | |
1297 | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1298 /* Returns true if REG is referenced in one nondebug insn in LOOP. |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1299 Set *DEBUG_USES to the number of debug insns that reference the |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1300 variable. */ |
0 | 1301 |
111 | 1302 static bool |
145 | 1303 referenced_in_one_insn_in_loop_p (class loop *loop, rtx reg, |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1304 int *debug_uses) |
0 | 1305 { |
1306 basic_block *body, bb; | |
1307 unsigned i; | |
1308 int count_ref = 0; | |
111 | 1309 rtx_insn *insn; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1310 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1311 body = get_loop_body (loop); |
0 | 1312 for (i = 0; i < loop->num_nodes; i++) |
1313 { | |
1314 bb = body[i]; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1315 |
0 | 1316 FOR_BB_INSNS (bb, insn) |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1317 if (!rtx_referenced_p (reg, insn)) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1318 continue; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1319 else if (DEBUG_INSN_P (insn)) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1320 ++*debug_uses; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1321 else if (++count_ref > 1) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1322 break; |
0 | 1323 } |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1324 free (body); |
0 | 1325 return (count_ref == 1); |
1326 } | |
1327 | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1328 /* Reset the DEBUG_USES debug insns in LOOP that reference REG. */ |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1329 |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1330 static void |
145 | 1331 reset_debug_uses_in_loop (class loop *loop, rtx reg, int debug_uses) |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1332 { |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1333 basic_block *body, bb; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1334 unsigned i; |
111 | 1335 rtx_insn *insn; |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1336 |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1337 body = get_loop_body (loop); |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1338 for (i = 0; debug_uses && i < loop->num_nodes; i++) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1339 { |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1340 bb = body[i]; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1341 |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1342 FOR_BB_INSNS (bb, insn) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1343 if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn)) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1344 continue; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1345 else |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1346 { |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1347 validate_change (insn, &INSN_VAR_LOCATION_LOC (insn), |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1348 gen_rtx_UNKNOWN_VAR_LOC (), 0); |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1349 if (!--debug_uses) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1350 break; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1351 } |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1352 } |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1353 free (body); |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1354 } |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1355 |
0 | 1356 /* Determine whether INSN contains an accumulator |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1357 which can be expanded into separate copies, |
0 | 1358 one for each copy of the LOOP body. |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1359 |
0 | 1360 for (i = 0 ; i < n; i++) |
1361 sum += a[i]; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1362 |
0 | 1363 ==> |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1364 |
0 | 1365 sum += a[i] |
1366 .... | |
1367 i = i+1; | |
1368 sum1 += a[i] | |
1369 .... | |
1370 i = i+1 | |
1371 sum2 += a[i]; | |
1372 .... | |
1373 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1374 Return NULL if INSN contains no opportunity for expansion of accumulator. |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1375 Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant |
0 | 1376 information and return a pointer to it. |
1377 */ | |
1378 | |
1379 static struct var_to_expand * | |
145 | 1380 analyze_insn_to_expand_var (class loop *loop, rtx_insn *insn) |
0 | 1381 { |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1382 rtx set, dest, src; |
0 | 1383 struct var_to_expand *ves; |
1384 unsigned accum_pos; | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1385 enum rtx_code code; |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1386 int debug_uses = 0; |
0 | 1387 |
1388 set = single_set (insn); | |
1389 if (!set) | |
1390 return NULL; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1391 |
0 | 1392 dest = SET_DEST (set); |
1393 src = SET_SRC (set); | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1394 code = GET_CODE (src); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1395 |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1396 if (code != PLUS && code != MINUS && code != MULT && code != FMA) |
0 | 1397 return NULL; |
1398 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1399 if (FLOAT_MODE_P (GET_MODE (dest))) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1400 { |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1401 if (!flag_associative_math) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1402 return NULL; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1403 /* In the case of FMA, we're also changing the rounding. */ |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1404 if (code == FMA && !flag_unsafe_math_optimizations) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1405 return NULL; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1406 } |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1407 |
0 | 1408 /* Hmm, this is a bit paradoxical. We know that INSN is a valid insn |
145 | 1409 in MD. But if there is no optab to generate the insn, we cannot |
0 | 1410 perform the variable expansion. This can happen if an MD provides |
1411 an insn but not a named pattern to generate it, for example to avoid | |
1412 producing code that needs additional mode switches like for x87/mmx. | |
1413 | |
1414 So we check have_insn_for which looks for an optab for the operation | |
1415 in SRC. If it doesn't exist, we can't perform the expansion even | |
1416 though INSN is valid. */ | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1417 if (!have_insn_for (code, GET_MODE (src))) |
0 | 1418 return NULL; |
1419 | |
1420 if (!REG_P (dest) | |
1421 && !(GET_CODE (dest) == SUBREG | |
1422 && REG_P (SUBREG_REG (dest)))) | |
1423 return NULL; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1424 |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1425 /* Find the accumulator use within the operation. */ |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1426 if (code == FMA) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1427 { |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1428 /* We only support accumulation via FMA in the ADD position. */ |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1429 if (!rtx_equal_p (dest, XEXP (src, 2))) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1430 return NULL; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1431 accum_pos = 2; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1432 } |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1433 else if (rtx_equal_p (dest, XEXP (src, 0))) |
0 | 1434 accum_pos = 0; |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1435 else if (rtx_equal_p (dest, XEXP (src, 1))) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1436 { |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1437 /* The method of expansion that we are using; which includes the |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1438 initialization of the expansions with zero and the summation of |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1439 the expansions at the end of the computation will yield wrong |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1440 results for (x = something - x) thus avoid using it in that case. */ |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1441 if (code == MINUS) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1442 return NULL; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1443 accum_pos = 1; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1444 } |
0 | 1445 else |
1446 return NULL; | |
1447 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1448 /* It must not otherwise be used. */ |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1449 if (code == FMA) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1450 { |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1451 if (rtx_referenced_p (dest, XEXP (src, 0)) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1452 || rtx_referenced_p (dest, XEXP (src, 1))) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1453 return NULL; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1454 } |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1455 else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos))) |
0 | 1456 return NULL; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1457 |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1458 /* It must be used in exactly one insn. */ |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1459 if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses)) |
0 | 1460 return NULL; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1461 |
0 | 1462 if (dump_file) |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1463 { |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1464 fprintf (dump_file, "\n;; Expanding Accumulator "); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1465 print_rtl (dump_file, dest); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1466 fprintf (dump_file, "\n"); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1467 } |
0 | 1468 |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1469 if (debug_uses) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1470 /* Instead of resetting the debug insns, we could replace each |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1471 debug use in the loop with the sum or product of all expanded |
111 | 1472 accumulators. Since we'll only know of all expansions at the |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1473 end, we'd have to keep track of which vars_to_expand a debug |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1474 insn in the loop references, take note of each copy of the |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1475 debug insn during unrolling, and when it's all done, compute |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1476 the sum or product of each variable and adjust the original |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1477 debug insn and each copy thereof. What a pain! */ |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1478 reset_debug_uses_in_loop (loop, dest, debug_uses); |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1479 |
0 | 1480 /* Record the accumulator to expand. */ |
1481 ves = XNEW (struct var_to_expand); | |
1482 ves->insn = insn; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1483 ves->reg = copy_rtx (dest); |
111 | 1484 ves->var_expansions.create (1); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1485 ves->next = NULL; |
0 | 1486 ves->op = GET_CODE (src); |
1487 ves->expansion_count = 0; | |
1488 ves->reuse_expansion = 0; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1489 return ves; |
0 | 1490 } |
1491 | |
1492 /* Determine whether there is an induction variable in INSN that | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1493 we would like to split during unrolling. |
0 | 1494 |
1495 I.e. replace | |
1496 | |
1497 i = i + 1; | |
1498 ... | |
1499 i = i + 1; | |
1500 ... | |
1501 i = i + 1; | |
1502 ... | |
1503 | |
1504 type chains by | |
1505 | |
1506 i0 = i + 1 | |
1507 ... | |
1508 i = i0 + 1 | |
1509 ... | |
1510 i = i0 + 2 | |
1511 ... | |
1512 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1513 Return NULL if INSN contains no interesting IVs. Otherwise, allocate |
0 | 1514 an IV_TO_SPLIT structure, fill it with the relevant information and return a |
1515 pointer to it. */ | |
1516 | |
1517 static struct iv_to_split * | |
111 | 1518 analyze_iv_to_split_insn (rtx_insn *insn) |
0 | 1519 { |
1520 rtx set, dest; | |
145 | 1521 class rtx_iv iv; |
0 | 1522 struct iv_to_split *ivts; |
111 | 1523 scalar_int_mode mode; |
0 | 1524 bool ok; |
1525 | |
1526 /* For now we just split the basic induction variables. Later this may be | |
1527 extended for example by selecting also addresses of memory references. */ | |
1528 set = single_set (insn); | |
1529 if (!set) | |
1530 return NULL; | |
1531 | |
1532 dest = SET_DEST (set); | |
111 | 1533 if (!REG_P (dest) || !is_a <scalar_int_mode> (GET_MODE (dest), &mode)) |
0 | 1534 return NULL; |
1535 | |
111 | 1536 if (!biv_p (insn, mode, dest)) |
0 | 1537 return NULL; |
1538 | |
1539 ok = iv_analyze_result (insn, dest, &iv); | |
1540 | |
1541 /* This used to be an assert under the assumption that if biv_p returns | |
1542 true that iv_analyze_result must also return true. However, that | |
1543 assumption is not strictly correct as evidenced by pr25569. | |
1544 | |
1545 Returning NULL when iv_analyze_result returns false is safe and | |
1546 avoids the problems in pr25569 until the iv_analyze_* routines | |
1547 can be fixed, which is apparently hard and time consuming | |
1548 according to their author. */ | |
1549 if (! ok) | |
1550 return NULL; | |
1551 | |
1552 if (iv.step == const0_rtx | |
1553 || iv.mode != iv.extend_mode) | |
1554 return NULL; | |
1555 | |
1556 /* Record the insn to split. */ | |
1557 ivts = XNEW (struct iv_to_split); | |
1558 ivts->insn = insn; | |
111 | 1559 ivts->orig_var = dest; |
0 | 1560 ivts->base_var = NULL_RTX; |
1561 ivts->step = iv.step; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1562 ivts->next = NULL; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1563 |
0 | 1564 return ivts; |
1565 } | |
1566 | |
1567 /* Determines which of insns in LOOP can be optimized. | |
1568 Return a OPT_INFO struct with the relevant hash tables filled | |
1569 with all insns to be optimized. The FIRST_NEW_BLOCK field | |
1570 is undefined for the return value. */ | |
1571 | |
1572 static struct opt_info * | |
145 | 1573 analyze_insns_in_loop (class loop *loop) |
0 | 1574 { |
1575 basic_block *body, bb; | |
1576 unsigned i; | |
1577 struct opt_info *opt_info = XCNEW (struct opt_info); | |
111 | 1578 rtx_insn *insn; |
0 | 1579 struct iv_to_split *ivts = NULL; |
1580 struct var_to_expand *ves = NULL; | |
111 | 1581 iv_to_split **slot1; |
1582 var_to_expand **slot2; | |
1583 vec<edge> edges = get_loop_exit_edges (loop); | |
0 | 1584 edge exit; |
1585 bool can_apply = false; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1586 |
0 | 1587 iv_analysis_loop_init (loop); |
1588 | |
1589 body = get_loop_body (loop); | |
1590 | |
1591 if (flag_split_ivs_in_unroller) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1592 { |
111 | 1593 opt_info->insns_to_split |
1594 = new hash_table<iv_split_hasher> (5 * loop->num_nodes); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1595 opt_info->iv_to_split_head = NULL; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1596 opt_info->iv_to_split_tail = &opt_info->iv_to_split_head; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1597 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1598 |
0 | 1599 /* Record the loop exit bb and loop preheader before the unrolling. */ |
1600 opt_info->loop_preheader = loop_preheader_edge (loop)->src; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1601 |
111 | 1602 if (edges.length () == 1) |
0 | 1603 { |
111 | 1604 exit = edges[0]; |
0 | 1605 if (!(exit->flags & EDGE_COMPLEX)) |
1606 { | |
1607 opt_info->loop_exit = split_edge (exit); | |
1608 can_apply = true; | |
1609 } | |
1610 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1611 |
0 | 1612 if (flag_variable_expansion_in_unroller |
1613 && can_apply) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1614 { |
111 | 1615 opt_info->insns_with_var_to_expand |
1616 = new hash_table<var_expand_hasher> (5 * loop->num_nodes); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1617 opt_info->var_to_expand_head = NULL; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1618 opt_info->var_to_expand_tail = &opt_info->var_to_expand_head; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1619 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1620 |
0 | 1621 for (i = 0; i < loop->num_nodes; i++) |
1622 { | |
1623 bb = body[i]; | |
1624 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) | |
1625 continue; | |
1626 | |
1627 FOR_BB_INSNS (bb, insn) | |
1628 { | |
1629 if (!INSN_P (insn)) | |
1630 continue; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1631 |
0 | 1632 if (opt_info->insns_to_split) |
1633 ivts = analyze_iv_to_split_insn (insn); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1634 |
0 | 1635 if (ivts) |
1636 { | |
111 | 1637 slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1638 gcc_assert (*slot1 == NULL); |
0 | 1639 *slot1 = ivts; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1640 *opt_info->iv_to_split_tail = ivts; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1641 opt_info->iv_to_split_tail = &ivts->next; |
0 | 1642 continue; |
1643 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1644 |
0 | 1645 if (opt_info->insns_with_var_to_expand) |
1646 ves = analyze_insn_to_expand_var (loop, insn); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1647 |
0 | 1648 if (ves) |
1649 { | |
111 | 1650 slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1651 gcc_assert (*slot2 == NULL); |
0 | 1652 *slot2 = ves; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1653 *opt_info->var_to_expand_tail = ves; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1654 opt_info->var_to_expand_tail = &ves->next; |
0 | 1655 } |
1656 } | |
1657 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1658 |
111 | 1659 edges.release (); |
0 | 1660 free (body); |
1661 return opt_info; | |
1662 } | |
1663 | |
1664 /* Called just before loop duplication. Records start of duplicated area | |
1665 to OPT_INFO. */ | |
1666 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1667 static void |
0 | 1668 opt_info_start_duplication (struct opt_info *opt_info) |
1669 { | |
1670 if (opt_info) | |
111 | 1671 opt_info->first_new_block = last_basic_block_for_fn (cfun); |
0 | 1672 } |
1673 | |
1674 /* Determine the number of iterations between initialization of the base | |
1675 variable and the current copy (N_COPY). N_COPIES is the total number | |
1676 of newly created copies. UNROLLING is true if we are unrolling | |
1677 (not peeling) the loop. */ | |
1678 | |
1679 static unsigned | |
1680 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling) | |
1681 { | |
1682 if (unrolling) | |
1683 { | |
1684 /* If we are unrolling, initialization is done in the original loop | |
1685 body (number 0). */ | |
1686 return n_copy; | |
1687 } | |
1688 else | |
1689 { | |
1690 /* If we are peeling, the copy in that the initialization occurs has | |
1691 number 1. The original loop (number 0) is the last. */ | |
1692 if (n_copy) | |
1693 return n_copy - 1; | |
1694 else | |
1695 return n_copies; | |
1696 } | |
1697 } | |
1698 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1699 /* Allocate basic variable for the induction variable chain. */ |
0 | 1700 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1701 static void |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1702 allocate_basic_variable (struct iv_to_split *ivts) |
0 | 1703 { |
111 | 1704 rtx expr = SET_SRC (single_set (ivts->insn)); |
0 | 1705 |
1706 ivts->base_var = gen_reg_rtx (GET_MODE (expr)); | |
1707 } | |
1708 | |
1709 /* Insert initialization of basic variable of IVTS before INSN, taking | |
1710 the initial value from INSN. */ | |
1711 | |
1712 static void | |
111 | 1713 insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn) |
0 | 1714 { |
111 | 1715 rtx expr = copy_rtx (SET_SRC (single_set (insn))); |
1716 rtx_insn *seq; | |
0 | 1717 |
1718 start_sequence (); | |
1719 expr = force_operand (expr, ivts->base_var); | |
1720 if (expr != ivts->base_var) | |
1721 emit_move_insn (ivts->base_var, expr); | |
1722 seq = get_insns (); | |
1723 end_sequence (); | |
1724 | |
1725 emit_insn_before (seq, insn); | |
1726 } | |
1727 | |
1728 /* Replace the use of induction variable described in IVTS in INSN | |
1729 by base variable + DELTA * step. */ | |
1730 | |
1731 static void | |
111 | 1732 split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta) |
0 | 1733 { |
111 | 1734 rtx expr, *loc, incr, var; |
1735 rtx_insn *seq; | |
1736 machine_mode mode = GET_MODE (ivts->base_var); | |
0 | 1737 rtx src, dest, set; |
1738 | |
1739 /* Construct base + DELTA * step. */ | |
1740 if (!delta) | |
1741 expr = ivts->base_var; | |
1742 else | |
1743 { | |
1744 incr = simplify_gen_binary (MULT, mode, | |
111 | 1745 copy_rtx (ivts->step), |
1746 gen_int_mode (delta, mode)); | |
0 | 1747 expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var), |
1748 ivts->base_var, incr); | |
1749 } | |
1750 | |
1751 /* Figure out where to do the replacement. */ | |
111 | 1752 loc = &SET_SRC (single_set (insn)); |
0 | 1753 |
1754 /* If we can make the replacement right away, we're done. */ | |
1755 if (validate_change (insn, loc, expr, 0)) | |
1756 return; | |
1757 | |
1758 /* Otherwise, force EXPR into a register and try again. */ | |
1759 start_sequence (); | |
1760 var = gen_reg_rtx (mode); | |
1761 expr = force_operand (expr, var); | |
1762 if (expr != var) | |
1763 emit_move_insn (var, expr); | |
1764 seq = get_insns (); | |
1765 end_sequence (); | |
1766 emit_insn_before (seq, insn); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1767 |
0 | 1768 if (validate_change (insn, loc, var, 0)) |
1769 return; | |
1770 | |
1771 /* The last chance. Try recreating the assignment in insn | |
1772 completely from scratch. */ | |
1773 set = single_set (insn); | |
1774 gcc_assert (set); | |
1775 | |
1776 start_sequence (); | |
1777 *loc = var; | |
1778 src = copy_rtx (SET_SRC (set)); | |
1779 dest = copy_rtx (SET_DEST (set)); | |
1780 src = force_operand (src, dest); | |
1781 if (src != dest) | |
1782 emit_move_insn (dest, src); | |
1783 seq = get_insns (); | |
1784 end_sequence (); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1785 |
0 | 1786 emit_insn_before (seq, insn); |
1787 delete_insn (insn); | |
1788 } | |
1789 | |
1790 | |
1791 /* Return one expansion of the accumulator recorded in struct VE. */ | |
1792 | |
1793 static rtx | |
1794 get_expansion (struct var_to_expand *ve) | |
1795 { | |
1796 rtx reg; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1797 |
0 | 1798 if (ve->reuse_expansion == 0) |
1799 reg = ve->reg; | |
1800 else | |
111 | 1801 reg = ve->var_expansions[ve->reuse_expansion - 1]; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1802 |
111 | 1803 if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion) |
0 | 1804 ve->reuse_expansion = 0; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1805 else |
0 | 1806 ve->reuse_expansion++; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1807 |
0 | 1808 return reg; |
1809 } | |
1810 | |
1811 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1812 /* Given INSN replace the uses of the accumulator recorded in VE |
0 | 1813 with a new register. */ |
1814 | |
1815 static void | |
111 | 1816 expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn) |
0 | 1817 { |
1818 rtx new_reg, set; | |
1819 bool really_new_expansion = false; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1820 |
0 | 1821 set = single_set (insn); |
1822 gcc_assert (set); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1823 |
0 | 1824 /* Generate a new register only if the expansion limit has not been |
1825 reached. Else reuse an already existing expansion. */ | |
145 | 1826 if (param_max_variable_expansions > ve->expansion_count) |
0 | 1827 { |
1828 really_new_expansion = true; | |
1829 new_reg = gen_reg_rtx (GET_MODE (ve->reg)); | |
1830 } | |
1831 else | |
1832 new_reg = get_expansion (ve); | |
1833 | |
111 | 1834 validate_replace_rtx_group (SET_DEST (set), new_reg, insn); |
0 | 1835 if (apply_change_group ()) |
1836 if (really_new_expansion) | |
1837 { | |
111 | 1838 ve->var_expansions.safe_push (new_reg); |
0 | 1839 ve->expansion_count++; |
1840 } | |
1841 } | |
1842 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1843 /* Initialize the variable expansions in loop preheader. PLACE is the |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1844 loop-preheader basic block where the initialization of the |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1845 expansions should take place. The expansions are initialized with |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1846 (-0) when the operation is plus or minus to honor sign zero. This |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1847 way we can prevent cases where the sign of the final result is |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1848 effected by the sign of the expansion. Here is an example to |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1849 demonstrate this: |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1850 |
0 | 1851 for (i = 0 ; i < n; i++) |
1852 sum += something; | |
1853 | |
1854 ==> | |
1855 | |
1856 sum += something | |
1857 .... | |
1858 i = i+1; | |
1859 sum1 += something | |
1860 .... | |
1861 i = i+1 | |
1862 sum2 += something; | |
1863 .... | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1864 |
0 | 1865 When SUM is initialized with -zero and SOMETHING is also -zero; the |
1866 final result of sum should be -zero thus the expansions sum1 and sum2 | |
1867 should be initialized with -zero as well (otherwise we will get +zero | |
1868 as the final result). */ | |
1869 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1870 static void |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1871 insert_var_expansion_initialization (struct var_to_expand *ve, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1872 basic_block place) |
0 | 1873 { |
111 | 1874 rtx_insn *seq; |
1875 rtx var, zero_init; | |
0 | 1876 unsigned i; |
111 | 1877 machine_mode mode = GET_MODE (ve->reg); |
0 | 1878 bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode); |
1879 | |
111 | 1880 if (ve->var_expansions.length () == 0) |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1881 return; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1882 |
0 | 1883 start_sequence (); |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1884 switch (ve->op) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1885 { |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1886 case FMA: |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1887 /* Note that we only accumulate FMA via the ADD operand. */ |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1888 case PLUS: |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1889 case MINUS: |
111 | 1890 FOR_EACH_VEC_ELT (ve->var_expansions, i, var) |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1891 { |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1892 if (honor_signed_zero_p) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1893 zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1894 else |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1895 zero_init = CONST0_RTX (mode); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1896 emit_move_insn (var, zero_init); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1897 } |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1898 break; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1899 |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1900 case MULT: |
111 | 1901 FOR_EACH_VEC_ELT (ve->var_expansions, i, var) |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1902 { |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1903 zero_init = CONST1_RTX (GET_MODE (var)); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1904 emit_move_insn (var, zero_init); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1905 } |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1906 break; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1907 |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1908 default: |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1909 gcc_unreachable (); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1910 } |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1911 |
0 | 1912 seq = get_insns (); |
1913 end_sequence (); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1914 |
111 | 1915 emit_insn_after (seq, BB_END (place)); |
0 | 1916 } |
1917 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1918 /* Combine the variable expansions at the loop exit. PLACE is the |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1919 loop exit basic block where the summation of the expansions should |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1920 take place. */ |
0 | 1921 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1922 static void |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1923 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place) |
0 | 1924 { |
1925 rtx sum = ve->reg; | |
111 | 1926 rtx expr, var; |
1927 rtx_insn *seq, *insn; | |
0 | 1928 unsigned i; |
1929 | |
111 | 1930 if (ve->var_expansions.length () == 0) |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1931 return; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1932 |
111 | 1933 /* ve->reg might be SUBREG or some other non-shareable RTL, and we use |
1934 it both here and as the destination of the assignment. */ | |
1935 sum = copy_rtx (sum); | |
0 | 1936 start_sequence (); |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1937 switch (ve->op) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1938 { |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1939 case FMA: |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1940 /* Note that we only accumulate FMA via the ADD operand. */ |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1941 case PLUS: |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1942 case MINUS: |
111 | 1943 FOR_EACH_VEC_ELT (ve->var_expansions, i, var) |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1944 sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1945 break; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1946 |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1947 case MULT: |
111 | 1948 FOR_EACH_VEC_ELT (ve->var_expansions, i, var) |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1949 sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1950 break; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1951 |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1952 default: |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1953 gcc_unreachable (); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1954 } |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1955 |
0 | 1956 expr = force_operand (sum, ve->reg); |
1957 if (expr != ve->reg) | |
1958 emit_move_insn (ve->reg, expr); | |
1959 seq = get_insns (); | |
1960 end_sequence (); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1961 |
0 | 1962 insn = BB_HEAD (place); |
1963 while (!NOTE_INSN_BASIC_BLOCK_P (insn)) | |
1964 insn = NEXT_INSN (insn); | |
1965 | |
1966 emit_insn_after (seq, insn); | |
1967 } | |
1968 | |
111 | 1969 /* Strip away REG_EQUAL notes for IVs we're splitting. |
1970 | |
1971 Updating REG_EQUAL notes for IVs we split is tricky: We | |
1972 cannot tell until after unrolling, DF-rescanning, and liveness | |
1973 updating, whether an EQ_USE is reached by the split IV while | |
1974 the IV reg is still live. See PR55006. | |
1975 | |
1976 ??? We cannot use remove_reg_equal_equiv_notes_for_regno, | |
1977 because RTL loop-iv requires us to defer rescanning insns and | |
1978 any notes attached to them. So resort to old techniques... */ | |
1979 | |
1980 static void | |
1981 maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn) | |
1982 { | |
1983 struct iv_to_split *ivts; | |
1984 rtx note = find_reg_equal_equiv_note (insn); | |
1985 if (! note) | |
1986 return; | |
1987 for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next) | |
1988 if (reg_mentioned_p (ivts->orig_var, note)) | |
1989 { | |
1990 remove_note (insn, note); | |
1991 return; | |
1992 } | |
1993 } | |
1994 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1995 /* Apply loop optimizations in loop copies using the |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1996 data which gathered during the unrolling. Structure |
0 | 1997 OPT_INFO record that data. |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1998 |
0 | 1999 UNROLLING is true if we unrolled (not peeled) the loop. |
2000 REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of | |
2001 the loop (as it should happen in complete unrolling, but not in ordinary | |
2002 peeling of the loop). */ | |
2003 | |
2004 static void | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2005 apply_opt_in_copies (struct opt_info *opt_info, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2006 unsigned n_copies, bool unrolling, |
0 | 2007 bool rewrite_original_loop) |
2008 { | |
2009 unsigned i, delta; | |
2010 basic_block bb, orig_bb; | |
111 | 2011 rtx_insn *insn, *orig_insn, *next; |
0 | 2012 struct iv_to_split ivts_templ, *ivts; |
2013 struct var_to_expand ve_templ, *ves; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2014 |
0 | 2015 /* Sanity check -- we need to put initialization in the original loop |
2016 body. */ | |
2017 gcc_assert (!unrolling || rewrite_original_loop); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2018 |
0 | 2019 /* Allocate the basic variables (i0). */ |
2020 if (opt_info->insns_to_split) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2021 for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2022 allocate_basic_variable (ivts); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2023 |
111 | 2024 for (i = opt_info->first_new_block; |
2025 i < (unsigned) last_basic_block_for_fn (cfun); | |
2026 i++) | |
0 | 2027 { |
111 | 2028 bb = BASIC_BLOCK_FOR_FN (cfun, i); |
0 | 2029 orig_bb = get_bb_original (bb); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2030 |
0 | 2031 /* bb->aux holds position in copy sequence initialized by |
2032 duplicate_loop_to_header_edge. */ | |
2033 delta = determine_split_iv_delta ((size_t)bb->aux, n_copies, | |
2034 unrolling); | |
2035 bb->aux = 0; | |
2036 orig_insn = BB_HEAD (orig_bb); | |
111 | 2037 FOR_BB_INSNS_SAFE (bb, insn, next) |
0 | 2038 { |
111 | 2039 if (!INSN_P (insn) |
131 | 2040 || (DEBUG_BIND_INSN_P (insn) |
2041 && INSN_VAR_LOCATION_DECL (insn) | |
111 | 2042 && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL)) |
0 | 2043 continue; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2044 |
111 | 2045 while (!INSN_P (orig_insn) |
131 | 2046 || (DEBUG_BIND_INSN_P (orig_insn) |
2047 && INSN_VAR_LOCATION_DECL (orig_insn) | |
111 | 2048 && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn)) |
2049 == LABEL_DECL))) | |
0 | 2050 orig_insn = NEXT_INSN (orig_insn); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2051 |
0 | 2052 ivts_templ.insn = orig_insn; |
2053 ve_templ.insn = orig_insn; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2054 |
0 | 2055 /* Apply splitting iv optimization. */ |
2056 if (opt_info->insns_to_split) | |
2057 { | |
111 | 2058 maybe_strip_eq_note_for_split_iv (opt_info, insn); |
2059 | |
2060 ivts = opt_info->insns_to_split->find (&ivts_templ); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2061 |
0 | 2062 if (ivts) |
2063 { | |
2064 gcc_assert (GET_CODE (PATTERN (insn)) | |
2065 == GET_CODE (PATTERN (orig_insn))); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2066 |
0 | 2067 if (!delta) |
2068 insert_base_initialization (ivts, insn); | |
2069 split_iv (ivts, insn, delta); | |
2070 } | |
2071 } | |
2072 /* Apply variable expansion optimization. */ | |
2073 if (unrolling && opt_info->insns_with_var_to_expand) | |
2074 { | |
2075 ves = (struct var_to_expand *) | |
111 | 2076 opt_info->insns_with_var_to_expand->find (&ve_templ); |
0 | 2077 if (ves) |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2078 { |
0 | 2079 gcc_assert (GET_CODE (PATTERN (insn)) |
2080 == GET_CODE (PATTERN (orig_insn))); | |
2081 expand_var_during_unrolling (ves, insn); | |
2082 } | |
2083 } | |
2084 orig_insn = NEXT_INSN (orig_insn); | |
2085 } | |
2086 } | |
2087 | |
2088 if (!rewrite_original_loop) | |
2089 return; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2090 |
0 | 2091 /* Initialize the variable expansions in the loop preheader |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2092 and take care of combining them at the loop exit. */ |
0 | 2093 if (opt_info->insns_with_var_to_expand) |
2094 { | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2095 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2096 insert_var_expansion_initialization (ves, opt_info->loop_preheader); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2097 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2098 combine_var_copies_in_loop_exit (ves, opt_info->loop_exit); |
0 | 2099 } |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2100 |
0 | 2101 /* Rewrite also the original loop body. Find them as originals of the blocks |
2102 in the last copied iteration, i.e. those that have | |
2103 get_bb_copy (get_bb_original (bb)) == bb. */ | |
111 | 2104 for (i = opt_info->first_new_block; |
2105 i < (unsigned) last_basic_block_for_fn (cfun); | |
2106 i++) | |
0 | 2107 { |
111 | 2108 bb = BASIC_BLOCK_FOR_FN (cfun, i); |
0 | 2109 orig_bb = get_bb_original (bb); |
2110 if (get_bb_copy (orig_bb) != bb) | |
2111 continue; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2112 |
0 | 2113 delta = determine_split_iv_delta (0, n_copies, unrolling); |
2114 for (orig_insn = BB_HEAD (orig_bb); | |
2115 orig_insn != NEXT_INSN (BB_END (bb)); | |
2116 orig_insn = next) | |
2117 { | |
2118 next = NEXT_INSN (orig_insn); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2119 |
0 | 2120 if (!INSN_P (orig_insn)) |
2121 continue; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2122 |
0 | 2123 ivts_templ.insn = orig_insn; |
2124 if (opt_info->insns_to_split) | |
2125 { | |
111 | 2126 maybe_strip_eq_note_for_split_iv (opt_info, orig_insn); |
2127 | |
0 | 2128 ivts = (struct iv_to_split *) |
111 | 2129 opt_info->insns_to_split->find (&ivts_templ); |
0 | 2130 if (ivts) |
2131 { | |
2132 if (!delta) | |
2133 insert_base_initialization (ivts, orig_insn); | |
2134 split_iv (ivts, orig_insn, delta); | |
2135 continue; | |
2136 } | |
2137 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2138 |
0 | 2139 } |
2140 } | |
2141 } | |
2142 | |
2143 /* Release OPT_INFO. */ | |
2144 | |
2145 static void | |
2146 free_opt_info (struct opt_info *opt_info) | |
2147 { | |
111 | 2148 delete opt_info->insns_to_split; |
2149 opt_info->insns_to_split = NULL; | |
0 | 2150 if (opt_info->insns_with_var_to_expand) |
2151 { | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2152 struct var_to_expand *ves; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2153 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2154 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next) |
111 | 2155 ves->var_expansions.release (); |
2156 delete opt_info->insns_with_var_to_expand; | |
2157 opt_info->insns_with_var_to_expand = NULL; | |
0 | 2158 } |
2159 free (opt_info); | |
2160 } |