Mercurial > hg > CbC > CbC_gcc
annotate gcc/loop-unroll.c @ 58:3aaf117db171
error at dwarf2out.c
author | ryoma <e075725@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 15 Feb 2010 14:58:24 +0900 |
parents | 77e2b8dfacca |
children | b7f97abdc517 |
rev | line source |
---|---|
0 | 1 /* Loop unrolling and peeling. |
2 Copyright (C) 2002, 2003, 2004, 2005, 2007, 2008 | |
3 Free Software Foundation, Inc. | |
4 | |
5 This file is part of GCC. | |
6 | |
7 GCC is free software; you can redistribute it and/or modify it under | |
8 the terms of the GNU General Public License as published by the Free | |
9 Software Foundation; either version 3, or (at your option) any later | |
10 version. | |
11 | |
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
18 along with GCC; see the file COPYING3. If not see | |
19 <http://www.gnu.org/licenses/>. */ | |
20 | |
21 #include "config.h" | |
22 #include "system.h" | |
23 #include "coretypes.h" | |
24 #include "tm.h" | |
25 #include "rtl.h" | |
26 #include "hard-reg-set.h" | |
27 #include "obstack.h" | |
28 #include "basic-block.h" | |
29 #include "cfgloop.h" | |
30 #include "cfglayout.h" | |
31 #include "params.h" | |
32 #include "output.h" | |
33 #include "expr.h" | |
34 #include "hashtab.h" | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
35 #include "recog.h" |
0 | 36 |
37 /* This pass performs loop unrolling and peeling. We only perform these | |
38 optimizations on innermost loops (with single exception) because | |
39 the impact on performance is greatest here, and we want to avoid | |
40 unnecessary code size growth. The gain is caused by greater sequentiality | |
41 of code, better code to optimize for further passes and in some cases | |
42 by fewer testings of exit conditions. The main problem is code growth, | |
43 that impacts performance negatively due to effect of caches. | |
44 | |
45 What we do: | |
46 | |
47 -- complete peeling of once-rolling loops; this is the above mentioned | |
48 exception, as this causes loop to be cancelled completely and | |
49 does not cause code growth | |
50 -- complete peeling of loops that roll (small) constant times. | |
51 -- simple peeling of first iterations of loops that do not roll much | |
52 (according to profile feedback) | |
53 -- unrolling of loops that roll constant times; this is almost always | |
54 win, as we get rid of exit condition tests. | |
55 -- unrolling of loops that roll number of times that we can compute | |
56 in runtime; we also get rid of exit condition tests here, but there | |
57 is the extra expense for calculating the number of iterations | |
58 -- simple unrolling of remaining loops; this is performed only if we | |
59 are asked to, as the gain is questionable in this case and often | |
60 it may even slow down the code | |
61 For more detailed descriptions of each of those, see comments at | |
62 appropriate function below. | |
63 | |
64 There is a lot of parameters (defined and described in params.def) that | |
65 control how much we unroll/peel. | |
66 | |
67 ??? A great problem is that we don't have a good way how to determine | |
68 how many times we should unroll the loop; the experiments I have made | |
69 showed that this choice may affect performance in order of several %. | |
70 */ | |
71 | |
72 /* Information about induction variables to split. */ | |
73 | |
74 struct iv_to_split | |
75 { | |
76 rtx insn; /* The insn in that the induction variable occurs. */ | |
77 rtx base_var; /* The variable on that the values in the further | |
78 iterations are based. */ | |
79 rtx step; /* Step of the induction variable. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
80 struct iv_to_split *next; /* Next entry in walking order. */ |
0 | 81 unsigned n_loc; |
82 unsigned loc[3]; /* Location where the definition of the induction | |
83 variable occurs in the insn. For example if | |
84 N_LOC is 2, the expression is located at | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
85 XEXP (XEXP (single_set, loc[0]), loc[1]). */ |
0 | 86 }; |
87 | |
88 /* Information about accumulators to expand. */ | |
89 | |
90 struct var_to_expand | |
91 { | |
92 rtx insn; /* The insn in that the variable expansion occurs. */ | |
93 rtx reg; /* The accumulator which is expanded. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
94 VEC(rtx,heap) *var_expansions; /* The copies of the accumulator which is expanded. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
95 struct var_to_expand *next; /* Next entry in walking order. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
96 enum rtx_code op; /* The type of the accumulation - addition, subtraction |
0 | 97 or multiplication. */ |
98 int expansion_count; /* Count the number of expansions generated so far. */ | |
99 int reuse_expansion; /* The expansion we intend to reuse to expand | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
100 the accumulator. If REUSE_EXPANSION is 0 reuse |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
101 the original accumulator. Else use |
0 | 102 var_expansions[REUSE_EXPANSION - 1]. */ |
103 unsigned accum_pos; /* The position in which the accumulator is placed in | |
104 the insn src. For example in x = x + something | |
105 accum_pos is 0 while in x = something + x accum_pos | |
106 is 1. */ | |
107 }; | |
108 | |
109 /* Information about optimization applied in | |
110 the unrolled loop. */ | |
111 | |
112 struct opt_info | |
113 { | |
114 htab_t insns_to_split; /* A hashtable of insns to split. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
115 struct iv_to_split *iv_to_split_head; /* The first iv to split. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
116 struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list. */ |
0 | 117 htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators |
118 to expand. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
119 struct var_to_expand *var_to_expand_head; /* The first var to expand. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
120 struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list. */ |
0 | 121 unsigned first_new_block; /* The first basic block that was |
122 duplicated. */ | |
123 basic_block loop_exit; /* The loop exit basic block. */ | |
124 basic_block loop_preheader; /* The loop preheader basic block. */ | |
125 }; | |
126 | |
127 static void decide_unrolling_and_peeling (int); | |
128 static void peel_loops_completely (int); | |
129 static void decide_peel_simple (struct loop *, int); | |
130 static void decide_peel_once_rolling (struct loop *, int); | |
131 static void decide_peel_completely (struct loop *, int); | |
132 static void decide_unroll_stupid (struct loop *, int); | |
133 static void decide_unroll_constant_iterations (struct loop *, int); | |
134 static void decide_unroll_runtime_iterations (struct loop *, int); | |
135 static void peel_loop_simple (struct loop *); | |
136 static void peel_loop_completely (struct loop *); | |
137 static void unroll_loop_stupid (struct loop *); | |
138 static void unroll_loop_constant_iterations (struct loop *); | |
139 static void unroll_loop_runtime_iterations (struct loop *); | |
140 static struct opt_info *analyze_insns_in_loop (struct loop *); | |
141 static void opt_info_start_duplication (struct opt_info *); | |
142 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool); | |
143 static void free_opt_info (struct opt_info *); | |
144 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx); | |
145 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx); | |
146 static struct iv_to_split *analyze_iv_to_split_insn (rtx); | |
147 static void expand_var_during_unrolling (struct var_to_expand *, rtx); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
148 static void insert_var_expansion_initialization (struct var_to_expand *, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
149 basic_block); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
150 static void combine_var_copies_in_loop_exit (struct var_to_expand *, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
151 basic_block); |
0 | 152 static rtx get_expansion (struct var_to_expand *); |
153 | |
154 /* Unroll and/or peel (depending on FLAGS) LOOPS. */ | |
155 void | |
156 unroll_and_peel_loops (int flags) | |
157 { | |
158 struct loop *loop; | |
159 bool check; | |
160 loop_iterator li; | |
161 | |
162 /* First perform complete loop peeling (it is almost surely a win, | |
163 and affects parameters for further decision a lot). */ | |
164 peel_loops_completely (flags); | |
165 | |
166 /* Now decide rest of unrolling and peeling. */ | |
167 decide_unrolling_and_peeling (flags); | |
168 | |
169 /* Scan the loops, inner ones first. */ | |
170 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) | |
171 { | |
172 check = true; | |
173 /* And perform the appropriate transformations. */ | |
174 switch (loop->lpt_decision.decision) | |
175 { | |
176 case LPT_PEEL_COMPLETELY: | |
177 /* Already done. */ | |
178 gcc_unreachable (); | |
179 case LPT_PEEL_SIMPLE: | |
180 peel_loop_simple (loop); | |
181 break; | |
182 case LPT_UNROLL_CONSTANT: | |
183 unroll_loop_constant_iterations (loop); | |
184 break; | |
185 case LPT_UNROLL_RUNTIME: | |
186 unroll_loop_runtime_iterations (loop); | |
187 break; | |
188 case LPT_UNROLL_STUPID: | |
189 unroll_loop_stupid (loop); | |
190 break; | |
191 case LPT_NONE: | |
192 check = false; | |
193 break; | |
194 default: | |
195 gcc_unreachable (); | |
196 } | |
197 if (check) | |
198 { | |
199 #ifdef ENABLE_CHECKING | |
200 verify_dominators (CDI_DOMINATORS); | |
201 verify_loop_structure (); | |
202 #endif | |
203 } | |
204 } | |
205 | |
206 iv_analysis_done (); | |
207 } | |
208 | |
209 /* Check whether exit of the LOOP is at the end of loop body. */ | |
210 | |
211 static bool | |
212 loop_exit_at_end_p (struct loop *loop) | |
213 { | |
214 struct niter_desc *desc = get_simple_loop_desc (loop); | |
215 rtx insn; | |
216 | |
217 if (desc->in_edge->dest != loop->latch) | |
218 return false; | |
219 | |
220 /* Check that the latch is empty. */ | |
221 FOR_BB_INSNS (loop->latch, insn) | |
222 { | |
223 if (INSN_P (insn)) | |
224 return false; | |
225 } | |
226 | |
227 return true; | |
228 } | |
229 | |
230 /* Depending on FLAGS, check whether to peel loops completely and do so. */ | |
231 static void | |
232 peel_loops_completely (int flags) | |
233 { | |
234 struct loop *loop; | |
235 loop_iterator li; | |
236 | |
237 /* Scan the loops, the inner ones first. */ | |
238 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) | |
239 { | |
240 loop->lpt_decision.decision = LPT_NONE; | |
241 | |
242 if (dump_file) | |
243 fprintf (dump_file, | |
244 "\n;; *** Considering loop %d for complete peeling ***\n", | |
245 loop->num); | |
246 | |
247 loop->ninsns = num_loop_insns (loop); | |
248 | |
249 decide_peel_once_rolling (loop, flags); | |
250 if (loop->lpt_decision.decision == LPT_NONE) | |
251 decide_peel_completely (loop, flags); | |
252 | |
253 if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY) | |
254 { | |
255 peel_loop_completely (loop); | |
256 #ifdef ENABLE_CHECKING | |
257 verify_dominators (CDI_DOMINATORS); | |
258 verify_loop_structure (); | |
259 #endif | |
260 } | |
261 } | |
262 } | |
263 | |
264 /* Decide whether unroll or peel loops (depending on FLAGS) and how much. */ | |
265 static void | |
266 decide_unrolling_and_peeling (int flags) | |
267 { | |
268 struct loop *loop; | |
269 loop_iterator li; | |
270 | |
271 /* Scan the loops, inner ones first. */ | |
272 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) | |
273 { | |
274 loop->lpt_decision.decision = LPT_NONE; | |
275 | |
276 if (dump_file) | |
277 fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num); | |
278 | |
279 /* Do not peel cold areas. */ | |
280 if (optimize_loop_for_size_p (loop)) | |
281 { | |
282 if (dump_file) | |
283 fprintf (dump_file, ";; Not considering loop, cold area\n"); | |
284 continue; | |
285 } | |
286 | |
287 /* Can the loop be manipulated? */ | |
288 if (!can_duplicate_loop_p (loop)) | |
289 { | |
290 if (dump_file) | |
291 fprintf (dump_file, | |
292 ";; Not considering loop, cannot duplicate\n"); | |
293 continue; | |
294 } | |
295 | |
296 /* Skip non-innermost loops. */ | |
297 if (loop->inner) | |
298 { | |
299 if (dump_file) | |
300 fprintf (dump_file, ";; Not considering loop, is not innermost\n"); | |
301 continue; | |
302 } | |
303 | |
304 loop->ninsns = num_loop_insns (loop); | |
305 loop->av_ninsns = average_num_loop_insns (loop); | |
306 | |
307 /* Try transformations one by one in decreasing order of | |
308 priority. */ | |
309 | |
310 decide_unroll_constant_iterations (loop, flags); | |
311 if (loop->lpt_decision.decision == LPT_NONE) | |
312 decide_unroll_runtime_iterations (loop, flags); | |
313 if (loop->lpt_decision.decision == LPT_NONE) | |
314 decide_unroll_stupid (loop, flags); | |
315 if (loop->lpt_decision.decision == LPT_NONE) | |
316 decide_peel_simple (loop, flags); | |
317 } | |
318 } | |
319 | |
320 /* Decide whether the LOOP is once rolling and suitable for complete | |
321 peeling. */ | |
322 static void | |
323 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED) | |
324 { | |
325 struct niter_desc *desc; | |
326 | |
327 if (dump_file) | |
328 fprintf (dump_file, "\n;; Considering peeling once rolling loop\n"); | |
329 | |
330 /* Is the loop small enough? */ | |
331 if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns) | |
332 { | |
333 if (dump_file) | |
334 fprintf (dump_file, ";; Not considering loop, is too big\n"); | |
335 return; | |
336 } | |
337 | |
338 /* Check for simple loops. */ | |
339 desc = get_simple_loop_desc (loop); | |
340 | |
341 /* Check number of iterations. */ | |
342 if (!desc->simple_p | |
343 || desc->assumptions | |
344 || desc->infinite | |
345 || !desc->const_iter | |
346 || desc->niter != 0) | |
347 { | |
348 if (dump_file) | |
349 fprintf (dump_file, | |
350 ";; Unable to prove that the loop rolls exactly once\n"); | |
351 return; | |
352 } | |
353 | |
354 /* Success. */ | |
355 if (dump_file) | |
356 fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n"); | |
357 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY; | |
358 } | |
359 | |
360 /* Decide whether the LOOP is suitable for complete peeling. */ | |
361 static void | |
362 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED) | |
363 { | |
364 unsigned npeel; | |
365 struct niter_desc *desc; | |
366 | |
367 if (dump_file) | |
368 fprintf (dump_file, "\n;; Considering peeling completely\n"); | |
369 | |
370 /* Skip non-innermost loops. */ | |
371 if (loop->inner) | |
372 { | |
373 if (dump_file) | |
374 fprintf (dump_file, ";; Not considering loop, is not innermost\n"); | |
375 return; | |
376 } | |
377 | |
378 /* Do not peel cold areas. */ | |
379 if (optimize_loop_for_size_p (loop)) | |
380 { | |
381 if (dump_file) | |
382 fprintf (dump_file, ";; Not considering loop, cold area\n"); | |
383 return; | |
384 } | |
385 | |
386 /* Can the loop be manipulated? */ | |
387 if (!can_duplicate_loop_p (loop)) | |
388 { | |
389 if (dump_file) | |
390 fprintf (dump_file, | |
391 ";; Not considering loop, cannot duplicate\n"); | |
392 return; | |
393 } | |
394 | |
395 /* npeel = number of iterations to peel. */ | |
396 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns; | |
397 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES)) | |
398 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES); | |
399 | |
400 /* Is the loop small enough? */ | |
401 if (!npeel) | |
402 { | |
403 if (dump_file) | |
404 fprintf (dump_file, ";; Not considering loop, is too big\n"); | |
405 return; | |
406 } | |
407 | |
408 /* Check for simple loops. */ | |
409 desc = get_simple_loop_desc (loop); | |
410 | |
411 /* Check number of iterations. */ | |
412 if (!desc->simple_p | |
413 || desc->assumptions | |
414 || !desc->const_iter | |
415 || desc->infinite) | |
416 { | |
417 if (dump_file) | |
418 fprintf (dump_file, | |
419 ";; Unable to prove that the loop iterates constant times\n"); | |
420 return; | |
421 } | |
422 | |
423 if (desc->niter > npeel - 1) | |
424 { | |
425 if (dump_file) | |
426 { | |
427 fprintf (dump_file, | |
428 ";; Not peeling loop completely, rolls too much ("); | |
429 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter); | |
430 fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel); | |
431 } | |
432 return; | |
433 } | |
434 | |
435 /* Success. */ | |
436 if (dump_file) | |
437 fprintf (dump_file, ";; Decided to peel loop completely\n"); | |
438 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY; | |
439 } | |
440 | |
441 /* Peel all iterations of LOOP, remove exit edges and cancel the loop | |
442 completely. The transformation done: | |
443 | |
444 for (i = 0; i < 4; i++) | |
445 body; | |
446 | |
447 ==> | |
448 | |
449 i = 0; | |
450 body; i++; | |
451 body; i++; | |
452 body; i++; | |
453 body; i++; | |
454 */ | |
455 static void | |
456 peel_loop_completely (struct loop *loop) | |
457 { | |
458 sbitmap wont_exit; | |
459 unsigned HOST_WIDE_INT npeel; | |
460 unsigned i; | |
461 VEC (edge, heap) *remove_edges; | |
462 edge ein; | |
463 struct niter_desc *desc = get_simple_loop_desc (loop); | |
464 struct opt_info *opt_info = NULL; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
465 |
0 | 466 npeel = desc->niter; |
467 | |
468 if (npeel) | |
469 { | |
470 bool ok; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
471 |
0 | 472 wont_exit = sbitmap_alloc (npeel + 1); |
473 sbitmap_ones (wont_exit); | |
474 RESET_BIT (wont_exit, 0); | |
475 if (desc->noloop_assumptions) | |
476 RESET_BIT (wont_exit, 1); | |
477 | |
478 remove_edges = NULL; | |
479 | |
480 if (flag_split_ivs_in_unroller) | |
481 opt_info = analyze_insns_in_loop (loop); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
482 |
0 | 483 opt_info_start_duplication (opt_info); |
484 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | |
485 npeel, | |
486 wont_exit, desc->out_edge, | |
487 &remove_edges, | |
488 DLTHE_FLAG_UPDATE_FREQ | |
489 | DLTHE_FLAG_COMPLETTE_PEEL | |
490 | (opt_info | |
491 ? DLTHE_RECORD_COPY_NUMBER : 0)); | |
492 gcc_assert (ok); | |
493 | |
494 free (wont_exit); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
495 |
0 | 496 if (opt_info) |
497 { | |
498 apply_opt_in_copies (opt_info, npeel, false, true); | |
499 free_opt_info (opt_info); | |
500 } | |
501 | |
502 /* Remove the exit edges. */ | |
503 for (i = 0; VEC_iterate (edge, remove_edges, i, ein); i++) | |
504 remove_path (ein); | |
505 VEC_free (edge, heap, remove_edges); | |
506 } | |
507 | |
508 ein = desc->in_edge; | |
509 free_simple_loop_desc (loop); | |
510 | |
511 /* Now remove the unreachable part of the last iteration and cancel | |
512 the loop. */ | |
513 remove_path (ein); | |
514 | |
515 if (dump_file) | |
516 fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel); | |
517 } | |
518 | |
519 /* Decide whether to unroll LOOP iterating constant number of times | |
520 and how much. */ | |
521 | |
522 static void | |
523 decide_unroll_constant_iterations (struct loop *loop, int flags) | |
524 { | |
525 unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i; | |
526 struct niter_desc *desc; | |
527 | |
528 if (!(flags & UAP_UNROLL)) | |
529 { | |
530 /* We were not asked to, just return back silently. */ | |
531 return; | |
532 } | |
533 | |
534 if (dump_file) | |
535 fprintf (dump_file, | |
536 "\n;; Considering unrolling loop with constant " | |
537 "number of iterations\n"); | |
538 | |
539 /* nunroll = total number of copies of the original loop body in | |
540 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ | |
541 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns; | |
542 nunroll_by_av | |
543 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns; | |
544 if (nunroll > nunroll_by_av) | |
545 nunroll = nunroll_by_av; | |
546 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES)) | |
547 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); | |
548 | |
549 /* Skip big loops. */ | |
550 if (nunroll <= 1) | |
551 { | |
552 if (dump_file) | |
553 fprintf (dump_file, ";; Not considering loop, is too big\n"); | |
554 return; | |
555 } | |
556 | |
557 /* Check for simple loops. */ | |
558 desc = get_simple_loop_desc (loop); | |
559 | |
560 /* Check number of iterations. */ | |
561 if (!desc->simple_p || !desc->const_iter || desc->assumptions) | |
562 { | |
563 if (dump_file) | |
564 fprintf (dump_file, | |
565 ";; Unable to prove that the loop iterates constant times\n"); | |
566 return; | |
567 } | |
568 | |
569 /* Check whether the loop rolls enough to consider. */ | |
570 if (desc->niter < 2 * nunroll) | |
571 { | |
572 if (dump_file) | |
573 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); | |
574 return; | |
575 } | |
576 | |
577 /* Success; now compute number of iterations to unroll. We alter | |
578 nunroll so that as few as possible copies of loop body are | |
579 necessary, while still not decreasing the number of unrollings | |
580 too much (at most by 1). */ | |
581 best_copies = 2 * nunroll + 10; | |
582 | |
583 i = 2 * nunroll + 2; | |
584 if (i - 1 >= desc->niter) | |
585 i = desc->niter - 2; | |
586 | |
587 for (; i >= nunroll - 1; i--) | |
588 { | |
589 unsigned exit_mod = desc->niter % (i + 1); | |
590 | |
591 if (!loop_exit_at_end_p (loop)) | |
592 n_copies = exit_mod + i + 1; | |
593 else if (exit_mod != (unsigned) i | |
594 || desc->noloop_assumptions != NULL_RTX) | |
595 n_copies = exit_mod + i + 2; | |
596 else | |
597 n_copies = i + 1; | |
598 | |
599 if (n_copies < best_copies) | |
600 { | |
601 best_copies = n_copies; | |
602 best_unroll = i; | |
603 } | |
604 } | |
605 | |
606 if (dump_file) | |
607 fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n", | |
608 best_unroll + 1, best_copies, nunroll); | |
609 | |
610 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT; | |
611 loop->lpt_decision.times = best_unroll; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
612 |
0 | 613 if (dump_file) |
614 fprintf (dump_file, | |
615 ";; Decided to unroll the constant times rolling loop, %d times.\n", | |
616 loop->lpt_decision.times); | |
617 } | |
618 | |
619 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1 | |
620 times. The transformation does this: | |
621 | |
622 for (i = 0; i < 102; i++) | |
623 body; | |
624 | |
625 ==> | |
626 | |
627 i = 0; | |
628 body; i++; | |
629 body; i++; | |
630 while (i < 102) | |
631 { | |
632 body; i++; | |
633 body; i++; | |
634 body; i++; | |
635 body; i++; | |
636 } | |
637 */ | |
638 static void | |
639 unroll_loop_constant_iterations (struct loop *loop) | |
640 { | |
641 unsigned HOST_WIDE_INT niter; | |
642 unsigned exit_mod; | |
643 sbitmap wont_exit; | |
644 unsigned i; | |
645 VEC (edge, heap) *remove_edges; | |
646 edge e; | |
647 unsigned max_unroll = loop->lpt_decision.times; | |
648 struct niter_desc *desc = get_simple_loop_desc (loop); | |
649 bool exit_at_end = loop_exit_at_end_p (loop); | |
650 struct opt_info *opt_info = NULL; | |
651 bool ok; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
652 |
0 | 653 niter = desc->niter; |
654 | |
655 /* Should not get here (such loop should be peeled instead). */ | |
656 gcc_assert (niter > max_unroll + 1); | |
657 | |
658 exit_mod = niter % (max_unroll + 1); | |
659 | |
660 wont_exit = sbitmap_alloc (max_unroll + 1); | |
661 sbitmap_ones (wont_exit); | |
662 | |
663 remove_edges = NULL; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
664 if (flag_split_ivs_in_unroller |
0 | 665 || flag_variable_expansion_in_unroller) |
666 opt_info = analyze_insns_in_loop (loop); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
667 |
0 | 668 if (!exit_at_end) |
669 { | |
670 /* The exit is not at the end of the loop; leave exit test | |
671 in the first copy, so that the loops that start with test | |
672 of exit condition have continuous body after unrolling. */ | |
673 | |
674 if (dump_file) | |
675 fprintf (dump_file, ";; Condition on beginning of loop.\n"); | |
676 | |
677 /* Peel exit_mod iterations. */ | |
678 RESET_BIT (wont_exit, 0); | |
679 if (desc->noloop_assumptions) | |
680 RESET_BIT (wont_exit, 1); | |
681 | |
682 if (exit_mod) | |
683 { | |
684 opt_info_start_duplication (opt_info); | |
685 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | |
686 exit_mod, | |
687 wont_exit, desc->out_edge, | |
688 &remove_edges, | |
689 DLTHE_FLAG_UPDATE_FREQ | |
690 | (opt_info && exit_mod > 1 | |
691 ? DLTHE_RECORD_COPY_NUMBER | |
692 : 0)); | |
693 gcc_assert (ok); | |
694 | |
695 if (opt_info && exit_mod > 1) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
696 apply_opt_in_copies (opt_info, exit_mod, false, false); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
697 |
0 | 698 desc->noloop_assumptions = NULL_RTX; |
699 desc->niter -= exit_mod; | |
700 desc->niter_max -= exit_mod; | |
701 } | |
702 | |
703 SET_BIT (wont_exit, 1); | |
704 } | |
705 else | |
706 { | |
707 /* Leave exit test in last copy, for the same reason as above if | |
708 the loop tests the condition at the end of loop body. */ | |
709 | |
710 if (dump_file) | |
711 fprintf (dump_file, ";; Condition on end of loop.\n"); | |
712 | |
713 /* We know that niter >= max_unroll + 2; so we do not need to care of | |
714 case when we would exit before reaching the loop. So just peel | |
715 exit_mod + 1 iterations. */ | |
716 if (exit_mod != max_unroll | |
717 || desc->noloop_assumptions) | |
718 { | |
719 RESET_BIT (wont_exit, 0); | |
720 if (desc->noloop_assumptions) | |
721 RESET_BIT (wont_exit, 1); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
722 |
0 | 723 opt_info_start_duplication (opt_info); |
724 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | |
725 exit_mod + 1, | |
726 wont_exit, desc->out_edge, | |
727 &remove_edges, | |
728 DLTHE_FLAG_UPDATE_FREQ | |
729 | (opt_info && exit_mod > 0 | |
730 ? DLTHE_RECORD_COPY_NUMBER | |
731 : 0)); | |
732 gcc_assert (ok); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
733 |
0 | 734 if (opt_info && exit_mod > 0) |
735 apply_opt_in_copies (opt_info, exit_mod + 1, false, false); | |
736 | |
737 desc->niter -= exit_mod + 1; | |
738 desc->niter_max -= exit_mod + 1; | |
739 desc->noloop_assumptions = NULL_RTX; | |
740 | |
741 SET_BIT (wont_exit, 0); | |
742 SET_BIT (wont_exit, 1); | |
743 } | |
744 | |
745 RESET_BIT (wont_exit, max_unroll); | |
746 } | |
747 | |
748 /* Now unroll the loop. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
749 |
0 | 750 opt_info_start_duplication (opt_info); |
751 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), | |
752 max_unroll, | |
753 wont_exit, desc->out_edge, | |
754 &remove_edges, | |
755 DLTHE_FLAG_UPDATE_FREQ | |
756 | (opt_info | |
757 ? DLTHE_RECORD_COPY_NUMBER | |
758 : 0)); | |
759 gcc_assert (ok); | |
760 | |
761 if (opt_info) | |
762 { | |
763 apply_opt_in_copies (opt_info, max_unroll, true, true); | |
764 free_opt_info (opt_info); | |
765 } | |
766 | |
767 free (wont_exit); | |
768 | |
769 if (exit_at_end) | |
770 { | |
771 basic_block exit_block = get_bb_copy (desc->in_edge->src); | |
772 /* Find a new in and out edge; they are in the last copy we have made. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
773 |
0 | 774 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest) |
775 { | |
776 desc->out_edge = EDGE_SUCC (exit_block, 0); | |
777 desc->in_edge = EDGE_SUCC (exit_block, 1); | |
778 } | |
779 else | |
780 { | |
781 desc->out_edge = EDGE_SUCC (exit_block, 1); | |
782 desc->in_edge = EDGE_SUCC (exit_block, 0); | |
783 } | |
784 } | |
785 | |
786 desc->niter /= max_unroll + 1; | |
787 desc->niter_max /= max_unroll + 1; | |
788 desc->niter_expr = GEN_INT (desc->niter); | |
789 | |
790 /* Remove the edges. */ | |
791 for (i = 0; VEC_iterate (edge, remove_edges, i, e); i++) | |
792 remove_path (e); | |
793 VEC_free (edge, heap, remove_edges); | |
794 | |
795 if (dump_file) | |
796 fprintf (dump_file, | |
797 ";; Unrolled loop %d times, constant # of iterations %i insns\n", | |
798 max_unroll, num_loop_insns (loop)); | |
799 } | |
800 | |
801 /* Decide whether to unroll LOOP iterating runtime computable number of times | |
802 and how much. */ | |
803 static void | |
804 decide_unroll_runtime_iterations (struct loop *loop, int flags) | |
805 { | |
806 unsigned nunroll, nunroll_by_av, i; | |
807 struct niter_desc *desc; | |
808 | |
809 if (!(flags & UAP_UNROLL)) | |
810 { | |
811 /* We were not asked to, just return back silently. */ | |
812 return; | |
813 } | |
814 | |
815 if (dump_file) | |
816 fprintf (dump_file, | |
817 "\n;; Considering unrolling loop with runtime " | |
818 "computable number of iterations\n"); | |
819 | |
820 /* nunroll = total number of copies of the original loop body in | |
821 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ | |
822 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns; | |
823 nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns; | |
824 if (nunroll > nunroll_by_av) | |
825 nunroll = nunroll_by_av; | |
826 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES)) | |
827 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); | |
828 | |
829 /* Skip big loops. */ | |
830 if (nunroll <= 1) | |
831 { | |
832 if (dump_file) | |
833 fprintf (dump_file, ";; Not considering loop, is too big\n"); | |
834 return; | |
835 } | |
836 | |
837 /* Check for simple loops. */ | |
838 desc = get_simple_loop_desc (loop); | |
839 | |
840 /* Check simpleness. */ | |
841 if (!desc->simple_p || desc->assumptions) | |
842 { | |
843 if (dump_file) | |
844 fprintf (dump_file, | |
845 ";; Unable to prove that the number of iterations " | |
846 "can be counted in runtime\n"); | |
847 return; | |
848 } | |
849 | |
850 if (desc->const_iter) | |
851 { | |
852 if (dump_file) | |
853 fprintf (dump_file, ";; Loop iterates constant times\n"); | |
854 return; | |
855 } | |
856 | |
857 /* If we have profile feedback, check whether the loop rolls. */ | |
858 if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll) | |
859 { | |
860 if (dump_file) | |
861 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); | |
862 return; | |
863 } | |
864 | |
865 /* Success; now force nunroll to be power of 2, as we are unable to | |
866 cope with overflows in computation of number of iterations. */ | |
867 for (i = 1; 2 * i <= nunroll; i *= 2) | |
868 continue; | |
869 | |
870 loop->lpt_decision.decision = LPT_UNROLL_RUNTIME; | |
871 loop->lpt_decision.times = i - 1; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
872 |
0 | 873 if (dump_file) |
874 fprintf (dump_file, | |
875 ";; Decided to unroll the runtime computable " | |
876 "times rolling loop, %d times.\n", | |
877 loop->lpt_decision.times); | |
878 } | |
879 | |
880 /* Splits edge E and inserts the sequence of instructions INSNS on it, and | |
881 returns the newly created block. If INSNS is NULL_RTX, nothing is changed | |
882 and NULL is returned instead. */ | |
883 | |
884 basic_block | |
885 split_edge_and_insert (edge e, rtx insns) | |
886 { | |
887 basic_block bb; | |
888 | |
889 if (!insns) | |
890 return NULL; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
891 bb = split_edge (e); |
0 | 892 emit_insn_after (insns, BB_END (bb)); |
893 | |
894 /* ??? We used to assume that INSNS can contain control flow insns, and | |
895 that we had to try to find sub basic blocks in BB to maintain a valid | |
896 CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB | |
897 and call break_superblocks when going out of cfglayout mode. But it | |
898 turns out that this never happens; and that if it does ever happen, | |
899 the verify_flow_info call in loop_optimizer_finalize would fail. | |
900 | |
901 There are two reasons why we expected we could have control flow insns | |
902 in INSNS. The first is when a comparison has to be done in parts, and | |
903 the second is when the number of iterations is computed for loops with | |
904 the number of iterations known at runtime. In both cases, test cases | |
905 to get control flow in INSNS appear to be impossible to construct: | |
906 | |
907 * If do_compare_rtx_and_jump needs several branches to do comparison | |
908 in a mode that needs comparison by parts, we cannot analyze the | |
909 number of iterations of the loop, and we never get to unrolling it. | |
910 | |
911 * The code in expand_divmod that was suspected to cause creation of | |
912 branching code seems to be only accessed for signed division. The | |
913 divisions used by # of iterations analysis are always unsigned. | |
914 Problems might arise on architectures that emits branching code | |
915 for some operations that may appear in the unroller (especially | |
916 for division), but we have no such architectures. | |
917 | |
918 Considering all this, it was decided that we should for now assume | |
919 that INSNS can in theory contain control flow insns, but in practice | |
920 it never does. So we don't handle the theoretical case, and should | |
921 a real failure ever show up, we have a pretty good clue for how to | |
922 fix it. */ | |
923 | |
924 return bb; | |
925 } | |
926 | |
927 /* Unroll LOOP for that we are able to count number of iterations in runtime | |
928 LOOP->LPT_DECISION.TIMES + 1 times. The transformation does this (with some | |
929 extra care for case n < 0): | |
930 | |
931 for (i = 0; i < n; i++) | |
932 body; | |
933 | |
934 ==> | |
935 | |
936 i = 0; | |
937 mod = n % 4; | |
938 | |
939 switch (mod) | |
940 { | |
941 case 3: | |
942 body; i++; | |
943 case 2: | |
944 body; i++; | |
945 case 1: | |
946 body; i++; | |
947 case 0: ; | |
948 } | |
949 | |
950 while (i < n) | |
951 { | |
952 body; i++; | |
953 body; i++; | |
954 body; i++; | |
955 body; i++; | |
956 } | |
957 */ | |
958 static void | |
959 unroll_loop_runtime_iterations (struct loop *loop) | |
960 { | |
961 rtx old_niter, niter, init_code, branch_code, tmp; | |
962 unsigned i, j, p; | |
963 basic_block preheader, *body, swtch, ezc_swtch; | |
964 VEC (basic_block, heap) *dom_bbs; | |
965 sbitmap wont_exit; | |
966 int may_exit_copy; | |
967 unsigned n_peel; | |
968 VEC (edge, heap) *remove_edges; | |
969 edge e; | |
970 bool extra_zero_check, last_may_exit; | |
971 unsigned max_unroll = loop->lpt_decision.times; | |
972 struct niter_desc *desc = get_simple_loop_desc (loop); | |
973 bool exit_at_end = loop_exit_at_end_p (loop); | |
974 struct opt_info *opt_info = NULL; | |
975 bool ok; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
976 |
0 | 977 if (flag_split_ivs_in_unroller |
978 || flag_variable_expansion_in_unroller) | |
979 opt_info = analyze_insns_in_loop (loop); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
980 |
0 | 981 /* Remember blocks whose dominators will have to be updated. */ |
982 dom_bbs = NULL; | |
983 | |
984 body = get_loop_body (loop); | |
985 for (i = 0; i < loop->num_nodes; i++) | |
986 { | |
987 VEC (basic_block, heap) *ldom; | |
988 basic_block bb; | |
989 | |
990 ldom = get_dominated_by (CDI_DOMINATORS, body[i]); | |
991 for (j = 0; VEC_iterate (basic_block, ldom, j, bb); j++) | |
992 if (!flow_bb_inside_loop_p (loop, bb)) | |
993 VEC_safe_push (basic_block, heap, dom_bbs, bb); | |
994 | |
995 VEC_free (basic_block, heap, ldom); | |
996 } | |
997 free (body); | |
998 | |
999 if (!exit_at_end) | |
1000 { | |
1001 /* Leave exit in first copy (for explanation why see comment in | |
1002 unroll_loop_constant_iterations). */ | |
1003 may_exit_copy = 0; | |
1004 n_peel = max_unroll - 1; | |
1005 extra_zero_check = true; | |
1006 last_may_exit = false; | |
1007 } | |
1008 else | |
1009 { | |
1010 /* Leave exit in last copy (for explanation why see comment in | |
1011 unroll_loop_constant_iterations). */ | |
1012 may_exit_copy = max_unroll; | |
1013 n_peel = max_unroll; | |
1014 extra_zero_check = false; | |
1015 last_may_exit = true; | |
1016 } | |
1017 | |
1018 /* Get expression for number of iterations. */ | |
1019 start_sequence (); | |
1020 old_niter = niter = gen_reg_rtx (desc->mode); | |
1021 tmp = force_operand (copy_rtx (desc->niter_expr), niter); | |
1022 if (tmp != niter) | |
1023 emit_move_insn (niter, tmp); | |
1024 | |
1025 /* Count modulo by ANDing it with max_unroll; we use the fact that | |
1026 the number of unrollings is a power of two, and thus this is correct | |
1027 even if there is overflow in the computation. */ | |
1028 niter = expand_simple_binop (desc->mode, AND, | |
1029 niter, | |
1030 GEN_INT (max_unroll), | |
1031 NULL_RTX, 0, OPTAB_LIB_WIDEN); | |
1032 | |
1033 init_code = get_insns (); | |
1034 end_sequence (); | |
1035 unshare_all_rtl_in_chain (init_code); | |
1036 | |
1037 /* Precondition the loop. */ | |
1038 split_edge_and_insert (loop_preheader_edge (loop), init_code); | |
1039 | |
1040 remove_edges = NULL; | |
1041 | |
1042 wont_exit = sbitmap_alloc (max_unroll + 2); | |
1043 | |
1044 /* Peel the first copy of loop body (almost always we must leave exit test | |
1045 here; the only exception is when we have extra zero check and the number | |
1046 of iterations is reliable. Also record the place of (possible) extra | |
1047 zero check. */ | |
1048 sbitmap_zero (wont_exit); | |
1049 if (extra_zero_check | |
1050 && !desc->noloop_assumptions) | |
1051 SET_BIT (wont_exit, 1); | |
1052 ezc_swtch = loop_preheader_edge (loop)->src; | |
1053 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | |
1054 1, wont_exit, desc->out_edge, | |
1055 &remove_edges, | |
1056 DLTHE_FLAG_UPDATE_FREQ); | |
1057 gcc_assert (ok); | |
1058 | |
1059 /* Record the place where switch will be built for preconditioning. */ | |
1060 swtch = split_edge (loop_preheader_edge (loop)); | |
1061 | |
1062 for (i = 0; i < n_peel; i++) | |
1063 { | |
1064 /* Peel the copy. */ | |
1065 sbitmap_zero (wont_exit); | |
1066 if (i != n_peel - 1 || !last_may_exit) | |
1067 SET_BIT (wont_exit, 1); | |
1068 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | |
1069 1, wont_exit, desc->out_edge, | |
1070 &remove_edges, | |
1071 DLTHE_FLAG_UPDATE_FREQ); | |
1072 gcc_assert (ok); | |
1073 | |
1074 /* Create item for switch. */ | |
1075 j = n_peel - i - (extra_zero_check ? 0 : 1); | |
1076 p = REG_BR_PROB_BASE / (i + 2); | |
1077 | |
1078 preheader = split_edge (loop_preheader_edge (loop)); | |
1079 branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ, | |
1080 block_label (preheader), p, | |
1081 NULL_RTX); | |
1082 | |
1083 /* We rely on the fact that the compare and jump cannot be optimized out, | |
1084 and hence the cfg we create is correct. */ | |
1085 gcc_assert (branch_code != NULL_RTX); | |
1086 | |
1087 swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code); | |
1088 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch); | |
1089 single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p; | |
1090 e = make_edge (swtch, preheader, | |
1091 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP); | |
1092 e->probability = p; | |
1093 } | |
1094 | |
1095 if (extra_zero_check) | |
1096 { | |
1097 /* Add branch for zero iterations. */ | |
1098 p = REG_BR_PROB_BASE / (max_unroll + 1); | |
1099 swtch = ezc_swtch; | |
1100 preheader = split_edge (loop_preheader_edge (loop)); | |
1101 branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ, | |
1102 block_label (preheader), p, | |
1103 NULL_RTX); | |
1104 gcc_assert (branch_code != NULL_RTX); | |
1105 | |
1106 swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code); | |
1107 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch); | |
1108 single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p; | |
1109 e = make_edge (swtch, preheader, | |
1110 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP); | |
1111 e->probability = p; | |
1112 } | |
1113 | |
1114 /* Recount dominators for outer blocks. */ | |
1115 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false); | |
1116 | |
1117 /* And unroll loop. */ | |
1118 | |
1119 sbitmap_ones (wont_exit); | |
1120 RESET_BIT (wont_exit, may_exit_copy); | |
1121 opt_info_start_duplication (opt_info); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1122 |
0 | 1123 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), |
1124 max_unroll, | |
1125 wont_exit, desc->out_edge, | |
1126 &remove_edges, | |
1127 DLTHE_FLAG_UPDATE_FREQ | |
1128 | (opt_info | |
1129 ? DLTHE_RECORD_COPY_NUMBER | |
1130 : 0)); | |
1131 gcc_assert (ok); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1132 |
0 | 1133 if (opt_info) |
1134 { | |
1135 apply_opt_in_copies (opt_info, max_unroll, true, true); | |
1136 free_opt_info (opt_info); | |
1137 } | |
1138 | |
1139 free (wont_exit); | |
1140 | |
1141 if (exit_at_end) | |
1142 { | |
1143 basic_block exit_block = get_bb_copy (desc->in_edge->src); | |
1144 /* Find a new in and out edge; they are in the last copy we have | |
1145 made. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1146 |
0 | 1147 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest) |
1148 { | |
1149 desc->out_edge = EDGE_SUCC (exit_block, 0); | |
1150 desc->in_edge = EDGE_SUCC (exit_block, 1); | |
1151 } | |
1152 else | |
1153 { | |
1154 desc->out_edge = EDGE_SUCC (exit_block, 1); | |
1155 desc->in_edge = EDGE_SUCC (exit_block, 0); | |
1156 } | |
1157 } | |
1158 | |
1159 /* Remove the edges. */ | |
1160 for (i = 0; VEC_iterate (edge, remove_edges, i, e); i++) | |
1161 remove_path (e); | |
1162 VEC_free (edge, heap, remove_edges); | |
1163 | |
1164 /* We must be careful when updating the number of iterations due to | |
1165 preconditioning and the fact that the value must be valid at entry | |
1166 of the loop. After passing through the above code, we see that | |
1167 the correct new number of iterations is this: */ | |
1168 gcc_assert (!desc->const_iter); | |
1169 desc->niter_expr = | |
1170 simplify_gen_binary (UDIV, desc->mode, old_niter, | |
1171 GEN_INT (max_unroll + 1)); | |
1172 desc->niter_max /= max_unroll + 1; | |
1173 if (exit_at_end) | |
1174 { | |
1175 desc->niter_expr = | |
1176 simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx); | |
1177 desc->noloop_assumptions = NULL_RTX; | |
1178 desc->niter_max--; | |
1179 } | |
1180 | |
1181 if (dump_file) | |
1182 fprintf (dump_file, | |
1183 ";; Unrolled loop %d times, counting # of iterations " | |
1184 "in runtime, %i insns\n", | |
1185 max_unroll, num_loop_insns (loop)); | |
1186 | |
1187 VEC_free (basic_block, heap, dom_bbs); | |
1188 } | |
1189 | |
1190 /* Decide whether to simply peel LOOP and how much. */ | |
1191 static void | |
1192 decide_peel_simple (struct loop *loop, int flags) | |
1193 { | |
1194 unsigned npeel; | |
1195 struct niter_desc *desc; | |
1196 | |
1197 if (!(flags & UAP_PEEL)) | |
1198 { | |
1199 /* We were not asked to, just return back silently. */ | |
1200 return; | |
1201 } | |
1202 | |
1203 if (dump_file) | |
1204 fprintf (dump_file, "\n;; Considering simply peeling loop\n"); | |
1205 | |
1206 /* npeel = number of iterations to peel. */ | |
1207 npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns; | |
1208 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES)) | |
1209 npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES); | |
1210 | |
1211 /* Skip big loops. */ | |
1212 if (!npeel) | |
1213 { | |
1214 if (dump_file) | |
1215 fprintf (dump_file, ";; Not considering loop, is too big\n"); | |
1216 return; | |
1217 } | |
1218 | |
1219 /* Check for simple loops. */ | |
1220 desc = get_simple_loop_desc (loop); | |
1221 | |
1222 /* Check number of iterations. */ | |
1223 if (desc->simple_p && !desc->assumptions && desc->const_iter) | |
1224 { | |
1225 if (dump_file) | |
1226 fprintf (dump_file, ";; Loop iterates constant times\n"); | |
1227 return; | |
1228 } | |
1229 | |
1230 /* Do not simply peel loops with branches inside -- it increases number | |
1231 of mispredicts. */ | |
1232 if (num_loop_branches (loop) > 1) | |
1233 { | |
1234 if (dump_file) | |
1235 fprintf (dump_file, ";; Not peeling, contains branches\n"); | |
1236 return; | |
1237 } | |
1238 | |
1239 if (loop->header->count) | |
1240 { | |
1241 unsigned niter = expected_loop_iterations (loop); | |
1242 if (niter + 1 > npeel) | |
1243 { | |
1244 if (dump_file) | |
1245 { | |
1246 fprintf (dump_file, ";; Not peeling loop, rolls too much ("); | |
1247 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, | |
1248 (HOST_WIDEST_INT) (niter + 1)); | |
1249 fprintf (dump_file, " iterations > %d [maximum peelings])\n", | |
1250 npeel); | |
1251 } | |
1252 return; | |
1253 } | |
1254 npeel = niter + 1; | |
1255 } | |
1256 else | |
1257 { | |
1258 /* For now we have no good heuristics to decide whether loop peeling | |
1259 will be effective, so disable it. */ | |
1260 if (dump_file) | |
1261 fprintf (dump_file, | |
1262 ";; Not peeling loop, no evidence it will be profitable\n"); | |
1263 return; | |
1264 } | |
1265 | |
1266 /* Success. */ | |
1267 loop->lpt_decision.decision = LPT_PEEL_SIMPLE; | |
1268 loop->lpt_decision.times = npeel; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1269 |
0 | 1270 if (dump_file) |
1271 fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n", | |
1272 loop->lpt_decision.times); | |
1273 } | |
1274 | |
1275 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation: | |
1276 while (cond) | |
1277 body; | |
1278 | |
1279 ==> | |
1280 | |
1281 if (!cond) goto end; | |
1282 body; | |
1283 if (!cond) goto end; | |
1284 body; | |
1285 while (cond) | |
1286 body; | |
1287 end: ; | |
1288 */ | |
1289 static void | |
1290 peel_loop_simple (struct loop *loop) | |
1291 { | |
1292 sbitmap wont_exit; | |
1293 unsigned npeel = loop->lpt_decision.times; | |
1294 struct niter_desc *desc = get_simple_loop_desc (loop); | |
1295 struct opt_info *opt_info = NULL; | |
1296 bool ok; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1297 |
0 | 1298 if (flag_split_ivs_in_unroller && npeel > 1) |
1299 opt_info = analyze_insns_in_loop (loop); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1300 |
0 | 1301 wont_exit = sbitmap_alloc (npeel + 1); |
1302 sbitmap_zero (wont_exit); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1303 |
0 | 1304 opt_info_start_duplication (opt_info); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1305 |
0 | 1306 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), |
1307 npeel, wont_exit, NULL, | |
1308 NULL, DLTHE_FLAG_UPDATE_FREQ | |
1309 | (opt_info | |
1310 ? DLTHE_RECORD_COPY_NUMBER | |
1311 : 0)); | |
1312 gcc_assert (ok); | |
1313 | |
1314 free (wont_exit); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1315 |
0 | 1316 if (opt_info) |
1317 { | |
1318 apply_opt_in_copies (opt_info, npeel, false, false); | |
1319 free_opt_info (opt_info); | |
1320 } | |
1321 | |
1322 if (desc->simple_p) | |
1323 { | |
1324 if (desc->const_iter) | |
1325 { | |
1326 desc->niter -= npeel; | |
1327 desc->niter_expr = GEN_INT (desc->niter); | |
1328 desc->noloop_assumptions = NULL_RTX; | |
1329 } | |
1330 else | |
1331 { | |
1332 /* We cannot just update niter_expr, as its value might be clobbered | |
1333 inside loop. We could handle this by counting the number into | |
1334 temporary just like we do in runtime unrolling, but it does not | |
1335 seem worthwhile. */ | |
1336 free_simple_loop_desc (loop); | |
1337 } | |
1338 } | |
1339 if (dump_file) | |
1340 fprintf (dump_file, ";; Peeling loop %d times\n", npeel); | |
1341 } | |
1342 | |
1343 /* Decide whether to unroll LOOP stupidly and how much. */ | |
1344 static void | |
1345 decide_unroll_stupid (struct loop *loop, int flags) | |
1346 { | |
1347 unsigned nunroll, nunroll_by_av, i; | |
1348 struct niter_desc *desc; | |
1349 | |
1350 if (!(flags & UAP_UNROLL_ALL)) | |
1351 { | |
1352 /* We were not asked to, just return back silently. */ | |
1353 return; | |
1354 } | |
1355 | |
1356 if (dump_file) | |
1357 fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n"); | |
1358 | |
1359 /* nunroll = total number of copies of the original loop body in | |
1360 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ | |
1361 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns; | |
1362 nunroll_by_av | |
1363 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns; | |
1364 if (nunroll > nunroll_by_av) | |
1365 nunroll = nunroll_by_av; | |
1366 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES)) | |
1367 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); | |
1368 | |
1369 /* Skip big loops. */ | |
1370 if (nunroll <= 1) | |
1371 { | |
1372 if (dump_file) | |
1373 fprintf (dump_file, ";; Not considering loop, is too big\n"); | |
1374 return; | |
1375 } | |
1376 | |
1377 /* Check for simple loops. */ | |
1378 desc = get_simple_loop_desc (loop); | |
1379 | |
1380 /* Check simpleness. */ | |
1381 if (desc->simple_p && !desc->assumptions) | |
1382 { | |
1383 if (dump_file) | |
1384 fprintf (dump_file, ";; The loop is simple\n"); | |
1385 return; | |
1386 } | |
1387 | |
1388 /* Do not unroll loops with branches inside -- it increases number | |
1389 of mispredicts. */ | |
1390 if (num_loop_branches (loop) > 1) | |
1391 { | |
1392 if (dump_file) | |
1393 fprintf (dump_file, ";; Not unrolling, contains branches\n"); | |
1394 return; | |
1395 } | |
1396 | |
1397 /* If we have profile feedback, check whether the loop rolls. */ | |
1398 if (loop->header->count | |
1399 && expected_loop_iterations (loop) < 2 * nunroll) | |
1400 { | |
1401 if (dump_file) | |
1402 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); | |
1403 return; | |
1404 } | |
1405 | |
1406 /* Success. Now force nunroll to be power of 2, as it seems that this | |
1407 improves results (partially because of better alignments, partially | |
1408 because of some dark magic). */ | |
1409 for (i = 1; 2 * i <= nunroll; i *= 2) | |
1410 continue; | |
1411 | |
1412 loop->lpt_decision.decision = LPT_UNROLL_STUPID; | |
1413 loop->lpt_decision.times = i - 1; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1414 |
0 | 1415 if (dump_file) |
1416 fprintf (dump_file, | |
1417 ";; Decided to unroll the loop stupidly, %d times.\n", | |
1418 loop->lpt_decision.times); | |
1419 } | |
1420 | |
1421 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation: | |
1422 while (cond) | |
1423 body; | |
1424 | |
1425 ==> | |
1426 | |
1427 while (cond) | |
1428 { | |
1429 body; | |
1430 if (!cond) break; | |
1431 body; | |
1432 if (!cond) break; | |
1433 body; | |
1434 if (!cond) break; | |
1435 body; | |
1436 } | |
1437 */ | |
1438 static void | |
1439 unroll_loop_stupid (struct loop *loop) | |
1440 { | |
1441 sbitmap wont_exit; | |
1442 unsigned nunroll = loop->lpt_decision.times; | |
1443 struct niter_desc *desc = get_simple_loop_desc (loop); | |
1444 struct opt_info *opt_info = NULL; | |
1445 bool ok; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1446 |
0 | 1447 if (flag_split_ivs_in_unroller |
1448 || flag_variable_expansion_in_unroller) | |
1449 opt_info = analyze_insns_in_loop (loop); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1450 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1451 |
0 | 1452 wont_exit = sbitmap_alloc (nunroll + 1); |
1453 sbitmap_zero (wont_exit); | |
1454 opt_info_start_duplication (opt_info); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1455 |
0 | 1456 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), |
1457 nunroll, wont_exit, | |
1458 NULL, NULL, | |
1459 DLTHE_FLAG_UPDATE_FREQ | |
1460 | (opt_info | |
1461 ? DLTHE_RECORD_COPY_NUMBER | |
1462 : 0)); | |
1463 gcc_assert (ok); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1464 |
0 | 1465 if (opt_info) |
1466 { | |
1467 apply_opt_in_copies (opt_info, nunroll, true, true); | |
1468 free_opt_info (opt_info); | |
1469 } | |
1470 | |
1471 free (wont_exit); | |
1472 | |
1473 if (desc->simple_p) | |
1474 { | |
1475 /* We indeed may get here provided that there are nontrivial assumptions | |
1476 for a loop to be really simple. We could update the counts, but the | |
1477 problem is that we are unable to decide which exit will be taken | |
1478 (not really true in case the number of iterations is constant, | |
1479 but noone will do anything with this information, so we do not | |
1480 worry about it). */ | |
1481 desc->simple_p = false; | |
1482 } | |
1483 | |
1484 if (dump_file) | |
1485 fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n", | |
1486 nunroll, num_loop_insns (loop)); | |
1487 } | |
1488 | |
1489 /* A hash function for information about insns to split. */ | |
1490 | |
1491 static hashval_t | |
1492 si_info_hash (const void *ivts) | |
1493 { | |
1494 return (hashval_t) INSN_UID (((const struct iv_to_split *) ivts)->insn); | |
1495 } | |
1496 | |
1497 /* An equality functions for information about insns to split. */ | |
1498 | |
1499 static int | |
1500 si_info_eq (const void *ivts1, const void *ivts2) | |
1501 { | |
1502 const struct iv_to_split *const i1 = (const struct iv_to_split *) ivts1; | |
1503 const struct iv_to_split *const i2 = (const struct iv_to_split *) ivts2; | |
1504 | |
1505 return i1->insn == i2->insn; | |
1506 } | |
1507 | |
1508 /* Return a hash for VES, which is really a "var_to_expand *". */ | |
1509 | |
1510 static hashval_t | |
1511 ve_info_hash (const void *ves) | |
1512 { | |
1513 return (hashval_t) INSN_UID (((const struct var_to_expand *) ves)->insn); | |
1514 } | |
1515 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1516 /* Return true if IVTS1 and IVTS2 (which are really both of type |
0 | 1517 "var_to_expand *") refer to the same instruction. */ |
1518 | |
1519 static int | |
1520 ve_info_eq (const void *ivts1, const void *ivts2) | |
1521 { | |
1522 const struct var_to_expand *const i1 = (const struct var_to_expand *) ivts1; | |
1523 const struct var_to_expand *const i2 = (const struct var_to_expand *) ivts2; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1524 |
0 | 1525 return i1->insn == i2->insn; |
1526 } | |
1527 | |
1528 /* Returns true if REG is referenced in one insn in LOOP. */ | |
1529 | |
1530 bool | |
1531 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg) | |
1532 { | |
1533 basic_block *body, bb; | |
1534 unsigned i; | |
1535 int count_ref = 0; | |
1536 rtx insn; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1537 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1538 body = get_loop_body (loop); |
0 | 1539 for (i = 0; i < loop->num_nodes; i++) |
1540 { | |
1541 bb = body[i]; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1542 |
0 | 1543 FOR_BB_INSNS (bb, insn) |
1544 { | |
1545 if (rtx_referenced_p (reg, insn)) | |
1546 count_ref++; | |
1547 } | |
1548 } | |
1549 return (count_ref == 1); | |
1550 } | |
1551 | |
1552 /* Determine whether INSN contains an accumulator | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1553 which can be expanded into separate copies, |
0 | 1554 one for each copy of the LOOP body. |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1555 |
0 | 1556 for (i = 0 ; i < n; i++) |
1557 sum += a[i]; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1558 |
0 | 1559 ==> |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1560 |
0 | 1561 sum += a[i] |
1562 .... | |
1563 i = i+1; | |
1564 sum1 += a[i] | |
1565 .... | |
1566 i = i+1 | |
1567 sum2 += a[i]; | |
1568 .... | |
1569 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1570 Return NULL if INSN contains no opportunity for expansion of accumulator. |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1571 Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant |
0 | 1572 information and return a pointer to it. |
1573 */ | |
1574 | |
1575 static struct var_to_expand * | |
1576 analyze_insn_to_expand_var (struct loop *loop, rtx insn) | |
1577 { | |
1578 rtx set, dest, src, op1, op2, something; | |
1579 struct var_to_expand *ves; | |
1580 enum machine_mode mode1, mode2; | |
1581 unsigned accum_pos; | |
1582 | |
1583 set = single_set (insn); | |
1584 if (!set) | |
1585 return NULL; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1586 |
0 | 1587 dest = SET_DEST (set); |
1588 src = SET_SRC (set); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1589 |
0 | 1590 if (GET_CODE (src) != PLUS |
1591 && GET_CODE (src) != MINUS | |
1592 && GET_CODE (src) != MULT) | |
1593 return NULL; | |
1594 | |
1595 /* Hmm, this is a bit paradoxical. We know that INSN is a valid insn | |
1596 in MD. But if there is no optab to generate the insn, we can not | |
1597 perform the variable expansion. This can happen if an MD provides | |
1598 an insn but not a named pattern to generate it, for example to avoid | |
1599 producing code that needs additional mode switches like for x87/mmx. | |
1600 | |
1601 So we check have_insn_for which looks for an optab for the operation | |
1602 in SRC. If it doesn't exist, we can't perform the expansion even | |
1603 though INSN is valid. */ | |
1604 if (!have_insn_for (GET_CODE (src), GET_MODE (src))) | |
1605 return NULL; | |
1606 | |
1607 op1 = XEXP (src, 0); | |
1608 op2 = XEXP (src, 1); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1609 |
0 | 1610 if (!REG_P (dest) |
1611 && !(GET_CODE (dest) == SUBREG | |
1612 && REG_P (SUBREG_REG (dest)))) | |
1613 return NULL; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1614 |
0 | 1615 if (rtx_equal_p (dest, op1)) |
1616 accum_pos = 0; | |
1617 else if (rtx_equal_p (dest, op2)) | |
1618 accum_pos = 1; | |
1619 else | |
1620 return NULL; | |
1621 | |
1622 /* The method of expansion that we are using; which includes | |
1623 the initialization of the expansions with zero and the summation of | |
1624 the expansions at the end of the computation will yield wrong results | |
1625 for (x = something - x) thus avoid using it in that case. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1626 if (accum_pos == 1 |
0 | 1627 && GET_CODE (src) == MINUS) |
1628 return NULL; | |
1629 | |
1630 something = (accum_pos == 0)? op2 : op1; | |
1631 | |
1632 if (!referenced_in_one_insn_in_loop_p (loop, dest)) | |
1633 return NULL; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1634 |
0 | 1635 if (rtx_referenced_p (dest, something)) |
1636 return NULL; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1637 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1638 mode1 = GET_MODE (dest); |
0 | 1639 mode2 = GET_MODE (something); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1640 if ((FLOAT_MODE_P (mode1) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1641 || FLOAT_MODE_P (mode2)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1642 && !flag_associative_math) |
0 | 1643 return NULL; |
1644 | |
1645 if (dump_file) | |
1646 { | |
1647 fprintf (dump_file, | |
1648 "\n;; Expanding Accumulator "); | |
1649 print_rtl (dump_file, dest); | |
1650 fprintf (dump_file, "\n"); | |
1651 } | |
1652 | |
1653 /* Record the accumulator to expand. */ | |
1654 ves = XNEW (struct var_to_expand); | |
1655 ves->insn = insn; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1656 ves->reg = copy_rtx (dest); |
0 | 1657 ves->var_expansions = VEC_alloc (rtx, heap, 1); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1658 ves->next = NULL; |
0 | 1659 ves->op = GET_CODE (src); |
1660 ves->expansion_count = 0; | |
1661 ves->reuse_expansion = 0; | |
1662 ves->accum_pos = accum_pos; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1663 return ves; |
0 | 1664 } |
1665 | |
1666 /* Determine whether there is an induction variable in INSN that | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1667 we would like to split during unrolling. |
0 | 1668 |
1669 I.e. replace | |
1670 | |
1671 i = i + 1; | |
1672 ... | |
1673 i = i + 1; | |
1674 ... | |
1675 i = i + 1; | |
1676 ... | |
1677 | |
1678 type chains by | |
1679 | |
1680 i0 = i + 1 | |
1681 ... | |
1682 i = i0 + 1 | |
1683 ... | |
1684 i = i0 + 2 | |
1685 ... | |
1686 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1687 Return NULL if INSN contains no interesting IVs. Otherwise, allocate |
0 | 1688 an IV_TO_SPLIT structure, fill it with the relevant information and return a |
1689 pointer to it. */ | |
1690 | |
1691 static struct iv_to_split * | |
1692 analyze_iv_to_split_insn (rtx insn) | |
1693 { | |
1694 rtx set, dest; | |
1695 struct rtx_iv iv; | |
1696 struct iv_to_split *ivts; | |
1697 bool ok; | |
1698 | |
1699 /* For now we just split the basic induction variables. Later this may be | |
1700 extended for example by selecting also addresses of memory references. */ | |
1701 set = single_set (insn); | |
1702 if (!set) | |
1703 return NULL; | |
1704 | |
1705 dest = SET_DEST (set); | |
1706 if (!REG_P (dest)) | |
1707 return NULL; | |
1708 | |
1709 if (!biv_p (insn, dest)) | |
1710 return NULL; | |
1711 | |
1712 ok = iv_analyze_result (insn, dest, &iv); | |
1713 | |
1714 /* This used to be an assert under the assumption that if biv_p returns | |
1715 true that iv_analyze_result must also return true. However, that | |
1716 assumption is not strictly correct as evidenced by pr25569. | |
1717 | |
1718 Returning NULL when iv_analyze_result returns false is safe and | |
1719 avoids the problems in pr25569 until the iv_analyze_* routines | |
1720 can be fixed, which is apparently hard and time consuming | |
1721 according to their author. */ | |
1722 if (! ok) | |
1723 return NULL; | |
1724 | |
1725 if (iv.step == const0_rtx | |
1726 || iv.mode != iv.extend_mode) | |
1727 return NULL; | |
1728 | |
1729 /* Record the insn to split. */ | |
1730 ivts = XNEW (struct iv_to_split); | |
1731 ivts->insn = insn; | |
1732 ivts->base_var = NULL_RTX; | |
1733 ivts->step = iv.step; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1734 ivts->next = NULL; |
0 | 1735 ivts->n_loc = 1; |
1736 ivts->loc[0] = 1; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1737 |
0 | 1738 return ivts; |
1739 } | |
1740 | |
1741 /* Determines which of insns in LOOP can be optimized. | |
1742 Return a OPT_INFO struct with the relevant hash tables filled | |
1743 with all insns to be optimized. The FIRST_NEW_BLOCK field | |
1744 is undefined for the return value. */ | |
1745 | |
1746 static struct opt_info * | |
1747 analyze_insns_in_loop (struct loop *loop) | |
1748 { | |
1749 basic_block *body, bb; | |
1750 unsigned i; | |
1751 struct opt_info *opt_info = XCNEW (struct opt_info); | |
1752 rtx insn; | |
1753 struct iv_to_split *ivts = NULL; | |
1754 struct var_to_expand *ves = NULL; | |
1755 PTR *slot1; | |
1756 PTR *slot2; | |
1757 VEC (edge, heap) *edges = get_loop_exit_edges (loop); | |
1758 edge exit; | |
1759 bool can_apply = false; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1760 |
0 | 1761 iv_analysis_loop_init (loop); |
1762 | |
1763 body = get_loop_body (loop); | |
1764 | |
1765 if (flag_split_ivs_in_unroller) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1766 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1767 opt_info->insns_to_split = htab_create (5 * loop->num_nodes, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1768 si_info_hash, si_info_eq, free); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1769 opt_info->iv_to_split_head = NULL; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1770 opt_info->iv_to_split_tail = &opt_info->iv_to_split_head; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1771 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1772 |
0 | 1773 /* Record the loop exit bb and loop preheader before the unrolling. */ |
1774 opt_info->loop_preheader = loop_preheader_edge (loop)->src; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1775 |
0 | 1776 if (VEC_length (edge, edges) == 1) |
1777 { | |
1778 exit = VEC_index (edge, edges, 0); | |
1779 if (!(exit->flags & EDGE_COMPLEX)) | |
1780 { | |
1781 opt_info->loop_exit = split_edge (exit); | |
1782 can_apply = true; | |
1783 } | |
1784 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1785 |
0 | 1786 if (flag_variable_expansion_in_unroller |
1787 && can_apply) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1788 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1789 opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1790 ve_info_hash, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1791 ve_info_eq, free); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1792 opt_info->var_to_expand_head = NULL; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1793 opt_info->var_to_expand_tail = &opt_info->var_to_expand_head; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1794 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1795 |
0 | 1796 for (i = 0; i < loop->num_nodes; i++) |
1797 { | |
1798 bb = body[i]; | |
1799 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) | |
1800 continue; | |
1801 | |
1802 FOR_BB_INSNS (bb, insn) | |
1803 { | |
1804 if (!INSN_P (insn)) | |
1805 continue; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1806 |
0 | 1807 if (opt_info->insns_to_split) |
1808 ivts = analyze_iv_to_split_insn (insn); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1809 |
0 | 1810 if (ivts) |
1811 { | |
1812 slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1813 gcc_assert (*slot1 == NULL); |
0 | 1814 *slot1 = ivts; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1815 *opt_info->iv_to_split_tail = ivts; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1816 opt_info->iv_to_split_tail = &ivts->next; |
0 | 1817 continue; |
1818 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1819 |
0 | 1820 if (opt_info->insns_with_var_to_expand) |
1821 ves = analyze_insn_to_expand_var (loop, insn); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1822 |
0 | 1823 if (ves) |
1824 { | |
1825 slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1826 gcc_assert (*slot2 == NULL); |
0 | 1827 *slot2 = ves; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1828 *opt_info->var_to_expand_tail = ves; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1829 opt_info->var_to_expand_tail = &ves->next; |
0 | 1830 } |
1831 } | |
1832 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1833 |
0 | 1834 VEC_free (edge, heap, edges); |
1835 free (body); | |
1836 return opt_info; | |
1837 } | |
1838 | |
1839 /* Called just before loop duplication. Records start of duplicated area | |
1840 to OPT_INFO. */ | |
1841 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1842 static void |
0 | 1843 opt_info_start_duplication (struct opt_info *opt_info) |
1844 { | |
1845 if (opt_info) | |
1846 opt_info->first_new_block = last_basic_block; | |
1847 } | |
1848 | |
1849 /* Determine the number of iterations between initialization of the base | |
1850 variable and the current copy (N_COPY). N_COPIES is the total number | |
1851 of newly created copies. UNROLLING is true if we are unrolling | |
1852 (not peeling) the loop. */ | |
1853 | |
1854 static unsigned | |
1855 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling) | |
1856 { | |
1857 if (unrolling) | |
1858 { | |
1859 /* If we are unrolling, initialization is done in the original loop | |
1860 body (number 0). */ | |
1861 return n_copy; | |
1862 } | |
1863 else | |
1864 { | |
1865 /* If we are peeling, the copy in that the initialization occurs has | |
1866 number 1. The original loop (number 0) is the last. */ | |
1867 if (n_copy) | |
1868 return n_copy - 1; | |
1869 else | |
1870 return n_copies; | |
1871 } | |
1872 } | |
1873 | |
1874 /* Locate in EXPR the expression corresponding to the location recorded | |
1875 in IVTS, and return a pointer to the RTX for this location. */ | |
1876 | |
1877 static rtx * | |
1878 get_ivts_expr (rtx expr, struct iv_to_split *ivts) | |
1879 { | |
1880 unsigned i; | |
1881 rtx *ret = &expr; | |
1882 | |
1883 for (i = 0; i < ivts->n_loc; i++) | |
1884 ret = &XEXP (*ret, ivts->loc[i]); | |
1885 | |
1886 return ret; | |
1887 } | |
1888 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1889 /* Allocate basic variable for the induction variable chain. */ |
0 | 1890 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1891 static void |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1892 allocate_basic_variable (struct iv_to_split *ivts) |
0 | 1893 { |
1894 rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts); | |
1895 | |
1896 ivts->base_var = gen_reg_rtx (GET_MODE (expr)); | |
1897 } | |
1898 | |
1899 /* Insert initialization of basic variable of IVTS before INSN, taking | |
1900 the initial value from INSN. */ | |
1901 | |
1902 static void | |
1903 insert_base_initialization (struct iv_to_split *ivts, rtx insn) | |
1904 { | |
1905 rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts)); | |
1906 rtx seq; | |
1907 | |
1908 start_sequence (); | |
1909 expr = force_operand (expr, ivts->base_var); | |
1910 if (expr != ivts->base_var) | |
1911 emit_move_insn (ivts->base_var, expr); | |
1912 seq = get_insns (); | |
1913 end_sequence (); | |
1914 | |
1915 emit_insn_before (seq, insn); | |
1916 } | |
1917 | |
1918 /* Replace the use of induction variable described in IVTS in INSN | |
1919 by base variable + DELTA * step. */ | |
1920 | |
1921 static void | |
1922 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta) | |
1923 { | |
1924 rtx expr, *loc, seq, incr, var; | |
1925 enum machine_mode mode = GET_MODE (ivts->base_var); | |
1926 rtx src, dest, set; | |
1927 | |
1928 /* Construct base + DELTA * step. */ | |
1929 if (!delta) | |
1930 expr = ivts->base_var; | |
1931 else | |
1932 { | |
1933 incr = simplify_gen_binary (MULT, mode, | |
1934 ivts->step, gen_int_mode (delta, mode)); | |
1935 expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var), | |
1936 ivts->base_var, incr); | |
1937 } | |
1938 | |
1939 /* Figure out where to do the replacement. */ | |
1940 loc = get_ivts_expr (single_set (insn), ivts); | |
1941 | |
1942 /* If we can make the replacement right away, we're done. */ | |
1943 if (validate_change (insn, loc, expr, 0)) | |
1944 return; | |
1945 | |
1946 /* Otherwise, force EXPR into a register and try again. */ | |
1947 start_sequence (); | |
1948 var = gen_reg_rtx (mode); | |
1949 expr = force_operand (expr, var); | |
1950 if (expr != var) | |
1951 emit_move_insn (var, expr); | |
1952 seq = get_insns (); | |
1953 end_sequence (); | |
1954 emit_insn_before (seq, insn); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1955 |
0 | 1956 if (validate_change (insn, loc, var, 0)) |
1957 return; | |
1958 | |
1959 /* The last chance. Try recreating the assignment in insn | |
1960 completely from scratch. */ | |
1961 set = single_set (insn); | |
1962 gcc_assert (set); | |
1963 | |
1964 start_sequence (); | |
1965 *loc = var; | |
1966 src = copy_rtx (SET_SRC (set)); | |
1967 dest = copy_rtx (SET_DEST (set)); | |
1968 src = force_operand (src, dest); | |
1969 if (src != dest) | |
1970 emit_move_insn (dest, src); | |
1971 seq = get_insns (); | |
1972 end_sequence (); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1973 |
0 | 1974 emit_insn_before (seq, insn); |
1975 delete_insn (insn); | |
1976 } | |
1977 | |
1978 | |
1979 /* Return one expansion of the accumulator recorded in struct VE. */ | |
1980 | |
1981 static rtx | |
1982 get_expansion (struct var_to_expand *ve) | |
1983 { | |
1984 rtx reg; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1985 |
0 | 1986 if (ve->reuse_expansion == 0) |
1987 reg = ve->reg; | |
1988 else | |
1989 reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1990 |
0 | 1991 if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion) |
1992 ve->reuse_expansion = 0; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1993 else |
0 | 1994 ve->reuse_expansion++; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1995 |
0 | 1996 return reg; |
1997 } | |
1998 | |
1999 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2000 /* Given INSN replace the uses of the accumulator recorded in VE |
0 | 2001 with a new register. */ |
2002 | |
2003 static void | |
2004 expand_var_during_unrolling (struct var_to_expand *ve, rtx insn) | |
2005 { | |
2006 rtx new_reg, set; | |
2007 bool really_new_expansion = false; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2008 |
0 | 2009 set = single_set (insn); |
2010 gcc_assert (set); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2011 |
0 | 2012 /* Generate a new register only if the expansion limit has not been |
2013 reached. Else reuse an already existing expansion. */ | |
2014 if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count) | |
2015 { | |
2016 really_new_expansion = true; | |
2017 new_reg = gen_reg_rtx (GET_MODE (ve->reg)); | |
2018 } | |
2019 else | |
2020 new_reg = get_expansion (ve); | |
2021 | |
2022 validate_change (insn, &SET_DEST (set), new_reg, 1); | |
2023 validate_change (insn, &XEXP (SET_SRC (set), ve->accum_pos), new_reg, 1); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2024 |
0 | 2025 if (apply_change_group ()) |
2026 if (really_new_expansion) | |
2027 { | |
2028 VEC_safe_push (rtx, heap, ve->var_expansions, new_reg); | |
2029 ve->expansion_count++; | |
2030 } | |
2031 } | |
2032 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2033 /* Initialize the variable expansions in loop preheader. PLACE is the |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2034 loop-preheader basic block where the initialization of the |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2035 expansions should take place. The expansions are initialized with |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2036 (-0) when the operation is plus or minus to honor sign zero. This |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2037 way we can prevent cases where the sign of the final result is |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2038 effected by the sign of the expansion. Here is an example to |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2039 demonstrate this: |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2040 |
0 | 2041 for (i = 0 ; i < n; i++) |
2042 sum += something; | |
2043 | |
2044 ==> | |
2045 | |
2046 sum += something | |
2047 .... | |
2048 i = i+1; | |
2049 sum1 += something | |
2050 .... | |
2051 i = i+1 | |
2052 sum2 += something; | |
2053 .... | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2054 |
0 | 2055 When SUM is initialized with -zero and SOMETHING is also -zero; the |
2056 final result of sum should be -zero thus the expansions sum1 and sum2 | |
2057 should be initialized with -zero as well (otherwise we will get +zero | |
2058 as the final result). */ | |
2059 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2060 static void |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2061 insert_var_expansion_initialization (struct var_to_expand *ve, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2062 basic_block place) |
0 | 2063 { |
2064 rtx seq, var, zero_init, insn; | |
2065 unsigned i; | |
2066 enum machine_mode mode = GET_MODE (ve->reg); | |
2067 bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode); | |
2068 | |
2069 if (VEC_length (rtx, ve->var_expansions) == 0) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2070 return; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2071 |
0 | 2072 start_sequence (); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2073 if (ve->op == PLUS || ve->op == MINUS) |
0 | 2074 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++) |
2075 { | |
2076 if (honor_signed_zero_p) | |
2077 zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode); | |
2078 else | |
2079 zero_init = CONST0_RTX (mode); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2080 |
0 | 2081 emit_move_insn (var, zero_init); |
2082 } | |
2083 else if (ve->op == MULT) | |
2084 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++) | |
2085 { | |
2086 zero_init = CONST1_RTX (GET_MODE (var)); | |
2087 emit_move_insn (var, zero_init); | |
2088 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2089 |
0 | 2090 seq = get_insns (); |
2091 end_sequence (); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2092 |
0 | 2093 insn = BB_HEAD (place); |
2094 while (!NOTE_INSN_BASIC_BLOCK_P (insn)) | |
2095 insn = NEXT_INSN (insn); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2096 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2097 emit_insn_after (seq, insn); |
0 | 2098 } |
2099 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2100 /* Combine the variable expansions at the loop exit. PLACE is the |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2101 loop exit basic block where the summation of the expansions should |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2102 take place. */ |
0 | 2103 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2104 static void |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2105 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place) |
0 | 2106 { |
2107 rtx sum = ve->reg; | |
2108 rtx expr, seq, var, insn; | |
2109 unsigned i; | |
2110 | |
2111 if (VEC_length (rtx, ve->var_expansions) == 0) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2112 return; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2113 |
0 | 2114 start_sequence (); |
2115 if (ve->op == PLUS || ve->op == MINUS) | |
2116 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++) | |
2117 { | |
2118 sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), | |
2119 var, sum); | |
2120 } | |
2121 else if (ve->op == MULT) | |
2122 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++) | |
2123 { | |
2124 sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), | |
2125 var, sum); | |
2126 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2127 |
0 | 2128 expr = force_operand (sum, ve->reg); |
2129 if (expr != ve->reg) | |
2130 emit_move_insn (ve->reg, expr); | |
2131 seq = get_insns (); | |
2132 end_sequence (); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2133 |
0 | 2134 insn = BB_HEAD (place); |
2135 while (!NOTE_INSN_BASIC_BLOCK_P (insn)) | |
2136 insn = NEXT_INSN (insn); | |
2137 | |
2138 emit_insn_after (seq, insn); | |
2139 } | |
2140 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2141 /* Apply loop optimizations in loop copies using the |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2142 data which gathered during the unrolling. Structure |
0 | 2143 OPT_INFO record that data. |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2144 |
0 | 2145 UNROLLING is true if we unrolled (not peeled) the loop. |
2146 REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of | |
2147 the loop (as it should happen in complete unrolling, but not in ordinary | |
2148 peeling of the loop). */ | |
2149 | |
2150 static void | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2151 apply_opt_in_copies (struct opt_info *opt_info, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2152 unsigned n_copies, bool unrolling, |
0 | 2153 bool rewrite_original_loop) |
2154 { | |
2155 unsigned i, delta; | |
2156 basic_block bb, orig_bb; | |
2157 rtx insn, orig_insn, next; | |
2158 struct iv_to_split ivts_templ, *ivts; | |
2159 struct var_to_expand ve_templ, *ves; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2160 |
0 | 2161 /* Sanity check -- we need to put initialization in the original loop |
2162 body. */ | |
2163 gcc_assert (!unrolling || rewrite_original_loop); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2164 |
0 | 2165 /* Allocate the basic variables (i0). */ |
2166 if (opt_info->insns_to_split) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2167 for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2168 allocate_basic_variable (ivts); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2169 |
0 | 2170 for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++) |
2171 { | |
2172 bb = BASIC_BLOCK (i); | |
2173 orig_bb = get_bb_original (bb); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2174 |
0 | 2175 /* bb->aux holds position in copy sequence initialized by |
2176 duplicate_loop_to_header_edge. */ | |
2177 delta = determine_split_iv_delta ((size_t)bb->aux, n_copies, | |
2178 unrolling); | |
2179 bb->aux = 0; | |
2180 orig_insn = BB_HEAD (orig_bb); | |
2181 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next) | |
2182 { | |
2183 next = NEXT_INSN (insn); | |
2184 if (!INSN_P (insn)) | |
2185 continue; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2186 |
0 | 2187 while (!INSN_P (orig_insn)) |
2188 orig_insn = NEXT_INSN (orig_insn); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2189 |
0 | 2190 ivts_templ.insn = orig_insn; |
2191 ve_templ.insn = orig_insn; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2192 |
0 | 2193 /* Apply splitting iv optimization. */ |
2194 if (opt_info->insns_to_split) | |
2195 { | |
2196 ivts = (struct iv_to_split *) | |
2197 htab_find (opt_info->insns_to_split, &ivts_templ); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2198 |
0 | 2199 if (ivts) |
2200 { | |
2201 gcc_assert (GET_CODE (PATTERN (insn)) | |
2202 == GET_CODE (PATTERN (orig_insn))); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2203 |
0 | 2204 if (!delta) |
2205 insert_base_initialization (ivts, insn); | |
2206 split_iv (ivts, insn, delta); | |
2207 } | |
2208 } | |
2209 /* Apply variable expansion optimization. */ | |
2210 if (unrolling && opt_info->insns_with_var_to_expand) | |
2211 { | |
2212 ves = (struct var_to_expand *) | |
2213 htab_find (opt_info->insns_with_var_to_expand, &ve_templ); | |
2214 if (ves) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2215 { |
0 | 2216 gcc_assert (GET_CODE (PATTERN (insn)) |
2217 == GET_CODE (PATTERN (orig_insn))); | |
2218 expand_var_during_unrolling (ves, insn); | |
2219 } | |
2220 } | |
2221 orig_insn = NEXT_INSN (orig_insn); | |
2222 } | |
2223 } | |
2224 | |
2225 if (!rewrite_original_loop) | |
2226 return; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2227 |
0 | 2228 /* Initialize the variable expansions in the loop preheader |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2229 and take care of combining them at the loop exit. */ |
0 | 2230 if (opt_info->insns_with_var_to_expand) |
2231 { | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2232 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2233 insert_var_expansion_initialization (ves, opt_info->loop_preheader); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2234 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2235 combine_var_copies_in_loop_exit (ves, opt_info->loop_exit); |
0 | 2236 } |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2237 |
0 | 2238 /* Rewrite also the original loop body. Find them as originals of the blocks |
2239 in the last copied iteration, i.e. those that have | |
2240 get_bb_copy (get_bb_original (bb)) == bb. */ | |
2241 for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++) | |
2242 { | |
2243 bb = BASIC_BLOCK (i); | |
2244 orig_bb = get_bb_original (bb); | |
2245 if (get_bb_copy (orig_bb) != bb) | |
2246 continue; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2247 |
0 | 2248 delta = determine_split_iv_delta (0, n_copies, unrolling); |
2249 for (orig_insn = BB_HEAD (orig_bb); | |
2250 orig_insn != NEXT_INSN (BB_END (bb)); | |
2251 orig_insn = next) | |
2252 { | |
2253 next = NEXT_INSN (orig_insn); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2254 |
0 | 2255 if (!INSN_P (orig_insn)) |
2256 continue; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2257 |
0 | 2258 ivts_templ.insn = orig_insn; |
2259 if (opt_info->insns_to_split) | |
2260 { | |
2261 ivts = (struct iv_to_split *) | |
2262 htab_find (opt_info->insns_to_split, &ivts_templ); | |
2263 if (ivts) | |
2264 { | |
2265 if (!delta) | |
2266 insert_base_initialization (ivts, orig_insn); | |
2267 split_iv (ivts, orig_insn, delta); | |
2268 continue; | |
2269 } | |
2270 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2271 |
0 | 2272 } |
2273 } | |
2274 } | |
2275 | |
2276 /* Release OPT_INFO. */ | |
2277 | |
2278 static void | |
2279 free_opt_info (struct opt_info *opt_info) | |
2280 { | |
2281 if (opt_info->insns_to_split) | |
2282 htab_delete (opt_info->insns_to_split); | |
2283 if (opt_info->insns_with_var_to_expand) | |
2284 { | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2285 struct var_to_expand *ves; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2286 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2287 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2288 VEC_free (rtx, heap, ves->var_expansions); |
0 | 2289 htab_delete (opt_info->insns_with_var_to_expand); |
2290 } | |
2291 free (opt_info); | |
2292 } |