Mercurial > hg > CbC > CbC_gcc
comparison gcc/predict.c @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children | 77e2b8dfacca |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a06113de4d67 |
---|---|
1 /* Branch prediction routines for the GNU compiler. | |
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008 | |
3 Free Software Foundation, Inc. | |
4 | |
5 This file is part of GCC. | |
6 | |
7 GCC is free software; you can redistribute it and/or modify it under | |
8 the terms of the GNU General Public License as published by the Free | |
9 Software Foundation; either version 3, or (at your option) any later | |
10 version. | |
11 | |
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
18 along with GCC; see the file COPYING3. If not see | |
19 <http://www.gnu.org/licenses/>. */ | |
20 | |
21 /* References: | |
22 | |
23 [1] "Branch Prediction for Free" | |
24 Ball and Larus; PLDI '93. | |
25 [2] "Static Branch Frequency and Program Profile Analysis" | |
26 Wu and Larus; MICRO-27. | |
27 [3] "Corpus-based Static Branch Prediction" | |
28 Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95. */ | |
29 | |
30 | |
31 #include "config.h" | |
32 #include "system.h" | |
33 #include "coretypes.h" | |
34 #include "tm.h" | |
35 #include "tree.h" | |
36 #include "rtl.h" | |
37 #include "tm_p.h" | |
38 #include "hard-reg-set.h" | |
39 #include "basic-block.h" | |
40 #include "insn-config.h" | |
41 #include "regs.h" | |
42 #include "flags.h" | |
43 #include "output.h" | |
44 #include "function.h" | |
45 #include "except.h" | |
46 #include "toplev.h" | |
47 #include "recog.h" | |
48 #include "expr.h" | |
49 #include "predict.h" | |
50 #include "coverage.h" | |
51 #include "sreal.h" | |
52 #include "params.h" | |
53 #include "target.h" | |
54 #include "cfgloop.h" | |
55 #include "tree-flow.h" | |
56 #include "ggc.h" | |
57 #include "tree-dump.h" | |
58 #include "tree-pass.h" | |
59 #include "timevar.h" | |
60 #include "tree-scalar-evolution.h" | |
61 #include "cfgloop.h" | |
62 #include "pointer-set.h" | |
63 | |
64 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE, | |
65 1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */ | |
66 static sreal real_zero, real_one, real_almost_one, real_br_prob_base, | |
67 real_inv_br_prob_base, real_one_half, real_bb_freq_max; | |
68 | |
69 /* Random guesstimation given names. | |
70 PROV_VERY_UNLIKELY should be small enough so basic block predicted | |
71 by it gets bellow HOT_BB_FREQUENCY_FRANCTION. */ | |
72 #define PROB_VERY_UNLIKELY (REG_BR_PROB_BASE / 2000 - 1) | |
73 #define PROB_EVEN (REG_BR_PROB_BASE / 2) | |
74 #define PROB_VERY_LIKELY (REG_BR_PROB_BASE - PROB_VERY_UNLIKELY) | |
75 #define PROB_ALWAYS (REG_BR_PROB_BASE) | |
76 | |
77 static void combine_predictions_for_insn (rtx, basic_block); | |
78 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int); | |
79 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction); | |
80 static void compute_function_frequency (void); | |
81 static void choose_function_section (void); | |
82 static bool can_predict_insn_p (const_rtx); | |
83 | |
84 /* Information we hold about each branch predictor. | |
85 Filled using information from predict.def. */ | |
86 | |
87 struct predictor_info | |
88 { | |
89 const char *const name; /* Name used in the debugging dumps. */ | |
90 const int hitrate; /* Expected hitrate used by | |
91 predict_insn_def call. */ | |
92 const int flags; | |
93 }; | |
94 | |
95 /* Use given predictor without Dempster-Shaffer theory if it matches | |
96 using first_match heuristics. */ | |
97 #define PRED_FLAG_FIRST_MATCH 1 | |
98 | |
99 /* Recompute hitrate in percent to our representation. */ | |
100 | |
101 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100) | |
102 | |
103 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS}, | |
104 static const struct predictor_info predictor_info[]= { | |
105 #include "predict.def" | |
106 | |
107 /* Upper bound on predictors. */ | |
108 {NULL, 0, 0} | |
109 }; | |
110 #undef DEF_PREDICTOR | |
111 | |
112 /* Return TRUE if frequency FREQ is considered to be hot. */ | |
113 | |
114 static inline bool | |
115 maybe_hot_frequency_p (int freq) | |
116 { | |
117 if (!profile_info || !flag_branch_probabilities) | |
118 { | |
119 if (cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED) | |
120 return false; | |
121 if (cfun->function_frequency == FUNCTION_FREQUENCY_HOT) | |
122 return true; | |
123 } | |
124 if (profile_status == PROFILE_ABSENT) | |
125 return true; | |
126 if (freq < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)) | |
127 return false; | |
128 return true; | |
129 } | |
130 | |
131 /* Return TRUE if frequency FREQ is considered to be hot. */ | |
132 | |
133 static inline bool | |
134 maybe_hot_count_p (gcov_type count) | |
135 { | |
136 if (profile_status != PROFILE_READ) | |
137 return true; | |
138 /* Code executed at most once is not hot. */ | |
139 if (profile_info->runs >= count) | |
140 return false; | |
141 return (count | |
142 > profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)); | |
143 } | |
144 | |
145 /* Return true in case BB can be CPU intensive and should be optimized | |
146 for maximal performance. */ | |
147 | |
148 bool | |
149 maybe_hot_bb_p (const_basic_block bb) | |
150 { | |
151 if (profile_status == PROFILE_READ) | |
152 return maybe_hot_count_p (bb->count); | |
153 return maybe_hot_frequency_p (bb->frequency); | |
154 } | |
155 | |
156 /* Return true if the call can be hot. */ | |
157 | |
158 bool | |
159 cgraph_maybe_hot_edge_p (struct cgraph_edge *edge) | |
160 { | |
161 if (profile_info && flag_branch_probabilities | |
162 && (edge->count | |
163 <= profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION))) | |
164 return false; | |
165 if (lookup_attribute ("cold", DECL_ATTRIBUTES (edge->callee->decl)) | |
166 || lookup_attribute ("cold", DECL_ATTRIBUTES (edge->caller->decl))) | |
167 return false; | |
168 if (lookup_attribute ("hot", DECL_ATTRIBUTES (edge->caller->decl))) | |
169 return true; | |
170 if (flag_guess_branch_prob | |
171 && edge->frequency < (CGRAPH_FREQ_MAX | |
172 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))) | |
173 return false; | |
174 return true; | |
175 } | |
176 | |
177 /* Return true in case BB can be CPU intensive and should be optimized | |
178 for maximal performance. */ | |
179 | |
180 bool | |
181 maybe_hot_edge_p (edge e) | |
182 { | |
183 if (profile_status == PROFILE_READ) | |
184 return maybe_hot_count_p (e->count); | |
185 return maybe_hot_frequency_p (EDGE_FREQUENCY (e)); | |
186 } | |
187 | |
188 /* Return true in case BB is probably never executed. */ | |
189 bool | |
190 probably_never_executed_bb_p (const_basic_block bb) | |
191 { | |
192 if (profile_info && flag_branch_probabilities) | |
193 return ((bb->count + profile_info->runs / 2) / profile_info->runs) == 0; | |
194 if ((!profile_info || !flag_branch_probabilities) | |
195 && cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED) | |
196 return true; | |
197 return false; | |
198 } | |
199 | |
200 /* Return true when current function should always be optimized for size. */ | |
201 | |
202 bool | |
203 optimize_function_for_size_p (struct function *fun) | |
204 { | |
205 return (optimize_size | |
206 || (fun && (fun->function_frequency | |
207 == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED))); | |
208 } | |
209 | |
210 /* Return true when current function should always be optimized for speed. */ | |
211 | |
212 bool | |
213 optimize_function_for_speed_p (struct function *fun) | |
214 { | |
215 return !optimize_function_for_size_p (fun); | |
216 } | |
217 | |
218 /* Return TRUE when BB should be optimized for size. */ | |
219 | |
220 bool | |
221 optimize_bb_for_size_p (const_basic_block bb) | |
222 { | |
223 return optimize_function_for_size_p (cfun) || !maybe_hot_bb_p (bb); | |
224 } | |
225 | |
226 /* Return TRUE when BB should be optimized for speed. */ | |
227 | |
228 bool | |
229 optimize_bb_for_speed_p (const_basic_block bb) | |
230 { | |
231 return !optimize_bb_for_size_p (bb); | |
232 } | |
233 | |
234 /* Return TRUE when BB should be optimized for size. */ | |
235 | |
236 bool | |
237 optimize_edge_for_size_p (edge e) | |
238 { | |
239 return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e); | |
240 } | |
241 | |
242 /* Return TRUE when BB should be optimized for speed. */ | |
243 | |
244 bool | |
245 optimize_edge_for_speed_p (edge e) | |
246 { | |
247 return !optimize_edge_for_size_p (e); | |
248 } | |
249 | |
250 /* Return TRUE when BB should be optimized for size. */ | |
251 | |
252 bool | |
253 optimize_insn_for_size_p (void) | |
254 { | |
255 return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p; | |
256 } | |
257 | |
258 /* Return TRUE when BB should be optimized for speed. */ | |
259 | |
260 bool | |
261 optimize_insn_for_speed_p (void) | |
262 { | |
263 return !optimize_insn_for_size_p (); | |
264 } | |
265 | |
266 /* Return TRUE when LOOP should be optimized for size. */ | |
267 | |
268 bool | |
269 optimize_loop_for_size_p (struct loop *loop) | |
270 { | |
271 return optimize_bb_for_size_p (loop->header); | |
272 } | |
273 | |
274 /* Return TRUE when LOOP should be optimized for speed. */ | |
275 | |
276 bool | |
277 optimize_loop_for_speed_p (struct loop *loop) | |
278 { | |
279 return optimize_bb_for_speed_p (loop->header); | |
280 } | |
281 | |
282 /* Return TRUE when LOOP nest should be optimized for speed. */ | |
283 | |
284 bool | |
285 optimize_loop_nest_for_speed_p (struct loop *loop) | |
286 { | |
287 struct loop *l = loop; | |
288 if (optimize_loop_for_speed_p (loop)) | |
289 return true; | |
290 l = loop->inner; | |
291 while (l && l != loop) | |
292 { | |
293 if (optimize_loop_for_speed_p (l)) | |
294 return true; | |
295 if (l->inner) | |
296 l = l->inner; | |
297 else if (l->next) | |
298 l = l->next; | |
299 else | |
300 { | |
301 while (l != loop && !l->next) | |
302 l = loop_outer (l); | |
303 if (l != loop) | |
304 l = l->next; | |
305 } | |
306 } | |
307 return false; | |
308 } | |
309 | |
310 /* Return TRUE when LOOP nest should be optimized for size. */ | |
311 | |
312 bool | |
313 optimize_loop_nest_for_size_p (struct loop *loop) | |
314 { | |
315 return !optimize_loop_nest_for_speed_p (loop); | |
316 } | |
317 | |
318 /* Return true when edge E is likely to be well predictable by branch | |
319 predictor. */ | |
320 | |
321 bool | |
322 predictable_edge_p (edge e) | |
323 { | |
324 if (profile_status == PROFILE_ABSENT) | |
325 return false; | |
326 if ((e->probability | |
327 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100) | |
328 || (REG_BR_PROB_BASE - e->probability | |
329 <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)) | |
330 return true; | |
331 return false; | |
332 } | |
333 | |
334 | |
335 /* Set RTL expansion for BB profile. */ | |
336 | |
337 void | |
338 rtl_profile_for_bb (basic_block bb) | |
339 { | |
340 crtl->maybe_hot_insn_p = maybe_hot_bb_p (bb); | |
341 } | |
342 | |
343 /* Set RTL expansion for edge profile. */ | |
344 | |
345 void | |
346 rtl_profile_for_edge (edge e) | |
347 { | |
348 crtl->maybe_hot_insn_p = maybe_hot_edge_p (e); | |
349 } | |
350 | |
351 /* Set RTL expansion to default mode (i.e. when profile info is not known). */ | |
352 void | |
353 default_rtl_profile (void) | |
354 { | |
355 crtl->maybe_hot_insn_p = true; | |
356 } | |
357 | |
358 /* Return true if the one of outgoing edges is already predicted by | |
359 PREDICTOR. */ | |
360 | |
361 bool | |
362 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor) | |
363 { | |
364 rtx note; | |
365 if (!INSN_P (BB_END (bb))) | |
366 return false; | |
367 for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1)) | |
368 if (REG_NOTE_KIND (note) == REG_BR_PRED | |
369 && INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor) | |
370 return true; | |
371 return false; | |
372 } | |
373 | |
374 /* This map contains for a basic block the list of predictions for the | |
375 outgoing edges. */ | |
376 | |
377 static struct pointer_map_t *bb_predictions; | |
378 | |
379 /* Return true if the one of outgoing edges is already predicted by | |
380 PREDICTOR. */ | |
381 | |
382 bool | |
383 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor) | |
384 { | |
385 struct edge_prediction *i; | |
386 void **preds = pointer_map_contains (bb_predictions, bb); | |
387 | |
388 if (!preds) | |
389 return false; | |
390 | |
391 for (i = (struct edge_prediction *) *preds; i; i = i->ep_next) | |
392 if (i->ep_predictor == predictor) | |
393 return true; | |
394 return false; | |
395 } | |
396 | |
397 /* Return true when the probability of edge is reliable. | |
398 | |
399 The profile guessing code is good at predicting branch outcome (ie. | |
400 taken/not taken), that is predicted right slightly over 75% of time. | |
401 It is however notoriously poor on predicting the probability itself. | |
402 In general the profile appear a lot flatter (with probabilities closer | |
403 to 50%) than the reality so it is bad idea to use it to drive optimization | |
404 such as those disabling dynamic branch prediction for well predictable | |
405 branches. | |
406 | |
407 There are two exceptions - edges leading to noreturn edges and edges | |
408 predicted by number of iterations heuristics are predicted well. This macro | |
409 should be able to distinguish those, but at the moment it simply check for | |
410 noreturn heuristic that is only one giving probability over 99% or bellow | |
411 1%. In future we might want to propagate reliability information across the | |
412 CFG if we find this information useful on multiple places. */ | |
413 static bool | |
414 probability_reliable_p (int prob) | |
415 { | |
416 return (profile_status == PROFILE_READ | |
417 || (profile_status == PROFILE_GUESSED | |
418 && (prob <= HITRATE (1) || prob >= HITRATE (99)))); | |
419 } | |
420 | |
421 /* Same predicate as above, working on edges. */ | |
422 bool | |
423 edge_probability_reliable_p (const_edge e) | |
424 { | |
425 return probability_reliable_p (e->probability); | |
426 } | |
427 | |
428 /* Same predicate as edge_probability_reliable_p, working on notes. */ | |
429 bool | |
430 br_prob_note_reliable_p (const_rtx note) | |
431 { | |
432 gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB); | |
433 return probability_reliable_p (INTVAL (XEXP (note, 0))); | |
434 } | |
435 | |
436 static void | |
437 predict_insn (rtx insn, enum br_predictor predictor, int probability) | |
438 { | |
439 gcc_assert (any_condjump_p (insn)); | |
440 if (!flag_guess_branch_prob) | |
441 return; | |
442 | |
443 add_reg_note (insn, REG_BR_PRED, | |
444 gen_rtx_CONCAT (VOIDmode, | |
445 GEN_INT ((int) predictor), | |
446 GEN_INT ((int) probability))); | |
447 } | |
448 | |
449 /* Predict insn by given predictor. */ | |
450 | |
451 void | |
452 predict_insn_def (rtx insn, enum br_predictor predictor, | |
453 enum prediction taken) | |
454 { | |
455 int probability = predictor_info[(int) predictor].hitrate; | |
456 | |
457 if (taken != TAKEN) | |
458 probability = REG_BR_PROB_BASE - probability; | |
459 | |
460 predict_insn (insn, predictor, probability); | |
461 } | |
462 | |
463 /* Predict edge E with given probability if possible. */ | |
464 | |
465 void | |
466 rtl_predict_edge (edge e, enum br_predictor predictor, int probability) | |
467 { | |
468 rtx last_insn; | |
469 last_insn = BB_END (e->src); | |
470 | |
471 /* We can store the branch prediction information only about | |
472 conditional jumps. */ | |
473 if (!any_condjump_p (last_insn)) | |
474 return; | |
475 | |
476 /* We always store probability of branching. */ | |
477 if (e->flags & EDGE_FALLTHRU) | |
478 probability = REG_BR_PROB_BASE - probability; | |
479 | |
480 predict_insn (last_insn, predictor, probability); | |
481 } | |
482 | |
483 /* Predict edge E with the given PROBABILITY. */ | |
484 void | |
485 gimple_predict_edge (edge e, enum br_predictor predictor, int probability) | |
486 { | |
487 gcc_assert (profile_status != PROFILE_GUESSED); | |
488 if ((e->src != ENTRY_BLOCK_PTR && EDGE_COUNT (e->src->succs) > 1) | |
489 && flag_guess_branch_prob && optimize) | |
490 { | |
491 struct edge_prediction *i = XNEW (struct edge_prediction); | |
492 void **preds = pointer_map_insert (bb_predictions, e->src); | |
493 | |
494 i->ep_next = (struct edge_prediction *) *preds; | |
495 *preds = i; | |
496 i->ep_probability = probability; | |
497 i->ep_predictor = predictor; | |
498 i->ep_edge = e; | |
499 } | |
500 } | |
501 | |
502 /* Remove all predictions on given basic block that are attached | |
503 to edge E. */ | |
504 void | |
505 remove_predictions_associated_with_edge (edge e) | |
506 { | |
507 void **preds; | |
508 | |
509 if (!bb_predictions) | |
510 return; | |
511 | |
512 preds = pointer_map_contains (bb_predictions, e->src); | |
513 | |
514 if (preds) | |
515 { | |
516 struct edge_prediction **prediction = (struct edge_prediction **) preds; | |
517 struct edge_prediction *next; | |
518 | |
519 while (*prediction) | |
520 { | |
521 if ((*prediction)->ep_edge == e) | |
522 { | |
523 next = (*prediction)->ep_next; | |
524 free (*prediction); | |
525 *prediction = next; | |
526 } | |
527 else | |
528 prediction = &((*prediction)->ep_next); | |
529 } | |
530 } | |
531 } | |
532 | |
533 /* Clears the list of predictions stored for BB. */ | |
534 | |
535 static void | |
536 clear_bb_predictions (basic_block bb) | |
537 { | |
538 void **preds = pointer_map_contains (bb_predictions, bb); | |
539 struct edge_prediction *pred, *next; | |
540 | |
541 if (!preds) | |
542 return; | |
543 | |
544 for (pred = (struct edge_prediction *) *preds; pred; pred = next) | |
545 { | |
546 next = pred->ep_next; | |
547 free (pred); | |
548 } | |
549 *preds = NULL; | |
550 } | |
551 | |
552 /* Return true when we can store prediction on insn INSN. | |
553 At the moment we represent predictions only on conditional | |
554 jumps, not at computed jump or other complicated cases. */ | |
555 static bool | |
556 can_predict_insn_p (const_rtx insn) | |
557 { | |
558 return (JUMP_P (insn) | |
559 && any_condjump_p (insn) | |
560 && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2); | |
561 } | |
562 | |
563 /* Predict edge E by given predictor if possible. */ | |
564 | |
565 void | |
566 predict_edge_def (edge e, enum br_predictor predictor, | |
567 enum prediction taken) | |
568 { | |
569 int probability = predictor_info[(int) predictor].hitrate; | |
570 | |
571 if (taken != TAKEN) | |
572 probability = REG_BR_PROB_BASE - probability; | |
573 | |
574 predict_edge (e, predictor, probability); | |
575 } | |
576 | |
577 /* Invert all branch predictions or probability notes in the INSN. This needs | |
578 to be done each time we invert the condition used by the jump. */ | |
579 | |
580 void | |
581 invert_br_probabilities (rtx insn) | |
582 { | |
583 rtx note; | |
584 | |
585 for (note = REG_NOTES (insn); note; note = XEXP (note, 1)) | |
586 if (REG_NOTE_KIND (note) == REG_BR_PROB) | |
587 XEXP (note, 0) = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (note, 0))); | |
588 else if (REG_NOTE_KIND (note) == REG_BR_PRED) | |
589 XEXP (XEXP (note, 0), 1) | |
590 = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1))); | |
591 } | |
592 | |
593 /* Dump information about the branch prediction to the output file. */ | |
594 | |
595 static void | |
596 dump_prediction (FILE *file, enum br_predictor predictor, int probability, | |
597 basic_block bb, int used) | |
598 { | |
599 edge e; | |
600 edge_iterator ei; | |
601 | |
602 if (!file) | |
603 return; | |
604 | |
605 FOR_EACH_EDGE (e, ei, bb->succs) | |
606 if (! (e->flags & EDGE_FALLTHRU)) | |
607 break; | |
608 | |
609 fprintf (file, " %s heuristics%s: %.1f%%", | |
610 predictor_info[predictor].name, | |
611 used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE); | |
612 | |
613 if (bb->count) | |
614 { | |
615 fprintf (file, " exec "); | |
616 fprintf (file, HOST_WIDEST_INT_PRINT_DEC, bb->count); | |
617 if (e) | |
618 { | |
619 fprintf (file, " hit "); | |
620 fprintf (file, HOST_WIDEST_INT_PRINT_DEC, e->count); | |
621 fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count); | |
622 } | |
623 } | |
624 | |
625 fprintf (file, "\n"); | |
626 } | |
627 | |
628 /* We can not predict the probabilities of outgoing edges of bb. Set them | |
629 evenly and hope for the best. */ | |
630 static void | |
631 set_even_probabilities (basic_block bb) | |
632 { | |
633 int nedges = 0; | |
634 edge e; | |
635 edge_iterator ei; | |
636 | |
637 FOR_EACH_EDGE (e, ei, bb->succs) | |
638 if (!(e->flags & (EDGE_EH | EDGE_FAKE))) | |
639 nedges ++; | |
640 FOR_EACH_EDGE (e, ei, bb->succs) | |
641 if (!(e->flags & (EDGE_EH | EDGE_FAKE))) | |
642 e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges; | |
643 else | |
644 e->probability = 0; | |
645 } | |
646 | |
647 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB | |
648 note if not already present. Remove now useless REG_BR_PRED notes. */ | |
649 | |
650 static void | |
651 combine_predictions_for_insn (rtx insn, basic_block bb) | |
652 { | |
653 rtx prob_note; | |
654 rtx *pnote; | |
655 rtx note; | |
656 int best_probability = PROB_EVEN; | |
657 int best_predictor = END_PREDICTORS; | |
658 int combined_probability = REG_BR_PROB_BASE / 2; | |
659 int d; | |
660 bool first_match = false; | |
661 bool found = false; | |
662 | |
663 if (!can_predict_insn_p (insn)) | |
664 { | |
665 set_even_probabilities (bb); | |
666 return; | |
667 } | |
668 | |
669 prob_note = find_reg_note (insn, REG_BR_PROB, 0); | |
670 pnote = ®_NOTES (insn); | |
671 if (dump_file) | |
672 fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn), | |
673 bb->index); | |
674 | |
675 /* We implement "first match" heuristics and use probability guessed | |
676 by predictor with smallest index. */ | |
677 for (note = REG_NOTES (insn); note; note = XEXP (note, 1)) | |
678 if (REG_NOTE_KIND (note) == REG_BR_PRED) | |
679 { | |
680 int predictor = INTVAL (XEXP (XEXP (note, 0), 0)); | |
681 int probability = INTVAL (XEXP (XEXP (note, 0), 1)); | |
682 | |
683 found = true; | |
684 if (best_predictor > predictor) | |
685 best_probability = probability, best_predictor = predictor; | |
686 | |
687 d = (combined_probability * probability | |
688 + (REG_BR_PROB_BASE - combined_probability) | |
689 * (REG_BR_PROB_BASE - probability)); | |
690 | |
691 /* Use FP math to avoid overflows of 32bit integers. */ | |
692 if (d == 0) | |
693 /* If one probability is 0% and one 100%, avoid division by zero. */ | |
694 combined_probability = REG_BR_PROB_BASE / 2; | |
695 else | |
696 combined_probability = (((double) combined_probability) * probability | |
697 * REG_BR_PROB_BASE / d + 0.5); | |
698 } | |
699 | |
700 /* Decide which heuristic to use. In case we didn't match anything, | |
701 use no_prediction heuristic, in case we did match, use either | |
702 first match or Dempster-Shaffer theory depending on the flags. */ | |
703 | |
704 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH) | |
705 first_match = true; | |
706 | |
707 if (!found) | |
708 dump_prediction (dump_file, PRED_NO_PREDICTION, | |
709 combined_probability, bb, true); | |
710 else | |
711 { | |
712 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, | |
713 bb, !first_match); | |
714 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, | |
715 bb, first_match); | |
716 } | |
717 | |
718 if (first_match) | |
719 combined_probability = best_probability; | |
720 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true); | |
721 | |
722 while (*pnote) | |
723 { | |
724 if (REG_NOTE_KIND (*pnote) == REG_BR_PRED) | |
725 { | |
726 int predictor = INTVAL (XEXP (XEXP (*pnote, 0), 0)); | |
727 int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1)); | |
728 | |
729 dump_prediction (dump_file, predictor, probability, bb, | |
730 !first_match || best_predictor == predictor); | |
731 *pnote = XEXP (*pnote, 1); | |
732 } | |
733 else | |
734 pnote = &XEXP (*pnote, 1); | |
735 } | |
736 | |
737 if (!prob_note) | |
738 { | |
739 add_reg_note (insn, REG_BR_PROB, GEN_INT (combined_probability)); | |
740 | |
741 /* Save the prediction into CFG in case we are seeing non-degenerated | |
742 conditional jump. */ | |
743 if (!single_succ_p (bb)) | |
744 { | |
745 BRANCH_EDGE (bb)->probability = combined_probability; | |
746 FALLTHRU_EDGE (bb)->probability | |
747 = REG_BR_PROB_BASE - combined_probability; | |
748 } | |
749 } | |
750 else if (!single_succ_p (bb)) | |
751 { | |
752 int prob = INTVAL (XEXP (prob_note, 0)); | |
753 | |
754 BRANCH_EDGE (bb)->probability = prob; | |
755 FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob; | |
756 } | |
757 else | |
758 single_succ_edge (bb)->probability = REG_BR_PROB_BASE; | |
759 } | |
760 | |
761 /* Combine predictions into single probability and store them into CFG. | |
762 Remove now useless prediction entries. */ | |
763 | |
764 static void | |
765 combine_predictions_for_bb (basic_block bb) | |
766 { | |
767 int best_probability = PROB_EVEN; | |
768 int best_predictor = END_PREDICTORS; | |
769 int combined_probability = REG_BR_PROB_BASE / 2; | |
770 int d; | |
771 bool first_match = false; | |
772 bool found = false; | |
773 struct edge_prediction *pred; | |
774 int nedges = 0; | |
775 edge e, first = NULL, second = NULL; | |
776 edge_iterator ei; | |
777 void **preds; | |
778 | |
779 FOR_EACH_EDGE (e, ei, bb->succs) | |
780 if (!(e->flags & (EDGE_EH | EDGE_FAKE))) | |
781 { | |
782 nedges ++; | |
783 if (first && !second) | |
784 second = e; | |
785 if (!first) | |
786 first = e; | |
787 } | |
788 | |
789 /* When there is no successor or only one choice, prediction is easy. | |
790 | |
791 We are lazy for now and predict only basic blocks with two outgoing | |
792 edges. It is possible to predict generic case too, but we have to | |
793 ignore first match heuristics and do more involved combining. Implement | |
794 this later. */ | |
795 if (nedges != 2) | |
796 { | |
797 if (!bb->count) | |
798 set_even_probabilities (bb); | |
799 clear_bb_predictions (bb); | |
800 if (dump_file) | |
801 fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n", | |
802 nedges, bb->index); | |
803 return; | |
804 } | |
805 | |
806 if (dump_file) | |
807 fprintf (dump_file, "Predictions for bb %i\n", bb->index); | |
808 | |
809 preds = pointer_map_contains (bb_predictions, bb); | |
810 if (preds) | |
811 { | |
812 /* We implement "first match" heuristics and use probability guessed | |
813 by predictor with smallest index. */ | |
814 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next) | |
815 { | |
816 int predictor = pred->ep_predictor; | |
817 int probability = pred->ep_probability; | |
818 | |
819 if (pred->ep_edge != first) | |
820 probability = REG_BR_PROB_BASE - probability; | |
821 | |
822 found = true; | |
823 /* First match heuristics would be widly confused if we predicted | |
824 both directions. */ | |
825 if (best_predictor > predictor) | |
826 { | |
827 struct edge_prediction *pred2; | |
828 int prob = probability; | |
829 | |
830 for (pred2 = (struct edge_prediction *) *preds; pred2; pred2 = pred2->ep_next) | |
831 if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor) | |
832 { | |
833 int probability2 = pred->ep_probability; | |
834 | |
835 if (pred2->ep_edge != first) | |
836 probability2 = REG_BR_PROB_BASE - probability2; | |
837 | |
838 if ((probability < REG_BR_PROB_BASE / 2) != | |
839 (probability2 < REG_BR_PROB_BASE / 2)) | |
840 break; | |
841 | |
842 /* If the same predictor later gave better result, go for it! */ | |
843 if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability)) | |
844 || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability))) | |
845 prob = probability2; | |
846 } | |
847 if (!pred2) | |
848 best_probability = prob, best_predictor = predictor; | |
849 } | |
850 | |
851 d = (combined_probability * probability | |
852 + (REG_BR_PROB_BASE - combined_probability) | |
853 * (REG_BR_PROB_BASE - probability)); | |
854 | |
855 /* Use FP math to avoid overflows of 32bit integers. */ | |
856 if (d == 0) | |
857 /* If one probability is 0% and one 100%, avoid division by zero. */ | |
858 combined_probability = REG_BR_PROB_BASE / 2; | |
859 else | |
860 combined_probability = (((double) combined_probability) | |
861 * probability | |
862 * REG_BR_PROB_BASE / d + 0.5); | |
863 } | |
864 } | |
865 | |
866 /* Decide which heuristic to use. In case we didn't match anything, | |
867 use no_prediction heuristic, in case we did match, use either | |
868 first match or Dempster-Shaffer theory depending on the flags. */ | |
869 | |
870 if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH) | |
871 first_match = true; | |
872 | |
873 if (!found) | |
874 dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true); | |
875 else | |
876 { | |
877 dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb, | |
878 !first_match); | |
879 dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb, | |
880 first_match); | |
881 } | |
882 | |
883 if (first_match) | |
884 combined_probability = best_probability; | |
885 dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true); | |
886 | |
887 if (preds) | |
888 { | |
889 for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next) | |
890 { | |
891 int predictor = pred->ep_predictor; | |
892 int probability = pred->ep_probability; | |
893 | |
894 if (pred->ep_edge != EDGE_SUCC (bb, 0)) | |
895 probability = REG_BR_PROB_BASE - probability; | |
896 dump_prediction (dump_file, predictor, probability, bb, | |
897 !first_match || best_predictor == predictor); | |
898 } | |
899 } | |
900 clear_bb_predictions (bb); | |
901 | |
902 if (!bb->count) | |
903 { | |
904 first->probability = combined_probability; | |
905 second->probability = REG_BR_PROB_BASE - combined_probability; | |
906 } | |
907 } | |
908 | |
909 /* Predict edge probabilities by exploiting loop structure. */ | |
910 | |
911 static void | |
912 predict_loops (void) | |
913 { | |
914 loop_iterator li; | |
915 struct loop *loop; | |
916 | |
917 scev_initialize (); | |
918 | |
919 /* Try to predict out blocks in a loop that are not part of a | |
920 natural loop. */ | |
921 FOR_EACH_LOOP (li, loop, 0) | |
922 { | |
923 basic_block bb, *bbs; | |
924 unsigned j, n_exits; | |
925 VEC (edge, heap) *exits; | |
926 struct tree_niter_desc niter_desc; | |
927 edge ex; | |
928 | |
929 exits = get_loop_exit_edges (loop); | |
930 n_exits = VEC_length (edge, exits); | |
931 | |
932 for (j = 0; VEC_iterate (edge, exits, j, ex); j++) | |
933 { | |
934 tree niter = NULL; | |
935 HOST_WIDE_INT nitercst; | |
936 int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS); | |
937 int probability; | |
938 enum br_predictor predictor; | |
939 | |
940 if (number_of_iterations_exit (loop, ex, &niter_desc, false)) | |
941 niter = niter_desc.niter; | |
942 if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST) | |
943 niter = loop_niter_by_eval (loop, ex); | |
944 | |
945 if (TREE_CODE (niter) == INTEGER_CST) | |
946 { | |
947 if (host_integerp (niter, 1) | |
948 && compare_tree_int (niter, max-1) == -1) | |
949 nitercst = tree_low_cst (niter, 1) + 1; | |
950 else | |
951 nitercst = max; | |
952 predictor = PRED_LOOP_ITERATIONS; | |
953 } | |
954 /* If we have just one exit and we can derive some information about | |
955 the number of iterations of the loop from the statements inside | |
956 the loop, use it to predict this exit. */ | |
957 else if (n_exits == 1) | |
958 { | |
959 nitercst = estimated_loop_iterations_int (loop, false); | |
960 if (nitercst < 0) | |
961 continue; | |
962 if (nitercst > max) | |
963 nitercst = max; | |
964 | |
965 predictor = PRED_LOOP_ITERATIONS_GUESSED; | |
966 } | |
967 else | |
968 continue; | |
969 | |
970 probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst); | |
971 predict_edge (ex, predictor, probability); | |
972 } | |
973 VEC_free (edge, heap, exits); | |
974 | |
975 bbs = get_loop_body (loop); | |
976 | |
977 for (j = 0; j < loop->num_nodes; j++) | |
978 { | |
979 int header_found = 0; | |
980 edge e; | |
981 edge_iterator ei; | |
982 | |
983 bb = bbs[j]; | |
984 | |
985 /* Bypass loop heuristics on continue statement. These | |
986 statements construct loops via "non-loop" constructs | |
987 in the source language and are better to be handled | |
988 separately. */ | |
989 if (predicted_by_p (bb, PRED_CONTINUE)) | |
990 continue; | |
991 | |
992 /* Loop branch heuristics - predict an edge back to a | |
993 loop's head as taken. */ | |
994 if (bb == loop->latch) | |
995 { | |
996 e = find_edge (loop->latch, loop->header); | |
997 if (e) | |
998 { | |
999 header_found = 1; | |
1000 predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN); | |
1001 } | |
1002 } | |
1003 | |
1004 /* Loop exit heuristics - predict an edge exiting the loop if the | |
1005 conditional has no loop header successors as not taken. */ | |
1006 if (!header_found | |
1007 /* If we already used more reliable loop exit predictors, do not | |
1008 bother with PRED_LOOP_EXIT. */ | |
1009 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED) | |
1010 && !predicted_by_p (bb, PRED_LOOP_ITERATIONS)) | |
1011 { | |
1012 /* For loop with many exits we don't want to predict all exits | |
1013 with the pretty large probability, because if all exits are | |
1014 considered in row, the loop would be predicted to iterate | |
1015 almost never. The code to divide probability by number of | |
1016 exits is very rough. It should compute the number of exits | |
1017 taken in each patch through function (not the overall number | |
1018 of exits that might be a lot higher for loops with wide switch | |
1019 statements in them) and compute n-th square root. | |
1020 | |
1021 We limit the minimal probability by 2% to avoid | |
1022 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction | |
1023 as this was causing regression in perl benchmark containing such | |
1024 a wide loop. */ | |
1025 | |
1026 int probability = ((REG_BR_PROB_BASE | |
1027 - predictor_info [(int) PRED_LOOP_EXIT].hitrate) | |
1028 / n_exits); | |
1029 if (probability < HITRATE (2)) | |
1030 probability = HITRATE (2); | |
1031 FOR_EACH_EDGE (e, ei, bb->succs) | |
1032 if (e->dest->index < NUM_FIXED_BLOCKS | |
1033 || !flow_bb_inside_loop_p (loop, e->dest)) | |
1034 predict_edge (e, PRED_LOOP_EXIT, probability); | |
1035 } | |
1036 } | |
1037 | |
1038 /* Free basic blocks from get_loop_body. */ | |
1039 free (bbs); | |
1040 } | |
1041 | |
1042 scev_finalize (); | |
1043 } | |
1044 | |
1045 /* Attempt to predict probabilities of BB outgoing edges using local | |
1046 properties. */ | |
1047 static void | |
1048 bb_estimate_probability_locally (basic_block bb) | |
1049 { | |
1050 rtx last_insn = BB_END (bb); | |
1051 rtx cond; | |
1052 | |
1053 if (! can_predict_insn_p (last_insn)) | |
1054 return; | |
1055 cond = get_condition (last_insn, NULL, false, false); | |
1056 if (! cond) | |
1057 return; | |
1058 | |
1059 /* Try "pointer heuristic." | |
1060 A comparison ptr == 0 is predicted as false. | |
1061 Similarly, a comparison ptr1 == ptr2 is predicted as false. */ | |
1062 if (COMPARISON_P (cond) | |
1063 && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0))) | |
1064 || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1))))) | |
1065 { | |
1066 if (GET_CODE (cond) == EQ) | |
1067 predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN); | |
1068 else if (GET_CODE (cond) == NE) | |
1069 predict_insn_def (last_insn, PRED_POINTER, TAKEN); | |
1070 } | |
1071 else | |
1072 | |
1073 /* Try "opcode heuristic." | |
1074 EQ tests are usually false and NE tests are usually true. Also, | |
1075 most quantities are positive, so we can make the appropriate guesses | |
1076 about signed comparisons against zero. */ | |
1077 switch (GET_CODE (cond)) | |
1078 { | |
1079 case CONST_INT: | |
1080 /* Unconditional branch. */ | |
1081 predict_insn_def (last_insn, PRED_UNCONDITIONAL, | |
1082 cond == const0_rtx ? NOT_TAKEN : TAKEN); | |
1083 break; | |
1084 | |
1085 case EQ: | |
1086 case UNEQ: | |
1087 /* Floating point comparisons appears to behave in a very | |
1088 unpredictable way because of special role of = tests in | |
1089 FP code. */ | |
1090 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0)))) | |
1091 ; | |
1092 /* Comparisons with 0 are often used for booleans and there is | |
1093 nothing useful to predict about them. */ | |
1094 else if (XEXP (cond, 1) == const0_rtx | |
1095 || XEXP (cond, 0) == const0_rtx) | |
1096 ; | |
1097 else | |
1098 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN); | |
1099 break; | |
1100 | |
1101 case NE: | |
1102 case LTGT: | |
1103 /* Floating point comparisons appears to behave in a very | |
1104 unpredictable way because of special role of = tests in | |
1105 FP code. */ | |
1106 if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0)))) | |
1107 ; | |
1108 /* Comparisons with 0 are often used for booleans and there is | |
1109 nothing useful to predict about them. */ | |
1110 else if (XEXP (cond, 1) == const0_rtx | |
1111 || XEXP (cond, 0) == const0_rtx) | |
1112 ; | |
1113 else | |
1114 predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN); | |
1115 break; | |
1116 | |
1117 case ORDERED: | |
1118 predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN); | |
1119 break; | |
1120 | |
1121 case UNORDERED: | |
1122 predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN); | |
1123 break; | |
1124 | |
1125 case LE: | |
1126 case LT: | |
1127 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx | |
1128 || XEXP (cond, 1) == constm1_rtx) | |
1129 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN); | |
1130 break; | |
1131 | |
1132 case GE: | |
1133 case GT: | |
1134 if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx | |
1135 || XEXP (cond, 1) == constm1_rtx) | |
1136 predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN); | |
1137 break; | |
1138 | |
1139 default: | |
1140 break; | |
1141 } | |
1142 } | |
1143 | |
1144 /* Set edge->probability for each successor edge of BB. */ | |
1145 void | |
1146 guess_outgoing_edge_probabilities (basic_block bb) | |
1147 { | |
1148 bb_estimate_probability_locally (bb); | |
1149 combine_predictions_for_insn (BB_END (bb), bb); | |
1150 } | |
1151 | |
1152 static tree expr_expected_value (tree, bitmap); | |
1153 | |
1154 /* Helper function for expr_expected_value. */ | |
1155 | |
1156 static tree | |
1157 expr_expected_value_1 (tree type, tree op0, enum tree_code code, tree op1, bitmap visited) | |
1158 { | |
1159 gimple def; | |
1160 | |
1161 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS) | |
1162 { | |
1163 if (TREE_CONSTANT (op0)) | |
1164 return op0; | |
1165 | |
1166 if (code != SSA_NAME) | |
1167 return NULL_TREE; | |
1168 | |
1169 def = SSA_NAME_DEF_STMT (op0); | |
1170 | |
1171 /* If we were already here, break the infinite cycle. */ | |
1172 if (bitmap_bit_p (visited, SSA_NAME_VERSION (op0))) | |
1173 return NULL; | |
1174 bitmap_set_bit (visited, SSA_NAME_VERSION (op0)); | |
1175 | |
1176 if (gimple_code (def) == GIMPLE_PHI) | |
1177 { | |
1178 /* All the arguments of the PHI node must have the same constant | |
1179 length. */ | |
1180 int i, n = gimple_phi_num_args (def); | |
1181 tree val = NULL, new_val; | |
1182 | |
1183 for (i = 0; i < n; i++) | |
1184 { | |
1185 tree arg = PHI_ARG_DEF (def, i); | |
1186 | |
1187 /* If this PHI has itself as an argument, we cannot | |
1188 determine the string length of this argument. However, | |
1189 if we can find an expected constant value for the other | |
1190 PHI args then we can still be sure that this is | |
1191 likely a constant. So be optimistic and just | |
1192 continue with the next argument. */ | |
1193 if (arg == PHI_RESULT (def)) | |
1194 continue; | |
1195 | |
1196 new_val = expr_expected_value (arg, visited); | |
1197 if (!new_val) | |
1198 return NULL; | |
1199 if (!val) | |
1200 val = new_val; | |
1201 else if (!operand_equal_p (val, new_val, false)) | |
1202 return NULL; | |
1203 } | |
1204 return val; | |
1205 } | |
1206 if (is_gimple_assign (def)) | |
1207 { | |
1208 if (gimple_assign_lhs (def) != op0) | |
1209 return NULL; | |
1210 | |
1211 return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)), | |
1212 gimple_assign_rhs1 (def), | |
1213 gimple_assign_rhs_code (def), | |
1214 gimple_assign_rhs2 (def), | |
1215 visited); | |
1216 } | |
1217 | |
1218 if (is_gimple_call (def)) | |
1219 { | |
1220 tree decl = gimple_call_fndecl (def); | |
1221 if (!decl) | |
1222 return NULL; | |
1223 if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL | |
1224 && DECL_FUNCTION_CODE (decl) == BUILT_IN_EXPECT) | |
1225 { | |
1226 tree val; | |
1227 | |
1228 if (gimple_call_num_args (def) != 2) | |
1229 return NULL; | |
1230 val = gimple_call_arg (def, 0); | |
1231 if (TREE_CONSTANT (val)) | |
1232 return val; | |
1233 return gimple_call_arg (def, 1); | |
1234 } | |
1235 } | |
1236 | |
1237 return NULL; | |
1238 } | |
1239 | |
1240 if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS) | |
1241 { | |
1242 tree res; | |
1243 op0 = expr_expected_value (op0, visited); | |
1244 if (!op0) | |
1245 return NULL; | |
1246 op1 = expr_expected_value (op1, visited); | |
1247 if (!op1) | |
1248 return NULL; | |
1249 res = fold_build2 (code, type, op0, op1); | |
1250 if (TREE_CONSTANT (res)) | |
1251 return res; | |
1252 return NULL; | |
1253 } | |
1254 if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS) | |
1255 { | |
1256 tree res; | |
1257 op0 = expr_expected_value (op0, visited); | |
1258 if (!op0) | |
1259 return NULL; | |
1260 res = fold_build1 (code, type, op0); | |
1261 if (TREE_CONSTANT (res)) | |
1262 return res; | |
1263 return NULL; | |
1264 } | |
1265 return NULL; | |
1266 } | |
1267 | |
1268 /* Return constant EXPR will likely have at execution time, NULL if unknown. | |
1269 The function is used by builtin_expect branch predictor so the evidence | |
1270 must come from this construct and additional possible constant folding. | |
1271 | |
1272 We may want to implement more involved value guess (such as value range | |
1273 propagation based prediction), but such tricks shall go to new | |
1274 implementation. */ | |
1275 | |
1276 static tree | |
1277 expr_expected_value (tree expr, bitmap visited) | |
1278 { | |
1279 enum tree_code code; | |
1280 tree op0, op1; | |
1281 | |
1282 if (TREE_CONSTANT (expr)) | |
1283 return expr; | |
1284 | |
1285 extract_ops_from_tree (expr, &code, &op0, &op1); | |
1286 return expr_expected_value_1 (TREE_TYPE (expr), | |
1287 op0, code, op1, visited); | |
1288 } | |
1289 | |
1290 | |
1291 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements | |
1292 we no longer need. */ | |
1293 static unsigned int | |
1294 strip_predict_hints (void) | |
1295 { | |
1296 basic_block bb; | |
1297 gimple ass_stmt; | |
1298 tree var; | |
1299 | |
1300 FOR_EACH_BB (bb) | |
1301 { | |
1302 gimple_stmt_iterator bi; | |
1303 for (bi = gsi_start_bb (bb); !gsi_end_p (bi);) | |
1304 { | |
1305 gimple stmt = gsi_stmt (bi); | |
1306 | |
1307 if (gimple_code (stmt) == GIMPLE_PREDICT) | |
1308 { | |
1309 gsi_remove (&bi, true); | |
1310 continue; | |
1311 } | |
1312 else if (gimple_code (stmt) == GIMPLE_CALL) | |
1313 { | |
1314 tree fndecl = gimple_call_fndecl (stmt); | |
1315 | |
1316 if (fndecl | |
1317 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL | |
1318 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT | |
1319 && gimple_call_num_args (stmt) == 2) | |
1320 { | |
1321 var = gimple_call_lhs (stmt); | |
1322 ass_stmt = gimple_build_assign (var, gimple_call_arg (stmt, 0)); | |
1323 | |
1324 gsi_replace (&bi, ass_stmt, true); | |
1325 } | |
1326 } | |
1327 gsi_next (&bi); | |
1328 } | |
1329 } | |
1330 return 0; | |
1331 } | |
1332 | |
1333 /* Predict using opcode of the last statement in basic block. */ | |
1334 static void | |
1335 tree_predict_by_opcode (basic_block bb) | |
1336 { | |
1337 gimple stmt = last_stmt (bb); | |
1338 edge then_edge; | |
1339 tree op0, op1; | |
1340 tree type; | |
1341 tree val; | |
1342 enum tree_code cmp; | |
1343 bitmap visited; | |
1344 edge_iterator ei; | |
1345 | |
1346 if (!stmt || gimple_code (stmt) != GIMPLE_COND) | |
1347 return; | |
1348 FOR_EACH_EDGE (then_edge, ei, bb->succs) | |
1349 if (then_edge->flags & EDGE_TRUE_VALUE) | |
1350 break; | |
1351 op0 = gimple_cond_lhs (stmt); | |
1352 op1 = gimple_cond_rhs (stmt); | |
1353 cmp = gimple_cond_code (stmt); | |
1354 type = TREE_TYPE (op0); | |
1355 visited = BITMAP_ALLOC (NULL); | |
1356 val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited); | |
1357 BITMAP_FREE (visited); | |
1358 if (val) | |
1359 { | |
1360 if (integer_zerop (val)) | |
1361 predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, NOT_TAKEN); | |
1362 else | |
1363 predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, TAKEN); | |
1364 return; | |
1365 } | |
1366 /* Try "pointer heuristic." | |
1367 A comparison ptr == 0 is predicted as false. | |
1368 Similarly, a comparison ptr1 == ptr2 is predicted as false. */ | |
1369 if (POINTER_TYPE_P (type)) | |
1370 { | |
1371 if (cmp == EQ_EXPR) | |
1372 predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN); | |
1373 else if (cmp == NE_EXPR) | |
1374 predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN); | |
1375 } | |
1376 else | |
1377 | |
1378 /* Try "opcode heuristic." | |
1379 EQ tests are usually false and NE tests are usually true. Also, | |
1380 most quantities are positive, so we can make the appropriate guesses | |
1381 about signed comparisons against zero. */ | |
1382 switch (cmp) | |
1383 { | |
1384 case EQ_EXPR: | |
1385 case UNEQ_EXPR: | |
1386 /* Floating point comparisons appears to behave in a very | |
1387 unpredictable way because of special role of = tests in | |
1388 FP code. */ | |
1389 if (FLOAT_TYPE_P (type)) | |
1390 ; | |
1391 /* Comparisons with 0 are often used for booleans and there is | |
1392 nothing useful to predict about them. */ | |
1393 else if (integer_zerop (op0) || integer_zerop (op1)) | |
1394 ; | |
1395 else | |
1396 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN); | |
1397 break; | |
1398 | |
1399 case NE_EXPR: | |
1400 case LTGT_EXPR: | |
1401 /* Floating point comparisons appears to behave in a very | |
1402 unpredictable way because of special role of = tests in | |
1403 FP code. */ | |
1404 if (FLOAT_TYPE_P (type)) | |
1405 ; | |
1406 /* Comparisons with 0 are often used for booleans and there is | |
1407 nothing useful to predict about them. */ | |
1408 else if (integer_zerop (op0) | |
1409 || integer_zerop (op1)) | |
1410 ; | |
1411 else | |
1412 predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN); | |
1413 break; | |
1414 | |
1415 case ORDERED_EXPR: | |
1416 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN); | |
1417 break; | |
1418 | |
1419 case UNORDERED_EXPR: | |
1420 predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN); | |
1421 break; | |
1422 | |
1423 case LE_EXPR: | |
1424 case LT_EXPR: | |
1425 if (integer_zerop (op1) | |
1426 || integer_onep (op1) | |
1427 || integer_all_onesp (op1) | |
1428 || real_zerop (op1) | |
1429 || real_onep (op1) | |
1430 || real_minus_onep (op1)) | |
1431 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN); | |
1432 break; | |
1433 | |
1434 case GE_EXPR: | |
1435 case GT_EXPR: | |
1436 if (integer_zerop (op1) | |
1437 || integer_onep (op1) | |
1438 || integer_all_onesp (op1) | |
1439 || real_zerop (op1) | |
1440 || real_onep (op1) | |
1441 || real_minus_onep (op1)) | |
1442 predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN); | |
1443 break; | |
1444 | |
1445 default: | |
1446 break; | |
1447 } | |
1448 } | |
1449 | |
1450 /* Try to guess whether the value of return means error code. */ | |
1451 | |
1452 static enum br_predictor | |
1453 return_prediction (tree val, enum prediction *prediction) | |
1454 { | |
1455 /* VOID. */ | |
1456 if (!val) | |
1457 return PRED_NO_PREDICTION; | |
1458 /* Different heuristics for pointers and scalars. */ | |
1459 if (POINTER_TYPE_P (TREE_TYPE (val))) | |
1460 { | |
1461 /* NULL is usually not returned. */ | |
1462 if (integer_zerop (val)) | |
1463 { | |
1464 *prediction = NOT_TAKEN; | |
1465 return PRED_NULL_RETURN; | |
1466 } | |
1467 } | |
1468 else if (INTEGRAL_TYPE_P (TREE_TYPE (val))) | |
1469 { | |
1470 /* Negative return values are often used to indicate | |
1471 errors. */ | |
1472 if (TREE_CODE (val) == INTEGER_CST | |
1473 && tree_int_cst_sgn (val) < 0) | |
1474 { | |
1475 *prediction = NOT_TAKEN; | |
1476 return PRED_NEGATIVE_RETURN; | |
1477 } | |
1478 /* Constant return values seems to be commonly taken. | |
1479 Zero/one often represent booleans so exclude them from the | |
1480 heuristics. */ | |
1481 if (TREE_CONSTANT (val) | |
1482 && (!integer_zerop (val) && !integer_onep (val))) | |
1483 { | |
1484 *prediction = TAKEN; | |
1485 return PRED_CONST_RETURN; | |
1486 } | |
1487 } | |
1488 return PRED_NO_PREDICTION; | |
1489 } | |
1490 | |
1491 /* Find the basic block with return expression and look up for possible | |
1492 return value trying to apply RETURN_PREDICTION heuristics. */ | |
1493 static void | |
1494 apply_return_prediction (void) | |
1495 { | |
1496 gimple return_stmt = NULL; | |
1497 tree return_val; | |
1498 edge e; | |
1499 gimple phi; | |
1500 int phi_num_args, i; | |
1501 enum br_predictor pred; | |
1502 enum prediction direction; | |
1503 edge_iterator ei; | |
1504 | |
1505 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) | |
1506 { | |
1507 return_stmt = last_stmt (e->src); | |
1508 if (return_stmt | |
1509 && gimple_code (return_stmt) == GIMPLE_RETURN) | |
1510 break; | |
1511 } | |
1512 if (!e) | |
1513 return; | |
1514 return_val = gimple_return_retval (return_stmt); | |
1515 if (!return_val) | |
1516 return; | |
1517 if (TREE_CODE (return_val) != SSA_NAME | |
1518 || !SSA_NAME_DEF_STMT (return_val) | |
1519 || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI) | |
1520 return; | |
1521 phi = SSA_NAME_DEF_STMT (return_val); | |
1522 phi_num_args = gimple_phi_num_args (phi); | |
1523 pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction); | |
1524 | |
1525 /* Avoid the degenerate case where all return values form the function | |
1526 belongs to same category (ie they are all positive constants) | |
1527 so we can hardly say something about them. */ | |
1528 for (i = 1; i < phi_num_args; i++) | |
1529 if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction)) | |
1530 break; | |
1531 if (i != phi_num_args) | |
1532 for (i = 0; i < phi_num_args; i++) | |
1533 { | |
1534 pred = return_prediction (PHI_ARG_DEF (phi, i), &direction); | |
1535 if (pred != PRED_NO_PREDICTION) | |
1536 predict_paths_leading_to (gimple_phi_arg_edge (phi, i)->src, pred, | |
1537 direction); | |
1538 } | |
1539 } | |
1540 | |
1541 /* Look for basic block that contains unlikely to happen events | |
1542 (such as noreturn calls) and mark all paths leading to execution | |
1543 of this basic blocks as unlikely. */ | |
1544 | |
1545 static void | |
1546 tree_bb_level_predictions (void) | |
1547 { | |
1548 basic_block bb; | |
1549 bool has_return_edges = false; | |
1550 edge e; | |
1551 edge_iterator ei; | |
1552 | |
1553 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) | |
1554 if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH))) | |
1555 { | |
1556 has_return_edges = true; | |
1557 break; | |
1558 } | |
1559 | |
1560 apply_return_prediction (); | |
1561 | |
1562 FOR_EACH_BB (bb) | |
1563 { | |
1564 gimple_stmt_iterator gsi; | |
1565 | |
1566 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1567 { | |
1568 gimple stmt = gsi_stmt (gsi); | |
1569 tree decl; | |
1570 | |
1571 if (is_gimple_call (stmt)) | |
1572 { | |
1573 if ((gimple_call_flags (stmt) & ECF_NORETURN) | |
1574 && has_return_edges) | |
1575 predict_paths_leading_to (bb, PRED_NORETURN, | |
1576 NOT_TAKEN); | |
1577 decl = gimple_call_fndecl (stmt); | |
1578 if (decl | |
1579 && lookup_attribute ("cold", | |
1580 DECL_ATTRIBUTES (decl))) | |
1581 predict_paths_leading_to (bb, PRED_COLD_FUNCTION, | |
1582 NOT_TAKEN); | |
1583 } | |
1584 else if (gimple_code (stmt) == GIMPLE_PREDICT) | |
1585 { | |
1586 predict_paths_leading_to (bb, gimple_predict_predictor (stmt), | |
1587 gimple_predict_outcome (stmt)); | |
1588 /* Keep GIMPLE_PREDICT around so early inlining will propagate | |
1589 hints to callers. */ | |
1590 } | |
1591 } | |
1592 } | |
1593 } | |
1594 | |
1595 #ifdef ENABLE_CHECKING | |
1596 | |
1597 /* Callback for pointer_map_traverse, asserts that the pointer map is | |
1598 empty. */ | |
1599 | |
1600 static bool | |
1601 assert_is_empty (const void *key ATTRIBUTE_UNUSED, void **value, | |
1602 void *data ATTRIBUTE_UNUSED) | |
1603 { | |
1604 gcc_assert (!*value); | |
1605 return false; | |
1606 } | |
1607 #endif | |
1608 | |
1609 /* Predict branch probabilities and estimate profile of the tree CFG. */ | |
1610 static unsigned int | |
1611 tree_estimate_probability (void) | |
1612 { | |
1613 basic_block bb; | |
1614 | |
1615 loop_optimizer_init (0); | |
1616 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1617 flow_loops_dump (dump_file, NULL, 0); | |
1618 | |
1619 add_noreturn_fake_exit_edges (); | |
1620 connect_infinite_loops_to_exit (); | |
1621 /* We use loop_niter_by_eval, which requires that the loops have | |
1622 preheaders. */ | |
1623 create_preheaders (CP_SIMPLE_PREHEADERS); | |
1624 calculate_dominance_info (CDI_POST_DOMINATORS); | |
1625 | |
1626 bb_predictions = pointer_map_create (); | |
1627 tree_bb_level_predictions (); | |
1628 | |
1629 mark_irreducible_loops (); | |
1630 record_loop_exits (); | |
1631 if (number_of_loops () > 1) | |
1632 predict_loops (); | |
1633 | |
1634 FOR_EACH_BB (bb) | |
1635 { | |
1636 edge e; | |
1637 edge_iterator ei; | |
1638 gimple last; | |
1639 | |
1640 FOR_EACH_EDGE (e, ei, bb->succs) | |
1641 { | |
1642 /* Predict early returns to be probable, as we've already taken | |
1643 care for error returns and other cases are often used for | |
1644 fast paths through function. | |
1645 | |
1646 Since we've already removed the return statements, we are | |
1647 looking for CFG like: | |
1648 | |
1649 if (conditional) | |
1650 { | |
1651 .. | |
1652 goto return_block | |
1653 } | |
1654 some other blocks | |
1655 return_block: | |
1656 return_stmt. */ | |
1657 if (e->dest != bb->next_bb | |
1658 && e->dest != EXIT_BLOCK_PTR | |
1659 && single_succ_p (e->dest) | |
1660 && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR | |
1661 && (last = last_stmt (e->dest)) != NULL | |
1662 && gimple_code (last) == GIMPLE_RETURN) | |
1663 { | |
1664 edge e1; | |
1665 edge_iterator ei1; | |
1666 | |
1667 if (single_succ_p (bb)) | |
1668 { | |
1669 FOR_EACH_EDGE (e1, ei1, bb->preds) | |
1670 if (!predicted_by_p (e1->src, PRED_NULL_RETURN) | |
1671 && !predicted_by_p (e1->src, PRED_CONST_RETURN) | |
1672 && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN)) | |
1673 predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN); | |
1674 } | |
1675 else | |
1676 if (!predicted_by_p (e->src, PRED_NULL_RETURN) | |
1677 && !predicted_by_p (e->src, PRED_CONST_RETURN) | |
1678 && !predicted_by_p (e->src, PRED_NEGATIVE_RETURN)) | |
1679 predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN); | |
1680 } | |
1681 | |
1682 /* Look for block we are guarding (ie we dominate it, | |
1683 but it doesn't postdominate us). */ | |
1684 if (e->dest != EXIT_BLOCK_PTR && e->dest != bb | |
1685 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src) | |
1686 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest)) | |
1687 { | |
1688 gimple_stmt_iterator bi; | |
1689 | |
1690 /* The call heuristic claims that a guarded function call | |
1691 is improbable. This is because such calls are often used | |
1692 to signal exceptional situations such as printing error | |
1693 messages. */ | |
1694 for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi); | |
1695 gsi_next (&bi)) | |
1696 { | |
1697 gimple stmt = gsi_stmt (bi); | |
1698 if (is_gimple_call (stmt) | |
1699 /* Constant and pure calls are hardly used to signalize | |
1700 something exceptional. */ | |
1701 && gimple_has_side_effects (stmt)) | |
1702 { | |
1703 predict_edge_def (e, PRED_CALL, NOT_TAKEN); | |
1704 break; | |
1705 } | |
1706 } | |
1707 } | |
1708 } | |
1709 tree_predict_by_opcode (bb); | |
1710 } | |
1711 FOR_EACH_BB (bb) | |
1712 combine_predictions_for_bb (bb); | |
1713 | |
1714 #ifdef ENABLE_CHECKING | |
1715 pointer_map_traverse (bb_predictions, assert_is_empty, NULL); | |
1716 #endif | |
1717 pointer_map_destroy (bb_predictions); | |
1718 bb_predictions = NULL; | |
1719 | |
1720 estimate_bb_frequencies (); | |
1721 free_dominance_info (CDI_POST_DOMINATORS); | |
1722 remove_fake_exit_edges (); | |
1723 loop_optimizer_finalize (); | |
1724 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1725 gimple_dump_cfg (dump_file, dump_flags); | |
1726 if (profile_status == PROFILE_ABSENT) | |
1727 profile_status = PROFILE_GUESSED; | |
1728 return 0; | |
1729 } | |
1730 | |
1731 /* Predict edges to successors of CUR whose sources are not postdominated by | |
1732 BB by PRED and recurse to all postdominators. */ | |
1733 | |
1734 static void | |
1735 predict_paths_for_bb (basic_block cur, basic_block bb, | |
1736 enum br_predictor pred, | |
1737 enum prediction taken) | |
1738 { | |
1739 edge e; | |
1740 edge_iterator ei; | |
1741 basic_block son; | |
1742 | |
1743 /* We are looking for all edges forming edge cut induced by | |
1744 set of all blocks postdominated by BB. */ | |
1745 FOR_EACH_EDGE (e, ei, cur->preds) | |
1746 if (e->src->index >= NUM_FIXED_BLOCKS | |
1747 && !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb)) | |
1748 { | |
1749 gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb)); | |
1750 predict_edge_def (e, pred, taken); | |
1751 } | |
1752 for (son = first_dom_son (CDI_POST_DOMINATORS, cur); | |
1753 son; | |
1754 son = next_dom_son (CDI_POST_DOMINATORS, son)) | |
1755 predict_paths_for_bb (son, bb, pred, taken); | |
1756 } | |
1757 | |
1758 /* Sets branch probabilities according to PREDiction and | |
1759 FLAGS. */ | |
1760 | |
1761 static void | |
1762 predict_paths_leading_to (basic_block bb, enum br_predictor pred, | |
1763 enum prediction taken) | |
1764 { | |
1765 predict_paths_for_bb (bb, bb, pred, taken); | |
1766 } | |
1767 | |
1768 /* This is used to carry information about basic blocks. It is | |
1769 attached to the AUX field of the standard CFG block. */ | |
1770 | |
1771 typedef struct block_info_def | |
1772 { | |
1773 /* Estimated frequency of execution of basic_block. */ | |
1774 sreal frequency; | |
1775 | |
1776 /* To keep queue of basic blocks to process. */ | |
1777 basic_block next; | |
1778 | |
1779 /* Number of predecessors we need to visit first. */ | |
1780 int npredecessors; | |
1781 } *block_info; | |
1782 | |
1783 /* Similar information for edges. */ | |
1784 typedef struct edge_info_def | |
1785 { | |
1786 /* In case edge is a loopback edge, the probability edge will be reached | |
1787 in case header is. Estimated number of iterations of the loop can be | |
1788 then computed as 1 / (1 - back_edge_prob). */ | |
1789 sreal back_edge_prob; | |
1790 /* True if the edge is a loopback edge in the natural loop. */ | |
1791 unsigned int back_edge:1; | |
1792 } *edge_info; | |
1793 | |
1794 #define BLOCK_INFO(B) ((block_info) (B)->aux) | |
1795 #define EDGE_INFO(E) ((edge_info) (E)->aux) | |
1796 | |
1797 /* Helper function for estimate_bb_frequencies. | |
1798 Propagate the frequencies in blocks marked in | |
1799 TOVISIT, starting in HEAD. */ | |
1800 | |
1801 static void | |
1802 propagate_freq (basic_block head, bitmap tovisit) | |
1803 { | |
1804 basic_block bb; | |
1805 basic_block last; | |
1806 unsigned i; | |
1807 edge e; | |
1808 basic_block nextbb; | |
1809 bitmap_iterator bi; | |
1810 | |
1811 /* For each basic block we need to visit count number of his predecessors | |
1812 we need to visit first. */ | |
1813 EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi) | |
1814 { | |
1815 edge_iterator ei; | |
1816 int count = 0; | |
1817 | |
1818 /* The outermost "loop" includes the exit block, which we can not | |
1819 look up via BASIC_BLOCK. Detect this and use EXIT_BLOCK_PTR | |
1820 directly. Do the same for the entry block. */ | |
1821 bb = BASIC_BLOCK (i); | |
1822 | |
1823 FOR_EACH_EDGE (e, ei, bb->preds) | |
1824 { | |
1825 bool visit = bitmap_bit_p (tovisit, e->src->index); | |
1826 | |
1827 if (visit && !(e->flags & EDGE_DFS_BACK)) | |
1828 count++; | |
1829 else if (visit && dump_file && !EDGE_INFO (e)->back_edge) | |
1830 fprintf (dump_file, | |
1831 "Irreducible region hit, ignoring edge to %i->%i\n", | |
1832 e->src->index, bb->index); | |
1833 } | |
1834 BLOCK_INFO (bb)->npredecessors = count; | |
1835 } | |
1836 | |
1837 memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one)); | |
1838 last = head; | |
1839 for (bb = head; bb; bb = nextbb) | |
1840 { | |
1841 edge_iterator ei; | |
1842 sreal cyclic_probability, frequency; | |
1843 | |
1844 memcpy (&cyclic_probability, &real_zero, sizeof (real_zero)); | |
1845 memcpy (&frequency, &real_zero, sizeof (real_zero)); | |
1846 | |
1847 nextbb = BLOCK_INFO (bb)->next; | |
1848 BLOCK_INFO (bb)->next = NULL; | |
1849 | |
1850 /* Compute frequency of basic block. */ | |
1851 if (bb != head) | |
1852 { | |
1853 #ifdef ENABLE_CHECKING | |
1854 FOR_EACH_EDGE (e, ei, bb->preds) | |
1855 gcc_assert (!bitmap_bit_p (tovisit, e->src->index) | |
1856 || (e->flags & EDGE_DFS_BACK)); | |
1857 #endif | |
1858 | |
1859 FOR_EACH_EDGE (e, ei, bb->preds) | |
1860 if (EDGE_INFO (e)->back_edge) | |
1861 { | |
1862 sreal_add (&cyclic_probability, &cyclic_probability, | |
1863 &EDGE_INFO (e)->back_edge_prob); | |
1864 } | |
1865 else if (!(e->flags & EDGE_DFS_BACK)) | |
1866 { | |
1867 sreal tmp; | |
1868 | |
1869 /* frequency += (e->probability | |
1870 * BLOCK_INFO (e->src)->frequency / | |
1871 REG_BR_PROB_BASE); */ | |
1872 | |
1873 sreal_init (&tmp, e->probability, 0); | |
1874 sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency); | |
1875 sreal_mul (&tmp, &tmp, &real_inv_br_prob_base); | |
1876 sreal_add (&frequency, &frequency, &tmp); | |
1877 } | |
1878 | |
1879 if (sreal_compare (&cyclic_probability, &real_zero) == 0) | |
1880 { | |
1881 memcpy (&BLOCK_INFO (bb)->frequency, &frequency, | |
1882 sizeof (frequency)); | |
1883 } | |
1884 else | |
1885 { | |
1886 if (sreal_compare (&cyclic_probability, &real_almost_one) > 0) | |
1887 { | |
1888 memcpy (&cyclic_probability, &real_almost_one, | |
1889 sizeof (real_almost_one)); | |
1890 } | |
1891 | |
1892 /* BLOCK_INFO (bb)->frequency = frequency | |
1893 / (1 - cyclic_probability) */ | |
1894 | |
1895 sreal_sub (&cyclic_probability, &real_one, &cyclic_probability); | |
1896 sreal_div (&BLOCK_INFO (bb)->frequency, | |
1897 &frequency, &cyclic_probability); | |
1898 } | |
1899 } | |
1900 | |
1901 bitmap_clear_bit (tovisit, bb->index); | |
1902 | |
1903 e = find_edge (bb, head); | |
1904 if (e) | |
1905 { | |
1906 sreal tmp; | |
1907 | |
1908 /* EDGE_INFO (e)->back_edge_prob | |
1909 = ((e->probability * BLOCK_INFO (bb)->frequency) | |
1910 / REG_BR_PROB_BASE); */ | |
1911 | |
1912 sreal_init (&tmp, e->probability, 0); | |
1913 sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency); | |
1914 sreal_mul (&EDGE_INFO (e)->back_edge_prob, | |
1915 &tmp, &real_inv_br_prob_base); | |
1916 } | |
1917 | |
1918 /* Propagate to successor blocks. */ | |
1919 FOR_EACH_EDGE (e, ei, bb->succs) | |
1920 if (!(e->flags & EDGE_DFS_BACK) | |
1921 && BLOCK_INFO (e->dest)->npredecessors) | |
1922 { | |
1923 BLOCK_INFO (e->dest)->npredecessors--; | |
1924 if (!BLOCK_INFO (e->dest)->npredecessors) | |
1925 { | |
1926 if (!nextbb) | |
1927 nextbb = e->dest; | |
1928 else | |
1929 BLOCK_INFO (last)->next = e->dest; | |
1930 | |
1931 last = e->dest; | |
1932 } | |
1933 } | |
1934 } | |
1935 } | |
1936 | |
1937 /* Estimate probabilities of loopback edges in loops at same nest level. */ | |
1938 | |
1939 static void | |
1940 estimate_loops_at_level (struct loop *first_loop) | |
1941 { | |
1942 struct loop *loop; | |
1943 | |
1944 for (loop = first_loop; loop; loop = loop->next) | |
1945 { | |
1946 edge e; | |
1947 basic_block *bbs; | |
1948 unsigned i; | |
1949 bitmap tovisit = BITMAP_ALLOC (NULL); | |
1950 | |
1951 estimate_loops_at_level (loop->inner); | |
1952 | |
1953 /* Find current loop back edge and mark it. */ | |
1954 e = loop_latch_edge (loop); | |
1955 EDGE_INFO (e)->back_edge = 1; | |
1956 | |
1957 bbs = get_loop_body (loop); | |
1958 for (i = 0; i < loop->num_nodes; i++) | |
1959 bitmap_set_bit (tovisit, bbs[i]->index); | |
1960 free (bbs); | |
1961 propagate_freq (loop->header, tovisit); | |
1962 BITMAP_FREE (tovisit); | |
1963 } | |
1964 } | |
1965 | |
1966 /* Propagates frequencies through structure of loops. */ | |
1967 | |
1968 static void | |
1969 estimate_loops (void) | |
1970 { | |
1971 bitmap tovisit = BITMAP_ALLOC (NULL); | |
1972 basic_block bb; | |
1973 | |
1974 /* Start by estimating the frequencies in the loops. */ | |
1975 if (number_of_loops () > 1) | |
1976 estimate_loops_at_level (current_loops->tree_root->inner); | |
1977 | |
1978 /* Now propagate the frequencies through all the blocks. */ | |
1979 FOR_ALL_BB (bb) | |
1980 { | |
1981 bitmap_set_bit (tovisit, bb->index); | |
1982 } | |
1983 propagate_freq (ENTRY_BLOCK_PTR, tovisit); | |
1984 BITMAP_FREE (tovisit); | |
1985 } | |
1986 | |
1987 /* Convert counts measured by profile driven feedback to frequencies. | |
1988 Return nonzero iff there was any nonzero execution count. */ | |
1989 | |
1990 int | |
1991 counts_to_freqs (void) | |
1992 { | |
1993 gcov_type count_max, true_count_max = 0; | |
1994 basic_block bb; | |
1995 | |
1996 FOR_EACH_BB (bb) | |
1997 true_count_max = MAX (bb->count, true_count_max); | |
1998 | |
1999 count_max = MAX (true_count_max, 1); | |
2000 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb) | |
2001 bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max; | |
2002 | |
2003 return true_count_max; | |
2004 } | |
2005 | |
2006 /* Return true if function is likely to be expensive, so there is no point to | |
2007 optimize performance of prologue, epilogue or do inlining at the expense | |
2008 of code size growth. THRESHOLD is the limit of number of instructions | |
2009 function can execute at average to be still considered not expensive. */ | |
2010 | |
2011 bool | |
2012 expensive_function_p (int threshold) | |
2013 { | |
2014 unsigned int sum = 0; | |
2015 basic_block bb; | |
2016 unsigned int limit; | |
2017 | |
2018 /* We can not compute accurately for large thresholds due to scaled | |
2019 frequencies. */ | |
2020 gcc_assert (threshold <= BB_FREQ_MAX); | |
2021 | |
2022 /* Frequencies are out of range. This either means that function contains | |
2023 internal loop executing more than BB_FREQ_MAX times or profile feedback | |
2024 is available and function has not been executed at all. */ | |
2025 if (ENTRY_BLOCK_PTR->frequency == 0) | |
2026 return true; | |
2027 | |
2028 /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */ | |
2029 limit = ENTRY_BLOCK_PTR->frequency * threshold; | |
2030 FOR_EACH_BB (bb) | |
2031 { | |
2032 rtx insn; | |
2033 | |
2034 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); | |
2035 insn = NEXT_INSN (insn)) | |
2036 if (active_insn_p (insn)) | |
2037 { | |
2038 sum += bb->frequency; | |
2039 if (sum > limit) | |
2040 return true; | |
2041 } | |
2042 } | |
2043 | |
2044 return false; | |
2045 } | |
2046 | |
2047 /* Estimate basic blocks frequency by given branch probabilities. */ | |
2048 | |
2049 void | |
2050 estimate_bb_frequencies (void) | |
2051 { | |
2052 basic_block bb; | |
2053 sreal freq_max; | |
2054 | |
2055 if (profile_status != PROFILE_READ || !counts_to_freqs ()) | |
2056 { | |
2057 static int real_values_initialized = 0; | |
2058 | |
2059 if (!real_values_initialized) | |
2060 { | |
2061 real_values_initialized = 1; | |
2062 sreal_init (&real_zero, 0, 0); | |
2063 sreal_init (&real_one, 1, 0); | |
2064 sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0); | |
2065 sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0); | |
2066 sreal_init (&real_one_half, 1, -1); | |
2067 sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base); | |
2068 sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base); | |
2069 } | |
2070 | |
2071 mark_dfs_back_edges (); | |
2072 | |
2073 single_succ_edge (ENTRY_BLOCK_PTR)->probability = REG_BR_PROB_BASE; | |
2074 | |
2075 /* Set up block info for each basic block. */ | |
2076 alloc_aux_for_blocks (sizeof (struct block_info_def)); | |
2077 alloc_aux_for_edges (sizeof (struct edge_info_def)); | |
2078 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb) | |
2079 { | |
2080 edge e; | |
2081 edge_iterator ei; | |
2082 | |
2083 FOR_EACH_EDGE (e, ei, bb->succs) | |
2084 { | |
2085 sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0); | |
2086 sreal_mul (&EDGE_INFO (e)->back_edge_prob, | |
2087 &EDGE_INFO (e)->back_edge_prob, | |
2088 &real_inv_br_prob_base); | |
2089 } | |
2090 } | |
2091 | |
2092 /* First compute probabilities locally for each loop from innermost | |
2093 to outermost to examine probabilities for back edges. */ | |
2094 estimate_loops (); | |
2095 | |
2096 memcpy (&freq_max, &real_zero, sizeof (real_zero)); | |
2097 FOR_EACH_BB (bb) | |
2098 if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0) | |
2099 memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max)); | |
2100 | |
2101 sreal_div (&freq_max, &real_bb_freq_max, &freq_max); | |
2102 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb) | |
2103 { | |
2104 sreal tmp; | |
2105 | |
2106 sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max); | |
2107 sreal_add (&tmp, &tmp, &real_one_half); | |
2108 bb->frequency = sreal_to_int (&tmp); | |
2109 } | |
2110 | |
2111 free_aux_for_blocks (); | |
2112 free_aux_for_edges (); | |
2113 } | |
2114 compute_function_frequency (); | |
2115 if (flag_reorder_functions) | |
2116 choose_function_section (); | |
2117 } | |
2118 | |
2119 /* Decide whether function is hot, cold or unlikely executed. */ | |
2120 static void | |
2121 compute_function_frequency (void) | |
2122 { | |
2123 basic_block bb; | |
2124 | |
2125 if (!profile_info || !flag_branch_probabilities) | |
2126 { | |
2127 if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl)) | |
2128 != NULL) | |
2129 cfun->function_frequency = FUNCTION_FREQUENCY_UNLIKELY_EXECUTED; | |
2130 else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl)) | |
2131 != NULL) | |
2132 cfun->function_frequency = FUNCTION_FREQUENCY_HOT; | |
2133 return; | |
2134 } | |
2135 cfun->function_frequency = FUNCTION_FREQUENCY_UNLIKELY_EXECUTED; | |
2136 FOR_EACH_BB (bb) | |
2137 { | |
2138 if (maybe_hot_bb_p (bb)) | |
2139 { | |
2140 cfun->function_frequency = FUNCTION_FREQUENCY_HOT; | |
2141 return; | |
2142 } | |
2143 if (!probably_never_executed_bb_p (bb)) | |
2144 cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL; | |
2145 } | |
2146 } | |
2147 | |
2148 /* Choose appropriate section for the function. */ | |
2149 static void | |
2150 choose_function_section (void) | |
2151 { | |
2152 if (DECL_SECTION_NAME (current_function_decl) | |
2153 || !targetm.have_named_sections | |
2154 /* Theoretically we can split the gnu.linkonce text section too, | |
2155 but this requires more work as the frequency needs to match | |
2156 for all generated objects so we need to merge the frequency | |
2157 of all instances. For now just never set frequency for these. */ | |
2158 || DECL_ONE_ONLY (current_function_decl)) | |
2159 return; | |
2160 | |
2161 /* If we are doing the partitioning optimization, let the optimization | |
2162 choose the correct section into which to put things. */ | |
2163 | |
2164 if (flag_reorder_blocks_and_partition) | |
2165 return; | |
2166 | |
2167 if (cfun->function_frequency == FUNCTION_FREQUENCY_HOT) | |
2168 DECL_SECTION_NAME (current_function_decl) = | |
2169 build_string (strlen (HOT_TEXT_SECTION_NAME), HOT_TEXT_SECTION_NAME); | |
2170 if (cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED) | |
2171 DECL_SECTION_NAME (current_function_decl) = | |
2172 build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME), | |
2173 UNLIKELY_EXECUTED_TEXT_SECTION_NAME); | |
2174 } | |
2175 | |
2176 static bool | |
2177 gate_estimate_probability (void) | |
2178 { | |
2179 return flag_guess_branch_prob; | |
2180 } | |
2181 | |
2182 /* Build PREDICT_EXPR. */ | |
2183 tree | |
2184 build_predict_expr (enum br_predictor predictor, enum prediction taken) | |
2185 { | |
2186 tree t = build1 (PREDICT_EXPR, void_type_node, | |
2187 build_int_cst (NULL, predictor)); | |
2188 PREDICT_EXPR_OUTCOME (t) = taken; | |
2189 return t; | |
2190 } | |
2191 | |
2192 const char * | |
2193 predictor_name (enum br_predictor predictor) | |
2194 { | |
2195 return predictor_info[predictor].name; | |
2196 } | |
2197 | |
2198 struct gimple_opt_pass pass_profile = | |
2199 { | |
2200 { | |
2201 GIMPLE_PASS, | |
2202 "profile", /* name */ | |
2203 gate_estimate_probability, /* gate */ | |
2204 tree_estimate_probability, /* execute */ | |
2205 NULL, /* sub */ | |
2206 NULL, /* next */ | |
2207 0, /* static_pass_number */ | |
2208 TV_BRANCH_PROB, /* tv_id */ | |
2209 PROP_cfg, /* properties_required */ | |
2210 0, /* properties_provided */ | |
2211 0, /* properties_destroyed */ | |
2212 0, /* todo_flags_start */ | |
2213 TODO_ggc_collect | TODO_verify_ssa /* todo_flags_finish */ | |
2214 } | |
2215 }; | |
2216 | |
2217 struct gimple_opt_pass pass_strip_predict_hints = | |
2218 { | |
2219 { | |
2220 GIMPLE_PASS, | |
2221 NULL, /* name */ | |
2222 NULL, /* gate */ | |
2223 strip_predict_hints, /* execute */ | |
2224 NULL, /* sub */ | |
2225 NULL, /* next */ | |
2226 0, /* static_pass_number */ | |
2227 TV_BRANCH_PROB, /* tv_id */ | |
2228 PROP_cfg, /* properties_required */ | |
2229 0, /* properties_provided */ | |
2230 0, /* properties_destroyed */ | |
2231 0, /* todo_flags_start */ | |
2232 TODO_ggc_collect | TODO_verify_ssa /* todo_flags_finish */ | |
2233 } | |
2234 }; |