Mercurial > hg > CbC > CbC_gcc
comparison gcc/ipa-split.c @ 68:561a7518be6b
update gcc-4.6
author | Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 21 Aug 2011 07:07:55 +0900 |
parents | |
children | 04ced10e8804 |
comparison
equal
deleted
inserted
replaced
67:f6334be47118 | 68:561a7518be6b |
---|---|
1 /* Function splitting pass | |
2 Copyright (C) 2010, 2011 | |
3 Free Software Foundation, Inc. | |
4 Contributed by Jan Hubicka <jh@suse.cz> | |
5 | |
6 This file is part of GCC. | |
7 | |
8 GCC is free software; you can redistribute it and/or modify it under | |
9 the terms of the GNU General Public License as published by the Free | |
10 Software Foundation; either version 3, or (at your option) any later | |
11 version. | |
12 | |
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 for more details. | |
17 | |
18 You should have received a copy of the GNU General Public License | |
19 along with GCC; see the file COPYING3. If not see | |
20 <http://www.gnu.org/licenses/>. */ | |
21 | |
22 /* The purpose of this pass is to split function bodies to improve | |
23 inlining. I.e. for function of the form: | |
24 | |
25 func (...) | |
26 { | |
27 if (cheap_test) | |
28 something_small | |
29 else | |
30 something_big | |
31 } | |
32 | |
33 Produce: | |
34 | |
35 func.part (...) | |
36 { | |
37 something_big | |
38 } | |
39 | |
40 func (...) | |
41 { | |
42 if (cheap_test) | |
43 something_small | |
44 else | |
45 func.part (...); | |
46 } | |
47 | |
48 When func becomes inlinable and when cheap_test is often true, inlining func, | |
49 but not fund.part leads to performance improvement similar as inlining | |
50 original func while the code size growth is smaller. | |
51 | |
52 The pass is organized in three stages: | |
53 1) Collect local info about basic block into BB_INFO structure and | |
54 compute function body estimated size and time. | |
55 2) Via DFS walk find all possible basic blocks where we can split | |
56 and chose best one. | |
57 3) If split point is found, split at the specified BB by creating a clone | |
58 and updating function to call it. | |
59 | |
60 The decisions what functions to split are in execute_split_functions | |
61 and consider_split. | |
62 | |
63 There are several possible future improvements for this pass including: | |
64 | |
65 1) Splitting to break up large functions | |
66 2) Splitting to reduce stack frame usage | |
67 3) Allow split part of function to use values computed in the header part. | |
68 The values needs to be passed to split function, perhaps via same | |
69 interface as for nested functions or as argument. | |
70 4) Support for simple rematerialization. I.e. when split part use | |
71 value computed in header from function parameter in very cheap way, we | |
72 can just recompute it. | |
73 5) Support splitting of nested functions. | |
74 6) Support non-SSA arguments. | |
75 7) There is nothing preventing us from producing multiple parts of single function | |
76 when needed or splitting also the parts. */ | |
77 | |
78 #include "config.h" | |
79 #include "system.h" | |
80 #include "coretypes.h" | |
81 #include "tree.h" | |
82 #include "target.h" | |
83 #include "cgraph.h" | |
84 #include "ipa-prop.h" | |
85 #include "tree-flow.h" | |
86 #include "tree-pass.h" | |
87 #include "flags.h" | |
88 #include "timevar.h" | |
89 #include "diagnostic.h" | |
90 #include "tree-dump.h" | |
91 #include "tree-inline.h" | |
92 #include "fibheap.h" | |
93 #include "params.h" | |
94 #include "gimple-pretty-print.h" | |
95 | |
96 /* Per basic block info. */ | |
97 | |
98 typedef struct | |
99 { | |
100 unsigned int size; | |
101 unsigned int time; | |
102 } bb_info; | |
103 DEF_VEC_O(bb_info); | |
104 DEF_VEC_ALLOC_O(bb_info,heap); | |
105 | |
106 static VEC(bb_info, heap) *bb_info_vec; | |
107 | |
108 /* Description of split point. */ | |
109 | |
110 struct split_point | |
111 { | |
112 /* Size of the partitions. */ | |
113 unsigned int header_time, header_size, split_time, split_size; | |
114 | |
115 /* SSA names that need to be passed into spit function. */ | |
116 bitmap ssa_names_to_pass; | |
117 | |
118 /* Basic block where we split (that will become entry point of new function. */ | |
119 basic_block entry_bb; | |
120 | |
121 /* Basic blocks we are splitting away. */ | |
122 bitmap split_bbs; | |
123 | |
124 /* True when return value is computed on split part and thus it needs | |
125 to be returned. */ | |
126 bool split_part_set_retval; | |
127 }; | |
128 | |
129 /* Best split point found. */ | |
130 | |
131 struct split_point best_split_point; | |
132 | |
133 static tree find_retval (basic_block return_bb); | |
134 | |
135 /* Callback for walk_stmt_load_store_addr_ops. If T is non-SSA automatic | |
136 variable, check it if it is present in bitmap passed via DATA. */ | |
137 | |
138 static bool | |
139 test_nonssa_use (gimple stmt ATTRIBUTE_UNUSED, tree t, void *data) | |
140 { | |
141 t = get_base_address (t); | |
142 | |
143 if (!t || is_gimple_reg (t)) | |
144 return false; | |
145 | |
146 if (TREE_CODE (t) == PARM_DECL | |
147 || (TREE_CODE (t) == VAR_DECL | |
148 && auto_var_in_fn_p (t, current_function_decl)) | |
149 || TREE_CODE (t) == RESULT_DECL | |
150 || TREE_CODE (t) == LABEL_DECL) | |
151 return bitmap_bit_p ((bitmap)data, DECL_UID (t)); | |
152 | |
153 /* For DECL_BY_REFERENCE, the return value is actually a pointer. We want | |
154 to pretend that the value pointed to is actual result decl. */ | |
155 if ((TREE_CODE (t) == MEM_REF || INDIRECT_REF_P (t)) | |
156 && TREE_CODE (TREE_OPERAND (t, 0)) == SSA_NAME | |
157 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND (t, 0))) == RESULT_DECL | |
158 && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl))) | |
159 return | |
160 bitmap_bit_p ((bitmap)data, | |
161 DECL_UID (DECL_RESULT (current_function_decl))); | |
162 | |
163 return false; | |
164 } | |
165 | |
166 /* Dump split point CURRENT. */ | |
167 | |
168 static void | |
169 dump_split_point (FILE * file, struct split_point *current) | |
170 { | |
171 fprintf (file, | |
172 "Split point at BB %i header time:%i header size: %i" | |
173 " split time: %i split size: %i\n bbs: ", | |
174 current->entry_bb->index, current->header_time, | |
175 current->header_size, current->split_time, current->split_size); | |
176 dump_bitmap (file, current->split_bbs); | |
177 fprintf (file, " SSA names to pass: "); | |
178 dump_bitmap (file, current->ssa_names_to_pass); | |
179 } | |
180 | |
181 /* Look for all BBs in header that might lead to the split part and verify | |
182 that they are not defining any non-SSA var used by the split part. | |
183 Parameters are the same as for consider_split. */ | |
184 | |
185 static bool | |
186 verify_non_ssa_vars (struct split_point *current, bitmap non_ssa_vars, | |
187 basic_block return_bb) | |
188 { | |
189 bitmap seen = BITMAP_ALLOC (NULL); | |
190 VEC (basic_block,heap) *worklist = NULL; | |
191 edge e; | |
192 edge_iterator ei; | |
193 bool ok = true; | |
194 | |
195 FOR_EACH_EDGE (e, ei, current->entry_bb->preds) | |
196 if (e->src != ENTRY_BLOCK_PTR | |
197 && !bitmap_bit_p (current->split_bbs, e->src->index)) | |
198 { | |
199 VEC_safe_push (basic_block, heap, worklist, e->src); | |
200 bitmap_set_bit (seen, e->src->index); | |
201 } | |
202 | |
203 while (!VEC_empty (basic_block, worklist)) | |
204 { | |
205 gimple_stmt_iterator bsi; | |
206 basic_block bb = VEC_pop (basic_block, worklist); | |
207 | |
208 FOR_EACH_EDGE (e, ei, bb->preds) | |
209 if (e->src != ENTRY_BLOCK_PTR | |
210 && bitmap_set_bit (seen, e->src->index)) | |
211 { | |
212 gcc_checking_assert (!bitmap_bit_p (current->split_bbs, | |
213 e->src->index)); | |
214 VEC_safe_push (basic_block, heap, worklist, e->src); | |
215 } | |
216 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) | |
217 { | |
218 gimple stmt = gsi_stmt (bsi); | |
219 if (is_gimple_debug (stmt)) | |
220 continue; | |
221 if (walk_stmt_load_store_addr_ops | |
222 (stmt, non_ssa_vars, test_nonssa_use, test_nonssa_use, | |
223 test_nonssa_use)) | |
224 { | |
225 ok = false; | |
226 goto done; | |
227 } | |
228 if (gimple_code (stmt) == GIMPLE_LABEL | |
229 && test_nonssa_use (stmt, gimple_label_label (stmt), | |
230 non_ssa_vars)) | |
231 { | |
232 ok = false; | |
233 goto done; | |
234 } | |
235 } | |
236 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi)) | |
237 { | |
238 if (walk_stmt_load_store_addr_ops | |
239 (gsi_stmt (bsi), non_ssa_vars, test_nonssa_use, test_nonssa_use, | |
240 test_nonssa_use)) | |
241 { | |
242 ok = false; | |
243 goto done; | |
244 } | |
245 } | |
246 FOR_EACH_EDGE (e, ei, bb->succs) | |
247 { | |
248 if (e->dest != return_bb) | |
249 continue; | |
250 for (bsi = gsi_start_phis (return_bb); !gsi_end_p (bsi); | |
251 gsi_next (&bsi)) | |
252 { | |
253 gimple stmt = gsi_stmt (bsi); | |
254 tree op = gimple_phi_arg_def (stmt, e->dest_idx); | |
255 | |
256 if (!is_gimple_reg (gimple_phi_result (stmt))) | |
257 continue; | |
258 if (TREE_CODE (op) != SSA_NAME | |
259 && test_nonssa_use (stmt, op, non_ssa_vars)) | |
260 { | |
261 ok = false; | |
262 goto done; | |
263 } | |
264 } | |
265 } | |
266 } | |
267 done: | |
268 BITMAP_FREE (seen); | |
269 VEC_free (basic_block, heap, worklist); | |
270 return ok; | |
271 } | |
272 | |
273 /* We found an split_point CURRENT. NON_SSA_VARS is bitmap of all non ssa | |
274 variables used and RETURN_BB is return basic block. | |
275 See if we can split function here. */ | |
276 | |
277 static void | |
278 consider_split (struct split_point *current, bitmap non_ssa_vars, | |
279 basic_block return_bb) | |
280 { | |
281 tree parm; | |
282 unsigned int num_args = 0; | |
283 unsigned int call_overhead; | |
284 edge e; | |
285 edge_iterator ei; | |
286 gimple_stmt_iterator bsi; | |
287 unsigned int i; | |
288 int incoming_freq = 0; | |
289 tree retval; | |
290 | |
291 if (dump_file && (dump_flags & TDF_DETAILS)) | |
292 dump_split_point (dump_file, current); | |
293 | |
294 FOR_EACH_EDGE (e, ei, current->entry_bb->preds) | |
295 if (!bitmap_bit_p (current->split_bbs, e->src->index)) | |
296 incoming_freq += EDGE_FREQUENCY (e); | |
297 | |
298 /* Do not split when we would end up calling function anyway. */ | |
299 if (incoming_freq | |
300 >= (ENTRY_BLOCK_PTR->frequency | |
301 * PARAM_VALUE (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY) / 100)) | |
302 { | |
303 if (dump_file && (dump_flags & TDF_DETAILS)) | |
304 fprintf (dump_file, | |
305 " Refused: incoming frequency is too large.\n"); | |
306 return; | |
307 } | |
308 | |
309 if (!current->header_size) | |
310 { | |
311 if (dump_file && (dump_flags & TDF_DETAILS)) | |
312 fprintf (dump_file, " Refused: header empty\n"); | |
313 return; | |
314 } | |
315 | |
316 /* Verify that PHI args on entry are either virtual or all their operands | |
317 incoming from header are the same. */ | |
318 for (bsi = gsi_start_phis (current->entry_bb); !gsi_end_p (bsi); gsi_next (&bsi)) | |
319 { | |
320 gimple stmt = gsi_stmt (bsi); | |
321 tree val = NULL; | |
322 | |
323 if (!is_gimple_reg (gimple_phi_result (stmt))) | |
324 continue; | |
325 for (i = 0; i < gimple_phi_num_args (stmt); i++) | |
326 { | |
327 edge e = gimple_phi_arg_edge (stmt, i); | |
328 if (!bitmap_bit_p (current->split_bbs, e->src->index)) | |
329 { | |
330 tree edge_val = gimple_phi_arg_def (stmt, i); | |
331 if (val && edge_val != val) | |
332 { | |
333 if (dump_file && (dump_flags & TDF_DETAILS)) | |
334 fprintf (dump_file, | |
335 " Refused: entry BB has PHI with multiple variants\n"); | |
336 return; | |
337 } | |
338 val = edge_val; | |
339 } | |
340 } | |
341 } | |
342 | |
343 | |
344 /* See what argument we will pass to the split function and compute | |
345 call overhead. */ | |
346 call_overhead = eni_size_weights.call_cost; | |
347 for (parm = DECL_ARGUMENTS (current_function_decl); parm; | |
348 parm = DECL_CHAIN (parm)) | |
349 { | |
350 if (!is_gimple_reg (parm)) | |
351 { | |
352 if (bitmap_bit_p (non_ssa_vars, DECL_UID (parm))) | |
353 { | |
354 if (dump_file && (dump_flags & TDF_DETAILS)) | |
355 fprintf (dump_file, | |
356 " Refused: need to pass non-ssa param values\n"); | |
357 return; | |
358 } | |
359 } | |
360 else if (gimple_default_def (cfun, parm) | |
361 && bitmap_bit_p (current->ssa_names_to_pass, | |
362 SSA_NAME_VERSION (gimple_default_def | |
363 (cfun, parm)))) | |
364 { | |
365 if (!VOID_TYPE_P (TREE_TYPE (parm))) | |
366 call_overhead += estimate_move_cost (TREE_TYPE (parm)); | |
367 num_args++; | |
368 } | |
369 } | |
370 if (!VOID_TYPE_P (TREE_TYPE (current_function_decl))) | |
371 call_overhead += estimate_move_cost (TREE_TYPE (current_function_decl)); | |
372 | |
373 if (current->split_size <= call_overhead) | |
374 { | |
375 if (dump_file && (dump_flags & TDF_DETAILS)) | |
376 fprintf (dump_file, | |
377 " Refused: split size is smaller than call overhead\n"); | |
378 return; | |
379 } | |
380 if (current->header_size + call_overhead | |
381 >= (unsigned int)(DECL_DECLARED_INLINE_P (current_function_decl) | |
382 ? MAX_INLINE_INSNS_SINGLE | |
383 : MAX_INLINE_INSNS_AUTO)) | |
384 { | |
385 if (dump_file && (dump_flags & TDF_DETAILS)) | |
386 fprintf (dump_file, | |
387 " Refused: header size is too large for inline candidate\n"); | |
388 return; | |
389 } | |
390 | |
391 /* FIXME: we currently can pass only SSA function parameters to the split | |
392 arguments. Once parm_adjustment infrastructure is supported by cloning, | |
393 we can pass more than that. */ | |
394 if (num_args != bitmap_count_bits (current->ssa_names_to_pass)) | |
395 { | |
396 | |
397 if (dump_file && (dump_flags & TDF_DETAILS)) | |
398 fprintf (dump_file, | |
399 " Refused: need to pass non-param values\n"); | |
400 return; | |
401 } | |
402 | |
403 /* When there are non-ssa vars used in the split region, see if they | |
404 are used in the header region. If so, reject the split. | |
405 FIXME: we can use nested function support to access both. */ | |
406 if (!bitmap_empty_p (non_ssa_vars) | |
407 && !verify_non_ssa_vars (current, non_ssa_vars, return_bb)) | |
408 { | |
409 if (dump_file && (dump_flags & TDF_DETAILS)) | |
410 fprintf (dump_file, | |
411 " Refused: split part has non-ssa uses\n"); | |
412 return; | |
413 } | |
414 /* See if retval used by return bb is computed by header or split part. | |
415 When it is computed by split part, we need to produce return statement | |
416 in the split part and add code to header to pass it around. | |
417 | |
418 This is bit tricky to test: | |
419 1) When there is no return_bb or no return value, we always pass | |
420 value around. | |
421 2) Invariants are always computed by caller. | |
422 3) For SSA we need to look if defining statement is in header or split part | |
423 4) For non-SSA we need to look where the var is computed. */ | |
424 retval = find_retval (return_bb); | |
425 if (!retval) | |
426 current->split_part_set_retval = true; | |
427 else if (is_gimple_min_invariant (retval)) | |
428 current->split_part_set_retval = false; | |
429 /* Special case is value returned by reference we record as if it was non-ssa | |
430 set to result_decl. */ | |
431 else if (TREE_CODE (retval) == SSA_NAME | |
432 && TREE_CODE (SSA_NAME_VAR (retval)) == RESULT_DECL | |
433 && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl))) | |
434 current->split_part_set_retval | |
435 = bitmap_bit_p (non_ssa_vars, DECL_UID (SSA_NAME_VAR (retval))); | |
436 else if (TREE_CODE (retval) == SSA_NAME) | |
437 current->split_part_set_retval | |
438 = (!SSA_NAME_IS_DEFAULT_DEF (retval) | |
439 && (bitmap_bit_p (current->split_bbs, | |
440 gimple_bb (SSA_NAME_DEF_STMT (retval))->index) | |
441 || gimple_bb (SSA_NAME_DEF_STMT (retval)) == return_bb)); | |
442 else if (TREE_CODE (retval) == PARM_DECL) | |
443 current->split_part_set_retval = false; | |
444 else if (TREE_CODE (retval) == VAR_DECL | |
445 || TREE_CODE (retval) == RESULT_DECL) | |
446 current->split_part_set_retval | |
447 = bitmap_bit_p (non_ssa_vars, DECL_UID (retval)); | |
448 else | |
449 current->split_part_set_retval = true; | |
450 | |
451 /* split_function fixes up at most one PHI non-virtual PHI node in return_bb, | |
452 for the return value. If there are other PHIs, give up. */ | |
453 if (return_bb != EXIT_BLOCK_PTR) | |
454 { | |
455 gimple_stmt_iterator psi; | |
456 | |
457 for (psi = gsi_start_phis (return_bb); !gsi_end_p (psi); gsi_next (&psi)) | |
458 if (is_gimple_reg (gimple_phi_result (gsi_stmt (psi))) | |
459 && !(retval | |
460 && current->split_part_set_retval | |
461 && TREE_CODE (retval) == SSA_NAME | |
462 && !DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)) | |
463 && SSA_NAME_DEF_STMT (retval) == gsi_stmt (psi))) | |
464 { | |
465 if (dump_file && (dump_flags & TDF_DETAILS)) | |
466 fprintf (dump_file, | |
467 " Refused: return bb has extra PHIs\n"); | |
468 return; | |
469 } | |
470 } | |
471 | |
472 if (dump_file && (dump_flags & TDF_DETAILS)) | |
473 fprintf (dump_file, " Accepted!\n"); | |
474 | |
475 /* At the moment chose split point with lowest frequency and that leaves | |
476 out smallest size of header. | |
477 In future we might re-consider this heuristics. */ | |
478 if (!best_split_point.split_bbs | |
479 || best_split_point.entry_bb->frequency > current->entry_bb->frequency | |
480 || (best_split_point.entry_bb->frequency == current->entry_bb->frequency | |
481 && best_split_point.split_size < current->split_size)) | |
482 | |
483 { | |
484 if (dump_file && (dump_flags & TDF_DETAILS)) | |
485 fprintf (dump_file, " New best split point!\n"); | |
486 if (best_split_point.ssa_names_to_pass) | |
487 { | |
488 BITMAP_FREE (best_split_point.ssa_names_to_pass); | |
489 BITMAP_FREE (best_split_point.split_bbs); | |
490 } | |
491 best_split_point = *current; | |
492 best_split_point.ssa_names_to_pass = BITMAP_ALLOC (NULL); | |
493 bitmap_copy (best_split_point.ssa_names_to_pass, | |
494 current->ssa_names_to_pass); | |
495 best_split_point.split_bbs = BITMAP_ALLOC (NULL); | |
496 bitmap_copy (best_split_point.split_bbs, current->split_bbs); | |
497 } | |
498 } | |
499 | |
500 /* Return basic block containing RETURN statement. We allow basic blocks | |
501 of the form: | |
502 <retval> = tmp_var; | |
503 return <retval> | |
504 but return_bb can not be more complex than this. | |
505 If nothing is found, return EXIT_BLOCK_PTR. | |
506 | |
507 When there are multiple RETURN statement, chose one with return value, | |
508 since that one is more likely shared by multiple code paths. | |
509 | |
510 Return BB is special, because for function splitting it is the only | |
511 basic block that is duplicated in between header and split part of the | |
512 function. | |
513 | |
514 TODO: We might support multiple return blocks. */ | |
515 | |
516 static basic_block | |
517 find_return_bb (void) | |
518 { | |
519 edge e; | |
520 basic_block return_bb = EXIT_BLOCK_PTR; | |
521 gimple_stmt_iterator bsi; | |
522 bool found_return = false; | |
523 tree retval = NULL_TREE; | |
524 | |
525 if (!single_pred_p (EXIT_BLOCK_PTR)) | |
526 return return_bb; | |
527 | |
528 e = single_pred_edge (EXIT_BLOCK_PTR); | |
529 for (bsi = gsi_last_bb (e->src); !gsi_end_p (bsi); gsi_prev (&bsi)) | |
530 { | |
531 gimple stmt = gsi_stmt (bsi); | |
532 if (gimple_code (stmt) == GIMPLE_LABEL || is_gimple_debug (stmt)) | |
533 ; | |
534 else if (gimple_code (stmt) == GIMPLE_ASSIGN | |
535 && found_return | |
536 && gimple_assign_single_p (stmt) | |
537 && (auto_var_in_fn_p (gimple_assign_rhs1 (stmt), | |
538 current_function_decl) | |
539 || is_gimple_min_invariant (gimple_assign_rhs1 (stmt))) | |
540 && retval == gimple_assign_lhs (stmt)) | |
541 ; | |
542 else if (gimple_code (stmt) == GIMPLE_RETURN) | |
543 { | |
544 found_return = true; | |
545 retval = gimple_return_retval (stmt); | |
546 } | |
547 else | |
548 break; | |
549 } | |
550 if (gsi_end_p (bsi) && found_return) | |
551 return_bb = e->src; | |
552 | |
553 return return_bb; | |
554 } | |
555 | |
556 /* Given return basic block RETURN_BB, see where return value is really | |
557 stored. */ | |
558 static tree | |
559 find_retval (basic_block return_bb) | |
560 { | |
561 gimple_stmt_iterator bsi; | |
562 for (bsi = gsi_start_bb (return_bb); !gsi_end_p (bsi); gsi_next (&bsi)) | |
563 if (gimple_code (gsi_stmt (bsi)) == GIMPLE_RETURN) | |
564 return gimple_return_retval (gsi_stmt (bsi)); | |
565 else if (gimple_code (gsi_stmt (bsi)) == GIMPLE_ASSIGN) | |
566 return gimple_assign_rhs1 (gsi_stmt (bsi)); | |
567 return NULL; | |
568 } | |
569 | |
570 /* Callback for walk_stmt_load_store_addr_ops. If T is non-SSA automatic | |
571 variable, mark it as used in bitmap passed via DATA. | |
572 Return true when access to T prevents splitting the function. */ | |
573 | |
574 static bool | |
575 mark_nonssa_use (gimple stmt ATTRIBUTE_UNUSED, tree t, void *data) | |
576 { | |
577 t = get_base_address (t); | |
578 | |
579 if (!t || is_gimple_reg (t)) | |
580 return false; | |
581 | |
582 /* At present we can't pass non-SSA arguments to split function. | |
583 FIXME: this can be relaxed by passing references to arguments. */ | |
584 if (TREE_CODE (t) == PARM_DECL) | |
585 { | |
586 if (dump_file && (dump_flags & TDF_DETAILS)) | |
587 fprintf (dump_file, | |
588 "Cannot split: use of non-ssa function parameter.\n"); | |
589 return true; | |
590 } | |
591 | |
592 if ((TREE_CODE (t) == VAR_DECL | |
593 && auto_var_in_fn_p (t, current_function_decl)) | |
594 || TREE_CODE (t) == RESULT_DECL | |
595 || TREE_CODE (t) == LABEL_DECL) | |
596 bitmap_set_bit ((bitmap)data, DECL_UID (t)); | |
597 | |
598 /* For DECL_BY_REFERENCE, the return value is actually a pointer. We want | |
599 to pretend that the value pointed to is actual result decl. */ | |
600 if ((TREE_CODE (t) == MEM_REF || INDIRECT_REF_P (t)) | |
601 && TREE_CODE (TREE_OPERAND (t, 0)) == SSA_NAME | |
602 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND (t, 0))) == RESULT_DECL | |
603 && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl))) | |
604 return | |
605 bitmap_bit_p ((bitmap)data, | |
606 DECL_UID (DECL_RESULT (current_function_decl))); | |
607 | |
608 return false; | |
609 } | |
610 | |
611 /* Compute local properties of basic block BB we collect when looking for | |
612 split points. We look for ssa defs and store them in SET_SSA_NAMES, | |
613 for ssa uses and store them in USED_SSA_NAMES and for any non-SSA automatic | |
614 vars stored in NON_SSA_VARS. | |
615 | |
616 When BB has edge to RETURN_BB, collect uses in RETURN_BB too. | |
617 | |
618 Return false when BB contains something that prevents it from being put into | |
619 split function. */ | |
620 | |
621 static bool | |
622 visit_bb (basic_block bb, basic_block return_bb, | |
623 bitmap set_ssa_names, bitmap used_ssa_names, | |
624 bitmap non_ssa_vars) | |
625 { | |
626 gimple_stmt_iterator bsi; | |
627 edge e; | |
628 edge_iterator ei; | |
629 bool can_split = true; | |
630 | |
631 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) | |
632 { | |
633 gimple stmt = gsi_stmt (bsi); | |
634 tree op; | |
635 ssa_op_iter iter; | |
636 tree decl; | |
637 | |
638 if (is_gimple_debug (stmt)) | |
639 continue; | |
640 | |
641 /* FIXME: We can split regions containing EH. We can not however | |
642 split RESX, EH_DISPATCH and EH_POINTER referring to same region | |
643 into different partitions. This would require tracking of | |
644 EH regions and checking in consider_split_point if they | |
645 are not used elsewhere. */ | |
646 if (gimple_code (stmt) == GIMPLE_RESX) | |
647 { | |
648 if (dump_file && (dump_flags & TDF_DETAILS)) | |
649 fprintf (dump_file, "Cannot split: resx.\n"); | |
650 can_split = false; | |
651 } | |
652 if (gimple_code (stmt) == GIMPLE_EH_DISPATCH) | |
653 { | |
654 if (dump_file && (dump_flags & TDF_DETAILS)) | |
655 fprintf (dump_file, "Cannot split: eh dispatch.\n"); | |
656 can_split = false; | |
657 } | |
658 | |
659 /* Check builtins that prevent splitting. */ | |
660 if (gimple_code (stmt) == GIMPLE_CALL | |
661 && (decl = gimple_call_fndecl (stmt)) != NULL_TREE | |
662 && DECL_BUILT_IN (decl) | |
663 && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL) | |
664 switch (DECL_FUNCTION_CODE (decl)) | |
665 { | |
666 /* FIXME: once we will allow passing non-parm values to split part, | |
667 we need to be sure to handle correct builtin_stack_save and | |
668 builtin_stack_restore. At the moment we are safe; there is no | |
669 way to store builtin_stack_save result in non-SSA variable | |
670 since all calls to those are compiler generated. */ | |
671 case BUILT_IN_APPLY: | |
672 case BUILT_IN_APPLY_ARGS: | |
673 case BUILT_IN_VA_START: | |
674 if (dump_file && (dump_flags & TDF_DETAILS)) | |
675 fprintf (dump_file, | |
676 "Cannot split: builtin_apply and va_start.\n"); | |
677 can_split = false; | |
678 break; | |
679 case BUILT_IN_EH_POINTER: | |
680 if (dump_file && (dump_flags & TDF_DETAILS)) | |
681 fprintf (dump_file, "Cannot split: builtin_eh_pointer.\n"); | |
682 can_split = false; | |
683 break; | |
684 default: | |
685 break; | |
686 } | |
687 | |
688 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_DEF) | |
689 bitmap_set_bit (set_ssa_names, SSA_NAME_VERSION (op)); | |
690 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE) | |
691 bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op)); | |
692 can_split &= !walk_stmt_load_store_addr_ops (stmt, non_ssa_vars, | |
693 mark_nonssa_use, | |
694 mark_nonssa_use, | |
695 mark_nonssa_use); | |
696 } | |
697 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi)) | |
698 { | |
699 gimple stmt = gsi_stmt (bsi); | |
700 unsigned int i; | |
701 | |
702 if (is_gimple_debug (stmt)) | |
703 continue; | |
704 if (!is_gimple_reg (gimple_phi_result (stmt))) | |
705 continue; | |
706 bitmap_set_bit (set_ssa_names, | |
707 SSA_NAME_VERSION (gimple_phi_result (stmt))); | |
708 for (i = 0; i < gimple_phi_num_args (stmt); i++) | |
709 { | |
710 tree op = gimple_phi_arg_def (stmt, i); | |
711 if (TREE_CODE (op) == SSA_NAME) | |
712 bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op)); | |
713 } | |
714 can_split &= !walk_stmt_load_store_addr_ops (stmt, non_ssa_vars, | |
715 mark_nonssa_use, | |
716 mark_nonssa_use, | |
717 mark_nonssa_use); | |
718 } | |
719 /* Record also uses coming from PHI operand in return BB. */ | |
720 FOR_EACH_EDGE (e, ei, bb->succs) | |
721 if (e->dest == return_bb) | |
722 { | |
723 for (bsi = gsi_start_phis (return_bb); !gsi_end_p (bsi); gsi_next (&bsi)) | |
724 { | |
725 gimple stmt = gsi_stmt (bsi); | |
726 tree op = gimple_phi_arg_def (stmt, e->dest_idx); | |
727 | |
728 if (is_gimple_debug (stmt)) | |
729 continue; | |
730 if (!is_gimple_reg (gimple_phi_result (stmt))) | |
731 continue; | |
732 if (TREE_CODE (op) == SSA_NAME) | |
733 bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op)); | |
734 else | |
735 can_split &= !mark_nonssa_use (stmt, op, non_ssa_vars); | |
736 } | |
737 } | |
738 return can_split; | |
739 } | |
740 | |
741 /* Stack entry for recursive DFS walk in find_split_point. */ | |
742 | |
743 typedef struct | |
744 { | |
745 /* Basic block we are examining. */ | |
746 basic_block bb; | |
747 | |
748 /* SSA names set and used by the BB and all BBs reachable | |
749 from it via DFS walk. */ | |
750 bitmap set_ssa_names, used_ssa_names; | |
751 bitmap non_ssa_vars; | |
752 | |
753 /* All BBS visited from this BB via DFS walk. */ | |
754 bitmap bbs_visited; | |
755 | |
756 /* Last examined edge in DFS walk. Since we walk unoriented graph, | |
757 the value is up to sum of incoming and outgoing edges of BB. */ | |
758 unsigned int edge_num; | |
759 | |
760 /* Stack entry index of earliest BB reachable from current BB | |
761 or any BB visited later in DFS walk. */ | |
762 int earliest; | |
763 | |
764 /* Overall time and size of all BBs reached from this BB in DFS walk. */ | |
765 int overall_time, overall_size; | |
766 | |
767 /* When false we can not split on this BB. */ | |
768 bool can_split; | |
769 } stack_entry; | |
770 DEF_VEC_O(stack_entry); | |
771 DEF_VEC_ALLOC_O(stack_entry,heap); | |
772 | |
773 | |
774 /* Find all articulations and call consider_split on them. | |
775 OVERALL_TIME and OVERALL_SIZE is time and size of the function. | |
776 | |
777 We perform basic algorithm for finding an articulation in a graph | |
778 created from CFG by considering it to be an unoriented graph. | |
779 | |
780 The articulation is discovered via DFS walk. We collect earliest | |
781 basic block on stack that is reachable via backward edge. Articulation | |
782 is any basic block such that there is no backward edge bypassing it. | |
783 To reduce stack usage we maintain heap allocated stack in STACK vector. | |
784 AUX pointer of BB is set to index it appears in the stack or -1 once | |
785 it is visited and popped off the stack. | |
786 | |
787 The algorithm finds articulation after visiting the whole component | |
788 reachable by it. This makes it convenient to collect information about | |
789 the component used by consider_split. */ | |
790 | |
791 static void | |
792 find_split_points (int overall_time, int overall_size) | |
793 { | |
794 stack_entry first; | |
795 VEC(stack_entry, heap) *stack = NULL; | |
796 basic_block bb; | |
797 basic_block return_bb = find_return_bb (); | |
798 struct split_point current; | |
799 | |
800 current.header_time = overall_time; | |
801 current.header_size = overall_size; | |
802 current.split_time = 0; | |
803 current.split_size = 0; | |
804 current.ssa_names_to_pass = BITMAP_ALLOC (NULL); | |
805 | |
806 first.bb = ENTRY_BLOCK_PTR; | |
807 first.edge_num = 0; | |
808 first.overall_time = 0; | |
809 first.overall_size = 0; | |
810 first.earliest = INT_MAX; | |
811 first.set_ssa_names = 0; | |
812 first.used_ssa_names = 0; | |
813 first.bbs_visited = 0; | |
814 VEC_safe_push (stack_entry, heap, stack, &first); | |
815 ENTRY_BLOCK_PTR->aux = (void *)(intptr_t)-1; | |
816 | |
817 while (!VEC_empty (stack_entry, stack)) | |
818 { | |
819 stack_entry *entry = VEC_last (stack_entry, stack); | |
820 | |
821 /* We are walking an acyclic graph, so edge_num counts | |
822 succ and pred edges together. However when considering | |
823 articulation, we want to have processed everything reachable | |
824 from articulation but nothing that reaches into it. */ | |
825 if (entry->edge_num == EDGE_COUNT (entry->bb->succs) | |
826 && entry->bb != ENTRY_BLOCK_PTR) | |
827 { | |
828 int pos = VEC_length (stack_entry, stack); | |
829 entry->can_split &= visit_bb (entry->bb, return_bb, | |
830 entry->set_ssa_names, | |
831 entry->used_ssa_names, | |
832 entry->non_ssa_vars); | |
833 if (pos <= entry->earliest && !entry->can_split | |
834 && dump_file && (dump_flags & TDF_DETAILS)) | |
835 fprintf (dump_file, | |
836 "found articulation at bb %i but can not split\n", | |
837 entry->bb->index); | |
838 if (pos <= entry->earliest && entry->can_split) | |
839 { | |
840 if (dump_file && (dump_flags & TDF_DETAILS)) | |
841 fprintf (dump_file, "found articulation at bb %i\n", | |
842 entry->bb->index); | |
843 current.entry_bb = entry->bb; | |
844 current.ssa_names_to_pass = BITMAP_ALLOC (NULL); | |
845 bitmap_and_compl (current.ssa_names_to_pass, | |
846 entry->used_ssa_names, entry->set_ssa_names); | |
847 current.header_time = overall_time - entry->overall_time; | |
848 current.header_size = overall_size - entry->overall_size; | |
849 current.split_time = entry->overall_time; | |
850 current.split_size = entry->overall_size; | |
851 current.split_bbs = entry->bbs_visited; | |
852 consider_split (¤t, entry->non_ssa_vars, return_bb); | |
853 BITMAP_FREE (current.ssa_names_to_pass); | |
854 } | |
855 } | |
856 /* Do actual DFS walk. */ | |
857 if (entry->edge_num | |
858 < (EDGE_COUNT (entry->bb->succs) | |
859 + EDGE_COUNT (entry->bb->preds))) | |
860 { | |
861 edge e; | |
862 basic_block dest; | |
863 if (entry->edge_num < EDGE_COUNT (entry->bb->succs)) | |
864 { | |
865 e = EDGE_SUCC (entry->bb, entry->edge_num); | |
866 dest = e->dest; | |
867 } | |
868 else | |
869 { | |
870 e = EDGE_PRED (entry->bb, entry->edge_num | |
871 - EDGE_COUNT (entry->bb->succs)); | |
872 dest = e->src; | |
873 } | |
874 | |
875 entry->edge_num++; | |
876 | |
877 /* New BB to visit, push it to the stack. */ | |
878 if (dest != return_bb && dest != EXIT_BLOCK_PTR | |
879 && !dest->aux) | |
880 { | |
881 stack_entry new_entry; | |
882 | |
883 new_entry.bb = dest; | |
884 new_entry.edge_num = 0; | |
885 new_entry.overall_time | |
886 = VEC_index (bb_info, bb_info_vec, dest->index)->time; | |
887 new_entry.overall_size | |
888 = VEC_index (bb_info, bb_info_vec, dest->index)->size; | |
889 new_entry.earliest = INT_MAX; | |
890 new_entry.set_ssa_names = BITMAP_ALLOC (NULL); | |
891 new_entry.used_ssa_names = BITMAP_ALLOC (NULL); | |
892 new_entry.bbs_visited = BITMAP_ALLOC (NULL); | |
893 new_entry.non_ssa_vars = BITMAP_ALLOC (NULL); | |
894 new_entry.can_split = true; | |
895 bitmap_set_bit (new_entry.bbs_visited, dest->index); | |
896 VEC_safe_push (stack_entry, heap, stack, &new_entry); | |
897 dest->aux = (void *)(intptr_t)VEC_length (stack_entry, stack); | |
898 } | |
899 /* Back edge found, record the earliest point. */ | |
900 else if ((intptr_t)dest->aux > 0 | |
901 && (intptr_t)dest->aux < entry->earliest) | |
902 entry->earliest = (intptr_t)dest->aux; | |
903 } | |
904 /* We are done with examining the edges. Pop off the value from stack | |
905 and merge stuff we accumulate during the walk. */ | |
906 else if (entry->bb != ENTRY_BLOCK_PTR) | |
907 { | |
908 stack_entry *prev = VEC_index (stack_entry, stack, | |
909 VEC_length (stack_entry, stack) - 2); | |
910 | |
911 entry->bb->aux = (void *)(intptr_t)-1; | |
912 prev->can_split &= entry->can_split; | |
913 if (prev->set_ssa_names) | |
914 { | |
915 bitmap_ior_into (prev->set_ssa_names, entry->set_ssa_names); | |
916 bitmap_ior_into (prev->used_ssa_names, entry->used_ssa_names); | |
917 bitmap_ior_into (prev->bbs_visited, entry->bbs_visited); | |
918 bitmap_ior_into (prev->non_ssa_vars, entry->non_ssa_vars); | |
919 } | |
920 if (prev->earliest > entry->earliest) | |
921 prev->earliest = entry->earliest; | |
922 prev->overall_time += entry->overall_time; | |
923 prev->overall_size += entry->overall_size; | |
924 BITMAP_FREE (entry->set_ssa_names); | |
925 BITMAP_FREE (entry->used_ssa_names); | |
926 BITMAP_FREE (entry->bbs_visited); | |
927 BITMAP_FREE (entry->non_ssa_vars); | |
928 VEC_pop (stack_entry, stack); | |
929 } | |
930 else | |
931 VEC_pop (stack_entry, stack); | |
932 } | |
933 ENTRY_BLOCK_PTR->aux = NULL; | |
934 FOR_EACH_BB (bb) | |
935 bb->aux = NULL; | |
936 VEC_free (stack_entry, heap, stack); | |
937 BITMAP_FREE (current.ssa_names_to_pass); | |
938 } | |
939 | |
940 /* Split function at SPLIT_POINT. */ | |
941 | |
942 static void | |
943 split_function (struct split_point *split_point) | |
944 { | |
945 VEC (tree, heap) *args_to_pass = NULL; | |
946 bitmap args_to_skip = BITMAP_ALLOC (NULL); | |
947 tree parm; | |
948 int num = 0; | |
949 struct cgraph_node *node; | |
950 basic_block return_bb = find_return_bb (); | |
951 basic_block call_bb; | |
952 gimple_stmt_iterator gsi; | |
953 gimple call; | |
954 edge e; | |
955 edge_iterator ei; | |
956 tree retval = NULL, real_retval = NULL; | |
957 bool split_part_return_p = false; | |
958 gimple last_stmt = NULL; | |
959 bool conv_needed = false; | |
960 unsigned int i; | |
961 tree arg; | |
962 | |
963 if (dump_file) | |
964 { | |
965 fprintf (dump_file, "\n\nSplitting function at:\n"); | |
966 dump_split_point (dump_file, split_point); | |
967 } | |
968 | |
969 /* Collect the parameters of new function and args_to_skip bitmap. */ | |
970 for (parm = DECL_ARGUMENTS (current_function_decl); | |
971 parm; parm = DECL_CHAIN (parm), num++) | |
972 if (!is_gimple_reg (parm) | |
973 || !gimple_default_def (cfun, parm) | |
974 || !bitmap_bit_p (split_point->ssa_names_to_pass, | |
975 SSA_NAME_VERSION (gimple_default_def (cfun, parm)))) | |
976 bitmap_set_bit (args_to_skip, num); | |
977 else | |
978 { | |
979 arg = gimple_default_def (cfun, parm); | |
980 if (TYPE_MAIN_VARIANT (DECL_ARG_TYPE (parm)) | |
981 != TYPE_MAIN_VARIANT (TREE_TYPE (arg))) | |
982 { | |
983 conv_needed = true; | |
984 arg = fold_convert (DECL_ARG_TYPE (parm), arg); | |
985 } | |
986 VEC_safe_push (tree, heap, args_to_pass, arg); | |
987 } | |
988 | |
989 /* See if the split function will return. */ | |
990 FOR_EACH_EDGE (e, ei, return_bb->preds) | |
991 if (bitmap_bit_p (split_point->split_bbs, e->src->index)) | |
992 break; | |
993 if (e) | |
994 split_part_return_p = true; | |
995 | |
996 /* Add return block to what will become the split function. | |
997 We do not return; no return block is needed. */ | |
998 if (!split_part_return_p) | |
999 ; | |
1000 /* We have no return block, so nothing is needed. */ | |
1001 else if (return_bb == EXIT_BLOCK_PTR) | |
1002 ; | |
1003 /* When we do not want to return value, we need to construct | |
1004 new return block with empty return statement. | |
1005 FIXME: Once we are able to change return type, we should change function | |
1006 to return void instead of just outputting function with undefined return | |
1007 value. For structures this affects quality of codegen. */ | |
1008 else if (!split_point->split_part_set_retval | |
1009 && find_retval (return_bb)) | |
1010 { | |
1011 bool redirected = true; | |
1012 basic_block new_return_bb = create_basic_block (NULL, 0, return_bb); | |
1013 gimple_stmt_iterator gsi = gsi_start_bb (new_return_bb); | |
1014 gsi_insert_after (&gsi, gimple_build_return (NULL), GSI_NEW_STMT); | |
1015 while (redirected) | |
1016 { | |
1017 redirected = false; | |
1018 FOR_EACH_EDGE (e, ei, return_bb->preds) | |
1019 if (bitmap_bit_p (split_point->split_bbs, e->src->index)) | |
1020 { | |
1021 new_return_bb->count += e->count; | |
1022 new_return_bb->frequency += EDGE_FREQUENCY (e); | |
1023 redirect_edge_and_branch (e, new_return_bb); | |
1024 redirected = true; | |
1025 break; | |
1026 } | |
1027 } | |
1028 e = make_edge (new_return_bb, EXIT_BLOCK_PTR, 0); | |
1029 e->probability = REG_BR_PROB_BASE; | |
1030 e->count = new_return_bb->count; | |
1031 bitmap_set_bit (split_point->split_bbs, new_return_bb->index); | |
1032 } | |
1033 /* When we pass around the value, use existing return block. */ | |
1034 else | |
1035 bitmap_set_bit (split_point->split_bbs, return_bb->index); | |
1036 | |
1037 /* If RETURN_BB has virtual operand PHIs, they must be removed and the | |
1038 virtual operand marked for renaming as we change the CFG in a way that | |
1039 tree-inline is not able to compensate for. | |
1040 | |
1041 Note this can happen whether or not we have a return value. If we have | |
1042 a return value, then RETURN_BB may have PHIs for real operands too. */ | |
1043 if (return_bb != EXIT_BLOCK_PTR) | |
1044 { | |
1045 for (gsi = gsi_start_phis (return_bb); !gsi_end_p (gsi);) | |
1046 { | |
1047 gimple stmt = gsi_stmt (gsi); | |
1048 if (is_gimple_reg (gimple_phi_result (stmt))) | |
1049 { | |
1050 gsi_next (&gsi); | |
1051 continue; | |
1052 } | |
1053 mark_virtual_phi_result_for_renaming (stmt); | |
1054 remove_phi_node (&gsi, true); | |
1055 } | |
1056 } | |
1057 | |
1058 /* Now create the actual clone. */ | |
1059 rebuild_cgraph_edges (); | |
1060 node = cgraph_function_versioning (cgraph_node (current_function_decl), | |
1061 NULL, NULL, | |
1062 args_to_skip, | |
1063 split_point->split_bbs, | |
1064 split_point->entry_bb, "part"); | |
1065 /* For usual cloning it is enough to clear builtin only when signature | |
1066 changes. For partial inlining we however can not expect the part | |
1067 of builtin implementation to have same semantic as the whole. */ | |
1068 if (DECL_BUILT_IN (node->decl)) | |
1069 { | |
1070 DECL_BUILT_IN_CLASS (node->decl) = NOT_BUILT_IN; | |
1071 DECL_FUNCTION_CODE (node->decl) = (enum built_in_function) 0; | |
1072 } | |
1073 cgraph_node_remove_callees (cgraph_node (current_function_decl)); | |
1074 if (!split_part_return_p) | |
1075 TREE_THIS_VOLATILE (node->decl) = 1; | |
1076 if (dump_file) | |
1077 dump_function_to_file (node->decl, dump_file, dump_flags); | |
1078 | |
1079 /* Create the basic block we place call into. It is the entry basic block | |
1080 split after last label. */ | |
1081 call_bb = split_point->entry_bb; | |
1082 for (gsi = gsi_start_bb (call_bb); !gsi_end_p (gsi);) | |
1083 if (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL) | |
1084 { | |
1085 last_stmt = gsi_stmt (gsi); | |
1086 gsi_next (&gsi); | |
1087 } | |
1088 else | |
1089 break; | |
1090 e = split_block (split_point->entry_bb, last_stmt); | |
1091 remove_edge (e); | |
1092 | |
1093 /* Produce the call statement. */ | |
1094 gsi = gsi_last_bb (call_bb); | |
1095 if (conv_needed) | |
1096 FOR_EACH_VEC_ELT (tree, args_to_pass, i, arg) | |
1097 if (!is_gimple_val (arg)) | |
1098 { | |
1099 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, | |
1100 false, GSI_NEW_STMT); | |
1101 VEC_replace (tree, args_to_pass, i, arg); | |
1102 } | |
1103 call = gimple_build_call_vec (node->decl, args_to_pass); | |
1104 gimple_set_block (call, DECL_INITIAL (current_function_decl)); | |
1105 | |
1106 /* We avoid address being taken on any variable used by split part, | |
1107 so return slot optimization is always possible. Moreover this is | |
1108 required to make DECL_BY_REFERENCE work. */ | |
1109 if (aggregate_value_p (DECL_RESULT (current_function_decl), | |
1110 TREE_TYPE (current_function_decl))) | |
1111 gimple_call_set_return_slot_opt (call, true); | |
1112 | |
1113 /* Update return value. This is bit tricky. When we do not return, | |
1114 do nothing. When we return we might need to update return_bb | |
1115 or produce a new return statement. */ | |
1116 if (!split_part_return_p) | |
1117 gsi_insert_after (&gsi, call, GSI_NEW_STMT); | |
1118 else | |
1119 { | |
1120 e = make_edge (call_bb, return_bb, | |
1121 return_bb == EXIT_BLOCK_PTR ? 0 : EDGE_FALLTHRU); | |
1122 e->count = call_bb->count; | |
1123 e->probability = REG_BR_PROB_BASE; | |
1124 | |
1125 /* If there is return basic block, see what value we need to store | |
1126 return value into and put call just before it. */ | |
1127 if (return_bb != EXIT_BLOCK_PTR) | |
1128 { | |
1129 real_retval = retval = find_retval (return_bb); | |
1130 | |
1131 if (real_retval && split_point->split_part_set_retval) | |
1132 { | |
1133 gimple_stmt_iterator psi; | |
1134 | |
1135 /* See if we need new SSA_NAME for the result. | |
1136 When DECL_BY_REFERENCE is true, retval is actually pointer to | |
1137 return value and it is constant in whole function. */ | |
1138 if (TREE_CODE (retval) == SSA_NAME | |
1139 && !DECL_BY_REFERENCE (DECL_RESULT (current_function_decl))) | |
1140 { | |
1141 retval = make_ssa_name (SSA_NAME_VAR (retval), call); | |
1142 | |
1143 /* See if there is PHI defining return value. */ | |
1144 for (psi = gsi_start_phis (return_bb); | |
1145 !gsi_end_p (psi); gsi_next (&psi)) | |
1146 if (is_gimple_reg (gimple_phi_result (gsi_stmt (psi)))) | |
1147 break; | |
1148 | |
1149 /* When there is PHI, just update its value. */ | |
1150 if (TREE_CODE (retval) == SSA_NAME | |
1151 && !gsi_end_p (psi)) | |
1152 add_phi_arg (gsi_stmt (psi), retval, e, UNKNOWN_LOCATION); | |
1153 /* Otherwise update the return BB itself. | |
1154 find_return_bb allows at most one assignment to return value, | |
1155 so update first statement. */ | |
1156 else | |
1157 { | |
1158 gimple_stmt_iterator bsi; | |
1159 for (bsi = gsi_start_bb (return_bb); !gsi_end_p (bsi); | |
1160 gsi_next (&bsi)) | |
1161 if (gimple_code (gsi_stmt (bsi)) == GIMPLE_RETURN) | |
1162 { | |
1163 gimple_return_set_retval (gsi_stmt (bsi), retval); | |
1164 break; | |
1165 } | |
1166 else if (gimple_code (gsi_stmt (bsi)) == GIMPLE_ASSIGN) | |
1167 { | |
1168 gimple_assign_set_rhs1 (gsi_stmt (bsi), retval); | |
1169 break; | |
1170 } | |
1171 update_stmt (gsi_stmt (bsi)); | |
1172 } | |
1173 } | |
1174 if (DECL_BY_REFERENCE (DECL_RESULT (current_function_decl))) | |
1175 gimple_call_set_lhs (call, build_simple_mem_ref (retval)); | |
1176 else | |
1177 gimple_call_set_lhs (call, retval); | |
1178 } | |
1179 gsi_insert_after (&gsi, call, GSI_NEW_STMT); | |
1180 } | |
1181 /* We don't use return block (there is either no return in function or | |
1182 multiple of them). So create new basic block with return statement. | |
1183 */ | |
1184 else | |
1185 { | |
1186 gimple ret; | |
1187 if (split_point->split_part_set_retval | |
1188 && !VOID_TYPE_P (TREE_TYPE (TREE_TYPE (current_function_decl)))) | |
1189 { | |
1190 retval = DECL_RESULT (current_function_decl); | |
1191 | |
1192 /* We use temporary register to hold value when aggregate_value_p | |
1193 is false. Similarly for DECL_BY_REFERENCE we must avoid extra | |
1194 copy. */ | |
1195 if (!aggregate_value_p (retval, TREE_TYPE (current_function_decl)) | |
1196 && !DECL_BY_REFERENCE (retval)) | |
1197 retval = create_tmp_reg (TREE_TYPE (retval), NULL); | |
1198 if (is_gimple_reg (retval)) | |
1199 { | |
1200 /* When returning by reference, there is only one SSA name | |
1201 assigned to RESULT_DECL (that is pointer to return value). | |
1202 Look it up or create new one if it is missing. */ | |
1203 if (DECL_BY_REFERENCE (retval)) | |
1204 { | |
1205 tree retval_name; | |
1206 if ((retval_name = gimple_default_def (cfun, retval)) | |
1207 != NULL) | |
1208 retval = retval_name; | |
1209 else | |
1210 { | |
1211 retval_name = make_ssa_name (retval, | |
1212 gimple_build_nop ()); | |
1213 set_default_def (retval, retval_name); | |
1214 retval = retval_name; | |
1215 } | |
1216 } | |
1217 /* Otherwise produce new SSA name for return value. */ | |
1218 else | |
1219 retval = make_ssa_name (retval, call); | |
1220 } | |
1221 if (DECL_BY_REFERENCE (DECL_RESULT (current_function_decl))) | |
1222 gimple_call_set_lhs (call, build_simple_mem_ref (retval)); | |
1223 else | |
1224 gimple_call_set_lhs (call, retval); | |
1225 } | |
1226 gsi_insert_after (&gsi, call, GSI_NEW_STMT); | |
1227 ret = gimple_build_return (retval); | |
1228 gsi_insert_after (&gsi, ret, GSI_NEW_STMT); | |
1229 } | |
1230 } | |
1231 free_dominance_info (CDI_DOMINATORS); | |
1232 free_dominance_info (CDI_POST_DOMINATORS); | |
1233 compute_inline_parameters (node); | |
1234 } | |
1235 | |
1236 /* Execute function splitting pass. */ | |
1237 | |
1238 static unsigned int | |
1239 execute_split_functions (void) | |
1240 { | |
1241 gimple_stmt_iterator bsi; | |
1242 basic_block bb; | |
1243 int overall_time = 0, overall_size = 0; | |
1244 int todo = 0; | |
1245 struct cgraph_node *node = cgraph_node (current_function_decl); | |
1246 | |
1247 if (flags_from_decl_or_type (current_function_decl) & ECF_NORETURN) | |
1248 { | |
1249 if (dump_file) | |
1250 fprintf (dump_file, "Not splitting: noreturn function.\n"); | |
1251 return 0; | |
1252 } | |
1253 if (MAIN_NAME_P (DECL_NAME (current_function_decl))) | |
1254 { | |
1255 if (dump_file) | |
1256 fprintf (dump_file, "Not splitting: main function.\n"); | |
1257 return 0; | |
1258 } | |
1259 /* This can be relaxed; function might become inlinable after splitting | |
1260 away the uninlinable part. */ | |
1261 if (!node->local.inlinable) | |
1262 { | |
1263 if (dump_file) | |
1264 fprintf (dump_file, "Not splitting: not inlinable.\n"); | |
1265 return 0; | |
1266 } | |
1267 if (node->local.disregard_inline_limits) | |
1268 { | |
1269 if (dump_file) | |
1270 fprintf (dump_file, "Not splitting: disregarding inline limits.\n"); | |
1271 return 0; | |
1272 } | |
1273 /* This can be relaxed; most of versioning tests actually prevents | |
1274 a duplication. */ | |
1275 if (!tree_versionable_function_p (current_function_decl)) | |
1276 { | |
1277 if (dump_file) | |
1278 fprintf (dump_file, "Not splitting: not versionable.\n"); | |
1279 return 0; | |
1280 } | |
1281 /* FIXME: we could support this. */ | |
1282 if (DECL_STRUCT_FUNCTION (current_function_decl)->static_chain_decl) | |
1283 { | |
1284 if (dump_file) | |
1285 fprintf (dump_file, "Not splitting: nested function.\n"); | |
1286 return 0; | |
1287 } | |
1288 | |
1289 /* See if it makes sense to try to split. | |
1290 It makes sense to split if we inline, that is if we have direct calls to | |
1291 handle or direct calls are possibly going to appear as result of indirect | |
1292 inlining or LTO. Also handle -fprofile-generate as LTO to allow non-LTO | |
1293 training for LTO -fprofile-use build. | |
1294 | |
1295 Note that we are not completely conservative about disqualifying functions | |
1296 called once. It is possible that the caller is called more then once and | |
1297 then inlining would still benefit. */ | |
1298 if ((!node->callers || !node->callers->next_caller) | |
1299 && !node->address_taken | |
1300 && (!flag_lto || !node->local.externally_visible)) | |
1301 { | |
1302 if (dump_file) | |
1303 fprintf (dump_file, "Not splitting: not called directly " | |
1304 "or called once.\n"); | |
1305 return 0; | |
1306 } | |
1307 | |
1308 /* FIXME: We can actually split if splitting reduces call overhead. */ | |
1309 if (!flag_inline_small_functions | |
1310 && !DECL_DECLARED_INLINE_P (current_function_decl)) | |
1311 { | |
1312 if (dump_file) | |
1313 fprintf (dump_file, "Not splitting: not autoinlining and function" | |
1314 " is not inline.\n"); | |
1315 return 0; | |
1316 } | |
1317 | |
1318 /* Compute local info about basic blocks and determine function size/time. */ | |
1319 VEC_safe_grow_cleared (bb_info, heap, bb_info_vec, last_basic_block + 1); | |
1320 memset (&best_split_point, 0, sizeof (best_split_point)); | |
1321 FOR_EACH_BB (bb) | |
1322 { | |
1323 int time = 0; | |
1324 int size = 0; | |
1325 int freq = compute_call_stmt_bb_frequency (current_function_decl, bb); | |
1326 | |
1327 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1328 fprintf (dump_file, "Basic block %i\n", bb->index); | |
1329 | |
1330 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) | |
1331 { | |
1332 int this_time, this_size; | |
1333 gimple stmt = gsi_stmt (bsi); | |
1334 | |
1335 this_size = estimate_num_insns (stmt, &eni_size_weights); | |
1336 this_time = estimate_num_insns (stmt, &eni_time_weights) * freq; | |
1337 size += this_size; | |
1338 time += this_time; | |
1339 | |
1340 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1341 { | |
1342 fprintf (dump_file, " freq:%6i size:%3i time:%3i ", | |
1343 freq, this_size, this_time); | |
1344 print_gimple_stmt (dump_file, stmt, 0, 0); | |
1345 } | |
1346 } | |
1347 overall_time += time; | |
1348 overall_size += size; | |
1349 VEC_index (bb_info, bb_info_vec, bb->index)->time = time; | |
1350 VEC_index (bb_info, bb_info_vec, bb->index)->size = size; | |
1351 } | |
1352 find_split_points (overall_time, overall_size); | |
1353 if (best_split_point.split_bbs) | |
1354 { | |
1355 split_function (&best_split_point); | |
1356 BITMAP_FREE (best_split_point.ssa_names_to_pass); | |
1357 BITMAP_FREE (best_split_point.split_bbs); | |
1358 todo = TODO_update_ssa | TODO_cleanup_cfg; | |
1359 } | |
1360 VEC_free (bb_info, heap, bb_info_vec); | |
1361 bb_info_vec = NULL; | |
1362 return todo; | |
1363 } | |
1364 | |
1365 /* Gate function splitting pass. When doing profile feedback, we want | |
1366 to execute the pass after profiling is read. So disable one in | |
1367 early optimization. */ | |
1368 | |
1369 static bool | |
1370 gate_split_functions (void) | |
1371 { | |
1372 return (flag_partial_inlining | |
1373 && !profile_arc_flag && !flag_branch_probabilities); | |
1374 } | |
1375 | |
1376 struct gimple_opt_pass pass_split_functions = | |
1377 { | |
1378 { | |
1379 GIMPLE_PASS, | |
1380 "fnsplit", /* name */ | |
1381 gate_split_functions, /* gate */ | |
1382 execute_split_functions, /* execute */ | |
1383 NULL, /* sub */ | |
1384 NULL, /* next */ | |
1385 0, /* static_pass_number */ | |
1386 TV_IPA_FNSPLIT, /* tv_id */ | |
1387 PROP_cfg, /* properties_required */ | |
1388 0, /* properties_provided */ | |
1389 0, /* properties_destroyed */ | |
1390 0, /* todo_flags_start */ | |
1391 TODO_dump_func /* todo_flags_finish */ | |
1392 } | |
1393 }; | |
1394 | |
1395 /* Gate feedback driven function splitting pass. | |
1396 We don't need to split when profiling at all, we are producing | |
1397 lousy code anyway. */ | |
1398 | |
1399 static bool | |
1400 gate_feedback_split_functions (void) | |
1401 { | |
1402 return (flag_partial_inlining | |
1403 && flag_branch_probabilities); | |
1404 } | |
1405 | |
1406 /* Execute function splitting pass. */ | |
1407 | |
1408 static unsigned int | |
1409 execute_feedback_split_functions (void) | |
1410 { | |
1411 unsigned int retval = execute_split_functions (); | |
1412 if (retval) | |
1413 retval |= TODO_rebuild_cgraph_edges; | |
1414 return retval; | |
1415 } | |
1416 | |
1417 struct gimple_opt_pass pass_feedback_split_functions = | |
1418 { | |
1419 { | |
1420 GIMPLE_PASS, | |
1421 "feedback_fnsplit", /* name */ | |
1422 gate_feedback_split_functions, /* gate */ | |
1423 execute_feedback_split_functions, /* execute */ | |
1424 NULL, /* sub */ | |
1425 NULL, /* next */ | |
1426 0, /* static_pass_number */ | |
1427 TV_IPA_FNSPLIT, /* tv_id */ | |
1428 PROP_cfg, /* properties_required */ | |
1429 0, /* properties_provided */ | |
1430 0, /* properties_destroyed */ | |
1431 0, /* todo_flags_start */ | |
1432 TODO_dump_func /* todo_flags_finish */ | |
1433 } | |
1434 }; |