Mercurial > hg > CbC > CbC_gcc
diff gcc/ipa-cp.c @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children | 58ad6c70ea60 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gcc/ipa-cp.c Fri Jul 17 14:47:48 2009 +0900 @@ -0,0 +1,1396 @@ +/* Interprocedural constant propagation + Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc. + Contributed by Razya Ladelsky <RAZYA@il.ibm.com> + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* Interprocedural constant propagation. The aim of interprocedural constant + propagation (IPCP) is to find which function's argument has the same + constant value in each invocation throughout the whole program. For example, + consider the following program: + + int g (int y) + { + printf ("value is %d",y); + } + + int f (int x) + { + g (x); + } + + int h (int y) + { + g (y); + } + + void main (void) + { + f (3); + h (3); + } + + + The IPCP algorithm will find that g's formal argument y is always called + with the value 3. + + The algorithm used is based on "Interprocedural Constant Propagation", by + Challahan David, Keith D Cooper, Ken Kennedy, Linda Torczon, Comp86, pg + 152-161 + + The optimization is divided into three stages: + + First stage - intraprocedural analysis + ======================================= + This phase computes jump_function and modification flags. + + A jump function for a callsite represents the values passed as an actual + arguments of a given callsite. There are three types of values: + Pass through - the caller's formal parameter is passed as an actual argument. + Constant - a constant is passed as an actual argument. + Unknown - neither of the above. + + The jump function info, ipa_jump_func, is stored in ipa_edge_args + structure (defined in ipa_prop.h and pointed to by cgraph_node->aux) + modified_flags are defined in ipa_node_params structure + (defined in ipa_prop.h and pointed to by cgraph_edge->aux). + + -ipcp_init_stage() is the first stage driver. + + Second stage - interprocedural analysis + ======================================== + This phase does the interprocedural constant propagation. + It computes lattices for all formal parameters in the program + and their value that may be: + TOP - unknown. + BOTTOM - non constant. + CONSTANT - constant value. + + Lattice describing a formal parameter p will have a constant value if all + callsites invoking this function have the same constant value passed to p. + + The lattices are stored in ipcp_lattice which is itself in ipa_node_params + structure (defined in ipa_prop.h and pointed to by cgraph_edge->aux). + + -ipcp_iterate_stage() is the second stage driver. + + Third phase - transformation of function code + ============================================ + Propagates the constant-valued formals into the function. + For each function whose parameters are constants, we create its clone. + + Then we process the clone in two ways: + 1. We insert an assignment statement 'parameter = const' at the beginning + of the cloned function. + 2. For read-only parameters that do not live in memory, we replace all their + uses with the constant. + + We also need to modify some callsites to call the cloned functions instead + of the original ones. For a callsite passing an argument found to be a + constant by IPCP, there are two different cases to handle: + 1. A constant is passed as an argument. In this case the callsite in the + should be redirected to call the cloned callee. + 2. A parameter (of the caller) passed as an argument (pass through + argument). In such cases both the caller and the callee have clones and + only the callsite in the cloned caller is redirected to call to the + cloned callee. + + This update is done in two steps: First all cloned functions are created + during a traversal of the call graph, during which all callsites are + redirected to call the cloned function. Then the callsites are traversed + and many calls redirected back to fit the description above. + + -ipcp_insert_stage() is the third phase driver. + +*/ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tree.h" +#include "target.h" +#include "cgraph.h" +#include "ipa-prop.h" +#include "tree-flow.h" +#include "tree-pass.h" +#include "flags.h" +#include "timevar.h" +#include "diagnostic.h" +#include "tree-dump.h" +#include "tree-inline.h" +#include "fibheap.h" +#include "params.h" + +/* Number of functions identified as candidates for cloning. When not cloning + we can simplify iterate stage not forcing it to go through the decision + on what is profitable and what not. */ +static int n_cloning_candidates; + +/* Maximal count found in program. */ +static gcov_type max_count; + +/* Cgraph nodes that has been completely replaced by cloning during iterate + * stage and will be removed after ipcp is finished. */ +static bitmap dead_nodes; + +static void ipcp_print_profile_data (FILE *); +static void ipcp_function_scale_print (FILE *); + +/* Get the original node field of ipa_node_params associated with node NODE. */ +static inline struct cgraph_node * +ipcp_get_orig_node (struct cgraph_node *node) +{ + return IPA_NODE_REF (node)->ipcp_orig_node; +} + +/* Return true if NODE describes a cloned/versioned function. */ +static inline bool +ipcp_node_is_clone (struct cgraph_node *node) +{ + return (ipcp_get_orig_node (node) != NULL); +} + +/* Create ipa_node_params and its data structures for NEW_NODE. Set ORIG_NODE + as the ipcp_orig_node field in ipa_node_params. */ +static void +ipcp_init_cloned_node (struct cgraph_node *orig_node, + struct cgraph_node *new_node) +{ + ipa_check_create_node_params (); + ipa_initialize_node_params (new_node); + IPA_NODE_REF (new_node)->ipcp_orig_node = orig_node; +} + +/* Perform intraprocedrual analysis needed for ipcp. */ +static void +ipcp_analyze_node (struct cgraph_node *node) +{ + /* Unreachable nodes should have been eliminated before ipcp. */ + gcc_assert (node->needed || node->reachable); + + ipa_initialize_node_params (node); + ipa_detect_param_modifications (node); +} + +/* Recompute all local information since node might've got new + direct calls after cloning. */ +static void +ipcp_update_cloned_node (struct cgraph_node *new_node) +{ + /* We might've introduced new direct calls. */ + push_cfun (DECL_STRUCT_FUNCTION (new_node->decl)); + current_function_decl = new_node->decl; + rebuild_cgraph_edges (); + + /* Indirect inlinng rely on fact that we've already analyzed + the body.. */ + if (flag_indirect_inlining) + { + struct cgraph_edge *cs; + + ipcp_analyze_node (new_node); + + for (cs = new_node->callees; cs; cs = cs->next_callee) + { + ipa_count_arguments (cs); + ipa_compute_jump_functions (cs); + } + } + pop_cfun (); + current_function_decl = NULL; +} + +/* Return scale for NODE. */ +static inline gcov_type +ipcp_get_node_scale (struct cgraph_node *node) +{ + return IPA_NODE_REF (node)->count_scale; +} + +/* Set COUNT as scale for NODE. */ +static inline void +ipcp_set_node_scale (struct cgraph_node *node, gcov_type count) +{ + IPA_NODE_REF (node)->count_scale = count; +} + +/* Return whether LAT is a constant lattice. */ +static inline bool +ipcp_lat_is_const (struct ipcp_lattice *lat) +{ + if (lat->type == IPA_CONST_VALUE) + return true; + else + return false; +} + +/* Return whether LAT is a constant lattice that ipa-cp can actually insert + into the code (i.e. constants excluding member pointers and pointers). */ +static inline bool +ipcp_lat_is_insertable (struct ipcp_lattice *lat) +{ + return lat->type == IPA_CONST_VALUE; +} + +/* Return true if LAT1 and LAT2 are equal. */ +static inline bool +ipcp_lats_are_equal (struct ipcp_lattice *lat1, struct ipcp_lattice *lat2) +{ + gcc_assert (ipcp_lat_is_const (lat1) && ipcp_lat_is_const (lat2)); + if (lat1->type != lat2->type) + return false; + + if (operand_equal_p (lat1->constant, lat2->constant, 0)) + return true; + + return false; +} + +/* Compute Meet arithmetics: + Meet (IPA_BOTTOM, x) = IPA_BOTTOM + Meet (IPA_TOP,x) = x + Meet (const_a,const_b) = IPA_BOTTOM, if const_a != const_b. + MEET (const_a,const_b) = const_a, if const_a == const_b.*/ +static void +ipa_lattice_meet (struct ipcp_lattice *res, struct ipcp_lattice *lat1, + struct ipcp_lattice *lat2) +{ + if (lat1->type == IPA_BOTTOM || lat2->type == IPA_BOTTOM) + { + res->type = IPA_BOTTOM; + return; + } + if (lat1->type == IPA_TOP) + { + res->type = lat2->type; + res->constant = lat2->constant; + return; + } + if (lat2->type == IPA_TOP) + { + res->type = lat1->type; + res->constant = lat1->constant; + return; + } + if (!ipcp_lats_are_equal (lat1, lat2)) + { + res->type = IPA_BOTTOM; + return; + } + res->type = lat1->type; + res->constant = lat1->constant; +} + +/* Return the lattice corresponding to the Ith formal parameter of the function + described by INFO. */ +static inline struct ipcp_lattice * +ipcp_get_lattice (struct ipa_node_params *info, int i) +{ + return &(info->params[i].ipcp_lattice); +} + +/* Given the jump function JFUNC, compute the lattice LAT that describes the + value coming down the callsite. INFO describes the caller node so that + pass-through jump functions can be evaluated. */ +static void +ipcp_lattice_from_jfunc (struct ipa_node_params *info, struct ipcp_lattice *lat, + struct ipa_jump_func *jfunc) +{ + if (jfunc->type == IPA_CONST) + { + lat->type = IPA_CONST_VALUE; + lat->constant = jfunc->value.constant; + } + else if (jfunc->type == IPA_PASS_THROUGH) + { + struct ipcp_lattice *caller_lat; + + caller_lat = ipcp_get_lattice (info, jfunc->value.formal_id); + lat->type = caller_lat->type; + lat->constant = caller_lat->constant; + } + else + lat->type = IPA_BOTTOM; +} + +/* True when OLD_LAT and NEW_LAT values are not the same. */ + +static bool +ipcp_lattice_changed (struct ipcp_lattice *old_lat, + struct ipcp_lattice *new_lat) +{ + if (old_lat->type == new_lat->type) + { + if (!ipcp_lat_is_const (old_lat)) + return false; + if (ipcp_lats_are_equal (old_lat, new_lat)) + return false; + } + return true; +} + +/* Print all ipcp_lattices of all functions to F. */ +static void +ipcp_print_all_lattices (FILE * f) +{ + struct cgraph_node *node; + int i, count; + + fprintf (f, "\nLattice:\n"); + for (node = cgraph_nodes; node; node = node->next) + { + struct ipa_node_params *info; + + if (!node->analyzed) + continue; + info = IPA_NODE_REF (node); + fprintf (f, " Node: %s:\n", cgraph_node_name (node)); + count = ipa_get_param_count (info); + for (i = 0; i < count; i++) + { + struct ipcp_lattice *lat = ipcp_get_lattice (info, i); + + fprintf (f, " param [%d]: ", i); + if (lat->type == IPA_CONST_VALUE) + { + fprintf (f, "type is CONST "); + print_generic_expr (f, lat->constant, 0); + fprintf (f, "\n"); + } + else if (lat->type == IPA_TOP) + fprintf (f, "type is TOP\n"); + else + fprintf (f, "type is BOTTOM\n"); + } + } +} + +/* Return true if this NODE is viable candidate for cloning. */ +static bool +ipcp_cloning_candidate_p (struct cgraph_node *node) +{ + int n_calls = 0; + int n_hot_calls = 0; + gcov_type direct_call_sum = 0; + struct cgraph_edge *e; + + /* We never clone functions that are not visible from outside. + FIXME: in future we should clone such functions when they are called with + different constants, but current ipcp implementation is not good on this. + */ + if (!node->needed || !node->analyzed) + return false; + + if (cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE) + { + if (dump_file) + fprintf (dump_file, "Not considering %s for cloning; body is overwrittable.\n", + cgraph_node_name (node)); + return false; + } + if (!tree_versionable_function_p (node->decl)) + { + if (dump_file) + fprintf (dump_file, "Not considering %s for cloning; body is not versionable.\n", + cgraph_node_name (node)); + return false; + } + for (e = node->callers; e; e = e->next_caller) + { + direct_call_sum += e->count; + n_calls ++; + if (cgraph_maybe_hot_edge_p (e)) + n_hot_calls ++; + } + + if (!n_calls) + { + if (dump_file) + fprintf (dump_file, "Not considering %s for cloning; no direct calls.\n", + cgraph_node_name (node)); + return false; + } + if (node->local.inline_summary.self_insns < n_calls) + { + if (dump_file) + fprintf (dump_file, "Considering %s for cloning; code would shrink.\n", + cgraph_node_name (node)); + return true; + } + + if (!flag_ipa_cp_clone) + { + if (dump_file) + fprintf (dump_file, "Not considering %s for cloning; -fipa-cp-clone disabled.\n", + cgraph_node_name (node)); + return false; + } + + if (!optimize_function_for_speed_p (DECL_STRUCT_FUNCTION (node->decl))) + { + if (dump_file) + fprintf (dump_file, "Not considering %s for cloning; optimizing it for size.\n", + cgraph_node_name (node)); + return false; + } + + /* When profile is available and function is hot, propagate into it even if + calls seems cold; constant propagation can improve function's speed + significandly. */ + if (max_count) + { + if (direct_call_sum > node->count * 90 / 100) + { + if (dump_file) + fprintf (dump_file, "Considering %s for cloning; usually called directly.\n", + cgraph_node_name (node)); + return true; + } + } + if (!n_hot_calls) + { + if (dump_file) + fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n", + cgraph_node_name (node)); + } + if (dump_file) + fprintf (dump_file, "Considering %s for cloning.\n", + cgraph_node_name (node)); + return true; +} + +/* Initialize ipcp_lattices array. The lattices corresponding to supported + types (integers, real types and Fortran constants defined as const_decls) + are initialized to IPA_TOP, the rest of them to IPA_BOTTOM. */ +static void +ipcp_initialize_node_lattices (struct cgraph_node *node) +{ + int i; + struct ipa_node_params *info = IPA_NODE_REF (node); + enum ipa_lattice_type type; + + if (ipa_is_called_with_var_arguments (info)) + type = IPA_BOTTOM; + else if (!node->needed) + type = IPA_TOP; + /* When cloning is allowed, we can assume that externally visible functions + are not called. We will compensate this by cloning later. */ + else if (ipcp_cloning_candidate_p (node)) + type = IPA_TOP, n_cloning_candidates ++; + else + type = IPA_BOTTOM; + + for (i = 0; i < ipa_get_param_count (info) ; i++) + ipcp_get_lattice (info, i)->type = type; +} + +/* build INTEGER_CST tree with type TREE_TYPE and value according to LAT. + Return the tree. */ +static tree +build_const_val (struct ipcp_lattice *lat, tree tree_type) +{ + tree val; + + gcc_assert (ipcp_lat_is_const (lat)); + val = lat->constant; + + if (!useless_type_conversion_p (tree_type, TREE_TYPE (val))) + { + if (fold_convertible_p (tree_type, val)) + return fold_build1 (NOP_EXPR, tree_type, val); + else + return fold_build1 (VIEW_CONVERT_EXPR, tree_type, val); + } + return val; +} + +/* Compute the proper scale for NODE. It is the ratio between the number of + direct calls (represented on the incoming cgraph_edges) and sum of all + invocations of NODE (represented as count in cgraph_node). */ +static void +ipcp_compute_node_scale (struct cgraph_node *node) +{ + gcov_type sum; + struct cgraph_edge *cs; + + sum = 0; + /* Compute sum of all counts of callers. */ + for (cs = node->callers; cs != NULL; cs = cs->next_caller) + sum += cs->count; + if (node->count == 0) + ipcp_set_node_scale (node, 0); + else + ipcp_set_node_scale (node, sum * REG_BR_PROB_BASE / node->count); +} + +/* Initialization and computation of IPCP data structures. This is the initial + intraprocedural analysis of functions, which gathers information to be + propagated later on. */ +static void +ipcp_init_stage (void) +{ + struct cgraph_node *node; + struct cgraph_edge *cs; + + for (node = cgraph_nodes; node; node = node->next) + if (node->analyzed) + ipcp_analyze_node (node); + for (node = cgraph_nodes; node; node = node->next) + { + if (!node->analyzed) + continue; + /* building jump functions */ + for (cs = node->callees; cs; cs = cs->next_callee) + { + if (!cs->callee->analyzed) + continue; + ipa_count_arguments (cs); + if (ipa_get_cs_argument_count (IPA_EDGE_REF (cs)) + != ipa_get_param_count (IPA_NODE_REF (cs->callee))) + { + /* Handle cases of functions with + a variable number of parameters. */ + ipa_set_called_with_variable_arg (IPA_NODE_REF (cs->callee)); + if (flag_indirect_inlining) + ipa_compute_jump_functions (cs); + } + else + ipa_compute_jump_functions (cs); + } + } +} + +/* Return true if there are some formal parameters whose value is IPA_TOP (in + the whole compilation unit). Change their values to IPA_BOTTOM, since they + most probably get their values from outside of this compilation unit. */ +static bool +ipcp_change_tops_to_bottom (void) +{ + int i, count; + struct cgraph_node *node; + bool prop_again; + + prop_again = false; + for (node = cgraph_nodes; node; node = node->next) + { + struct ipa_node_params *info = IPA_NODE_REF (node); + count = ipa_get_param_count (info); + for (i = 0; i < count; i++) + { + struct ipcp_lattice *lat = ipcp_get_lattice (info, i); + if (lat->type == IPA_TOP) + { + prop_again = true; + if (dump_file) + { + fprintf (dump_file, "Forcing param "); + print_generic_expr (dump_file, ipa_get_param (info, i), 0); + fprintf (dump_file, " of node %s to bottom.\n", + cgraph_node_name (node)); + } + lat->type = IPA_BOTTOM; + } + } + } + return prop_again; +} + +/* Interprocedural analysis. The algorithm propagates constants from the + caller's parameters to the callee's arguments. */ +static void +ipcp_propagate_stage (void) +{ + int i; + struct ipcp_lattice inc_lat = { IPA_BOTTOM, NULL }; + struct ipcp_lattice new_lat = { IPA_BOTTOM, NULL }; + struct ipcp_lattice *dest_lat; + struct cgraph_edge *cs; + struct ipa_jump_func *jump_func; + struct ipa_func_list *wl; + int count; + + ipa_check_create_node_params (); + ipa_check_create_edge_args (); + + /* Initialize worklist to contain all functions. */ + wl = ipa_init_func_list (); + while (wl) + { + struct cgraph_node *node = ipa_pop_func_from_list (&wl); + struct ipa_node_params *info = IPA_NODE_REF (node); + + for (cs = node->callees; cs; cs = cs->next_callee) + { + struct ipa_node_params *callee_info = IPA_NODE_REF (cs->callee); + struct ipa_edge_args *args = IPA_EDGE_REF (cs); + + if (ipa_is_called_with_var_arguments (callee_info)) + continue; + + count = ipa_get_cs_argument_count (args); + for (i = 0; i < count; i++) + { + jump_func = ipa_get_ith_jump_func (args, i); + ipcp_lattice_from_jfunc (info, &inc_lat, jump_func); + dest_lat = ipcp_get_lattice (callee_info, i); + ipa_lattice_meet (&new_lat, &inc_lat, dest_lat); + if (ipcp_lattice_changed (&new_lat, dest_lat)) + { + dest_lat->type = new_lat.type; + dest_lat->constant = new_lat.constant; + ipa_push_func_to_list (&wl, cs->callee); + } + } + } + } +} + +/* Call the constant propagation algorithm and re-call it if necessary + (if there are undetermined values left). */ +static void +ipcp_iterate_stage (void) +{ + struct cgraph_node *node; + n_cloning_candidates = 0; + + if (dump_file) + fprintf (dump_file, "\nIPA iterate stage:\n\n"); + for (node = cgraph_nodes; node; node = node->next) + { + ipcp_initialize_node_lattices (node); + ipcp_compute_node_scale (node); + } + if (dump_file && (dump_flags & TDF_DETAILS)) + { + ipcp_print_all_lattices (dump_file); + ipcp_function_scale_print (dump_file); + } + + ipcp_propagate_stage (); + if (ipcp_change_tops_to_bottom ()) + /* Some lattices have changed from IPA_TOP to IPA_BOTTOM. + This change should be propagated. */ + { + gcc_assert (n_cloning_candidates); + ipcp_propagate_stage (); + } + if (dump_file) + { + fprintf (dump_file, "\nIPA lattices after propagation:\n"); + ipcp_print_all_lattices (dump_file); + if (dump_flags & TDF_DETAILS) + ipcp_print_profile_data (dump_file); + } +} + +/* Check conditions to forbid constant insertion to function described by + NODE. */ +static inline bool +ipcp_node_modifiable_p (struct cgraph_node *node) +{ + /* Once we will be able to do in-place replacement, we can be more + lax here. */ + return tree_versionable_function_p (node->decl); +} + +/* Print count scale data structures. */ +static void +ipcp_function_scale_print (FILE * f) +{ + struct cgraph_node *node; + + for (node = cgraph_nodes; node; node = node->next) + { + if (!node->analyzed) + continue; + fprintf (f, "printing scale for %s: ", cgraph_node_name (node)); + fprintf (f, "value is " HOST_WIDE_INT_PRINT_DEC + " \n", (HOST_WIDE_INT) ipcp_get_node_scale (node)); + } +} + +/* Print counts of all cgraph nodes. */ +static void +ipcp_print_func_profile_counts (FILE * f) +{ + struct cgraph_node *node; + + for (node = cgraph_nodes; node; node = node->next) + { + fprintf (f, "function %s: ", cgraph_node_name (node)); + fprintf (f, "count is " HOST_WIDE_INT_PRINT_DEC + " \n", (HOST_WIDE_INT) node->count); + } +} + +/* Print counts of all cgraph edges. */ +static void +ipcp_print_call_profile_counts (FILE * f) +{ + struct cgraph_node *node; + struct cgraph_edge *cs; + + for (node = cgraph_nodes; node; node = node->next) + { + for (cs = node->callees; cs; cs = cs->next_callee) + { + fprintf (f, "%s -> %s ", cgraph_node_name (cs->caller), + cgraph_node_name (cs->callee)); + fprintf (f, "count is " HOST_WIDE_INT_PRINT_DEC " \n", + (HOST_WIDE_INT) cs->count); + } + } +} + +/* Print all counts and probabilities of cfg edges of all functions. */ +static void +ipcp_print_edge_profiles (FILE * f) +{ + struct cgraph_node *node; + basic_block bb; + edge_iterator ei; + edge e; + + for (node = cgraph_nodes; node; node = node->next) + { + fprintf (f, "function %s: \n", cgraph_node_name (node)); + if (node->analyzed) + { + bb = + ENTRY_BLOCK_PTR_FOR_FUNCTION (DECL_STRUCT_FUNCTION (node->decl)); + fprintf (f, "ENTRY: "); + fprintf (f, " " HOST_WIDE_INT_PRINT_DEC + " %d\n", (HOST_WIDE_INT) bb->count, bb->frequency); + + if (bb->succs) + FOR_EACH_EDGE (e, ei, bb->succs) + { + if (e->dest == + EXIT_BLOCK_PTR_FOR_FUNCTION (DECL_STRUCT_FUNCTION + (node->decl))) + fprintf (f, "edge ENTRY -> EXIT, Count"); + else + fprintf (f, "edge ENTRY -> %d, Count", e->dest->index); + fprintf (f, " " HOST_WIDE_INT_PRINT_DEC + " Prob %d\n", (HOST_WIDE_INT) e->count, + e->probability); + } + FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl)) + { + fprintf (f, "bb[%d]: ", bb->index); + fprintf (f, " " HOST_WIDE_INT_PRINT_DEC + " %d\n", (HOST_WIDE_INT) bb->count, bb->frequency); + FOR_EACH_EDGE (e, ei, bb->succs) + { + if (e->dest == + EXIT_BLOCK_PTR_FOR_FUNCTION (DECL_STRUCT_FUNCTION + (node->decl))) + fprintf (f, "edge %d -> EXIT, Count", e->src->index); + else + fprintf (f, "edge %d -> %d, Count", e->src->index, + e->dest->index); + fprintf (f, " " HOST_WIDE_INT_PRINT_DEC " Prob %d\n", + (HOST_WIDE_INT) e->count, e->probability); + } + } + } + } +} + +/* Print counts and frequencies for all basic blocks of all functions. */ +static void +ipcp_print_bb_profiles (FILE * f) +{ + basic_block bb; + struct cgraph_node *node; + + for (node = cgraph_nodes; node; node = node->next) + { + fprintf (f, "function %s: \n", cgraph_node_name (node)); + if (node->analyzed) + { + bb = + ENTRY_BLOCK_PTR_FOR_FUNCTION (DECL_STRUCT_FUNCTION (node->decl)); + fprintf (f, "ENTRY: Count"); + fprintf (f, " " HOST_WIDE_INT_PRINT_DEC + " Frequency %d\n", (HOST_WIDE_INT) bb->count, + bb->frequency); + + FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl)) + { + fprintf (f, "bb[%d]: Count", bb->index); + fprintf (f, " " HOST_WIDE_INT_PRINT_DEC + " Frequency %d\n", (HOST_WIDE_INT) bb->count, + bb->frequency); + } + bb = + EXIT_BLOCK_PTR_FOR_FUNCTION (DECL_STRUCT_FUNCTION (node->decl)); + fprintf (f, "EXIT: Count"); + fprintf (f, " " HOST_WIDE_INT_PRINT_DEC + " Frequency %d\n", (HOST_WIDE_INT) bb->count, + bb->frequency); + + } + } +} + +/* Print profile info for all functions. */ +static void +ipcp_print_profile_data (FILE * f) +{ + fprintf (f, "\nNODE COUNTS :\n"); + ipcp_print_func_profile_counts (f); + fprintf (f, "\nCS COUNTS stage:\n"); + ipcp_print_call_profile_counts (f); + fprintf (f, "\nBB COUNTS and FREQUENCIES :\n"); + ipcp_print_bb_profiles (f); + fprintf (f, "\nCFG EDGES COUNTS and PROBABILITIES :\n"); + ipcp_print_edge_profiles (f); +} + +/* Build and initialize ipa_replace_map struct according to LAT. This struct is + processed by versioning, which operates according to the flags set. + PARM_TREE is the formal parameter found to be constant. LAT represents the + constant. */ +static struct ipa_replace_map * +ipcp_create_replace_map (tree parm_tree, struct ipcp_lattice *lat) +{ + struct ipa_replace_map *replace_map; + tree const_val; + + replace_map = XCNEW (struct ipa_replace_map); + const_val = build_const_val (lat, TREE_TYPE (parm_tree)); + if (dump_file) + { + fprintf (dump_file, " replacing param "); + print_generic_expr (dump_file, parm_tree, 0); + fprintf (dump_file, " with const "); + print_generic_expr (dump_file, const_val, 0); + fprintf (dump_file, "\n"); + } + replace_map->old_tree = parm_tree; + replace_map->new_tree = const_val; + replace_map->replace_p = true; + replace_map->ref_p = false; + + return replace_map; +} + +/* Return true if this callsite should be redirected to the original callee + (instead of the cloned one). */ +static bool +ipcp_need_redirect_p (struct cgraph_edge *cs) +{ + struct ipa_node_params *orig_callee_info; + int i, count; + struct ipa_jump_func *jump_func; + struct cgraph_node *node = cs->callee, *orig; + + if (!n_cloning_candidates) + return false; + + if ((orig = ipcp_get_orig_node (node)) != NULL) + node = orig; + if (ipcp_get_orig_node (cs->caller)) + return false; + + orig_callee_info = IPA_NODE_REF (node); + count = ipa_get_param_count (orig_callee_info); + for (i = 0; i < count; i++) + { + struct ipcp_lattice *lat = ipcp_get_lattice (orig_callee_info, i); + if (ipcp_lat_is_const (lat)) + { + jump_func = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), i); + if (jump_func->type != IPA_CONST) + return true; + } + } + + return false; +} + +/* Fix the callsites and the call graph after function cloning was done. */ +static void +ipcp_update_callgraph (void) +{ + struct cgraph_node *node; + + for (node = cgraph_nodes; node; node = node->next) + if (node->analyzed && ipcp_node_is_clone (node)) + { + bitmap args_to_skip = BITMAP_ALLOC (NULL); + struct cgraph_node *orig_node = ipcp_get_orig_node (node); + struct ipa_node_params *info = IPA_NODE_REF (orig_node); + int i, count = ipa_get_param_count (info); + struct cgraph_edge *cs, *next; + + for (i = 0; i < count; i++) + { + struct ipcp_lattice *lat = ipcp_get_lattice (info, i); + tree parm_tree = ipa_get_param (info, i); + + /* We can proactively remove obviously unused arguments. */ + if (is_gimple_reg (parm_tree) + && !gimple_default_def (DECL_STRUCT_FUNCTION (orig_node->decl), + parm_tree)) + { + bitmap_set_bit (args_to_skip, i); + continue; + } + + if (lat->type == IPA_CONST_VALUE) + bitmap_set_bit (args_to_skip, i); + } + for (cs = node->callers; cs; cs = next) + { + next = cs->next_caller; + if (ipcp_node_is_clone (cs->caller) || !ipcp_need_redirect_p (cs)) + { + gimple new_stmt; + gimple_stmt_iterator gsi; + + current_function_decl = cs->caller->decl; + push_cfun (DECL_STRUCT_FUNCTION (cs->caller->decl)); + + new_stmt = gimple_call_copy_skip_args (cs->call_stmt, + args_to_skip); + gsi = gsi_for_stmt (cs->call_stmt); + gsi_replace (&gsi, new_stmt, true); + cgraph_set_call_stmt (cs, new_stmt); + pop_cfun (); + current_function_decl = NULL; + } + else + { + cgraph_redirect_edge_callee (cs, orig_node); + gimple_call_set_fndecl (cs->call_stmt, orig_node->decl); + } + } + } +} + +/* Update all cfg basic blocks in NODE according to SCALE. */ +static void +ipcp_update_bb_counts (struct cgraph_node *node, gcov_type scale) +{ + basic_block bb; + + FOR_ALL_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl)) + bb->count = bb->count * scale / REG_BR_PROB_BASE; +} + +/* Update all cfg edges in NODE according to SCALE. */ +static void +ipcp_update_edges_counts (struct cgraph_node *node, gcov_type scale) +{ + basic_block bb; + edge_iterator ei; + edge e; + + FOR_ALL_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl)) + FOR_EACH_EDGE (e, ei, bb->succs) + e->count = e->count * scale / REG_BR_PROB_BASE; +} + +/* Update profiling info for versioned functions and the functions they were + versioned from. */ +static void +ipcp_update_profiling (void) +{ + struct cgraph_node *node, *orig_node; + gcov_type scale, scale_complement; + struct cgraph_edge *cs; + + for (node = cgraph_nodes; node; node = node->next) + { + if (ipcp_node_is_clone (node)) + { + orig_node = ipcp_get_orig_node (node); + scale = ipcp_get_node_scale (orig_node); + node->count = orig_node->count * scale / REG_BR_PROB_BASE; + scale_complement = REG_BR_PROB_BASE - scale; + orig_node->count = + orig_node->count * scale_complement / REG_BR_PROB_BASE; + for (cs = node->callees; cs; cs = cs->next_callee) + cs->count = cs->count * scale / REG_BR_PROB_BASE; + for (cs = orig_node->callees; cs; cs = cs->next_callee) + cs->count = cs->count * scale_complement / REG_BR_PROB_BASE; + ipcp_update_bb_counts (node, scale); + ipcp_update_bb_counts (orig_node, scale_complement); + ipcp_update_edges_counts (node, scale); + ipcp_update_edges_counts (orig_node, scale_complement); + } + } +} + +/* If NODE was cloned, how much would program grow? */ +static long +ipcp_estimate_growth (struct cgraph_node *node) +{ + struct cgraph_edge *cs; + int redirectable_node_callers = 0; + int removable_args = 0; + bool need_original = node->needed; + struct ipa_node_params *info; + int i, count; + int growth; + + for (cs = node->callers; cs != NULL; cs = cs->next_caller) + if (cs->caller == node || !ipcp_need_redirect_p (cs)) + redirectable_node_callers++; + else + need_original = true; + + /* If we will be able to fully replace orignal node, we never increase + program size. */ + if (!need_original) + return 0; + + info = IPA_NODE_REF (node); + count = ipa_get_param_count (info); + for (i = 0; i < count; i++) + { + struct ipcp_lattice *lat = ipcp_get_lattice (info, i); + tree parm_tree = ipa_get_param (info, i); + + /* We can proactively remove obviously unused arguments. */ + if (is_gimple_reg (parm_tree) + && !gimple_default_def (DECL_STRUCT_FUNCTION (node->decl), + parm_tree)) + removable_args++; + + if (lat->type == IPA_CONST_VALUE) + removable_args++; + } + + /* We make just very simple estimate of savings for removal of operand from + call site. Precise cost is dificult to get, as our size metric counts + constants and moves as free. Generally we are looking for cases that + small function is called very many times. */ + growth = node->local.inline_summary.self_insns + - removable_args * redirectable_node_callers; + if (growth < 0) + return 0; + return growth; +} + + +/* Estimate cost of cloning NODE. */ +static long +ipcp_estimate_cloning_cost (struct cgraph_node *node) +{ + int freq_sum = 1; + gcov_type count_sum = 1; + struct cgraph_edge *e; + int cost; + + cost = ipcp_estimate_growth (node) * 1000; + if (!cost) + { + if (dump_file) + fprintf (dump_file, "Versioning of %s will save code size\n", + cgraph_node_name (node)); + return 0; + } + + for (e = node->callers; e; e = e->next_caller) + if (!bitmap_bit_p (dead_nodes, e->caller->uid) + && !ipcp_need_redirect_p (e)) + { + count_sum += e->count; + freq_sum += e->frequency + 1; + } + + if (max_count) + cost /= count_sum * 1000 / max_count + 1; + else + cost /= freq_sum * 1000 / REG_BR_PROB_BASE + 1; + if (dump_file) + fprintf (dump_file, "Cost of versioning %s is %i, (size: %i, freq: %i)\n", + cgraph_node_name (node), cost, node->local.inline_summary.self_insns, + freq_sum); + return cost + 1; +} + +/* Return number of live constant parameters. */ +static int +ipcp_const_param_count (struct cgraph_node *node) +{ + int const_param = 0; + struct ipa_node_params *info = IPA_NODE_REF (node); + int count = ipa_get_param_count (info); + int i; + + for (i = 0; i < count; i++) + { + struct ipcp_lattice *lat = ipcp_get_lattice (info, i); + tree parm_tree = ipa_get_param (info, i); + if (ipcp_lat_is_insertable (lat) + /* Do not count obviously unused arguments. */ + && (!is_gimple_reg (parm_tree) + || gimple_default_def (DECL_STRUCT_FUNCTION (node->decl), + parm_tree))) + const_param++; + } + return const_param; +} + +/* Propagate the constant parameters found by ipcp_iterate_stage() + to the function's code. */ +static void +ipcp_insert_stage (void) +{ + struct cgraph_node *node, *node1 = NULL; + int i; + VEC (cgraph_edge_p, heap) * redirect_callers; + varray_type replace_trees; + int node_callers, count; + tree parm_tree; + struct ipa_replace_map *replace_param; + fibheap_t heap; + long overall_insns = 0, new_insns = 0; + long max_new_insns; + + ipa_check_create_node_params (); + ipa_check_create_edge_args (); + if (dump_file) + fprintf (dump_file, "\nIPA insert stage:\n\n"); + + dead_nodes = BITMAP_ALLOC (NULL); + + for (node = cgraph_nodes; node; node = node->next) + if (node->analyzed) + { + if (node->count > max_count) + max_count = node->count; + overall_insns += node->local.inline_summary.self_insns; + } + + max_new_insns = overall_insns; + if (max_new_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS)) + max_new_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS); + max_new_insns = max_new_insns * PARAM_VALUE (PARAM_IPCP_UNIT_GROWTH) / 100 + 1; + + /* First collect all functions we proved to have constant arguments to heap. */ + heap = fibheap_new (); + for (node = cgraph_nodes; node; node = node->next) + { + struct ipa_node_params *info; + /* Propagation of the constant is forbidden in certain conditions. */ + if (!node->analyzed || !ipcp_node_modifiable_p (node)) + continue; + info = IPA_NODE_REF (node); + if (ipa_is_called_with_var_arguments (info)) + continue; + if (ipcp_const_param_count (node)) + node->aux = fibheap_insert (heap, ipcp_estimate_cloning_cost (node), node); + } + + /* Now clone in priority order until code size growth limits are met or + heap is emptied. */ + while (!fibheap_empty (heap)) + { + struct ipa_node_params *info; + int growth = 0; + bitmap args_to_skip; + struct cgraph_edge *cs; + + node = (struct cgraph_node *)fibheap_extract_min (heap); + node->aux = NULL; + if (dump_file) + fprintf (dump_file, "considering function %s\n", + cgraph_node_name (node)); + + growth = ipcp_estimate_growth (node); + + if (new_insns + growth > max_new_insns) + break; + if (growth + && optimize_function_for_size_p (DECL_STRUCT_FUNCTION (node->decl))) + { + if (dump_file) + fprintf (dump_file, "Not versioning, cold code would grow"); + continue; + } + + new_insns += growth; + + /* Look if original function becomes dead after clonning. */ + for (cs = node->callers; cs != NULL; cs = cs->next_caller) + if (cs->caller == node || ipcp_need_redirect_p (cs)) + break; + if (!cs && !node->needed) + bitmap_set_bit (dead_nodes, node->uid); + + info = IPA_NODE_REF (node); + count = ipa_get_param_count (info); + + VARRAY_GENERIC_PTR_INIT (replace_trees, ipcp_const_param_count (node), + "replace_trees"); + args_to_skip = BITMAP_ALLOC (NULL); + for (i = 0; i < count; i++) + { + struct ipcp_lattice *lat = ipcp_get_lattice (info, i); + parm_tree = ipa_get_param (info, i); + + /* We can proactively remove obviously unused arguments. */ + if (is_gimple_reg (parm_tree) + && !gimple_default_def (DECL_STRUCT_FUNCTION (node->decl), + parm_tree)) + { + bitmap_set_bit (args_to_skip, i); + continue; + } + + if (lat->type == IPA_CONST_VALUE) + { + replace_param = + ipcp_create_replace_map (parm_tree, lat); + VARRAY_PUSH_GENERIC_PTR (replace_trees, replace_param); + bitmap_set_bit (args_to_skip, i); + } + } + + /* Compute how many callers node has. */ + node_callers = 0; + for (cs = node->callers; cs != NULL; cs = cs->next_caller) + node_callers++; + redirect_callers = VEC_alloc (cgraph_edge_p, heap, node_callers); + for (cs = node->callers; cs != NULL; cs = cs->next_caller) + VEC_quick_push (cgraph_edge_p, redirect_callers, cs); + + /* Redirecting all the callers of the node to the + new versioned node. */ + node1 = + cgraph_function_versioning (node, redirect_callers, replace_trees, + args_to_skip); + BITMAP_FREE (args_to_skip); + VEC_free (cgraph_edge_p, heap, redirect_callers); + VARRAY_CLEAR (replace_trees); + if (node1 == NULL) + continue; + if (dump_file) + fprintf (dump_file, "versioned function %s with growth %i, overall %i\n", + cgraph_node_name (node), (int)growth, (int)new_insns); + ipcp_init_cloned_node (node, node1); + + /* We've possibly introduced direct calls. */ + ipcp_update_cloned_node (node1); + + if (dump_file) + dump_function_to_file (node1->decl, dump_file, dump_flags); + + for (cs = node->callees; cs; cs = cs->next_callee) + if (cs->callee->aux) + { + fibheap_delete_node (heap, (fibnode_t) cs->callee->aux); + cs->callee->aux = fibheap_insert (heap, + ipcp_estimate_cloning_cost (cs->callee), + cs->callee); + } + } + + while (!fibheap_empty (heap)) + { + if (dump_file) + fprintf (dump_file, "skipping function %s\n", + cgraph_node_name (node)); + node = (struct cgraph_node *) fibheap_extract_min (heap); + node->aux = NULL; + } + fibheap_delete (heap); + BITMAP_FREE (dead_nodes); + ipcp_update_callgraph (); + ipcp_update_profiling (); +} + +/* The IPCP driver. */ +static unsigned int +ipcp_driver (void) +{ + cgraph_remove_unreachable_nodes (true,dump_file); + if (dump_file) + { + fprintf (dump_file, "\nIPA structures before propagation:\n"); + if (dump_flags & TDF_DETAILS) + ipa_print_all_params (dump_file); + ipa_print_all_jump_functions (dump_file); + } + /* 2. Do the interprocedural propagation. */ + ipcp_iterate_stage (); + /* 3. Insert the constants found to the functions. */ + ipcp_insert_stage (); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "\nProfiling info after insert stage:\n"); + ipcp_print_profile_data (dump_file); + } + /* Free all IPCP structures. */ + free_all_ipa_structures_after_ipa_cp (); + if (dump_file) + fprintf (dump_file, "\nIPA constant propagation end\n"); + return 0; +} + +/* Note function body size. */ +static void +ipcp_generate_summary (void) +{ + if (dump_file) + fprintf (dump_file, "\nIPA constant propagation start:\n"); + ipa_check_create_node_params (); + ipa_check_create_edge_args (); + ipa_register_cgraph_hooks (); + /* 1. Call the init stage to initialize + the ipa_node_params and ipa_edge_args structures. */ + ipcp_init_stage (); +} + +/* Gate for IPCP optimization. */ +static bool +cgraph_gate_cp (void) +{ + return flag_ipa_cp; +} + +struct ipa_opt_pass pass_ipa_cp = +{ + { + IPA_PASS, + "cp", /* name */ + cgraph_gate_cp, /* gate */ + ipcp_driver, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_IPA_CONSTANT_PROP, /* tv_id */ + 0, /* properties_required */ + PROP_trees, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_cgraph | TODO_dump_func | + TODO_remove_functions /* todo_flags_finish */ + }, + ipcp_generate_summary, /* generate_summary */ + NULL, /* write_summary */ + NULL, /* read_summary */ + NULL, /* function_read_summary */ + 0, /* TODOs */ + NULL, /* function_transform */ + NULL, /* variable_transform */ +};