CbC/CbC_gcc: gcc/matrix-reorg.c comparison

comparison gcc/matrix-reorg.c @ 55:77e2b8dfacca gcc-4.4.5

update it from 4.4.3 to 4.5.0

author	ryoma <e075725@ie.u-ryukyu.ac.jp>
date	Fri, 12 Feb 2010 23:39:51 +0900
parents	a06113de4d67
children	b7f97abdc517

comparison

equal deleted inserted replaced

-:c156f1bd5cd9
+:77e2b8dfacca
 /* Matrix layout transformations.
 Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
 Contributed by Razya Ladelsky <razya@il.ibm.com>
 Originally written by Revital Eres and Mustafa Hagog.
 This file is part of GCC.
 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
 Software Foundation; either version 3, or (at your option) any later
 You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 /*
 Matrix flattening optimization tries to replace a N-dimensional
 matrix with its equivalent M-dimensional matrix, where M < N.
 This first implementation focuses on global matrices defined dynamically.
 When N==1, we actually flatten the whole matrix.
 For instance consider a two-dimensional array a [dim1] [dim2].
 The two main phases of the optimization are the analysis
 and transformation.
 The driver of the optimization is matrix_reorg ().
 Analysis phase:
 ===============
 We'll number the dimensions outside-in, meaning the most external
 is 0, then 1, and so on.
 The analysis part of the optimization determines K, the escape
 level of a N-dimensional matrix (K <= N), that allows flattening of
 the external dimensions 0,1,..., K-1. Escape level 0 means that the
 whole matrix escapes and no flattening is possible.
 The analysis part is implemented in analyze_matrix_allocation_site()
 and analyze_matrix_accesses().
 Transformation phase:
 =====================
 In this phase we define the new flattened matrices that replace the
 original matrices in the code.
 Implemented in transform_allocation_sites(),
 transform_access_sites().
 Matrix Transposing
 ==================
 The idea of Matrix Transposing is organizing the matrix in a different
 layout such that the dimensions are reordered.
 This could produce better cache behavior in some cases.
 For example, lets look at the matrix accesses in the following loop:
 for (i=0; i<N; i++)
 for (j=0; j<M; j++)
 access to a[i][j]
 This loop can produce good cache behavior because the elements of
 the inner dimension are accessed sequentially.
 However, if the accesses of the matrix were of the following form:
 for (i=0; i<N; i++)
 for (j=0; j<M; j++)
 access to a[j][i]
 In this loop we iterate the columns and not the rows.
 Therefore, replacing the rows and columns
 would have had an organization with better (cache) locality.
 Replacing the dimensions of the matrix is called matrix transposing.
 This  example, of course, could be enhanced to multiple dimensions matrices
 as well.
 Since a program could include all kind of accesses, there is a decision
 mechanism, implemented in analyze_transpose(), which implements a
 heuristic that tries to determine whether to transpose the matrix or not,
 according to the form of the more dominant accesses.
 This decision is transferred to the flattening mechanism, and whether
 the matrix was transposed or not, the matrix is flattened (if possible).
 This decision making is based on profiling information and loop information.
 If profiling information is available, decision making mechanism will be
 operated, otherwise the matrix will only be flattened (if possible).
 Both optimizations are described in the paper "Matrix flattening and
 transposing in GCC" which was presented in GCC summit 2006.
 http://www.gccsummit.org/2006/2006-GCC-Summit-Proceedings.pdf.  */
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
 #include "tm.h"
 #include "tree.h"
 #include "rtl.h"
-#include "c-tree.h"
 #include "tree-inline.h"
 #include "tree-flow.h"
 #include "tree-flow-inline.h"
 #include "langhooks.h"
 #include "hashtab.h"
 #include "cgraph.h"
 #include "diagnostic.h"
 #include "timevar.h"
 #include "params.h"
 #include "fibheap.h"
-#include "c-common.h"
 #include "intl.h"
 #include "function.h"
 #include "basic-block.h"
 #include "cfgloop.h"
 #include "tree-iterator.h"
 #include "tree-pass.h"
 #include "opts.h"
 #include "tree-data-ref.h"
 #include "tree-chrec.h"
 #include "tree-scalar-evolution.h"
+#include "tree-ssa-sccvn.h"
 /* We need to collect a lot of data from the original malloc,
 particularly as the gimplifier has converted:
 orig_var = (struct_type *) malloc (x * sizeof (struct_type *));
 typedef struct access_site_info *access_site_info_p;
 DEF_VEC_P (access_site_info_p);
 DEF_VEC_ALLOC_P (access_site_info_p, heap);
+/* Calls to free when flattening a matrix.  */
+struct free_info
+{
+gimple stmt;
+tree func;
+};
 /* Information about matrix to flatten.  */
 struct matrix_info
 {
 /* Decl tree of this matrix.  */
 tree decl;
 /* The location of the allocation sites (they must be in one
 function).  */
 tree allocation_function_decl;
 /* The calls to free for each level of indirection.  */
-struct free_info
+struct free_info *free_stmts;
-{
-gimple stmt;
-tree func;
-} *free_stmts;
 /* An array which holds for each dimension its size. where
 dimension 0 is the outer most (one that contains all the others).
 */
 tree *dimension_size;
 /* An array which holds for each dimension it's original size
 (before transposing and flattening take place).  */
 tree *dimension_size_orig;
 /* An array which holds for each dimension the size of the type of
 of elements accessed in that level (in bytes).  */
 /* An array of the accesses to be flattened.
 elements are of type "struct access_site_info *".  */
 VEC (access_site_info_p, heap) * access_l;
 /* A map of how the dimensions will be organized at the end of
 the analyses.  */
 int *dim_map;
 };
 /* In each phi node we want to record the indirection level we have when we
 return true;
 return false;
 }
 /* Return false if STMT may contain a vector expression.
 In this situation, all matrices should not be flattened.  */
 static bool
 may_flatten_matrices_1 (gimple stmt)
 {
 tree t;
 break;
 }
 return true;
 }
 /* Return false if there are hand-written vectors in the program.
 We disable the flattening in such a case.  */
 static bool
 may_flatten_matrices (struct cgraph_node *node)
 {
 tree decl;
 default:
 break;
 }
 }
 /* Record the access/allocation site information for matrix MI so we can
 handle it later in transformation.  */
 static void
 record_access_alloc_site_info (struct matrix_info *mi, gimple stmt, tree offset,
 			       tree index, int level, bool is_alloc)
 {
 	  mark_min_matrix_escape_level (mi, min_malloc_level, stmt);
 	}
 else
 	{
 	  mark_min_matrix_escape_level (mi, level, stmt);
 	  /* cannot be that (level == min_malloc_level)
 	     we would have returned earlier.  */
 	  return;
 	}
 }
 there is one and only one malloc site that sets this variable.  When
 we are performing the flattening we generate a new variable that
 will hold the size for each dimension; each malloc that allocates a
 dimension has the size parameter; we use that parameter to
 initialize the dimension size variable so we can use it later in
 the address calculations.  LEVEL is the dimension we're inspecting.
 Return if STMT is related to an allocation site.  */
 static void
 analyze_matrix_allocation_site (struct matrix_info *mi, gimple stmt,
 				int level, sbitmap visited)
 	  return;
 	}
 else
 	{
 	  tree malloc_fn_decl;
-	  const char *malloc_fname;
 	  malloc_fn_decl = gimple_call_fndecl (stmt);
 	  if (malloc_fn_decl == NULL_TREE)
 	    {
 	      mark_min_matrix_escape_level (mi, level, stmt);
 	      return;
 	    }
-	  malloc_fname = IDENTIFIER_POINTER (DECL_NAME (malloc_fn_decl));
 	  if (DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
 	    {
 	      if (dump_file)
 		fprintf (dump_file,
 			 "Matrix %s is an argument to function %s\n",
 			 get_name (mi->decl), get_name (malloc_fn_decl));
 	      mark_min_matrix_escape_level (mi, level, stmt);
 	      return;
 	    }
 	}
 /* This is a call to malloc of level 'level'.
 	 mi->max_malloced_level-1 == level  means that we've
 	 seen a malloc statement of level 'level' before.
 	 If the statement is not the same one that we've
 	 seen before, then there's another malloc statement
 	 for the same level, which means that we need to mark
 	 it escaping.  */
 if (mi->malloc_for_level
 	  && mi->max_malloced_level-1 == level
 	  && mi->malloc_for_level[level] != stmt)
 	{
 statement so be in the safe side and mark it as escaping.  */
 mark_min_matrix_escape_level (mi, level, stmt);
 }
 /* The transposing decision making.
 In order to to calculate the profitability of transposing, we collect two
 types of information regarding the accesses:
 1. profiling information used to express the hotness of an access, that
 is how often the matrix is accessed by this access site (count of the
 access site).
 2. which dimension in the access site is iterated by the inner
 most loop containing this access.
 The matrix will have a calculated value of weighted hotness for each
 dimension.
 Intuitively the hotness level of a dimension is a function of how
 many times it was the most frequently accessed dimension in the
 highly executed access sites of this matrix.
 As computed by following equation:
 m      n
 __   __
 \    \  dim_hot_level[i] +=
 /_   /_
 j     i
 acc[j]->dim[i]->iter_by_inner_loop * count(j)
 Where n is the number of dims and m is the number of the matrix
 access sites. acc[j]->dim[i]->iter_by_inner_loop is 1 if acc[j]
 iterates over dim[i] in innermost loop, and is 0 otherwise.
 VEC_free (access_site_info_p, heap, mi->access_l);
 return 1;
 }
 /* Find the index which defines the OFFSET from base.
 We walk from use to def until we find how the offset was defined.  */
 static tree
 get_index_from_offset (tree offset, gimple def_stmt)
 {
 tree op1, op2, index;
 else if (mi->dimension_type_size[l] != type_size)
 	mark_min_matrix_escape_level (mi, l, stmt);
 }
 }
 /* USE_STMT represents a GIMPLE_CALL, where one of the arguments is the
 ssa var that we want to check because it came from some use of matrix
 MI.  CURRENT_INDIRECT_LEVEL is the indirection level we reached so
 far.  */
 static int
 analyze_accesses_for_call_stmt (struct matrix_info *mi, tree ssa_var,
 				gimple use_stmt, int current_indirect_level)
 	}
 }
 return current_indirect_level;
 }
 /* USE_STMT represents a phi node of the ssa var that we want to
 check  because it came from some use of matrix
 MI.
 We check all the escaping levels that get to the PHI node
 and make sure they are all the same escaping;
 if not (which is rare) we let the escaping level be the
 minimum level that gets into that PHI because starting from
 that level we cannot expect the behavior of the indirections.
 CURRENT_INDIRECT_LEVEL is the indirection level we reached so far.  */
 static void
 analyze_accesses_for_phi_node (struct matrix_info *mi, gimple use_stmt,
 			       int current_indirect_level, sbitmap visited,
 			       record_accesses);
 RESET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
 }
 }
 /* USE_STMT represents an assign statement (the rhs or lhs include
 the ssa var that we want to check  because it came from some use of matrix
 MI.  CURRENT_INDIRECT_LEVEL is the indirection level we reached so far.  */
 static int
 analyze_accesses_for_assign_stmt (struct matrix_info *mi, tree ssa_var,
 				  gimple use_stmt, int current_indirect_level,
 					   NULL_TREE, l, true);
 	  update_type_size (mi, use_stmt, NULL, l);
 	}
 return current_indirect_level;
 }
 /* Now, check the right-hand-side, to see how the SSA variable
 is used.  */
 if (rhs_acc.var_found)
 {
 if (rhs_acc.t_code != INDIRECT_REF
 	  && rhs_acc.t_code != POINTER_PLUS_EXPR && rhs_acc.t_code != SSA_NAME)
 	{
 	  int l = current_indirect_level;
 	  /* One exception is when we are storing to the matrix
 	     variable itself; this is the case of malloc, we must make
 	     sure that it's the one and only one call to malloc so
 	     we call analyze_matrix_allocation_site to check
 	     this out.  */
 	  if (TREE_CODE (lhs) != VAR_DECL || lhs != mi->decl)
 	    mark_min_matrix_escape_level (mi, current_indirect_level,
 					  use_stmt);
 	  else
 	}
 }
 return current_indirect_level;
 }
 /* Given a SSA_VAR (coming from a use statement of the matrix MI),
 follow its uses and level of indirection and find out the minimum
 indirection level it escapes in (the highest dimension) and the maximum
 level it is accessed in (this will be the actual dimension of the
 matrix).  The information is accumulated in MI.
 We look at the immediate uses, if one escapes we finish; if not,
 					  current_indirect_level, last_op,
 					  visited, record_accesses);
 }
 }
 typedef struct
 {
 tree fn;
 gimple stmt;
 } check_var_data;
 switch (gimple_code (stmt))
 {
 case GIMPLE_ASSIGN:
 code = gimple_assign_rhs_code (stmt);
 op1 = gimple_assign_rhs1 (stmt);
 switch (code)
 	{
 	case POINTER_PLUS_EXPR:
 	case PLUS_EXPR:
 	case MINUS_EXPR:
 static int
 check_allocation_function (void **slot, void *data ATTRIBUTE_UNUSED)
 {
 int level;
-gimple_stmt_iterator gsi;
-basic_block bb_level_0;
 struct matrix_info *mi = (struct matrix_info *) *slot;
 sbitmap visited;
 if (!mi->malloc_for_level)
 return 1;
 for (level = 1; level < mi->max_malloced_level; level++)
 if (!mi->malloc_for_level[level])
 break;
 mark_min_matrix_escape_level (mi, level, NULL);
-gsi = gsi_for_stmt (mi->malloc_for_level[0]);
-bb_level_0 = gsi.bb;
 /* Check if the expression of the size passed to malloc could be
 pre-calculated before the malloc of level 0.  */
 for (level = 1; level < mi->min_indirect_level_escape; level++)
 {
 defined by the global variables pointing to the matrices of our interest.
 in each use of the SSA we calculate the offset from the base address
 according to the following equation:
 a[I1][I2]...[Ik] , where D1..Dk is the length of each dimension and the
 escaping level is m <= k, and a' is the new allocated matrix,
 will be translated to :
 b[I(m+1)]...[Ik]
 where
 b = a' + I1*D2...*Dm + I2*D3...Dm + ... + Im
 */
 static int
 transform_access_sites (void **slot, void *data ATTRIBUTE_UNUSED)
 				== INDIRECT_REF);
 		    /* Emit convert statement to convert to type of use.  */
 		    tmp = create_tmp_var (TREE_TYPE (lhs), "new");
 		    add_referenced_var (tmp);
 		    rhs = gimple_assign_rhs1 (acc_info->stmt);
-		    new_stmt = gimple_build_assign (tmp,
+		    rhs = fold_convert (TREE_TYPE (tmp),
-						    TREE_OPERAND (rhs, 0));
+					TREE_OPERAND (rhs, 0));
+		    new_stmt = gimple_build_assign (tmp, rhs);
 		    tmp = make_ssa_name (tmp, new_stmt);
 		    gimple_assign_set_lhs (new_stmt, tmp);
 		    gsi = gsi_for_stmt (acc_info->stmt);
 		    gsi_insert_after (&gsi, new_stmt, GSI_SAME_STMT);
 		    SET_USE (use_p, tmp);
 	      if (!check_transpose_p || mi->is_transposed_p == false)
 		tmp1 = offset;
 	      else
 		{
 		  tree new_offset;
-		  tree d_type_size, d_type_size_k;
-		  d_type_size = size_int (mi->dimension_type_size[min_escape_l]);
-		  d_type_size_k = size_int (mi->dimension_type_size[k + 1]);
 		  new_offset =
 		    compute_offset (mi->dimension_type_size[min_escape_l],
 				    mi->dimension_type_size[k + 1], offset);
 /* Replace multiple mallocs (one for each dimension) to one malloc
 with the size of DIM1*DIM2*...*DIMN*size_of_element
 Make sure that we hold the size in the malloc site inside a
 new global variable; this way we ensure that the size doesn't
 change and it is accessible from all the other functions that
 uses the matrix.  Also, the original calls to free are deleted,
 and replaced by a new call to free the flattened matrix.  */
 static int
 transform_allocation_sites (void **slot, void *data ATTRIBUTE_UNUSED)
 {
 gate_matrix_reorg (void)
 {
 return flag_ipa_matrix_reorg && flag_whole_program;
 }
 struct simple_ipa_opt_pass pass_ipa_matrix_reorg =
 {
 {
 SIMPLE_IPA_PASS,
 "matrix-reorg",		/* name */
 gate_matrix_reorg,		/* gate */
 matrix_reorg,			/* execute */
 NULL,				/* sub */
 NULL,				/* next */
 0,				/* static_pass_number */
-0,				/* tv_id */
+TV_NONE,			/* tv_id */
 0,				/* properties_required */
-PROP_trees,			/* properties_provided */
+0,				/* properties_provided */
 0,				/* properties_destroyed */
 0,				/* todo_flags_start */
 TODO_dump_cgraph | TODO_dump_func	/* todo_flags_finish */
 }
 };

Mercurial > hg > CbC > CbC_gcc

comparison gcc/matrix-reorg.c @ 55:77e2b8dfacca gcc-4.4.5