Mercurial > hg > CbC > CbC_gcc
comparison gcc/tree-vectorizer.h @ 111:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | f6334be47118 |
children | 84e7813d76e9 |
comparison
equal
deleted
inserted
replaced
68:561a7518be6b | 111:04ced10e8804 |
---|---|
1 /* Vectorizer | 1 /* Vectorizer |
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 | 2 Copyright (C) 2003-2017 Free Software Foundation, Inc. |
3 Free Software Foundation, Inc. | |
4 Contributed by Dorit Naishlos <dorit@il.ibm.com> | 3 Contributed by Dorit Naishlos <dorit@il.ibm.com> |
5 | 4 |
6 This file is part of GCC. | 5 This file is part of GCC. |
7 | 6 |
8 GCC is free software; you can redistribute it and/or modify it under | 7 GCC is free software; you can redistribute it and/or modify it under |
21 | 20 |
22 #ifndef GCC_TREE_VECTORIZER_H | 21 #ifndef GCC_TREE_VECTORIZER_H |
23 #define GCC_TREE_VECTORIZER_H | 22 #define GCC_TREE_VECTORIZER_H |
24 | 23 |
25 #include "tree-data-ref.h" | 24 #include "tree-data-ref.h" |
26 | 25 #include "tree-hash-traits.h" |
27 typedef source_location LOC; | 26 #include "target.h" |
28 #define UNKNOWN_LOC UNKNOWN_LOCATION | |
29 #define EXPR_LOC(e) EXPR_LOCATION(e) | |
30 #define LOC_FILE(l) LOCATION_FILE (l) | |
31 #define LOC_LINE(l) LOCATION_LINE (l) | |
32 | 27 |
33 /* Used for naming of new temporaries. */ | 28 /* Used for naming of new temporaries. */ |
34 enum vect_var_kind { | 29 enum vect_var_kind { |
35 vect_simple_var, | 30 vect_simple_var, |
36 vect_pointer_var, | 31 vect_pointer_var, |
37 vect_scalar_var | 32 vect_scalar_var, |
33 vect_mask_var | |
38 }; | 34 }; |
39 | 35 |
40 /* Defines type of operation. */ | 36 /* Defines type of operation. */ |
41 enum operation_type { | 37 enum operation_type { |
42 unary_op = 1, | 38 unary_op = 1, |
64 vect_double_reduction_def, | 60 vect_double_reduction_def, |
65 vect_nested_cycle, | 61 vect_nested_cycle, |
66 vect_unknown_def_type | 62 vect_unknown_def_type |
67 }; | 63 }; |
68 | 64 |
65 /* Define type of reduction. */ | |
66 enum vect_reduction_type { | |
67 TREE_CODE_REDUCTION, | |
68 COND_REDUCTION, | |
69 INTEGER_INDUC_COND_REDUCTION, | |
70 CONST_COND_REDUCTION | |
71 }; | |
72 | |
69 #define VECTORIZABLE_CYCLE_DEF(D) (((D) == vect_reduction_def) \ | 73 #define VECTORIZABLE_CYCLE_DEF(D) (((D) == vect_reduction_def) \ |
70 || ((D) == vect_double_reduction_def) \ | 74 || ((D) == vect_double_reduction_def) \ |
71 || ((D) == vect_nested_cycle)) | 75 || ((D) == vect_nested_cycle)) |
72 | 76 |
77 /* Structure to encapsulate information about a group of like | |
78 instructions to be presented to the target cost model. */ | |
79 struct stmt_info_for_cost { | |
80 int count; | |
81 enum vect_cost_for_stmt kind; | |
82 gimple *stmt; | |
83 int misalign; | |
84 }; | |
85 | |
86 typedef vec<stmt_info_for_cost> stmt_vector_for_cost; | |
87 | |
88 /* Maps base addresses to an innermost_loop_behavior that gives the maximum | |
89 known alignment for that base. */ | |
90 typedef hash_map<tree_operand_hash, | |
91 innermost_loop_behavior *> vec_base_alignments; | |
92 | |
73 /************************************************************************ | 93 /************************************************************************ |
74 SLP | 94 SLP |
75 ************************************************************************/ | 95 ************************************************************************/ |
76 | 96 typedef struct _slp_tree *slp_tree; |
77 /* A computation tree of an SLP instance. Each node corresponds to a group of | 97 |
98 /* A computation tree of an SLP instance. Each node corresponds to a group of | |
78 stmts to be packed in a SIMD stmt. */ | 99 stmts to be packed in a SIMD stmt. */ |
79 typedef struct _slp_tree { | 100 struct _slp_tree { |
80 /* Only binary and unary operations are supported. LEFT child corresponds to | 101 /* Nodes that contain def-stmts of this node statements operands. */ |
81 the first operand and RIGHT child to the second if the operation is | 102 vec<slp_tree> children; |
82 binary. */ | |
83 struct _slp_tree *left; | |
84 struct _slp_tree *right; | |
85 /* A group of scalar stmts to be vectorized together. */ | 103 /* A group of scalar stmts to be vectorized together. */ |
86 VEC (gimple, heap) *stmts; | 104 vec<gimple *> stmts; |
105 /* Load permutation relative to the stores, NULL if there is no | |
106 permutation. */ | |
107 vec<unsigned> load_permutation; | |
87 /* Vectorized stmt/s. */ | 108 /* Vectorized stmt/s. */ |
88 VEC (gimple, heap) *vec_stmts; | 109 vec<gimple *> vec_stmts; |
89 /* Number of vector stmts that are created to replace the group of scalar | 110 /* Number of vector stmts that are created to replace the group of scalar |
90 stmts. It is calculated during the transformation phase as the number of | 111 stmts. It is calculated during the transformation phase as the number of |
91 scalar elements in one scalar iteration (GROUP_SIZE) multiplied by VF | 112 scalar elements in one scalar iteration (GROUP_SIZE) multiplied by VF |
92 divided by vector size. */ | 113 divided by vector size. */ |
93 unsigned int vec_stmts_size; | 114 unsigned int vec_stmts_size; |
94 /* Vectorization costs associated with SLP node. */ | 115 /* Whether the scalar computations use two different operators. */ |
95 struct | 116 bool two_operators; |
96 { | 117 /* The DEF type of this node. */ |
97 int outside_of_loop; /* Statements generated outside loop. */ | 118 enum vect_def_type def_type; |
98 int inside_of_loop; /* Statements generated inside loop. */ | 119 }; |
99 } cost; | 120 |
100 } *slp_tree; | |
101 | |
102 DEF_VEC_P(slp_tree); | |
103 DEF_VEC_ALLOC_P(slp_tree, heap); | |
104 | 121 |
105 /* SLP instance is a sequence of stmts in a loop that can be packed into | 122 /* SLP instance is a sequence of stmts in a loop that can be packed into |
106 SIMD stmts. */ | 123 SIMD stmts. */ |
107 typedef struct _slp_instance { | 124 typedef struct _slp_instance { |
108 /* The root of SLP tree. */ | 125 /* The root of SLP tree. */ |
112 unsigned int group_size; | 129 unsigned int group_size; |
113 | 130 |
114 /* The unrolling factor required to vectorized this SLP instance. */ | 131 /* The unrolling factor required to vectorized this SLP instance. */ |
115 unsigned int unrolling_factor; | 132 unsigned int unrolling_factor; |
116 | 133 |
117 /* Vectorization costs associated with SLP instance. */ | |
118 struct | |
119 { | |
120 int outside_of_loop; /* Statements generated outside loop. */ | |
121 int inside_of_loop; /* Statements generated inside loop. */ | |
122 } cost; | |
123 | |
124 /* Loads permutation relatively to the stores, NULL if there is no | |
125 permutation. */ | |
126 VEC (int, heap) *load_permutation; | |
127 | |
128 /* The group of nodes that contain loads of this SLP instance. */ | 134 /* The group of nodes that contain loads of this SLP instance. */ |
129 VEC (slp_tree, heap) *loads; | 135 vec<slp_tree> loads; |
130 | 136 |
131 /* The first scalar load of the instance. The created vector loads will be | 137 /* The SLP node containing the reduction PHIs. */ |
132 inserted before this statement. */ | 138 slp_tree reduc_phis; |
133 gimple first_load; | |
134 } *slp_instance; | 139 } *slp_instance; |
135 | 140 |
136 DEF_VEC_P(slp_instance); | |
137 DEF_VEC_ALLOC_P(slp_instance, heap); | |
138 | 141 |
139 /* Access Functions. */ | 142 /* Access Functions. */ |
140 #define SLP_INSTANCE_TREE(S) (S)->root | 143 #define SLP_INSTANCE_TREE(S) (S)->root |
141 #define SLP_INSTANCE_GROUP_SIZE(S) (S)->group_size | 144 #define SLP_INSTANCE_GROUP_SIZE(S) (S)->group_size |
142 #define SLP_INSTANCE_UNROLLING_FACTOR(S) (S)->unrolling_factor | 145 #define SLP_INSTANCE_UNROLLING_FACTOR(S) (S)->unrolling_factor |
143 #define SLP_INSTANCE_OUTSIDE_OF_LOOP_COST(S) (S)->cost.outside_of_loop | |
144 #define SLP_INSTANCE_INSIDE_OF_LOOP_COST(S) (S)->cost.inside_of_loop | |
145 #define SLP_INSTANCE_LOAD_PERMUTATION(S) (S)->load_permutation | |
146 #define SLP_INSTANCE_LOADS(S) (S)->loads | 146 #define SLP_INSTANCE_LOADS(S) (S)->loads |
147 #define SLP_INSTANCE_FIRST_LOAD_STMT(S) (S)->first_load | 147 |
148 | 148 #define SLP_TREE_CHILDREN(S) (S)->children |
149 #define SLP_TREE_LEFT(S) (S)->left | |
150 #define SLP_TREE_RIGHT(S) (S)->right | |
151 #define SLP_TREE_SCALAR_STMTS(S) (S)->stmts | 149 #define SLP_TREE_SCALAR_STMTS(S) (S)->stmts |
152 #define SLP_TREE_VEC_STMTS(S) (S)->vec_stmts | 150 #define SLP_TREE_VEC_STMTS(S) (S)->vec_stmts |
153 #define SLP_TREE_NUMBER_OF_VEC_STMTS(S) (S)->vec_stmts_size | 151 #define SLP_TREE_NUMBER_OF_VEC_STMTS(S) (S)->vec_stmts_size |
154 #define SLP_TREE_OUTSIDE_OF_LOOP_COST(S) (S)->cost.outside_of_loop | 152 #define SLP_TREE_LOAD_PERMUTATION(S) (S)->load_permutation |
155 #define SLP_TREE_INSIDE_OF_LOOP_COST(S) (S)->cost.inside_of_loop | 153 #define SLP_TREE_TWO_OPERATORS(S) (S)->two_operators |
156 | 154 #define SLP_TREE_DEF_TYPE(S) (S)->def_type |
157 | 155 |
158 typedef struct _vect_peel_info | 156 |
159 { | 157 |
160 int npeel; | 158 /* Describes two objects whose addresses must be unequal for the vectorized |
161 struct data_reference *dr; | 159 loop to be valid. */ |
162 unsigned int count; | 160 typedef std::pair<tree, tree> vec_object_pair; |
163 } *vect_peel_info; | 161 |
164 | 162 /* Vectorizer state common between loop and basic-block vectorization. */ |
165 typedef struct _vect_peel_extended_info | 163 struct vec_info { |
166 { | 164 enum vec_kind { bb, loop }; |
167 struct _vect_peel_info peel_info; | 165 |
168 unsigned int inside_cost; | 166 vec_info (vec_kind, void *); |
169 unsigned int outside_cost; | 167 ~vec_info (); |
170 } *vect_peel_extended_info; | 168 |
169 /* The type of vectorization. */ | |
170 vec_kind kind; | |
171 | |
172 /* All SLP instances. */ | |
173 auto_vec<slp_instance> slp_instances; | |
174 | |
175 /* All data references. Freed by free_data_refs, so not an auto_vec. */ | |
176 vec<data_reference_p> datarefs; | |
177 | |
178 /* Maps base addresses to an innermost_loop_behavior that gives the maximum | |
179 known alignment for that base. */ | |
180 vec_base_alignments base_alignments; | |
181 | |
182 /* All data dependences. Freed by free_dependence_relations, so not | |
183 an auto_vec. */ | |
184 vec<ddr_p> ddrs; | |
185 | |
186 /* All interleaving chains of stores, represented by the first | |
187 stmt in the chain. */ | |
188 auto_vec<gimple *> grouped_stores; | |
189 | |
190 /* Cost data used by the target cost model. */ | |
191 void *target_cost_data; | |
192 }; | |
193 | |
194 struct _loop_vec_info; | |
195 struct _bb_vec_info; | |
196 | |
197 template<> | |
198 template<> | |
199 inline bool | |
200 is_a_helper <_loop_vec_info *>::test (vec_info *i) | |
201 { | |
202 return i->kind == vec_info::loop; | |
203 } | |
204 | |
205 template<> | |
206 template<> | |
207 inline bool | |
208 is_a_helper <_bb_vec_info *>::test (vec_info *i) | |
209 { | |
210 return i->kind == vec_info::bb; | |
211 } | |
212 | |
171 | 213 |
172 /*-----------------------------------------------------------------*/ | 214 /*-----------------------------------------------------------------*/ |
173 /* Info on vectorized loops. */ | 215 /* Info on vectorized loops. */ |
174 /*-----------------------------------------------------------------*/ | 216 /*-----------------------------------------------------------------*/ |
175 typedef struct _loop_vec_info { | 217 typedef struct _loop_vec_info : public vec_info { |
218 _loop_vec_info (struct loop *); | |
219 ~_loop_vec_info (); | |
176 | 220 |
177 /* The loop to which this info struct refers to. */ | 221 /* The loop to which this info struct refers to. */ |
178 struct loop *loop; | 222 struct loop *loop; |
179 | 223 |
180 /* The loop basic blocks. */ | 224 /* The loop basic blocks. */ |
181 basic_block *bbs; | 225 basic_block *bbs; |
182 | 226 |
227 /* Number of latch executions. */ | |
228 tree num_itersm1; | |
183 /* Number of iterations. */ | 229 /* Number of iterations. */ |
184 tree num_iters; | 230 tree num_iters; |
231 /* Number of iterations of the original loop. */ | |
185 tree num_iters_unchanged; | 232 tree num_iters_unchanged; |
186 | 233 /* Condition under which this loop is analyzed and versioned. */ |
187 /* Minimum number of iterations below which vectorization is expected to | 234 tree num_iters_assumptions; |
188 not be profitable (as estimated by the cost model). | 235 |
189 -1 indicates that vectorization will not be profitable. | 236 /* Threshold of number of iterations below which vectorzation will not be |
190 FORNOW: This field is an int. Will be a tree in the future, to represent | 237 performed. It is calculated from MIN_PROFITABLE_ITERS and |
191 values unknown at compile time. */ | 238 PARAM_MIN_VECT_LOOP_BOUND. */ |
192 int min_profitable_iters; | 239 unsigned int th; |
193 | |
194 /* Is the loop vectorizable? */ | |
195 bool vectorizable; | |
196 | 240 |
197 /* Unrolling factor */ | 241 /* Unrolling factor */ |
198 int vectorization_factor; | 242 int vectorization_factor; |
199 | 243 |
200 /* The loop location in the source. */ | 244 /* Maximum runtime vectorization factor, or MAX_VECTORIZATION_FACTOR |
201 LOC loop_line_number; | 245 if there is no particular limit. */ |
246 unsigned HOST_WIDE_INT max_vectorization_factor; | |
202 | 247 |
203 /* Unknown DRs according to which loop was peeled. */ | 248 /* Unknown DRs according to which loop was peeled. */ |
204 struct data_reference *unaligned_dr; | 249 struct data_reference *unaligned_dr; |
205 | 250 |
206 /* peeling_for_alignment indicates whether peeling for alignment will take | 251 /* peeling_for_alignment indicates whether peeling for alignment will take |
215 | 260 |
216 /* The mask used to check the alignment of pointers or arrays. */ | 261 /* The mask used to check the alignment of pointers or arrays. */ |
217 int ptr_mask; | 262 int ptr_mask; |
218 | 263 |
219 /* The loop nest in which the data dependences are computed. */ | 264 /* The loop nest in which the data dependences are computed. */ |
220 VEC (loop_p, heap) *loop_nest; | 265 auto_vec<loop_p> loop_nest; |
221 | |
222 /* All data references in the loop. */ | |
223 VEC (data_reference_p, heap) *datarefs; | |
224 | |
225 /* All data dependences in the loop. */ | |
226 VEC (ddr_p, heap) *ddrs; | |
227 | 266 |
228 /* Data Dependence Relations defining address ranges that are candidates | 267 /* Data Dependence Relations defining address ranges that are candidates |
229 for a run-time aliasing check. */ | 268 for a run-time aliasing check. */ |
230 VEC (ddr_p, heap) *may_alias_ddrs; | 269 auto_vec<ddr_p> may_alias_ddrs; |
270 | |
271 /* Data Dependence Relations defining address ranges together with segment | |
272 lengths from which the run-time aliasing check is built. */ | |
273 auto_vec<dr_with_seg_len_pair_t> comp_alias_ddrs; | |
274 | |
275 /* Check that the addresses of each pair of objects is unequal. */ | |
276 auto_vec<vec_object_pair> check_unequal_addrs; | |
231 | 277 |
232 /* Statements in the loop that have data references that are candidates for a | 278 /* Statements in the loop that have data references that are candidates for a |
233 runtime (loop versioning) misalignment check. */ | 279 runtime (loop versioning) misalignment check. */ |
234 VEC(gimple,heap) *may_misalign_stmts; | 280 auto_vec<gimple *> may_misalign_stmts; |
235 | 281 |
236 /* All interleaving chains of stores in the loop, represented by the first | 282 /* Reduction cycles detected in the loop. Used in loop-aware SLP. */ |
283 auto_vec<gimple *> reductions; | |
284 | |
285 /* All reduction chains in the loop, represented by the first | |
237 stmt in the chain. */ | 286 stmt in the chain. */ |
238 VEC(gimple, heap) *strided_stores; | 287 auto_vec<gimple *> reduction_chains; |
239 | 288 |
240 /* All SLP instances in the loop. This is a subset of the set of STRIDED_STORES | 289 /* Cost vector for a single scalar iteration. */ |
241 of the loop. */ | 290 auto_vec<stmt_info_for_cost> scalar_cost_vec; |
242 VEC(slp_instance, heap) *slp_instances; | |
243 | 291 |
244 /* The unrolling factor needed to SLP the loop. In case of that pure SLP is | 292 /* The unrolling factor needed to SLP the loop. In case of that pure SLP is |
245 applied to the loop, i.e., no unrolling is needed, this is 1. */ | 293 applied to the loop, i.e., no unrolling is needed, this is 1. */ |
246 unsigned slp_unrolling_factor; | 294 unsigned slp_unrolling_factor; |
247 | 295 |
248 /* Reduction cycles detected in the loop. Used in loop-aware SLP. */ | 296 /* Cost of a single scalar iteration. */ |
249 VEC (gimple, heap) *reductions; | 297 int single_scalar_iteration_cost; |
250 | 298 |
251 /* Hash table used to choose the best peeling option. */ | 299 /* Is the loop vectorizable? */ |
252 htab_t peeling_htab; | 300 bool vectorizable; |
301 | |
302 /* When we have grouped data accesses with gaps, we may introduce invalid | |
303 memory accesses. We peel the last iteration of the loop to prevent | |
304 this. */ | |
305 bool peeling_for_gaps; | |
306 | |
307 /* When the number of iterations is not a multiple of the vector size | |
308 we need to peel off iterations at the end to form an epilogue loop. */ | |
309 bool peeling_for_niter; | |
310 | |
311 /* Reductions are canonicalized so that the last operand is the reduction | |
312 operand. If this places a constant into RHS1, this decanonicalizes | |
313 GIMPLE for other phases, so we must track when this has occurred and | |
314 fix it up. */ | |
315 bool operands_swapped; | |
316 | |
317 /* True if there are no loop carried data dependencies in the loop. | |
318 If loop->safelen <= 1, then this is always true, either the loop | |
319 didn't have any loop carried data dependencies, or the loop is being | |
320 vectorized guarded with some runtime alias checks, or couldn't | |
321 be vectorized at all, but then this field shouldn't be used. | |
322 For loop->safelen >= 2, the user has asserted that there are no | |
323 backward dependencies, but there still could be loop carried forward | |
324 dependencies in such loops. This flag will be false if normal | |
325 vectorizer data dependency analysis would fail or require versioning | |
326 for alias, but because of loop->safelen >= 2 it has been vectorized | |
327 even without versioning for alias. E.g. in: | |
328 #pragma omp simd | |
329 for (int i = 0; i < m; i++) | |
330 a[i] = a[i + k] * c; | |
331 (or #pragma simd or #pragma ivdep) we can vectorize this and it will | |
332 DTRT even for k > 0 && k < m, but without safelen we would not | |
333 vectorize this, so this field would be false. */ | |
334 bool no_data_dependencies; | |
335 | |
336 /* Mark loops having masked stores. */ | |
337 bool has_mask_store; | |
338 | |
339 /* If if-conversion versioned this loop before conversion, this is the | |
340 loop version without if-conversion. */ | |
341 struct loop *scalar_loop; | |
342 | |
343 /* For loops being epilogues of already vectorized loops | |
344 this points to the original vectorized loop. Otherwise NULL. */ | |
345 _loop_vec_info *orig_loop_info; | |
253 | 346 |
254 } *loop_vec_info; | 347 } *loop_vec_info; |
255 | 348 |
256 /* Access Functions. */ | 349 /* Access Functions. */ |
257 #define LOOP_VINFO_LOOP(L) (L)->loop | 350 #define LOOP_VINFO_LOOP(L) (L)->loop |
258 #define LOOP_VINFO_BBS(L) (L)->bbs | 351 #define LOOP_VINFO_BBS(L) (L)->bbs |
352 #define LOOP_VINFO_NITERSM1(L) (L)->num_itersm1 | |
259 #define LOOP_VINFO_NITERS(L) (L)->num_iters | 353 #define LOOP_VINFO_NITERS(L) (L)->num_iters |
260 /* Since LOOP_VINFO_NITERS can change after prologue peeling | 354 /* Since LOOP_VINFO_NITERS and LOOP_VINFO_NITERSM1 can change after |
261 retain total unchanged scalar loop iterations for cost model. */ | 355 prologue peeling retain total unchanged scalar loop iterations for |
356 cost model. */ | |
262 #define LOOP_VINFO_NITERS_UNCHANGED(L) (L)->num_iters_unchanged | 357 #define LOOP_VINFO_NITERS_UNCHANGED(L) (L)->num_iters_unchanged |
263 #define LOOP_VINFO_COST_MODEL_MIN_ITERS(L) (L)->min_profitable_iters | 358 #define LOOP_VINFO_NITERS_ASSUMPTIONS(L) (L)->num_iters_assumptions |
359 #define LOOP_VINFO_COST_MODEL_THRESHOLD(L) (L)->th | |
264 #define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable | 360 #define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable |
265 #define LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor | 361 #define LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor |
362 #define LOOP_VINFO_MAX_VECT_FACTOR(L) (L)->max_vectorization_factor | |
266 #define LOOP_VINFO_PTR_MASK(L) (L)->ptr_mask | 363 #define LOOP_VINFO_PTR_MASK(L) (L)->ptr_mask |
267 #define LOOP_VINFO_LOOP_NEST(L) (L)->loop_nest | 364 #define LOOP_VINFO_LOOP_NEST(L) (L)->loop_nest |
268 #define LOOP_VINFO_DATAREFS(L) (L)->datarefs | 365 #define LOOP_VINFO_DATAREFS(L) (L)->datarefs |
269 #define LOOP_VINFO_DDRS(L) (L)->ddrs | 366 #define LOOP_VINFO_DDRS(L) (L)->ddrs |
270 #define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters)) | 367 #define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters)) |
271 #define LOOP_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment | 368 #define LOOP_VINFO_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment |
272 #define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr | 369 #define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr |
273 #define LOOP_VINFO_MAY_MISALIGN_STMTS(L) (L)->may_misalign_stmts | 370 #define LOOP_VINFO_MAY_MISALIGN_STMTS(L) (L)->may_misalign_stmts |
274 #define LOOP_VINFO_LOC(L) (L)->loop_line_number | |
275 #define LOOP_VINFO_MAY_ALIAS_DDRS(L) (L)->may_alias_ddrs | 371 #define LOOP_VINFO_MAY_ALIAS_DDRS(L) (L)->may_alias_ddrs |
276 #define LOOP_VINFO_STRIDED_STORES(L) (L)->strided_stores | 372 #define LOOP_VINFO_COMP_ALIAS_DDRS(L) (L)->comp_alias_ddrs |
373 #define LOOP_VINFO_CHECK_UNEQUAL_ADDRS(L) (L)->check_unequal_addrs | |
374 #define LOOP_VINFO_GROUPED_STORES(L) (L)->grouped_stores | |
277 #define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances | 375 #define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances |
278 #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor | 376 #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor |
279 #define LOOP_VINFO_REDUCTIONS(L) (L)->reductions | 377 #define LOOP_VINFO_REDUCTIONS(L) (L)->reductions |
280 #define LOOP_VINFO_PEELING_HTAB(L) (L)->peeling_htab | 378 #define LOOP_VINFO_REDUCTION_CHAINS(L) (L)->reduction_chains |
281 | 379 #define LOOP_VINFO_TARGET_COST_DATA(L) (L)->target_cost_data |
282 #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \ | 380 #define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps |
283 VEC_length (gimple, (L)->may_misalign_stmts) > 0 | 381 #define LOOP_VINFO_OPERANDS_SWAPPED(L) (L)->operands_swapped |
284 #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \ | 382 #define LOOP_VINFO_PEELING_FOR_NITER(L) (L)->peeling_for_niter |
285 VEC_length (ddr_p, (L)->may_alias_ddrs) > 0 | 383 #define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies |
286 | 384 #define LOOP_VINFO_SCALAR_LOOP(L) (L)->scalar_loop |
287 #define NITERS_KNOWN_P(n) \ | 385 #define LOOP_VINFO_HAS_MASK_STORE(L) (L)->has_mask_store |
288 (host_integerp ((n),0) \ | 386 #define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec |
289 && TREE_INT_CST_LOW ((n)) > 0) | 387 #define LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST(L) (L)->single_scalar_iteration_cost |
388 #define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info | |
389 | |
390 #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \ | |
391 ((L)->may_misalign_stmts.length () > 0) | |
392 #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \ | |
393 ((L)->comp_alias_ddrs.length () > 0 \ | |
394 || (L)->check_unequal_addrs.length () > 0) | |
395 #define LOOP_REQUIRES_VERSIONING_FOR_NITERS(L) \ | |
396 (LOOP_VINFO_NITERS_ASSUMPTIONS (L)) | |
397 #define LOOP_REQUIRES_VERSIONING(L) \ | |
398 (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (L) \ | |
399 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (L) \ | |
400 || LOOP_REQUIRES_VERSIONING_FOR_NITERS (L)) | |
290 | 401 |
291 #define LOOP_VINFO_NITERS_KNOWN_P(L) \ | 402 #define LOOP_VINFO_NITERS_KNOWN_P(L) \ |
292 NITERS_KNOWN_P((L)->num_iters) | 403 (tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0) |
404 | |
405 #define LOOP_VINFO_EPILOGUE_P(L) \ | |
406 (LOOP_VINFO_ORIG_LOOP_INFO (L) != NULL) | |
407 | |
408 #define LOOP_VINFO_ORIG_MAX_VECT_FACTOR(L) \ | |
409 (LOOP_VINFO_MAX_VECT_FACTOR (LOOP_VINFO_ORIG_LOOP_INFO (L))) | |
293 | 410 |
294 static inline loop_vec_info | 411 static inline loop_vec_info |
295 loop_vec_info_for_loop (struct loop *loop) | 412 loop_vec_info_for_loop (struct loop *loop) |
296 { | 413 { |
297 return (loop_vec_info) loop->aux; | 414 return (loop_vec_info) loop->aux; |
298 } | 415 } |
299 | 416 |
300 static inline bool | 417 static inline bool |
301 nested_in_vect_loop_p (struct loop *loop, gimple stmt) | 418 nested_in_vect_loop_p (struct loop *loop, gimple *stmt) |
302 { | 419 { |
303 return (loop->inner | 420 return (loop->inner |
304 && (loop->inner == (gimple_bb (stmt))->loop_father)); | 421 && (loop->inner == (gimple_bb (stmt))->loop_father)); |
305 } | 422 } |
306 | 423 |
307 typedef struct _bb_vec_info { | 424 typedef struct _bb_vec_info : public vec_info |
425 { | |
426 _bb_vec_info (gimple_stmt_iterator, gimple_stmt_iterator); | |
427 ~_bb_vec_info (); | |
308 | 428 |
309 basic_block bb; | 429 basic_block bb; |
310 /* All interleaving chains of stores in the basic block, represented by the | 430 gimple_stmt_iterator region_begin; |
311 first stmt in the chain. */ | 431 gimple_stmt_iterator region_end; |
312 VEC(gimple, heap) *strided_stores; | |
313 | |
314 /* All SLP instances in the basic block. This is a subset of the set of | |
315 STRIDED_STORES of the basic block. */ | |
316 VEC(slp_instance, heap) *slp_instances; | |
317 | |
318 /* All data references in the basic block. */ | |
319 VEC (data_reference_p, heap) *datarefs; | |
320 | |
321 /* All data dependences in the basic block. */ | |
322 VEC (ddr_p, heap) *ddrs; | |
323 } *bb_vec_info; | 432 } *bb_vec_info; |
324 | 433 |
325 #define BB_VINFO_BB(B) (B)->bb | 434 #define BB_VINFO_BB(B) (B)->bb |
326 #define BB_VINFO_STRIDED_STORES(B) (B)->strided_stores | 435 #define BB_VINFO_GROUPED_STORES(B) (B)->grouped_stores |
327 #define BB_VINFO_SLP_INSTANCES(B) (B)->slp_instances | 436 #define BB_VINFO_SLP_INSTANCES(B) (B)->slp_instances |
328 #define BB_VINFO_DATAREFS(B) (B)->datarefs | 437 #define BB_VINFO_DATAREFS(B) (B)->datarefs |
329 #define BB_VINFO_DDRS(B) (B)->ddrs | 438 #define BB_VINFO_DDRS(B) (B)->ddrs |
439 #define BB_VINFO_TARGET_COST_DATA(B) (B)->target_cost_data | |
330 | 440 |
331 static inline bb_vec_info | 441 static inline bb_vec_info |
332 vec_info_for_bb (basic_block bb) | 442 vec_info_for_bb (basic_block bb) |
333 { | 443 { |
334 return (bb_vec_info) bb->aux; | 444 return (bb_vec_info) bb->aux; |
342 load_vec_info_type, | 452 load_vec_info_type, |
343 store_vec_info_type, | 453 store_vec_info_type, |
344 shift_vec_info_type, | 454 shift_vec_info_type, |
345 op_vec_info_type, | 455 op_vec_info_type, |
346 call_vec_info_type, | 456 call_vec_info_type, |
457 call_simd_clone_vec_info_type, | |
347 assignment_vec_info_type, | 458 assignment_vec_info_type, |
348 condition_vec_info_type, | 459 condition_vec_info_type, |
460 comparison_vec_info_type, | |
349 reduc_vec_info_type, | 461 reduc_vec_info_type, |
350 induc_vec_info_type, | 462 induc_vec_info_type, |
351 type_promotion_vec_info_type, | 463 type_promotion_vec_info_type, |
352 type_demotion_vec_info_type, | 464 type_demotion_vec_info_type, |
353 type_conversion_vec_info_type, | 465 type_conversion_vec_info_type, |
356 | 468 |
357 /* Indicates whether/how a variable is used in the scope of loop/basic | 469 /* Indicates whether/how a variable is used in the scope of loop/basic |
358 block. */ | 470 block. */ |
359 enum vect_relevant { | 471 enum vect_relevant { |
360 vect_unused_in_scope = 0, | 472 vect_unused_in_scope = 0, |
473 | |
474 /* The def is only used outside the loop. */ | |
475 vect_used_only_live, | |
361 /* The def is in the inner loop, and the use is in the outer loop, and the | 476 /* The def is in the inner loop, and the use is in the outer loop, and the |
362 use is a reduction stmt. */ | 477 use is a reduction stmt. */ |
363 vect_used_in_outer_by_reduction, | 478 vect_used_in_outer_by_reduction, |
364 /* The def is in the inner loop, and the use is in the outer loop (and is | 479 /* The def is in the inner loop, and the use is in the outer loop (and is |
365 not part of reduction). */ | 480 not part of reduction). */ |
396 loop_vect = 0, | 511 loop_vect = 0, |
397 pure_slp, | 512 pure_slp, |
398 hybrid | 513 hybrid |
399 }; | 514 }; |
400 | 515 |
516 /* Describes how we're going to vectorize an individual load or store, | |
517 or a group of loads or stores. */ | |
518 enum vect_memory_access_type { | |
519 /* An access to an invariant address. This is used only for loads. */ | |
520 VMAT_INVARIANT, | |
521 | |
522 /* A simple contiguous access. */ | |
523 VMAT_CONTIGUOUS, | |
524 | |
525 /* A contiguous access that goes down in memory rather than up, | |
526 with no additional permutation. This is used only for stores | |
527 of invariants. */ | |
528 VMAT_CONTIGUOUS_DOWN, | |
529 | |
530 /* A simple contiguous access in which the elements need to be permuted | |
531 after loading or before storing. Only used for loop vectorization; | |
532 SLP uses separate permutes. */ | |
533 VMAT_CONTIGUOUS_PERMUTE, | |
534 | |
535 /* A simple contiguous access in which the elements need to be reversed | |
536 after loading or before storing. */ | |
537 VMAT_CONTIGUOUS_REVERSE, | |
538 | |
539 /* An access that uses IFN_LOAD_LANES or IFN_STORE_LANES. */ | |
540 VMAT_LOAD_STORE_LANES, | |
541 | |
542 /* An access in which each scalar element is loaded or stored | |
543 individually. */ | |
544 VMAT_ELEMENTWISE, | |
545 | |
546 /* A hybrid of VMAT_CONTIGUOUS and VMAT_ELEMENTWISE, used for grouped | |
547 SLP accesses. Each unrolled iteration uses a contiguous load | |
548 or store for the whole group, but the groups from separate iterations | |
549 are combined in the same way as for VMAT_ELEMENTWISE. */ | |
550 VMAT_STRIDED_SLP, | |
551 | |
552 /* The access uses gather loads or scatter stores. */ | |
553 VMAT_GATHER_SCATTER | |
554 }; | |
401 | 555 |
402 typedef struct data_reference *dr_p; | 556 typedef struct data_reference *dr_p; |
403 DEF_VEC_P(dr_p); | |
404 DEF_VEC_ALLOC_P(dr_p,heap); | |
405 | 557 |
406 typedef struct _stmt_vec_info { | 558 typedef struct _stmt_vec_info { |
407 | 559 |
408 enum stmt_vec_info_type type; | 560 enum stmt_vec_info_type type; |
409 | 561 |
412 bool live; | 564 bool live; |
413 | 565 |
414 /* Stmt is part of some pattern (computation idiom) */ | 566 /* Stmt is part of some pattern (computation idiom) */ |
415 bool in_pattern_p; | 567 bool in_pattern_p; |
416 | 568 |
417 /* For loads only, if there is a store with the same location, this field is | 569 /* Is this statement vectorizable or should it be skipped in (partial) |
418 TRUE. */ | 570 vectorization. */ |
419 bool read_write_dep; | 571 bool vectorizable; |
420 | 572 |
421 /* The stmt to which this info struct refers to. */ | 573 /* The stmt to which this info struct refers to. */ |
422 gimple stmt; | 574 gimple *stmt; |
423 | 575 |
424 /* The loop_vec_info with respect to which STMT is vectorized. */ | 576 /* The vec_info with respect to which STMT is vectorized. */ |
425 loop_vec_info loop_vinfo; | 577 vec_info *vinfo; |
426 | 578 |
427 /* The vector type to be used for the LHS of this statement. */ | 579 /* The vector type to be used for the LHS of this statement. */ |
428 tree vectype; | 580 tree vectype; |
429 | 581 |
430 /* The vectorized version of the stmt. */ | 582 /* The vectorized version of the stmt. */ |
431 gimple vectorized_stmt; | 583 gimple *vectorized_stmt; |
432 | 584 |
433 | 585 |
434 /** The following is relevant only for stmts that contain a non-scalar | 586 /* The following is relevant only for stmts that contain a non-scalar |
435 data-ref (array/pointer/struct access). A GIMPLE stmt is expected to have | 587 data-ref (array/pointer/struct access). A GIMPLE stmt is expected to have |
436 at most one such data-ref. **/ | 588 at most one such data-ref. */ |
437 | 589 |
438 /* Information about the data-ref (access function, etc), | 590 /* Information about the data-ref (access function, etc), |
439 relative to the inner-most containing loop. */ | 591 relative to the inner-most containing loop. */ |
440 struct data_reference *data_ref_info; | 592 struct data_reference *data_ref_info; |
441 | 593 |
442 /* Information about the data-ref relative to this loop | 594 /* Information about the data-ref relative to this loop |
443 nest (the loop that is being considered for vectorization). */ | 595 nest (the loop that is being considered for vectorization). */ |
444 tree dr_base_address; | 596 innermost_loop_behavior dr_wrt_vec_loop; |
445 tree dr_init; | 597 |
446 tree dr_offset; | 598 /* For loop PHI nodes, the base and evolution part of it. This makes sure |
447 tree dr_step; | 599 this information is still available in vect_update_ivs_after_vectorizer |
448 tree dr_aligned_to; | 600 where we may not be able to re-analyze the PHI nodes evolution as |
601 peeling for the prologue loop can make it unanalyzable. The evolution | |
602 part is still correct after peeling, but the base may have changed from | |
603 the version here. */ | |
604 tree loop_phi_evolution_base_unchanged; | |
605 tree loop_phi_evolution_part; | |
449 | 606 |
450 /* Used for various bookkeeping purposes, generally holding a pointer to | 607 /* Used for various bookkeeping purposes, generally holding a pointer to |
451 some other stmt S that is in some way "related" to this stmt. | 608 some other stmt S that is in some way "related" to this stmt. |
452 Current use of this field is: | 609 Current use of this field is: |
453 If this stmt is part of a pattern (i.e. the field 'in_pattern_p' is | 610 If this stmt is part of a pattern (i.e. the field 'in_pattern_p' is |
454 true): S is the "pattern stmt" that represents (and replaces) the | 611 true): S is the "pattern stmt" that represents (and replaces) the |
455 sequence of stmts that constitutes the pattern. Similarly, the | 612 sequence of stmts that constitutes the pattern. Similarly, the |
456 related_stmt of the "pattern stmt" points back to this stmt (which is | 613 related_stmt of the "pattern stmt" points back to this stmt (which is |
457 the last stmt in the original sequence of stmts that constitutes the | 614 the last stmt in the original sequence of stmts that constitutes the |
458 pattern). */ | 615 pattern). */ |
459 gimple related_stmt; | 616 gimple *related_stmt; |
617 | |
618 /* Used to keep a sequence of def stmts of a pattern stmt if such exists. */ | |
619 gimple_seq pattern_def_seq; | |
460 | 620 |
461 /* List of datarefs that are known to have the same alignment as the dataref | 621 /* List of datarefs that are known to have the same alignment as the dataref |
462 of this stmt. */ | 622 of this stmt. */ |
463 VEC(dr_p,heap) *same_align_refs; | 623 vec<dr_p> same_align_refs; |
624 | |
625 /* Selected SIMD clone's function info. First vector element | |
626 is SIMD clone's function decl, followed by a pair of trees (base + step) | |
627 for linear arguments (pair of NULLs for other arguments). */ | |
628 vec<tree> simd_clone_info; | |
464 | 629 |
465 /* Classify the def of this stmt. */ | 630 /* Classify the def of this stmt. */ |
466 enum vect_def_type def_type; | 631 enum vect_def_type def_type; |
467 | 632 |
468 /* Whether the stmt is SLPed, loop-based vectorized, or both. */ | 633 /* Whether the stmt is SLPed, loop-based vectorized, or both. */ |
469 enum slp_vect_type slp_type; | 634 enum slp_vect_type slp_type; |
470 | 635 |
471 /* Interleaving info. */ | 636 /* Interleaving and reduction chains info. */ |
472 /* First data-ref in the interleaving group. */ | 637 /* First element in the group. */ |
473 gimple first_dr; | 638 gimple *first_element; |
474 /* Pointer to the next data-ref in the group. */ | 639 /* Pointer to the next element in the group. */ |
475 gimple next_dr; | 640 gimple *next_element; |
476 /* In case that two or more stmts share data-ref, this is the pointer to the | 641 /* For data-refs, in case that two or more stmts share data-ref, this is the |
477 previously detected stmt with the same dr. */ | 642 pointer to the previously detected stmt with the same dr. */ |
478 gimple same_dr_stmt; | 643 gimple *same_dr_stmt; |
479 /* The size of the interleaving group. */ | 644 /* The size of the group. */ |
480 unsigned int size; | 645 unsigned int size; |
481 /* For stores, number of stores from this group seen. We vectorize the last | 646 /* For stores, number of stores from this group seen. We vectorize the last |
482 one. */ | 647 one. */ |
483 unsigned int store_count; | 648 unsigned int store_count; |
484 /* For loads only, the gap from the previous load. For consecutive loads, GAP | 649 /* For loads only, the gap from the previous load. For consecutive loads, GAP |
485 is 1. */ | 650 is 1. */ |
486 unsigned int gap; | 651 unsigned int gap; |
487 | 652 |
653 /* The minimum negative dependence distance this stmt participates in | |
654 or zero if none. */ | |
655 unsigned int min_neg_dist; | |
656 | |
488 /* Not all stmts in the loop need to be vectorized. e.g, the increment | 657 /* Not all stmts in the loop need to be vectorized. e.g, the increment |
489 of the loop induction variable and computation of array indexes. relevant | 658 of the loop induction variable and computation of array indexes. relevant |
490 indicates whether the stmt needs to be vectorized. */ | 659 indicates whether the stmt needs to be vectorized. */ |
491 enum vect_relevant relevant; | 660 enum vect_relevant relevant; |
492 | 661 |
493 /* Vectorization costs associated with statement. */ | 662 /* For loads if this is a gather, for stores if this is a scatter. */ |
494 struct | 663 bool gather_scatter_p; |
495 { | 664 |
496 int outside_of_loop; /* Statements generated outside loop. */ | 665 /* True if this is an access with loop-invariant stride. */ |
497 int inside_of_loop; /* Statements generated inside loop. */ | 666 bool strided_p; |
498 } cost; | 667 |
499 | 668 /* For both loads and stores. */ |
500 /* The bb_vec_info with respect to which STMT is vectorized. */ | 669 bool simd_lane_access_p; |
501 bb_vec_info bb_vinfo; | 670 |
502 | 671 /* Classifies how the load or store is going to be implemented |
503 /* Is this statement vectorizable or should it be skipped in (partial) | 672 for loop vectorization. */ |
504 vectorization. */ | 673 vect_memory_access_type memory_access_type; |
505 bool vectorizable; | 674 |
675 /* For reduction loops, this is the type of reduction. */ | |
676 enum vect_reduction_type v_reduc_type; | |
677 | |
678 /* For CONST_COND_REDUCTION, record the reduc code. */ | |
679 enum tree_code const_cond_reduc_code; | |
680 | |
681 /* On a reduction PHI the reduction type as detected by | |
682 vect_force_simple_reduction. */ | |
683 enum vect_reduction_type reduc_type; | |
684 | |
685 /* On a reduction PHI the def returned by vect_force_simple_reduction. | |
686 On the def returned by vect_force_simple_reduction the | |
687 corresponding PHI. */ | |
688 gimple *reduc_def; | |
689 | |
690 /* The number of scalar stmt references from active SLP instances. */ | |
691 unsigned int num_slp_uses; | |
506 } *stmt_vec_info; | 692 } *stmt_vec_info; |
693 | |
694 /* Information about a gather/scatter call. */ | |
695 struct gather_scatter_info { | |
696 /* The FUNCTION_DECL for the built-in gather/scatter function. */ | |
697 tree decl; | |
698 | |
699 /* The loop-invariant base value. */ | |
700 tree base; | |
701 | |
702 /* The original scalar offset, which is a non-loop-invariant SSA_NAME. */ | |
703 tree offset; | |
704 | |
705 /* Each offset element should be multiplied by this amount before | |
706 being added to the base. */ | |
707 int scale; | |
708 | |
709 /* The definition type for the vectorized offset. */ | |
710 enum vect_def_type offset_dt; | |
711 | |
712 /* The type of the vectorized offset. */ | |
713 tree offset_vectype; | |
714 }; | |
507 | 715 |
508 /* Access Functions. */ | 716 /* Access Functions. */ |
509 #define STMT_VINFO_TYPE(S) (S)->type | 717 #define STMT_VINFO_TYPE(S) (S)->type |
510 #define STMT_VINFO_STMT(S) (S)->stmt | 718 #define STMT_VINFO_STMT(S) (S)->stmt |
511 #define STMT_VINFO_LOOP_VINFO(S) (S)->loop_vinfo | 719 inline loop_vec_info |
512 #define STMT_VINFO_BB_VINFO(S) (S)->bb_vinfo | 720 STMT_VINFO_LOOP_VINFO (stmt_vec_info stmt_vinfo) |
721 { | |
722 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_vinfo->vinfo)) | |
723 return loop_vinfo; | |
724 return NULL; | |
725 } | |
726 inline bb_vec_info | |
727 STMT_VINFO_BB_VINFO (stmt_vec_info stmt_vinfo) | |
728 { | |
729 if (bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (stmt_vinfo->vinfo)) | |
730 return bb_vinfo; | |
731 return NULL; | |
732 } | |
513 #define STMT_VINFO_RELEVANT(S) (S)->relevant | 733 #define STMT_VINFO_RELEVANT(S) (S)->relevant |
514 #define STMT_VINFO_LIVE_P(S) (S)->live | 734 #define STMT_VINFO_LIVE_P(S) (S)->live |
515 #define STMT_VINFO_VECTYPE(S) (S)->vectype | 735 #define STMT_VINFO_VECTYPE(S) (S)->vectype |
516 #define STMT_VINFO_VEC_STMT(S) (S)->vectorized_stmt | 736 #define STMT_VINFO_VEC_STMT(S) (S)->vectorized_stmt |
517 #define STMT_VINFO_VECTORIZABLE(S) (S)->vectorizable | 737 #define STMT_VINFO_VECTORIZABLE(S) (S)->vectorizable |
518 #define STMT_VINFO_DATA_REF(S) (S)->data_ref_info | 738 #define STMT_VINFO_DATA_REF(S) (S)->data_ref_info |
519 | 739 #define STMT_VINFO_GATHER_SCATTER_P(S) (S)->gather_scatter_p |
520 #define STMT_VINFO_DR_BASE_ADDRESS(S) (S)->dr_base_address | 740 #define STMT_VINFO_STRIDED_P(S) (S)->strided_p |
521 #define STMT_VINFO_DR_INIT(S) (S)->dr_init | 741 #define STMT_VINFO_MEMORY_ACCESS_TYPE(S) (S)->memory_access_type |
522 #define STMT_VINFO_DR_OFFSET(S) (S)->dr_offset | 742 #define STMT_VINFO_SIMD_LANE_ACCESS_P(S) (S)->simd_lane_access_p |
523 #define STMT_VINFO_DR_STEP(S) (S)->dr_step | 743 #define STMT_VINFO_VEC_REDUCTION_TYPE(S) (S)->v_reduc_type |
524 #define STMT_VINFO_DR_ALIGNED_TO(S) (S)->dr_aligned_to | 744 #define STMT_VINFO_VEC_CONST_COND_REDUC_CODE(S) (S)->const_cond_reduc_code |
745 | |
746 #define STMT_VINFO_DR_WRT_VEC_LOOP(S) (S)->dr_wrt_vec_loop | |
747 #define STMT_VINFO_DR_BASE_ADDRESS(S) (S)->dr_wrt_vec_loop.base_address | |
748 #define STMT_VINFO_DR_INIT(S) (S)->dr_wrt_vec_loop.init | |
749 #define STMT_VINFO_DR_OFFSET(S) (S)->dr_wrt_vec_loop.offset | |
750 #define STMT_VINFO_DR_STEP(S) (S)->dr_wrt_vec_loop.step | |
751 #define STMT_VINFO_DR_BASE_ALIGNMENT(S) (S)->dr_wrt_vec_loop.base_alignment | |
752 #define STMT_VINFO_DR_BASE_MISALIGNMENT(S) \ | |
753 (S)->dr_wrt_vec_loop.base_misalignment | |
754 #define STMT_VINFO_DR_OFFSET_ALIGNMENT(S) \ | |
755 (S)->dr_wrt_vec_loop.offset_alignment | |
756 #define STMT_VINFO_DR_STEP_ALIGNMENT(S) \ | |
757 (S)->dr_wrt_vec_loop.step_alignment | |
525 | 758 |
526 #define STMT_VINFO_IN_PATTERN_P(S) (S)->in_pattern_p | 759 #define STMT_VINFO_IN_PATTERN_P(S) (S)->in_pattern_p |
527 #define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt | 760 #define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt |
761 #define STMT_VINFO_PATTERN_DEF_SEQ(S) (S)->pattern_def_seq | |
528 #define STMT_VINFO_SAME_ALIGN_REFS(S) (S)->same_align_refs | 762 #define STMT_VINFO_SAME_ALIGN_REFS(S) (S)->same_align_refs |
763 #define STMT_VINFO_SIMD_CLONE_INFO(S) (S)->simd_clone_info | |
529 #define STMT_VINFO_DEF_TYPE(S) (S)->def_type | 764 #define STMT_VINFO_DEF_TYPE(S) (S)->def_type |
530 #define STMT_VINFO_DR_GROUP_FIRST_DR(S) (S)->first_dr | 765 #define STMT_VINFO_GROUP_FIRST_ELEMENT(S) (S)->first_element |
531 #define STMT_VINFO_DR_GROUP_NEXT_DR(S) (S)->next_dr | 766 #define STMT_VINFO_GROUP_NEXT_ELEMENT(S) (S)->next_element |
532 #define STMT_VINFO_DR_GROUP_SIZE(S) (S)->size | 767 #define STMT_VINFO_GROUP_SIZE(S) (S)->size |
533 #define STMT_VINFO_DR_GROUP_STORE_COUNT(S) (S)->store_count | 768 #define STMT_VINFO_GROUP_STORE_COUNT(S) (S)->store_count |
534 #define STMT_VINFO_DR_GROUP_GAP(S) (S)->gap | 769 #define STMT_VINFO_GROUP_GAP(S) (S)->gap |
535 #define STMT_VINFO_DR_GROUP_SAME_DR_STMT(S)(S)->same_dr_stmt | 770 #define STMT_VINFO_GROUP_SAME_DR_STMT(S) (S)->same_dr_stmt |
536 #define STMT_VINFO_DR_GROUP_READ_WRITE_DEPENDENCE(S) (S)->read_write_dep | 771 #define STMT_VINFO_GROUPED_ACCESS(S) ((S)->first_element != NULL && (S)->data_ref_info) |
537 #define STMT_VINFO_STRIDED_ACCESS(S) ((S)->first_dr != NULL) | 772 #define STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED(S) (S)->loop_phi_evolution_base_unchanged |
538 | 773 #define STMT_VINFO_LOOP_PHI_EVOLUTION_PART(S) (S)->loop_phi_evolution_part |
539 #define DR_GROUP_FIRST_DR(S) (S)->first_dr | 774 #define STMT_VINFO_MIN_NEG_DIST(S) (S)->min_neg_dist |
540 #define DR_GROUP_NEXT_DR(S) (S)->next_dr | 775 #define STMT_VINFO_NUM_SLP_USES(S) (S)->num_slp_uses |
541 #define DR_GROUP_SIZE(S) (S)->size | 776 #define STMT_VINFO_REDUC_TYPE(S) (S)->reduc_type |
542 #define DR_GROUP_STORE_COUNT(S) (S)->store_count | 777 #define STMT_VINFO_REDUC_DEF(S) (S)->reduc_def |
543 #define DR_GROUP_GAP(S) (S)->gap | 778 |
544 #define DR_GROUP_SAME_DR_STMT(S) (S)->same_dr_stmt | 779 #define GROUP_FIRST_ELEMENT(S) (S)->first_element |
545 #define DR_GROUP_READ_WRITE_DEPENDENCE(S) (S)->read_write_dep | 780 #define GROUP_NEXT_ELEMENT(S) (S)->next_element |
781 #define GROUP_SIZE(S) (S)->size | |
782 #define GROUP_STORE_COUNT(S) (S)->store_count | |
783 #define GROUP_GAP(S) (S)->gap | |
784 #define GROUP_SAME_DR_STMT(S) (S)->same_dr_stmt | |
546 | 785 |
547 #define STMT_VINFO_RELEVANT_P(S) ((S)->relevant != vect_unused_in_scope) | 786 #define STMT_VINFO_RELEVANT_P(S) ((S)->relevant != vect_unused_in_scope) |
548 #define STMT_VINFO_OUTSIDE_OF_LOOP_COST(S) (S)->cost.outside_of_loop | |
549 #define STMT_VINFO_INSIDE_OF_LOOP_COST(S) (S)->cost.inside_of_loop | |
550 | 787 |
551 #define HYBRID_SLP_STMT(S) ((S)->slp_type == hybrid) | 788 #define HYBRID_SLP_STMT(S) ((S)->slp_type == hybrid) |
552 #define PURE_SLP_STMT(S) ((S)->slp_type == pure_slp) | 789 #define PURE_SLP_STMT(S) ((S)->slp_type == pure_slp) |
553 #define STMT_SLP_TYPE(S) (S)->slp_type | 790 #define STMT_SLP_TYPE(S) (S)->slp_type |
554 | 791 |
792 struct dataref_aux { | |
793 /* The misalignment in bytes of the reference, or -1 if not known. */ | |
794 int misalignment; | |
795 /* The byte alignment that we'd ideally like the reference to have, | |
796 and the value that misalignment is measured against. */ | |
797 int target_alignment; | |
798 /* If true the alignment of base_decl needs to be increased. */ | |
799 bool base_misaligned; | |
800 tree base_decl; | |
801 }; | |
802 | |
803 #define DR_VECT_AUX(dr) ((dataref_aux *)(dr)->aux) | |
804 | |
555 #define VECT_MAX_COST 1000 | 805 #define VECT_MAX_COST 1000 |
556 | 806 |
557 /* The maximum number of intermediate steps required in multi-step type | 807 /* The maximum number of intermediate steps required in multi-step type |
558 conversion. */ | 808 conversion. */ |
559 #define MAX_INTERM_CVT_STEPS 3 | 809 #define MAX_INTERM_CVT_STEPS 3 |
560 | 810 |
561 /* The maximum vectorization factor supported by any target (V32QI). */ | 811 /* The maximum vectorization factor supported by any target (V64QI). */ |
562 #define MAX_VECTORIZATION_FACTOR 32 | 812 #define MAX_VECTORIZATION_FACTOR 64 |
563 | 813 |
564 /* Avoid GTY(()) on stmt_vec_info. */ | 814 /* Nonzero if TYPE represents a (scalar) boolean type or type |
565 typedef void *vec_void_p; | 815 in the middle-end compatible with it (unsigned precision 1 integral |
566 DEF_VEC_P (vec_void_p); | 816 types). Used to determine which types should be vectorized as |
567 DEF_VEC_ALLOC_P (vec_void_p, heap); | 817 VECTOR_BOOLEAN_TYPE_P. */ |
568 | 818 |
569 extern VEC(vec_void_p,heap) *stmt_vec_info_vec; | 819 #define VECT_SCALAR_BOOLEAN_TYPE_P(TYPE) \ |
820 (TREE_CODE (TYPE) == BOOLEAN_TYPE \ | |
821 || ((TREE_CODE (TYPE) == INTEGER_TYPE \ | |
822 || TREE_CODE (TYPE) == ENUMERAL_TYPE) \ | |
823 && TYPE_PRECISION (TYPE) == 1 \ | |
824 && TYPE_UNSIGNED (TYPE))) | |
825 | |
826 extern vec<stmt_vec_info> stmt_vec_info_vec; | |
570 | 827 |
571 void init_stmt_vec_info_vec (void); | 828 void init_stmt_vec_info_vec (void); |
572 void free_stmt_vec_info_vec (void); | 829 void free_stmt_vec_info_vec (void); |
573 | 830 |
574 /* Return a stmt_vec_info corresponding to STMT. */ | 831 /* Return a stmt_vec_info corresponding to STMT. */ |
575 | 832 |
576 static inline stmt_vec_info | 833 static inline stmt_vec_info |
577 vinfo_for_stmt (gimple stmt) | 834 vinfo_for_stmt (gimple *stmt) |
578 { | 835 { |
579 unsigned int uid = gimple_uid (stmt); | 836 int uid = gimple_uid (stmt); |
580 if (uid == 0) | 837 if (uid <= 0) |
581 return NULL; | 838 return NULL; |
582 | 839 |
583 return (stmt_vec_info) VEC_index (vec_void_p, stmt_vec_info_vec, uid - 1); | 840 return stmt_vec_info_vec[uid - 1]; |
584 } | 841 } |
585 | 842 |
586 /* Set vectorizer information INFO for STMT. */ | 843 /* Set vectorizer information INFO for STMT. */ |
587 | 844 |
588 static inline void | 845 static inline void |
589 set_vinfo_for_stmt (gimple stmt, stmt_vec_info info) | 846 set_vinfo_for_stmt (gimple *stmt, stmt_vec_info info) |
590 { | 847 { |
591 unsigned int uid = gimple_uid (stmt); | 848 unsigned int uid = gimple_uid (stmt); |
592 if (uid == 0) | 849 if (uid == 0) |
593 { | 850 { |
594 gcc_checking_assert (info); | 851 gcc_checking_assert (info); |
595 uid = VEC_length (vec_void_p, stmt_vec_info_vec) + 1; | 852 uid = stmt_vec_info_vec.length () + 1; |
596 gimple_set_uid (stmt, uid); | 853 gimple_set_uid (stmt, uid); |
597 VEC_safe_push (vec_void_p, heap, stmt_vec_info_vec, (vec_void_p) info); | 854 stmt_vec_info_vec.safe_push (info); |
598 } | 855 } |
599 else | 856 else |
600 VEC_replace (vec_void_p, stmt_vec_info_vec, uid - 1, (vec_void_p) info); | 857 { |
858 gcc_checking_assert (info == NULL); | |
859 stmt_vec_info_vec[uid - 1] = info; | |
860 } | |
601 } | 861 } |
602 | 862 |
603 /* Return the earlier statement between STMT1 and STMT2. */ | 863 /* Return the earlier statement between STMT1 and STMT2. */ |
604 | 864 |
605 static inline gimple | 865 static inline gimple * |
606 get_earlier_stmt (gimple stmt1, gimple stmt2) | 866 get_earlier_stmt (gimple *stmt1, gimple *stmt2) |
607 { | 867 { |
608 unsigned int uid1, uid2; | 868 unsigned int uid1, uid2; |
609 | 869 |
610 if (stmt1 == NULL) | 870 if (stmt1 == NULL) |
611 return stmt2; | 871 return stmt2; |
617 uid2 = gimple_uid (stmt2); | 877 uid2 = gimple_uid (stmt2); |
618 | 878 |
619 if (uid1 == 0 || uid2 == 0) | 879 if (uid1 == 0 || uid2 == 0) |
620 return NULL; | 880 return NULL; |
621 | 881 |
622 gcc_checking_assert (uid1 <= VEC_length (vec_void_p, stmt_vec_info_vec) | 882 gcc_checking_assert (uid1 <= stmt_vec_info_vec.length () |
623 && uid2 <= VEC_length (vec_void_p, stmt_vec_info_vec)); | 883 && uid2 <= stmt_vec_info_vec.length ()); |
624 | 884 |
625 if (uid1 < uid2) | 885 if (uid1 < uid2) |
626 return stmt1; | 886 return stmt1; |
627 else | 887 else |
628 return stmt2; | 888 return stmt2; |
629 } | 889 } |
630 | 890 |
631 /* Return the later statement between STMT1 and STMT2. */ | 891 /* Return the later statement between STMT1 and STMT2. */ |
632 | 892 |
633 static inline gimple | 893 static inline gimple * |
634 get_later_stmt (gimple stmt1, gimple stmt2) | 894 get_later_stmt (gimple *stmt1, gimple *stmt2) |
635 { | 895 { |
636 unsigned int uid1, uid2; | 896 unsigned int uid1, uid2; |
637 | 897 |
638 if (stmt1 == NULL) | 898 if (stmt1 == NULL) |
639 return stmt2; | 899 return stmt2; |
645 uid2 = gimple_uid (stmt2); | 905 uid2 = gimple_uid (stmt2); |
646 | 906 |
647 if (uid1 == 0 || uid2 == 0) | 907 if (uid1 == 0 || uid2 == 0) |
648 return NULL; | 908 return NULL; |
649 | 909 |
650 gcc_assert (uid1 <= VEC_length (vec_void_p, stmt_vec_info_vec)); | 910 gcc_assert (uid1 <= stmt_vec_info_vec.length ()); |
651 gcc_assert (uid2 <= VEC_length (vec_void_p, stmt_vec_info_vec)); | 911 gcc_assert (uid2 <= stmt_vec_info_vec.length ()); |
652 | 912 |
653 if (uid1 > uid2) | 913 if (uid1 > uid2) |
654 return stmt1; | 914 return stmt1; |
655 else | 915 else |
656 return stmt2; | 916 return stmt2; |
660 pattern. */ | 920 pattern. */ |
661 | 921 |
662 static inline bool | 922 static inline bool |
663 is_pattern_stmt_p (stmt_vec_info stmt_info) | 923 is_pattern_stmt_p (stmt_vec_info stmt_info) |
664 { | 924 { |
665 gimple related_stmt; | 925 gimple *related_stmt; |
666 stmt_vec_info related_stmt_info; | 926 stmt_vec_info related_stmt_info; |
667 | 927 |
668 related_stmt = STMT_VINFO_RELATED_STMT (stmt_info); | 928 related_stmt = STMT_VINFO_RELATED_STMT (stmt_info); |
669 if (related_stmt | 929 if (related_stmt |
670 && (related_stmt_info = vinfo_for_stmt (related_stmt)) | 930 && (related_stmt_info = vinfo_for_stmt (related_stmt)) |
683 return true; | 943 return true; |
684 gcc_checking_assert (EDGE_COUNT (bb->preds) == 1); | 944 gcc_checking_assert (EDGE_COUNT (bb->preds) == 1); |
685 return false; | 945 return false; |
686 } | 946 } |
687 | 947 |
688 /* Set inside loop vectorization cost. */ | |
689 | |
690 static inline void | |
691 stmt_vinfo_set_inside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node, | |
692 int cost) | |
693 { | |
694 if (slp_node) | |
695 SLP_TREE_INSIDE_OF_LOOP_COST (slp_node) = cost; | |
696 else | |
697 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = cost; | |
698 } | |
699 | |
700 /* Set inside loop vectorization cost. */ | |
701 | |
702 static inline void | |
703 stmt_vinfo_set_outside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node, | |
704 int cost) | |
705 { | |
706 if (slp_node) | |
707 SLP_TREE_OUTSIDE_OF_LOOP_COST (slp_node) = cost; | |
708 else | |
709 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = cost; | |
710 } | |
711 | |
712 /* Return pow2 (X). */ | 948 /* Return pow2 (X). */ |
713 | 949 |
714 static inline int | 950 static inline int |
715 vect_pow2 (int x) | 951 vect_pow2 (int x) |
716 { | 952 { |
718 | 954 |
719 for (i = 0; i < x; i++) | 955 for (i = 0; i < x; i++) |
720 res *= 2; | 956 res *= 2; |
721 | 957 |
722 return res; | 958 return res; |
959 } | |
960 | |
961 /* Alias targetm.vectorize.builtin_vectorization_cost. */ | |
962 | |
963 static inline int | |
964 builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, | |
965 tree vectype, int misalign) | |
966 { | |
967 return targetm.vectorize.builtin_vectorization_cost (type_of_cost, | |
968 vectype, misalign); | |
969 } | |
970 | |
971 /* Get cost by calling cost target builtin. */ | |
972 | |
973 static inline | |
974 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost) | |
975 { | |
976 return builtin_vectorization_cost (type_of_cost, NULL, 0); | |
977 } | |
978 | |
979 /* Alias targetm.vectorize.init_cost. */ | |
980 | |
981 static inline void * | |
982 init_cost (struct loop *loop_info) | |
983 { | |
984 return targetm.vectorize.init_cost (loop_info); | |
985 } | |
986 | |
987 /* Alias targetm.vectorize.add_stmt_cost. */ | |
988 | |
989 static inline unsigned | |
990 add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, | |
991 stmt_vec_info stmt_info, int misalign, | |
992 enum vect_cost_model_location where) | |
993 { | |
994 return targetm.vectorize.add_stmt_cost (data, count, kind, | |
995 stmt_info, misalign, where); | |
996 } | |
997 | |
998 /* Alias targetm.vectorize.finish_cost. */ | |
999 | |
1000 static inline void | |
1001 finish_cost (void *data, unsigned *prologue_cost, | |
1002 unsigned *body_cost, unsigned *epilogue_cost) | |
1003 { | |
1004 targetm.vectorize.finish_cost (data, prologue_cost, body_cost, epilogue_cost); | |
1005 } | |
1006 | |
1007 /* Alias targetm.vectorize.destroy_cost_data. */ | |
1008 | |
1009 static inline void | |
1010 destroy_cost_data (void *data) | |
1011 { | |
1012 targetm.vectorize.destroy_cost_data (data); | |
723 } | 1013 } |
724 | 1014 |
725 /*-----------------------------------------------------------------*/ | 1015 /*-----------------------------------------------------------------*/ |
726 /* Info on data references alignment. */ | 1016 /* Info on data references alignment. */ |
727 /*-----------------------------------------------------------------*/ | 1017 /*-----------------------------------------------------------------*/ |
1018 inline void | |
1019 set_dr_misalignment (struct data_reference *dr, int val) | |
1020 { | |
1021 dataref_aux *data_aux = DR_VECT_AUX (dr); | |
1022 | |
1023 if (!data_aux) | |
1024 { | |
1025 data_aux = XCNEW (dataref_aux); | |
1026 dr->aux = data_aux; | |
1027 } | |
1028 | |
1029 data_aux->misalignment = val; | |
1030 } | |
1031 | |
1032 inline int | |
1033 dr_misalignment (struct data_reference *dr) | |
1034 { | |
1035 return DR_VECT_AUX (dr)->misalignment; | |
1036 } | |
728 | 1037 |
729 /* Reflects actual alignment of first access in the vectorized loop, | 1038 /* Reflects actual alignment of first access in the vectorized loop, |
730 taking into account peeling/versioning if applied. */ | 1039 taking into account peeling/versioning if applied. */ |
731 #define DR_MISALIGNMENT(DR) ((int) (size_t) (DR)->aux) | 1040 #define DR_MISALIGNMENT(DR) dr_misalignment (DR) |
732 #define SET_DR_MISALIGNMENT(DR, VAL) ((DR)->aux = (void *) (size_t) (VAL)) | 1041 #define SET_DR_MISALIGNMENT(DR, VAL) set_dr_misalignment (DR, VAL) |
733 | 1042 #define DR_MISALIGNMENT_UNKNOWN (-1) |
734 /* Return TRUE if the data access is aligned, and FALSE otherwise. */ | 1043 |
1044 /* Only defined once DR_MISALIGNMENT is defined. */ | |
1045 #define DR_TARGET_ALIGNMENT(DR) DR_VECT_AUX (DR)->target_alignment | |
1046 | |
1047 /* Return true if data access DR is aligned to its target alignment | |
1048 (which may be less than a full vector). */ | |
735 | 1049 |
736 static inline bool | 1050 static inline bool |
737 aligned_access_p (struct data_reference *data_ref_info) | 1051 aligned_access_p (struct data_reference *data_ref_info) |
738 { | 1052 { |
739 return (DR_MISALIGNMENT (data_ref_info) == 0); | 1053 return (DR_MISALIGNMENT (data_ref_info) == 0); |
743 otherwise. */ | 1057 otherwise. */ |
744 | 1058 |
745 static inline bool | 1059 static inline bool |
746 known_alignment_for_access_p (struct data_reference *data_ref_info) | 1060 known_alignment_for_access_p (struct data_reference *data_ref_info) |
747 { | 1061 { |
748 return (DR_MISALIGNMENT (data_ref_info) != -1); | 1062 return (DR_MISALIGNMENT (data_ref_info) != DR_MISALIGNMENT_UNKNOWN); |
749 } | 1063 } |
750 | 1064 |
751 /* vect_dump will be set to stderr or dump_file if exist. */ | 1065 /* Return the minimum alignment in bytes that the vectorized version |
752 extern FILE *vect_dump; | 1066 of DR is guaranteed to have. */ |
753 extern LOC vect_loop_location; | 1067 |
1068 static inline unsigned int | |
1069 vect_known_alignment_in_bytes (struct data_reference *dr) | |
1070 { | |
1071 if (DR_MISALIGNMENT (dr) == DR_MISALIGNMENT_UNKNOWN) | |
1072 return TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr))); | |
1073 if (DR_MISALIGNMENT (dr) == 0) | |
1074 return DR_TARGET_ALIGNMENT (dr); | |
1075 return DR_MISALIGNMENT (dr) & -DR_MISALIGNMENT (dr); | |
1076 } | |
1077 | |
1078 /* Return the behavior of DR with respect to the vectorization context | |
1079 (which for outer loop vectorization might not be the behavior recorded | |
1080 in DR itself). */ | |
1081 | |
1082 static inline innermost_loop_behavior * | |
1083 vect_dr_behavior (data_reference *dr) | |
1084 { | |
1085 gimple *stmt = DR_STMT (dr); | |
1086 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
1087 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); | |
1088 if (loop_vinfo == NULL | |
1089 || !nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt)) | |
1090 return &DR_INNERMOST (dr); | |
1091 else | |
1092 return &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info); | |
1093 } | |
1094 | |
1095 /* Return true if the vect cost model is unlimited. */ | |
1096 static inline bool | |
1097 unlimited_cost_model (loop_p loop) | |
1098 { | |
1099 if (loop != NULL && loop->force_vectorize | |
1100 && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT) | |
1101 return flag_simd_cost_model == VECT_COST_MODEL_UNLIMITED; | |
1102 return (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED); | |
1103 } | |
1104 | |
1105 /* Return the number of copies needed for loop vectorization when | |
1106 a statement operates on vectors of type VECTYPE. This is the | |
1107 vectorization factor divided by the number of elements in | |
1108 VECTYPE and is always known at compile time. */ | |
1109 | |
1110 static inline unsigned int | |
1111 vect_get_num_copies (loop_vec_info loop_vinfo, tree vectype) | |
1112 { | |
1113 gcc_checking_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo) | |
1114 % TYPE_VECTOR_SUBPARTS (vectype) == 0); | |
1115 return (LOOP_VINFO_VECT_FACTOR (loop_vinfo) | |
1116 / TYPE_VECTOR_SUBPARTS (vectype)); | |
1117 } | |
1118 | |
1119 /* Return the size of the value accessed by unvectorized data reference DR. | |
1120 This is only valid once STMT_VINFO_VECTYPE has been calculated for the | |
1121 associated gimple statement, since that guarantees that DR accesses | |
1122 either a scalar or a scalar equivalent. ("Scalar equivalent" here | |
1123 includes things like V1SI, which can be vectorized in the same way | |
1124 as a plain SI.) */ | |
1125 | |
1126 inline unsigned int | |
1127 vect_get_scalar_dr_size (struct data_reference *dr) | |
1128 { | |
1129 return tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)))); | |
1130 } | |
1131 | |
1132 /* Source location */ | |
1133 extern source_location vect_location; | |
754 | 1134 |
755 /*-----------------------------------------------------------------*/ | 1135 /*-----------------------------------------------------------------*/ |
756 /* Function prototypes. */ | 1136 /* Function prototypes. */ |
757 /*-----------------------------------------------------------------*/ | 1137 /*-----------------------------------------------------------------*/ |
758 | 1138 |
759 /* Simple loop peeling and versioning utilities for vectorizer's purposes - | 1139 /* Simple loop peeling and versioning utilities for vectorizer's purposes - |
760 in tree-vect-loop-manip.c. */ | 1140 in tree-vect-loop-manip.c. */ |
761 extern void slpeel_make_loop_iterate_ntimes (struct loop *, tree); | 1141 extern void slpeel_make_loop_iterate_ntimes (struct loop *, tree); |
762 extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge); | 1142 extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge); |
763 extern void vect_loop_versioning (loop_vec_info, bool, tree *, gimple_seq *); | 1143 struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *, |
764 extern void vect_do_peeling_for_loop_bound (loop_vec_info, tree *, | 1144 struct loop *, edge); |
765 tree, gimple_seq); | 1145 extern void vect_loop_versioning (loop_vec_info, unsigned int, bool); |
766 extern void vect_do_peeling_for_alignment (loop_vec_info); | 1146 extern struct loop *vect_do_peeling (loop_vec_info, tree, tree, |
767 extern LOC find_loop_location (struct loop *); | 1147 tree *, int, bool, bool); |
1148 extern source_location find_loop_location (struct loop *); | |
768 extern bool vect_can_advance_ivs_p (loop_vec_info); | 1149 extern bool vect_can_advance_ivs_p (loop_vec_info); |
769 | 1150 |
770 /* In tree-vect-stmts.c. */ | 1151 /* In tree-vect-stmts.c. */ |
771 extern unsigned int current_vector_size; | 1152 extern unsigned int current_vector_size; |
772 extern tree get_vectype_for_scalar_type (tree); | 1153 extern tree get_vectype_for_scalar_type (tree); |
1154 extern tree get_mask_type_for_scalar_type (tree); | |
773 extern tree get_same_sized_vectype (tree, tree); | 1155 extern tree get_same_sized_vectype (tree, tree); |
774 extern bool vect_is_simple_use (tree, loop_vec_info, bb_vec_info, gimple *, | 1156 extern bool vect_is_simple_use (tree, vec_info *, gimple **, |
775 tree *, enum vect_def_type *); | 1157 enum vect_def_type *); |
776 extern bool vect_is_simple_use_1 (tree, loop_vec_info, bb_vec_info, gimple *, | 1158 extern bool vect_is_simple_use (tree, vec_info *, gimple **, |
777 tree *, enum vect_def_type *, tree *); | 1159 enum vect_def_type *, tree *); |
778 extern bool supportable_widening_operation (enum tree_code, gimple, tree, tree, | 1160 extern bool supportable_widening_operation (enum tree_code, gimple *, tree, |
779 tree *, tree *, enum tree_code *, | 1161 tree, enum tree_code *, |
780 enum tree_code *, int *, | 1162 enum tree_code *, int *, |
781 VEC (tree, heap) **); | 1163 vec<tree> *); |
782 extern bool supportable_narrowing_operation (enum tree_code, tree, tree, | 1164 extern bool supportable_narrowing_operation (enum tree_code, tree, tree, |
783 enum tree_code *, | 1165 enum tree_code *, |
784 int *, VEC (tree, heap) **); | 1166 int *, vec<tree> *); |
785 extern stmt_vec_info new_stmt_vec_info (gimple stmt, loop_vec_info, | 1167 extern stmt_vec_info new_stmt_vec_info (gimple *stmt, vec_info *); |
786 bb_vec_info); | 1168 extern void free_stmt_vec_info (gimple *stmt); |
787 extern void free_stmt_vec_info (gimple stmt); | |
788 extern tree vectorizable_function (gimple, tree, tree); | |
789 extern void vect_model_simple_cost (stmt_vec_info, int, enum vect_def_type *, | 1169 extern void vect_model_simple_cost (stmt_vec_info, int, enum vect_def_type *, |
790 slp_tree); | 1170 int, stmt_vector_for_cost *, |
791 extern void vect_model_store_cost (stmt_vec_info, int, enum vect_def_type, | 1171 stmt_vector_for_cost *); |
792 slp_tree); | 1172 extern void vect_model_store_cost (stmt_vec_info, int, vect_memory_access_type, |
793 extern void vect_model_load_cost (stmt_vec_info, int, slp_tree); | 1173 enum vect_def_type, slp_tree, |
794 extern void vect_finish_stmt_generation (gimple, gimple, | 1174 stmt_vector_for_cost *, |
1175 stmt_vector_for_cost *); | |
1176 extern void vect_model_load_cost (stmt_vec_info, int, vect_memory_access_type, | |
1177 slp_tree, stmt_vector_for_cost *, | |
1178 stmt_vector_for_cost *); | |
1179 extern unsigned record_stmt_cost (stmt_vector_for_cost *, int, | |
1180 enum vect_cost_for_stmt, stmt_vec_info, | |
1181 int, enum vect_cost_model_location); | |
1182 extern void vect_finish_stmt_generation (gimple *, gimple *, | |
795 gimple_stmt_iterator *); | 1183 gimple_stmt_iterator *); |
796 extern bool vect_mark_stmts_to_be_vectorized (loop_vec_info); | 1184 extern bool vect_mark_stmts_to_be_vectorized (loop_vec_info); |
797 extern int cost_for_stmt (gimple); | 1185 extern tree vect_get_vec_def_for_operand_1 (gimple *, enum vect_def_type); |
798 extern tree vect_get_vec_def_for_operand (tree, gimple, tree *); | 1186 extern tree vect_get_vec_def_for_operand (tree, gimple *, tree = NULL); |
799 extern tree vect_init_vector (gimple, tree, tree, | 1187 extern void vect_get_vec_defs (tree, tree, gimple *, vec<tree> *, |
1188 vec<tree> *, slp_tree); | |
1189 extern void vect_get_vec_defs_for_stmt_copy (enum vect_def_type *, | |
1190 vec<tree> *, vec<tree> *); | |
1191 extern tree vect_init_vector (gimple *, tree, tree, | |
800 gimple_stmt_iterator *); | 1192 gimple_stmt_iterator *); |
801 extern tree vect_get_vec_def_for_stmt_copy (enum vect_def_type, tree); | 1193 extern tree vect_get_vec_def_for_stmt_copy (enum vect_def_type, tree); |
802 extern bool vect_transform_stmt (gimple, gimple_stmt_iterator *, | 1194 extern bool vect_transform_stmt (gimple *, gimple_stmt_iterator *, |
803 bool *, slp_tree, slp_instance); | 1195 bool *, slp_tree, slp_instance); |
804 extern void vect_remove_stores (gimple); | 1196 extern void vect_remove_stores (gimple *); |
805 extern bool vect_analyze_stmt (gimple, bool *, slp_tree); | 1197 extern bool vect_analyze_stmt (gimple *, bool *, slp_tree, slp_instance); |
806 extern bool vectorizable_condition (gimple, gimple_stmt_iterator *, gimple *, | 1198 extern bool vectorizable_condition (gimple *, gimple_stmt_iterator *, |
807 tree, int); | 1199 gimple **, tree, int, slp_tree); |
808 extern void vect_get_load_cost (struct data_reference *, int, bool, | 1200 extern void vect_get_load_cost (struct data_reference *, int, bool, |
809 unsigned int *, unsigned int *); | 1201 unsigned int *, unsigned int *, |
810 extern void vect_get_store_cost (struct data_reference *, int, unsigned int *); | 1202 stmt_vector_for_cost *, |
1203 stmt_vector_for_cost *, bool); | |
1204 extern void vect_get_store_cost (struct data_reference *, int, | |
1205 unsigned int *, stmt_vector_for_cost *); | |
1206 extern bool vect_supportable_shift (enum tree_code, tree); | |
1207 extern tree vect_gen_perm_mask_any (tree, vec_perm_indices); | |
1208 extern tree vect_gen_perm_mask_checked (tree, vec_perm_indices); | |
1209 extern void optimize_mask_stores (struct loop*); | |
811 | 1210 |
812 /* In tree-vect-data-refs.c. */ | 1211 /* In tree-vect-data-refs.c. */ |
813 extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int); | 1212 extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int); |
814 extern enum dr_alignment_support vect_supportable_dr_alignment | 1213 extern enum dr_alignment_support vect_supportable_dr_alignment |
815 (struct data_reference *, bool); | 1214 (struct data_reference *, bool); |
816 extern tree vect_get_smallest_scalar_type (gimple, HOST_WIDE_INT *, | 1215 extern tree vect_get_smallest_scalar_type (gimple *, HOST_WIDE_INT *, |
817 HOST_WIDE_INT *); | 1216 HOST_WIDE_INT *); |
818 extern bool vect_analyze_data_ref_dependences (loop_vec_info, bb_vec_info, | 1217 extern bool vect_analyze_data_ref_dependences (loop_vec_info, int *); |
819 int *, bool *); | 1218 extern bool vect_slp_analyze_instance_dependence (slp_instance); |
820 extern bool vect_enhance_data_refs_alignment (loop_vec_info); | 1219 extern bool vect_enhance_data_refs_alignment (loop_vec_info); |
821 extern bool vect_analyze_data_refs_alignment (loop_vec_info, bb_vec_info); | 1220 extern bool vect_analyze_data_refs_alignment (loop_vec_info); |
822 extern bool vect_verify_datarefs_alignment (loop_vec_info, bb_vec_info); | 1221 extern bool vect_verify_datarefs_alignment (loop_vec_info); |
823 extern bool vect_analyze_data_ref_accesses (loop_vec_info, bb_vec_info); | 1222 extern bool vect_slp_analyze_and_verify_instance_alignment (slp_instance); |
1223 extern bool vect_analyze_data_ref_accesses (vec_info *); | |
824 extern bool vect_prune_runtime_alias_test_list (loop_vec_info); | 1224 extern bool vect_prune_runtime_alias_test_list (loop_vec_info); |
825 extern bool vect_analyze_data_refs (loop_vec_info, bb_vec_info, int *); | 1225 extern bool vect_check_gather_scatter (gimple *, loop_vec_info, |
826 extern tree vect_create_data_ref_ptr (gimple, struct loop *, tree, tree *, | 1226 gather_scatter_info *); |
827 gimple *, bool, bool *); | 1227 extern bool vect_analyze_data_refs (vec_info *, int *); |
828 extern tree bump_vector_ptr (tree, gimple, gimple_stmt_iterator *, gimple, tree); | 1228 extern void vect_record_base_alignments (vec_info *); |
1229 extern tree vect_create_data_ref_ptr (gimple *, tree, struct loop *, tree, | |
1230 tree *, gimple_stmt_iterator *, | |
1231 gimple **, bool, bool *, | |
1232 tree = NULL_TREE); | |
1233 extern tree bump_vector_ptr (tree, gimple *, gimple_stmt_iterator *, gimple *, | |
1234 tree); | |
829 extern tree vect_create_destination_var (tree, tree); | 1235 extern tree vect_create_destination_var (tree, tree); |
830 extern bool vect_strided_store_supported (tree); | 1236 extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT); |
831 extern bool vect_strided_load_supported (tree); | 1237 extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT); |
832 extern bool vect_permute_store_chain (VEC(tree,heap) *,unsigned int, gimple, | 1238 extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT); |
833 gimple_stmt_iterator *, VEC(tree,heap) **); | 1239 extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT); |
834 extern tree vect_setup_realignment (gimple, gimple_stmt_iterator *, tree *, | 1240 extern void vect_permute_store_chain (vec<tree> ,unsigned int, gimple *, |
1241 gimple_stmt_iterator *, vec<tree> *); | |
1242 extern tree vect_setup_realignment (gimple *, gimple_stmt_iterator *, tree *, | |
835 enum dr_alignment_support, tree, | 1243 enum dr_alignment_support, tree, |
836 struct loop **); | 1244 struct loop **); |
837 extern bool vect_permute_load_chain (VEC(tree,heap) *,unsigned int, gimple, | 1245 extern void vect_transform_grouped_load (gimple *, vec<tree> , int, |
838 gimple_stmt_iterator *, VEC(tree,heap) **); | |
839 extern bool vect_transform_strided_load (gimple, VEC(tree,heap) *, int, | |
840 gimple_stmt_iterator *); | 1246 gimple_stmt_iterator *); |
841 extern int vect_get_place_in_interleaving_chain (gimple, gimple); | 1247 extern void vect_record_grouped_load_vectors (gimple *, vec<tree> ); |
842 extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); | 1248 extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); |
843 extern tree vect_create_addr_base_for_vector_ref (gimple, gimple_seq *, | 1249 extern tree vect_get_new_ssa_name (tree, enum vect_var_kind, |
844 tree, struct loop *); | 1250 const char * = NULL); |
1251 extern tree vect_create_addr_base_for_vector_ref (gimple *, gimple_seq *, | |
1252 tree, tree = NULL_TREE); | |
845 | 1253 |
846 /* In tree-vect-loop.c. */ | 1254 /* In tree-vect-loop.c. */ |
847 /* FORNOW: Used in tree-parloops.c. */ | 1255 /* FORNOW: Used in tree-parloops.c. */ |
848 extern void destroy_loop_vec_info (loop_vec_info, bool); | 1256 extern gimple *vect_force_simple_reduction (loop_vec_info, gimple *, |
849 extern gimple vect_force_simple_reduction (loop_vec_info, gimple, bool, bool *); | 1257 bool *, bool); |
850 /* Drive for loop analysis stage. */ | 1258 /* Drive for loop analysis stage. */ |
851 extern loop_vec_info vect_analyze_loop (struct loop *); | 1259 extern loop_vec_info vect_analyze_loop (struct loop *, loop_vec_info); |
1260 extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL); | |
1261 extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *, bool); | |
852 /* Drive for loop transformation stage. */ | 1262 /* Drive for loop transformation stage. */ |
853 extern void vect_transform_loop (loop_vec_info); | 1263 extern struct loop *vect_transform_loop (loop_vec_info); |
854 extern loop_vec_info vect_analyze_loop_form (struct loop *); | 1264 extern loop_vec_info vect_analyze_loop_form (struct loop *); |
855 extern bool vectorizable_live_operation (gimple, gimple_stmt_iterator *, | 1265 extern bool vectorizable_live_operation (gimple *, gimple_stmt_iterator *, |
856 gimple *); | 1266 slp_tree, int, gimple **); |
857 extern bool vectorizable_reduction (gimple, gimple_stmt_iterator *, gimple *, | 1267 extern bool vectorizable_reduction (gimple *, gimple_stmt_iterator *, |
858 slp_tree); | 1268 gimple **, slp_tree, slp_instance); |
859 extern bool vectorizable_induction (gimple, gimple_stmt_iterator *, gimple *); | 1269 extern bool vectorizable_induction (gimple *, gimple_stmt_iterator *, |
860 extern int vect_estimate_min_profitable_iters (loop_vec_info); | 1270 gimple **, slp_tree); |
861 extern tree get_initial_def_for_reduction (gimple, tree, tree *); | 1271 extern tree get_initial_def_for_reduction (gimple *, tree, tree *); |
862 extern int vect_min_worthwhile_factor (enum tree_code); | 1272 extern bool vect_worthwhile_without_simd_p (vec_info *, tree_code); |
863 extern int vect_get_known_peeling_cost (loop_vec_info, int, int *, int); | 1273 extern int vect_get_known_peeling_cost (loop_vec_info, int, int *, |
864 extern int vect_get_single_scalar_iteraion_cost (loop_vec_info); | 1274 stmt_vector_for_cost *, |
1275 stmt_vector_for_cost *, | |
1276 stmt_vector_for_cost *); | |
865 | 1277 |
866 /* In tree-vect-slp.c. */ | 1278 /* In tree-vect-slp.c. */ |
867 extern void vect_free_slp_instance (slp_instance); | 1279 extern void vect_free_slp_instance (slp_instance); |
868 extern bool vect_transform_slp_perm_load (gimple, VEC (tree, heap) *, | 1280 extern bool vect_transform_slp_perm_load (slp_tree, vec<tree> , |
869 gimple_stmt_iterator *, int, | 1281 gimple_stmt_iterator *, int, |
870 slp_instance, bool); | 1282 slp_instance, bool, unsigned *); |
871 extern bool vect_schedule_slp (loop_vec_info, bb_vec_info); | 1283 extern bool vect_slp_analyze_operations (vec_info *); |
872 extern void vect_update_slp_costs_according_to_vf (loop_vec_info); | 1284 extern bool vect_schedule_slp (vec_info *); |
873 extern bool vect_analyze_slp (loop_vec_info, bb_vec_info); | 1285 extern bool vect_analyze_slp (vec_info *, unsigned); |
874 extern void vect_make_slp_decision (loop_vec_info); | 1286 extern bool vect_make_slp_decision (loop_vec_info); |
875 extern void vect_detect_hybrid_slp (loop_vec_info); | 1287 extern void vect_detect_hybrid_slp (loop_vec_info); |
876 extern void vect_get_slp_defs (tree, tree, slp_tree, VEC (tree,heap) **, | 1288 extern void vect_get_slp_defs (vec<tree> , slp_tree, vec<vec<tree> > *); |
877 VEC (tree,heap) **, int); | 1289 extern bool vect_slp_bb (basic_block); |
878 extern LOC find_bb_location (basic_block); | 1290 extern gimple *vect_find_last_scalar_stmt_in_slp (slp_tree); |
879 extern bb_vec_info vect_slp_analyze_bb (basic_block); | 1291 extern bool is_simple_and_all_uses_invariant (gimple *, loop_vec_info); |
880 extern void vect_slp_transform_bb (basic_block); | |
881 | 1292 |
882 /* In tree-vect-patterns.c. */ | 1293 /* In tree-vect-patterns.c. */ |
883 /* Pattern recognition functions. | 1294 /* Pattern recognition functions. |
884 Additional pattern recognition functions can (and will) be added | 1295 Additional pattern recognition functions can (and will) be added |
885 in the future. */ | 1296 in the future. */ |
886 typedef gimple (* vect_recog_func_ptr) (gimple, tree *, tree *); | 1297 typedef gimple *(* vect_recog_func_ptr) (vec<gimple *> *, tree *, tree *); |
887 #define NUM_PATTERNS 4 | 1298 #define NUM_PATTERNS 14 |
888 void vect_pattern_recog (loop_vec_info); | 1299 void vect_pattern_recog (vec_info *); |
889 | 1300 |
890 /* In tree-vectorizer.c. */ | 1301 /* In tree-vectorizer.c. */ |
891 unsigned vectorize_loops (void); | 1302 unsigned vectorize_loops (void); |
892 /* Vectorization debug information */ | 1303 bool vect_stmt_in_region_p (vec_info *, gimple *); |
893 extern bool vect_print_dump_info (enum vect_verbosity_levels); | 1304 void vect_free_loop_info_assumptions (struct loop *); |
894 | 1305 |
895 #endif /* GCC_TREE_VECTORIZER_H */ | 1306 #endif /* GCC_TREE_VECTORIZER_H */ |