Mercurial > hg > CbC > CbC_gcc
annotate gcc/tree-vectorizer.h @ 145:1830386684a0
gcc-9.2.0
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 11:34:05 +0900 |
parents | 84e7813d76e9 |
children |
rev | line source |
---|---|
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1 /* Vectorizer |
145 | 2 Copyright (C) 2003-2020 Free Software Foundation, Inc. |
0 | 3 Contributed by Dorit Naishlos <dorit@il.ibm.com> |
4 | |
5 This file is part of GCC. | |
6 | |
7 GCC is free software; you can redistribute it and/or modify it under | |
8 the terms of the GNU General Public License as published by the Free | |
9 Software Foundation; either version 3, or (at your option) any later | |
10 version. | |
11 | |
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
18 along with GCC; see the file COPYING3. If not see | |
19 <http://www.gnu.org/licenses/>. */ | |
20 | |
21 #ifndef GCC_TREE_VECTORIZER_H | |
22 #define GCC_TREE_VECTORIZER_H | |
23 | |
145 | 24 typedef class _stmt_vec_info *stmt_vec_info; |
131 | 25 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
26 #include "tree-data-ref.h" |
111 | 27 #include "tree-hash-traits.h" |
28 #include "target.h" | |
145 | 29 #include <utility> |
0 | 30 |
31 /* Used for naming of new temporaries. */ | |
32 enum vect_var_kind { | |
33 vect_simple_var, | |
34 vect_pointer_var, | |
111 | 35 vect_scalar_var, |
36 vect_mask_var | |
0 | 37 }; |
38 | |
39 /* Defines type of operation. */ | |
40 enum operation_type { | |
41 unary_op = 1, | |
42 binary_op, | |
43 ternary_op | |
44 }; | |
45 | |
46 /* Define type of available alignment support. */ | |
47 enum dr_alignment_support { | |
48 dr_unaligned_unsupported, | |
49 dr_unaligned_supported, | |
50 dr_explicit_realign, | |
51 dr_explicit_realign_optimized, | |
52 dr_aligned | |
53 }; | |
54 | |
55 /* Define type of def-use cross-iteration cycle. */ | |
56 enum vect_def_type { | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
57 vect_uninitialized_def = 0, |
0 | 58 vect_constant_def = 1, |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
59 vect_external_def, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
60 vect_internal_def, |
0 | 61 vect_induction_def, |
62 vect_reduction_def, | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
63 vect_double_reduction_def, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
64 vect_nested_cycle, |
0 | 65 vect_unknown_def_type |
66 }; | |
67 | |
111 | 68 /* Define type of reduction. */ |
69 enum vect_reduction_type { | |
70 TREE_CODE_REDUCTION, | |
71 COND_REDUCTION, | |
72 INTEGER_INDUC_COND_REDUCTION, | |
131 | 73 CONST_COND_REDUCTION, |
74 | |
75 /* Retain a scalar phi and use a FOLD_EXTRACT_LAST within the loop | |
76 to implement: | |
77 | |
78 for (int i = 0; i < VF; ++i) | |
79 res = cond[i] ? val[i] : res; */ | |
80 EXTRACT_LAST_REDUCTION, | |
81 | |
82 /* Use a folding reduction within the loop to implement: | |
83 | |
84 for (int i = 0; i < VF; ++i) | |
85 res = res OP val[i]; | |
86 | |
87 (with no reassocation). */ | |
88 FOLD_LEFT_REDUCTION | |
111 | 89 }; |
90 | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
91 #define VECTORIZABLE_CYCLE_DEF(D) (((D) == vect_reduction_def) \ |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
92 || ((D) == vect_double_reduction_def) \ |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
93 || ((D) == vect_nested_cycle)) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
94 |
111 | 95 /* Structure to encapsulate information about a group of like |
96 instructions to be presented to the target cost model. */ | |
97 struct stmt_info_for_cost { | |
98 int count; | |
99 enum vect_cost_for_stmt kind; | |
131 | 100 enum vect_cost_model_location where; |
101 stmt_vec_info stmt_info; | |
111 | 102 int misalign; |
103 }; | |
104 | |
105 typedef vec<stmt_info_for_cost> stmt_vector_for_cost; | |
106 | |
107 /* Maps base addresses to an innermost_loop_behavior that gives the maximum | |
108 known alignment for that base. */ | |
109 typedef hash_map<tree_operand_hash, | |
110 innermost_loop_behavior *> vec_base_alignments; | |
111 | |
0 | 112 /************************************************************************ |
113 SLP | |
114 ************************************************************************/ | |
111 | 115 typedef struct _slp_tree *slp_tree; |
0 | 116 |
111 | 117 /* A computation tree of an SLP instance. Each node corresponds to a group of |
0 | 118 stmts to be packed in a SIMD stmt. */ |
111 | 119 struct _slp_tree { |
120 /* Nodes that contain def-stmts of this node statements operands. */ | |
121 vec<slp_tree> children; | |
0 | 122 /* A group of scalar stmts to be vectorized together. */ |
131 | 123 vec<stmt_vec_info> stmts; |
145 | 124 /* A group of scalar operands to be vectorized together. */ |
125 vec<tree> ops; | |
111 | 126 /* Load permutation relative to the stores, NULL if there is no |
127 permutation. */ | |
128 vec<unsigned> load_permutation; | |
0 | 129 /* Vectorized stmt/s. */ |
131 | 130 vec<stmt_vec_info> vec_stmts; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
131 /* Number of vector stmts that are created to replace the group of scalar |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
132 stmts. It is calculated during the transformation phase as the number of |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
133 scalar elements in one scalar iteration (GROUP_SIZE) multiplied by VF |
0 | 134 divided by vector size. */ |
135 unsigned int vec_stmts_size; | |
145 | 136 /* Reference count in the SLP graph. */ |
137 unsigned int refcnt; | |
138 /* The maximum number of vector elements for the subtree rooted | |
139 at this node. */ | |
140 poly_uint64 max_nunits; | |
111 | 141 /* Whether the scalar computations use two different operators. */ |
142 bool two_operators; | |
143 /* The DEF type of this node. */ | |
144 enum vect_def_type def_type; | |
145 }; | |
0 | 146 |
147 | |
148 /* SLP instance is a sequence of stmts in a loop that can be packed into | |
149 SIMD stmts. */ | |
145 | 150 typedef class _slp_instance { |
151 public: | |
0 | 152 /* The root of SLP tree. */ |
153 slp_tree root; | |
154 | |
145 | 155 /* For vector constructors, the constructor stmt that the SLP tree is built |
156 from, NULL otherwise. */ | |
157 stmt_vec_info root_stmt; | |
158 | |
0 | 159 /* Size of groups of scalar stmts that will be replaced by SIMD stmt/s. */ |
160 unsigned int group_size; | |
161 | |
162 /* The unrolling factor required to vectorized this SLP instance. */ | |
131 | 163 poly_uint64 unrolling_factor; |
0 | 164 |
111 | 165 /* The group of nodes that contain loads of this SLP instance. */ |
166 vec<slp_tree> loads; | |
0 | 167 |
111 | 168 /* The SLP node containing the reduction PHIs. */ |
169 slp_tree reduc_phis; | |
0 | 170 } *slp_instance; |
171 | |
172 | |
173 /* Access Functions. */ | |
174 #define SLP_INSTANCE_TREE(S) (S)->root | |
175 #define SLP_INSTANCE_GROUP_SIZE(S) (S)->group_size | |
176 #define SLP_INSTANCE_UNROLLING_FACTOR(S) (S)->unrolling_factor | |
177 #define SLP_INSTANCE_LOADS(S) (S)->loads | |
145 | 178 #define SLP_INSTANCE_ROOT_STMT(S) (S)->root_stmt |
0 | 179 |
111 | 180 #define SLP_TREE_CHILDREN(S) (S)->children |
0 | 181 #define SLP_TREE_SCALAR_STMTS(S) (S)->stmts |
145 | 182 #define SLP_TREE_SCALAR_OPS(S) (S)->ops |
0 | 183 #define SLP_TREE_VEC_STMTS(S) (S)->vec_stmts |
184 #define SLP_TREE_NUMBER_OF_VEC_STMTS(S) (S)->vec_stmts_size | |
111 | 185 #define SLP_TREE_LOAD_PERMUTATION(S) (S)->load_permutation |
186 #define SLP_TREE_TWO_OPERATORS(S) (S)->two_operators | |
187 #define SLP_TREE_DEF_TYPE(S) (S)->def_type | |
188 | |
145 | 189 /* Key for map that records association between |
190 scalar conditions and corresponding loop mask, and | |
191 is populated by vect_record_loop_mask. */ | |
0 | 192 |
145 | 193 struct scalar_cond_masked_key |
194 { | |
195 scalar_cond_masked_key (tree t, unsigned ncopies_) | |
196 : ncopies (ncopies_) | |
197 { | |
198 get_cond_ops_from_tree (t); | |
199 } | |
200 | |
201 void get_cond_ops_from_tree (tree); | |
202 | |
203 unsigned ncopies; | |
204 tree_code code; | |
205 tree op0; | |
206 tree op1; | |
207 }; | |
208 | |
209 template<> | |
210 struct default_hash_traits<scalar_cond_masked_key> | |
211 { | |
212 typedef scalar_cond_masked_key compare_type; | |
213 typedef scalar_cond_masked_key value_type; | |
214 | |
215 static inline hashval_t | |
216 hash (value_type v) | |
217 { | |
218 inchash::hash h; | |
219 h.add_int (v.code); | |
220 inchash::add_expr (v.op0, h, 0); | |
221 inchash::add_expr (v.op1, h, 0); | |
222 h.add_int (v.ncopies); | |
223 return h.end (); | |
224 } | |
225 | |
226 static inline bool | |
227 equal (value_type existing, value_type candidate) | |
228 { | |
229 return (existing.ncopies == candidate.ncopies | |
230 && existing.code == candidate.code | |
231 && operand_equal_p (existing.op0, candidate.op0, 0) | |
232 && operand_equal_p (existing.op1, candidate.op1, 0)); | |
233 } | |
234 | |
235 static const bool empty_zero_p = true; | |
236 | |
237 static inline void | |
238 mark_empty (value_type &v) | |
239 { | |
240 v.ncopies = 0; | |
241 } | |
242 | |
243 static inline bool | |
244 is_empty (value_type v) | |
245 { | |
246 return v.ncopies == 0; | |
247 } | |
248 | |
249 static inline void mark_deleted (value_type &) {} | |
250 | |
251 static inline bool is_deleted (const value_type &) | |
252 { | |
253 return false; | |
254 } | |
255 | |
256 static inline void remove (value_type &) {} | |
257 }; | |
258 | |
259 typedef hash_set<scalar_cond_masked_key> scalar_cond_masked_set_type; | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
260 |
111 | 261 /* Describes two objects whose addresses must be unequal for the vectorized |
262 loop to be valid. */ | |
263 typedef std::pair<tree, tree> vec_object_pair; | |
264 | |
131 | 265 /* Records that vectorization is only possible if abs (EXPR) >= MIN_VALUE. |
266 UNSIGNED_P is true if we can assume that abs (EXPR) == EXPR. */ | |
145 | 267 class vec_lower_bound { |
268 public: | |
131 | 269 vec_lower_bound () {} |
270 vec_lower_bound (tree e, bool u, poly_uint64 m) | |
271 : expr (e), unsigned_p (u), min_value (m) {} | |
272 | |
273 tree expr; | |
274 bool unsigned_p; | |
275 poly_uint64 min_value; | |
276 }; | |
277 | |
278 /* Vectorizer state shared between different analyses like vector sizes | |
279 of the same CFG region. */ | |
145 | 280 class vec_info_shared { |
281 public: | |
131 | 282 vec_info_shared(); |
283 ~vec_info_shared(); | |
284 | |
285 void save_datarefs(); | |
286 void check_datarefs(); | |
287 | |
288 /* All data references. Freed by free_data_refs, so not an auto_vec. */ | |
289 vec<data_reference_p> datarefs; | |
290 vec<data_reference> datarefs_copy; | |
291 | |
292 /* The loop nest in which the data dependences are computed. */ | |
293 auto_vec<loop_p> loop_nest; | |
294 | |
295 /* All data dependences. Freed by free_dependence_relations, so not | |
296 an auto_vec. */ | |
297 vec<ddr_p> ddrs; | |
298 }; | |
299 | |
111 | 300 /* Vectorizer state common between loop and basic-block vectorization. */ |
145 | 301 class vec_info { |
302 public: | |
303 typedef hash_set<int_hash<machine_mode, E_VOIDmode, E_BLKmode> > mode_set; | |
111 | 304 enum vec_kind { bb, loop }; |
305 | |
131 | 306 vec_info (vec_kind, void *, vec_info_shared *); |
111 | 307 ~vec_info (); |
308 | |
131 | 309 stmt_vec_info add_stmt (gimple *); |
310 stmt_vec_info lookup_stmt (gimple *); | |
311 stmt_vec_info lookup_def (tree); | |
312 stmt_vec_info lookup_single_use (tree); | |
145 | 313 class dr_vec_info *lookup_dr (data_reference *); |
131 | 314 void move_dr (stmt_vec_info, stmt_vec_info); |
315 void remove_stmt (stmt_vec_info); | |
316 void replace_stmt (gimple_stmt_iterator *, stmt_vec_info, gimple *); | |
317 | |
111 | 318 /* The type of vectorization. */ |
319 vec_kind kind; | |
320 | |
131 | 321 /* Shared vectorizer state. */ |
322 vec_info_shared *shared; | |
323 | |
324 /* The mapping of GIMPLE UID to stmt_vec_info. */ | |
325 vec<stmt_vec_info> stmt_vec_infos; | |
326 | |
111 | 327 /* All SLP instances. */ |
328 auto_vec<slp_instance> slp_instances; | |
329 | |
330 /* Maps base addresses to an innermost_loop_behavior that gives the maximum | |
331 known alignment for that base. */ | |
332 vec_base_alignments base_alignments; | |
333 | |
334 /* All interleaving chains of stores, represented by the first | |
335 stmt in the chain. */ | |
131 | 336 auto_vec<stmt_vec_info> grouped_stores; |
111 | 337 |
338 /* Cost data used by the target cost model. */ | |
339 void *target_cost_data; | |
131 | 340 |
145 | 341 /* The set of vector modes used in the vectorized region. */ |
342 mode_set used_vector_modes; | |
343 | |
344 /* The argument we should pass to related_vector_mode when looking up | |
345 the vector mode for a scalar mode, or VOIDmode if we haven't yet | |
346 made any decisions about which vector modes to use. */ | |
347 machine_mode vector_mode; | |
348 | |
131 | 349 private: |
350 stmt_vec_info new_stmt_vec_info (gimple *stmt); | |
351 void set_vinfo_for_stmt (gimple *, stmt_vec_info); | |
352 void free_stmt_vec_infos (); | |
353 void free_stmt_vec_info (stmt_vec_info); | |
111 | 354 }; |
355 | |
145 | 356 class _loop_vec_info; |
357 class _bb_vec_info; | |
111 | 358 |
359 template<> | |
360 template<> | |
361 inline bool | |
362 is_a_helper <_loop_vec_info *>::test (vec_info *i) | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
363 { |
111 | 364 return i->kind == vec_info::loop; |
365 } | |
366 | |
367 template<> | |
368 template<> | |
369 inline bool | |
370 is_a_helper <_bb_vec_info *>::test (vec_info *i) | |
371 { | |
372 return i->kind == vec_info::bb; | |
373 } | |
374 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
375 |
131 | 376 /* In general, we can divide the vector statements in a vectorized loop |
377 into related groups ("rgroups") and say that for each rgroup there is | |
378 some nS such that the rgroup operates on nS values from one scalar | |
379 iteration followed by nS values from the next. That is, if VF is the | |
380 vectorization factor of the loop, the rgroup operates on a sequence: | |
381 | |
382 (1,1) (1,2) ... (1,nS) (2,1) ... (2,nS) ... (VF,1) ... (VF,nS) | |
383 | |
384 where (i,j) represents a scalar value with index j in a scalar | |
385 iteration with index i. | |
386 | |
387 [ We use the term "rgroup" to emphasise that this grouping isn't | |
388 necessarily the same as the grouping of statements used elsewhere. | |
389 For example, if we implement a group of scalar loads using gather | |
390 loads, we'll use a separate gather load for each scalar load, and | |
391 thus each gather load will belong to its own rgroup. ] | |
392 | |
393 In general this sequence will occupy nV vectors concatenated | |
394 together. If these vectors have nL lanes each, the total number | |
395 of scalar values N is given by: | |
396 | |
397 N = nS * VF = nV * nL | |
398 | |
399 None of nS, VF, nV and nL are required to be a power of 2. nS and nV | |
400 are compile-time constants but VF and nL can be variable (if the target | |
401 supports variable-length vectors). | |
402 | |
403 In classical vectorization, each iteration of the vector loop would | |
404 handle exactly VF iterations of the original scalar loop. However, | |
405 in a fully-masked loop, a particular iteration of the vector loop | |
406 might handle fewer than VF iterations of the scalar loop. The vector | |
407 lanes that correspond to iterations of the scalar loop are said to be | |
408 "active" and the other lanes are said to be "inactive". | |
409 | |
410 In a fully-masked loop, many rgroups need to be masked to ensure that | |
411 they have no effect for the inactive lanes. Each such rgroup needs a | |
412 sequence of booleans in the same order as above, but with each (i,j) | |
413 replaced by a boolean that indicates whether iteration i is active. | |
414 This sequence occupies nV vector masks that again have nL lanes each. | |
415 Thus the mask sequence as a whole consists of VF independent booleans | |
416 that are each repeated nS times. | |
417 | |
418 We make the simplifying assumption that if a sequence of nV masks is | |
419 suitable for one (nS,nL) pair, we can reuse it for (nS/2,nL/2) by | |
420 VIEW_CONVERTing it. This holds for all current targets that support | |
421 fully-masked loops. For example, suppose the scalar loop is: | |
422 | |
423 float *f; | |
424 double *d; | |
425 for (int i = 0; i < n; ++i) | |
426 { | |
427 f[i * 2 + 0] += 1.0f; | |
428 f[i * 2 + 1] += 2.0f; | |
429 d[i] += 3.0; | |
430 } | |
431 | |
432 and suppose that vectors have 256 bits. The vectorized f accesses | |
433 will belong to one rgroup and the vectorized d access to another: | |
434 | |
435 f rgroup: nS = 2, nV = 1, nL = 8 | |
436 d rgroup: nS = 1, nV = 1, nL = 4 | |
437 VF = 4 | |
438 | |
439 [ In this simple example the rgroups do correspond to the normal | |
440 SLP grouping scheme. ] | |
441 | |
442 If only the first three lanes are active, the masks we need are: | |
443 | |
444 f rgroup: 1 1 | 1 1 | 1 1 | 0 0 | |
445 d rgroup: 1 | 1 | 1 | 0 | |
446 | |
447 Here we can use a mask calculated for f's rgroup for d's, but not | |
448 vice versa. | |
449 | |
450 Thus for each value of nV, it is enough to provide nV masks, with the | |
451 mask being calculated based on the highest nL (or, equivalently, based | |
452 on the highest nS) required by any rgroup with that nV. We therefore | |
453 represent the entire collection of masks as a two-level table, with the | |
454 first level being indexed by nV - 1 (since nV == 0 doesn't exist) and | |
455 the second being indexed by the mask index 0 <= i < nV. */ | |
456 | |
457 /* The masks needed by rgroups with nV vectors, according to the | |
458 description above. */ | |
459 struct rgroup_masks { | |
460 /* The largest nS for all rgroups that use these masks. */ | |
461 unsigned int max_nscalars_per_iter; | |
462 | |
463 /* The type of mask to use, based on the highest nS recorded above. */ | |
464 tree mask_type; | |
465 | |
466 /* A vector of nV masks, in iteration order. */ | |
467 vec<tree> masks; | |
468 }; | |
469 | |
470 typedef auto_vec<rgroup_masks> vec_loop_masks; | |
471 | |
145 | 472 typedef auto_vec<std::pair<data_reference*, tree> > drs_init_vec; |
473 | |
0 | 474 /*-----------------------------------------------------------------*/ |
475 /* Info on vectorized loops. */ | |
476 /*-----------------------------------------------------------------*/ | |
145 | 477 typedef class _loop_vec_info : public vec_info { |
478 public: | |
479 _loop_vec_info (class loop *, vec_info_shared *); | |
111 | 480 ~_loop_vec_info (); |
0 | 481 |
482 /* The loop to which this info struct refers to. */ | |
145 | 483 class loop *loop; |
0 | 484 |
485 /* The loop basic blocks. */ | |
486 basic_block *bbs; | |
487 | |
111 | 488 /* Number of latch executions. */ |
489 tree num_itersm1; | |
0 | 490 /* Number of iterations. */ |
491 tree num_iters; | |
111 | 492 /* Number of iterations of the original loop. */ |
0 | 493 tree num_iters_unchanged; |
111 | 494 /* Condition under which this loop is analyzed and versioned. */ |
495 tree num_iters_assumptions; | |
0 | 496 |
145 | 497 /* Threshold of number of iterations below which vectorization will not be |
111 | 498 performed. It is calculated from MIN_PROFITABLE_ITERS and |
145 | 499 param_min_vect_loop_bound. */ |
111 | 500 unsigned int th; |
0 | 501 |
131 | 502 /* When applying loop versioning, the vector form should only be used |
503 if the number of scalar iterations is >= this value, on top of all | |
504 the other requirements. Ignored when loop versioning is not being | |
505 used. */ | |
506 poly_uint64 versioning_threshold; | |
507 | |
0 | 508 /* Unrolling factor */ |
131 | 509 poly_uint64 vectorization_factor; |
0 | 510 |
111 | 511 /* Maximum runtime vectorization factor, or MAX_VECTORIZATION_FACTOR |
512 if there is no particular limit. */ | |
513 unsigned HOST_WIDE_INT max_vectorization_factor; | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
514 |
131 | 515 /* The masks that a fully-masked loop should use to avoid operating |
516 on inactive scalars. */ | |
517 vec_loop_masks masks; | |
518 | |
145 | 519 /* Set of scalar conditions that have loop mask applied. */ |
520 scalar_cond_masked_set_type scalar_cond_masked_set; | |
521 | |
131 | 522 /* If we are using a loop mask to align memory addresses, this variable |
523 contains the number of vector elements that we should skip in the | |
524 first iteration of the vector loop (i.e. the number of leading | |
525 elements that should be false in the first mask). */ | |
526 tree mask_skip_niters; | |
527 | |
528 /* Type of the variables to use in the WHILE_ULT call for fully-masked | |
529 loops. */ | |
530 tree mask_compare_type; | |
531 | |
145 | 532 /* For #pragma omp simd if (x) loops the x expression. If constant 0, |
533 the loop should not be vectorized, if constant non-zero, simd_if_cond | |
534 shouldn't be set and loop vectorized normally, if SSA_NAME, the loop | |
535 should be versioned on that condition, using scalar loop if the condition | |
536 is false and vectorized loop otherwise. */ | |
537 tree simd_if_cond; | |
538 | |
539 /* Type of the IV to use in the WHILE_ULT call for fully-masked | |
540 loops. */ | |
541 tree iv_type; | |
542 | |
0 | 543 /* Unknown DRs according to which loop was peeled. */ |
145 | 544 class dr_vec_info *unaligned_dr; |
0 | 545 |
546 /* peeling_for_alignment indicates whether peeling for alignment will take | |
547 place, and what the peeling factor should be: | |
548 peeling_for_alignment = X means: | |
549 If X=0: Peeling for alignment will not be applied. | |
550 If X>0: Peel first X iterations. | |
551 If X=-1: Generate a runtime test to calculate the number of iterations | |
552 to be peeled, using the dataref recorded in the field | |
553 unaligned_dr. */ | |
554 int peeling_for_alignment; | |
555 | |
556 /* The mask used to check the alignment of pointers or arrays. */ | |
557 int ptr_mask; | |
558 | |
559 /* Data Dependence Relations defining address ranges that are candidates | |
560 for a run-time aliasing check. */ | |
111 | 561 auto_vec<ddr_p> may_alias_ddrs; |
562 | |
563 /* Data Dependence Relations defining address ranges together with segment | |
564 lengths from which the run-time aliasing check is built. */ | |
565 auto_vec<dr_with_seg_len_pair_t> comp_alias_ddrs; | |
566 | |
567 /* Check that the addresses of each pair of objects is unequal. */ | |
568 auto_vec<vec_object_pair> check_unequal_addrs; | |
0 | 569 |
131 | 570 /* List of values that are required to be nonzero. This is used to check |
571 whether things like "x[i * n] += 1;" are safe and eventually gets added | |
572 to the checks for lower bounds below. */ | |
573 auto_vec<tree> check_nonzero; | |
574 | |
575 /* List of values that need to be checked for a minimum value. */ | |
576 auto_vec<vec_lower_bound> lower_bounds; | |
577 | |
0 | 578 /* Statements in the loop that have data references that are candidates for a |
579 runtime (loop versioning) misalignment check. */ | |
131 | 580 auto_vec<stmt_vec_info> may_misalign_stmts; |
0 | 581 |
111 | 582 /* Reduction cycles detected in the loop. Used in loop-aware SLP. */ |
131 | 583 auto_vec<stmt_vec_info> reductions; |
111 | 584 |
585 /* All reduction chains in the loop, represented by the first | |
0 | 586 stmt in the chain. */ |
131 | 587 auto_vec<stmt_vec_info> reduction_chains; |
0 | 588 |
111 | 589 /* Cost vector for a single scalar iteration. */ |
590 auto_vec<stmt_info_for_cost> scalar_cost_vec; | |
0 | 591 |
131 | 592 /* Map of IV base/step expressions to inserted name in the preheader. */ |
593 hash_map<tree_operand_hash, tree> *ivexpr_map; | |
594 | |
145 | 595 /* Map of OpenMP "omp simd array" scan variables to corresponding |
596 rhs of the store of the initializer. */ | |
597 hash_map<tree, tree> *scan_map; | |
598 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
599 /* The unrolling factor needed to SLP the loop. In case of that pure SLP is |
0 | 600 applied to the loop, i.e., no unrolling is needed, this is 1. */ |
131 | 601 poly_uint64 slp_unrolling_factor; |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
602 |
111 | 603 /* Cost of a single scalar iteration. */ |
604 int single_scalar_iteration_cost; | |
605 | |
145 | 606 /* The cost of the vector prologue and epilogue, including peeled |
607 iterations and set-up code. */ | |
608 int vec_outside_cost; | |
609 | |
610 /* The cost of the vector loop body. */ | |
611 int vec_inside_cost; | |
612 | |
111 | 613 /* Is the loop vectorizable? */ |
614 bool vectorizable; | |
615 | |
131 | 616 /* Records whether we still have the option of using a fully-masked loop. */ |
617 bool can_fully_mask_p; | |
618 | |
619 /* True if have decided to use a fully-masked loop. */ | |
620 bool fully_masked_p; | |
621 | |
111 | 622 /* When we have grouped data accesses with gaps, we may introduce invalid |
623 memory accesses. We peel the last iteration of the loop to prevent | |
624 this. */ | |
625 bool peeling_for_gaps; | |
626 | |
627 /* When the number of iterations is not a multiple of the vector size | |
628 we need to peel off iterations at the end to form an epilogue loop. */ | |
629 bool peeling_for_niter; | |
630 | |
631 /* True if there are no loop carried data dependencies in the loop. | |
632 If loop->safelen <= 1, then this is always true, either the loop | |
633 didn't have any loop carried data dependencies, or the loop is being | |
634 vectorized guarded with some runtime alias checks, or couldn't | |
635 be vectorized at all, but then this field shouldn't be used. | |
636 For loop->safelen >= 2, the user has asserted that there are no | |
637 backward dependencies, but there still could be loop carried forward | |
638 dependencies in such loops. This flag will be false if normal | |
639 vectorizer data dependency analysis would fail or require versioning | |
640 for alias, but because of loop->safelen >= 2 it has been vectorized | |
641 even without versioning for alias. E.g. in: | |
642 #pragma omp simd | |
643 for (int i = 0; i < m; i++) | |
644 a[i] = a[i + k] * c; | |
645 (or #pragma simd or #pragma ivdep) we can vectorize this and it will | |
646 DTRT even for k > 0 && k < m, but without safelen we would not | |
647 vectorize this, so this field would be false. */ | |
648 bool no_data_dependencies; | |
649 | |
650 /* Mark loops having masked stores. */ | |
651 bool has_mask_store; | |
652 | |
145 | 653 /* Queued scaling factor for the scalar loop. */ |
654 profile_probability scalar_loop_scaling; | |
655 | |
111 | 656 /* If if-conversion versioned this loop before conversion, this is the |
657 loop version without if-conversion. */ | |
145 | 658 class loop *scalar_loop; |
111 | 659 |
660 /* For loops being epilogues of already vectorized loops | |
661 this points to the original vectorized loop. Otherwise NULL. */ | |
662 _loop_vec_info *orig_loop_info; | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
663 |
145 | 664 /* Used to store loop_vec_infos of epilogues of this loop during |
665 analysis. */ | |
666 vec<_loop_vec_info *> epilogue_vinfos; | |
667 | |
0 | 668 } *loop_vec_info; |
669 | |
670 /* Access Functions. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
671 #define LOOP_VINFO_LOOP(L) (L)->loop |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
672 #define LOOP_VINFO_BBS(L) (L)->bbs |
111 | 673 #define LOOP_VINFO_NITERSM1(L) (L)->num_itersm1 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
674 #define LOOP_VINFO_NITERS(L) (L)->num_iters |
111 | 675 /* Since LOOP_VINFO_NITERS and LOOP_VINFO_NITERSM1 can change after |
676 prologue peeling retain total unchanged scalar loop iterations for | |
677 cost model. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
678 #define LOOP_VINFO_NITERS_UNCHANGED(L) (L)->num_iters_unchanged |
111 | 679 #define LOOP_VINFO_NITERS_ASSUMPTIONS(L) (L)->num_iters_assumptions |
680 #define LOOP_VINFO_COST_MODEL_THRESHOLD(L) (L)->th | |
131 | 681 #define LOOP_VINFO_VERSIONING_THRESHOLD(L) (L)->versioning_threshold |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
682 #define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable |
131 | 683 #define LOOP_VINFO_CAN_FULLY_MASK_P(L) (L)->can_fully_mask_p |
684 #define LOOP_VINFO_FULLY_MASKED_P(L) (L)->fully_masked_p | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
685 #define LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor |
111 | 686 #define LOOP_VINFO_MAX_VECT_FACTOR(L) (L)->max_vectorization_factor |
131 | 687 #define LOOP_VINFO_MASKS(L) (L)->masks |
688 #define LOOP_VINFO_MASK_SKIP_NITERS(L) (L)->mask_skip_niters | |
689 #define LOOP_VINFO_MASK_COMPARE_TYPE(L) (L)->mask_compare_type | |
145 | 690 #define LOOP_VINFO_MASK_IV_TYPE(L) (L)->iv_type |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
691 #define LOOP_VINFO_PTR_MASK(L) (L)->ptr_mask |
131 | 692 #define LOOP_VINFO_LOOP_NEST(L) (L)->shared->loop_nest |
693 #define LOOP_VINFO_DATAREFS(L) (L)->shared->datarefs | |
694 #define LOOP_VINFO_DDRS(L) (L)->shared->ddrs | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
695 #define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters)) |
111 | 696 #define LOOP_VINFO_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
697 #define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
698 #define LOOP_VINFO_MAY_MISALIGN_STMTS(L) (L)->may_misalign_stmts |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
699 #define LOOP_VINFO_MAY_ALIAS_DDRS(L) (L)->may_alias_ddrs |
111 | 700 #define LOOP_VINFO_COMP_ALIAS_DDRS(L) (L)->comp_alias_ddrs |
701 #define LOOP_VINFO_CHECK_UNEQUAL_ADDRS(L) (L)->check_unequal_addrs | |
131 | 702 #define LOOP_VINFO_CHECK_NONZERO(L) (L)->check_nonzero |
703 #define LOOP_VINFO_LOWER_BOUNDS(L) (L)->lower_bounds | |
111 | 704 #define LOOP_VINFO_GROUPED_STORES(L) (L)->grouped_stores |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
705 #define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances |
0 | 706 #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
707 #define LOOP_VINFO_REDUCTIONS(L) (L)->reductions |
111 | 708 #define LOOP_VINFO_REDUCTION_CHAINS(L) (L)->reduction_chains |
709 #define LOOP_VINFO_TARGET_COST_DATA(L) (L)->target_cost_data | |
710 #define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps | |
711 #define LOOP_VINFO_PEELING_FOR_NITER(L) (L)->peeling_for_niter | |
712 #define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies | |
713 #define LOOP_VINFO_SCALAR_LOOP(L) (L)->scalar_loop | |
145 | 714 #define LOOP_VINFO_SCALAR_LOOP_SCALING(L) (L)->scalar_loop_scaling |
111 | 715 #define LOOP_VINFO_HAS_MASK_STORE(L) (L)->has_mask_store |
716 #define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec | |
717 #define LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST(L) (L)->single_scalar_iteration_cost | |
718 #define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info | |
145 | 719 #define LOOP_VINFO_SIMD_IF_COND(L) (L)->simd_if_cond |
0 | 720 |
111 | 721 #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \ |
722 ((L)->may_misalign_stmts.length () > 0) | |
723 #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \ | |
724 ((L)->comp_alias_ddrs.length () > 0 \ | |
131 | 725 || (L)->check_unequal_addrs.length () > 0 \ |
726 || (L)->lower_bounds.length () > 0) | |
111 | 727 #define LOOP_REQUIRES_VERSIONING_FOR_NITERS(L) \ |
728 (LOOP_VINFO_NITERS_ASSUMPTIONS (L)) | |
145 | 729 #define LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND(L) \ |
730 (LOOP_VINFO_SIMD_IF_COND (L)) | |
111 | 731 #define LOOP_REQUIRES_VERSIONING(L) \ |
732 (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (L) \ | |
733 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (L) \ | |
145 | 734 || LOOP_REQUIRES_VERSIONING_FOR_NITERS (L) \ |
735 || LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (L)) | |
0 | 736 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
737 #define LOOP_VINFO_NITERS_KNOWN_P(L) \ |
111 | 738 (tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0) |
739 | |
740 #define LOOP_VINFO_EPILOGUE_P(L) \ | |
741 (LOOP_VINFO_ORIG_LOOP_INFO (L) != NULL) | |
742 | |
743 #define LOOP_VINFO_ORIG_MAX_VECT_FACTOR(L) \ | |
744 (LOOP_VINFO_MAX_VECT_FACTOR (LOOP_VINFO_ORIG_LOOP_INFO (L))) | |
0 | 745 |
131 | 746 /* Wrapper for loop_vec_info, for tracking success/failure, where a non-NULL |
747 value signifies success, and a NULL value signifies failure, supporting | |
748 propagating an opt_problem * describing the failure back up the call | |
749 stack. */ | |
750 typedef opt_pointer_wrapper <loop_vec_info> opt_loop_vec_info; | |
751 | |
0 | 752 static inline loop_vec_info |
145 | 753 loop_vec_info_for_loop (class loop *loop) |
0 | 754 { |
755 return (loop_vec_info) loop->aux; | |
756 } | |
757 | |
145 | 758 typedef class _bb_vec_info : public vec_info |
111 | 759 { |
145 | 760 public: |
131 | 761 _bb_vec_info (gimple_stmt_iterator, gimple_stmt_iterator, vec_info_shared *); |
111 | 762 ~_bb_vec_info (); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
763 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
764 basic_block bb; |
111 | 765 gimple_stmt_iterator region_begin; |
766 gimple_stmt_iterator region_end; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
767 } *bb_vec_info; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
768 |
111 | 769 #define BB_VINFO_BB(B) (B)->bb |
770 #define BB_VINFO_GROUPED_STORES(B) (B)->grouped_stores | |
771 #define BB_VINFO_SLP_INSTANCES(B) (B)->slp_instances | |
131 | 772 #define BB_VINFO_DATAREFS(B) (B)->shared->datarefs |
773 #define BB_VINFO_DDRS(B) (B)->shared->ddrs | |
111 | 774 #define BB_VINFO_TARGET_COST_DATA(B) (B)->target_cost_data |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
775 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
776 static inline bb_vec_info |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
777 vec_info_for_bb (basic_block bb) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
778 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
779 return (bb_vec_info) bb->aux; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
780 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
781 |
0 | 782 /*-----------------------------------------------------------------*/ |
783 /* Info on vectorized defs. */ | |
784 /*-----------------------------------------------------------------*/ | |
785 enum stmt_vec_info_type { | |
786 undef_vec_info_type = 0, | |
787 load_vec_info_type, | |
788 store_vec_info_type, | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
789 shift_vec_info_type, |
0 | 790 op_vec_info_type, |
791 call_vec_info_type, | |
111 | 792 call_simd_clone_vec_info_type, |
0 | 793 assignment_vec_info_type, |
794 condition_vec_info_type, | |
111 | 795 comparison_vec_info_type, |
0 | 796 reduc_vec_info_type, |
797 induc_vec_info_type, | |
798 type_promotion_vec_info_type, | |
799 type_demotion_vec_info_type, | |
800 type_conversion_vec_info_type, | |
145 | 801 cycle_phi_info_type, |
802 lc_phi_info_type, | |
0 | 803 loop_exit_ctrl_vec_info_type |
804 }; | |
805 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
806 /* Indicates whether/how a variable is used in the scope of loop/basic |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
807 block. */ |
0 | 808 enum vect_relevant { |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
809 vect_unused_in_scope = 0, |
111 | 810 |
811 /* The def is only used outside the loop. */ | |
812 vect_used_only_live, | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
813 /* The def is in the inner loop, and the use is in the outer loop, and the |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
814 use is a reduction stmt. */ |
0 | 815 vect_used_in_outer_by_reduction, |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
816 /* The def is in the inner loop, and the use is in the outer loop (and is |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
817 not part of reduction). */ |
0 | 818 vect_used_in_outer, |
819 | |
820 /* defs that feed computations that end up (only) in a reduction. These | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
821 defs may be used by non-reduction stmts, but eventually, any |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
822 computations/values that are affected by these defs are used to compute |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
823 a reduction (i.e. don't get stored to memory, for example). We use this |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
824 to identify computations that we can change the order in which they are |
0 | 825 computed. */ |
826 vect_used_by_reduction, | |
827 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
828 vect_used_in_scope |
0 | 829 }; |
830 | |
831 /* The type of vectorization that can be applied to the stmt: regular loop-based | |
832 vectorization; pure SLP - the stmt is a part of SLP instances and does not | |
833 have uses outside SLP instances; or hybrid SLP and loop-based - the stmt is | |
834 a part of SLP instance and also must be loop-based vectorized, since it has | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
835 uses outside SLP sequences. |
0 | 836 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
837 In the loop context the meanings of pure and hybrid SLP are slightly |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
838 different. By saying that pure SLP is applied to the loop, we mean that we |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
839 exploit only intra-iteration parallelism in the loop; i.e., the loop can be |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
840 vectorized without doing any conceptual unrolling, cause we don't pack |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
841 together stmts from different iterations, only within a single iteration. |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
842 Loop hybrid SLP means that we exploit both intra-iteration and |
0 | 843 inter-iteration parallelism (e.g., number of elements in the vector is 4 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
844 and the slp-group-size is 2, in which case we don't have enough parallelism |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
845 within an iteration, so we obtain the rest of the parallelism from subsequent |
0 | 846 iterations by unrolling the loop by 2). */ |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
847 enum slp_vect_type { |
0 | 848 loop_vect = 0, |
849 pure_slp, | |
850 hybrid | |
851 }; | |
852 | |
131 | 853 /* Says whether a statement is a load, a store of a vectorized statement |
854 result, or a store of an invariant value. */ | |
855 enum vec_load_store_type { | |
856 VLS_LOAD, | |
857 VLS_STORE, | |
858 VLS_STORE_INVARIANT | |
859 }; | |
860 | |
111 | 861 /* Describes how we're going to vectorize an individual load or store, |
862 or a group of loads or stores. */ | |
863 enum vect_memory_access_type { | |
864 /* An access to an invariant address. This is used only for loads. */ | |
865 VMAT_INVARIANT, | |
866 | |
867 /* A simple contiguous access. */ | |
868 VMAT_CONTIGUOUS, | |
869 | |
870 /* A contiguous access that goes down in memory rather than up, | |
871 with no additional permutation. This is used only for stores | |
872 of invariants. */ | |
873 VMAT_CONTIGUOUS_DOWN, | |
874 | |
875 /* A simple contiguous access in which the elements need to be permuted | |
876 after loading or before storing. Only used for loop vectorization; | |
877 SLP uses separate permutes. */ | |
878 VMAT_CONTIGUOUS_PERMUTE, | |
879 | |
880 /* A simple contiguous access in which the elements need to be reversed | |
881 after loading or before storing. */ | |
882 VMAT_CONTIGUOUS_REVERSE, | |
883 | |
884 /* An access that uses IFN_LOAD_LANES or IFN_STORE_LANES. */ | |
885 VMAT_LOAD_STORE_LANES, | |
886 | |
887 /* An access in which each scalar element is loaded or stored | |
888 individually. */ | |
889 VMAT_ELEMENTWISE, | |
890 | |
891 /* A hybrid of VMAT_CONTIGUOUS and VMAT_ELEMENTWISE, used for grouped | |
892 SLP accesses. Each unrolled iteration uses a contiguous load | |
893 or store for the whole group, but the groups from separate iterations | |
894 are combined in the same way as for VMAT_ELEMENTWISE. */ | |
895 VMAT_STRIDED_SLP, | |
896 | |
897 /* The access uses gather loads or scatter stores. */ | |
898 VMAT_GATHER_SCATTER | |
899 }; | |
0 | 900 |
145 | 901 class dr_vec_info { |
902 public: | |
131 | 903 /* The data reference itself. */ |
904 data_reference *dr; | |
905 /* The statement that contains the data reference. */ | |
906 stmt_vec_info stmt; | |
907 /* The misalignment in bytes of the reference, or -1 if not known. */ | |
908 int misalignment; | |
909 /* The byte alignment that we'd ideally like the reference to have, | |
910 and the value that misalignment is measured against. */ | |
145 | 911 poly_uint64 target_alignment; |
131 | 912 /* If true the alignment of base_decl needs to be increased. */ |
913 bool base_misaligned; | |
914 tree base_decl; | |
145 | 915 |
916 /* Stores current vectorized loop's offset. To be added to the DR's | |
917 offset to calculate current offset of data reference. */ | |
918 tree offset; | |
131 | 919 }; |
920 | |
0 | 921 typedef struct data_reference *dr_p; |
922 | |
145 | 923 class _stmt_vec_info { |
924 public: | |
0 | 925 |
926 enum stmt_vec_info_type type; | |
927 | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
928 /* Indicates whether this stmts is part of a computation whose result is |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
929 used outside the loop. */ |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
930 bool live; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
931 |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
932 /* Stmt is part of some pattern (computation idiom) */ |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
933 bool in_pattern_p; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
934 |
131 | 935 /* True if the statement was created during pattern recognition as |
936 part of the replacement for RELATED_STMT. This implies that the | |
937 statement isn't part of any basic block, although for convenience | |
938 its gimple_bb is the same as for RELATED_STMT. */ | |
939 bool pattern_stmt_p; | |
940 | |
111 | 941 /* Is this statement vectorizable or should it be skipped in (partial) |
942 vectorization. */ | |
943 bool vectorizable; | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
944 |
0 | 945 /* The stmt to which this info struct refers to. */ |
111 | 946 gimple *stmt; |
0 | 947 |
111 | 948 /* The vec_info with respect to which STMT is vectorized. */ |
949 vec_info *vinfo; | |
0 | 950 |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
951 /* The vector type to be used for the LHS of this statement. */ |
0 | 952 tree vectype; |
953 | |
954 /* The vectorized version of the stmt. */ | |
131 | 955 stmt_vec_info vectorized_stmt; |
0 | 956 |
957 | |
111 | 958 /* The following is relevant only for stmts that contain a non-scalar |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
959 data-ref (array/pointer/struct access). A GIMPLE stmt is expected to have |
111 | 960 at most one such data-ref. */ |
0 | 961 |
131 | 962 dr_vec_info dr_aux; |
0 | 963 |
964 /* Information about the data-ref relative to this loop | |
965 nest (the loop that is being considered for vectorization). */ | |
111 | 966 innermost_loop_behavior dr_wrt_vec_loop; |
967 | |
968 /* For loop PHI nodes, the base and evolution part of it. This makes sure | |
969 this information is still available in vect_update_ivs_after_vectorizer | |
970 where we may not be able to re-analyze the PHI nodes evolution as | |
971 peeling for the prologue loop can make it unanalyzable. The evolution | |
972 part is still correct after peeling, but the base may have changed from | |
973 the version here. */ | |
974 tree loop_phi_evolution_base_unchanged; | |
975 tree loop_phi_evolution_part; | |
0 | 976 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
977 /* Used for various bookkeeping purposes, generally holding a pointer to |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
978 some other stmt S that is in some way "related" to this stmt. |
0 | 979 Current use of this field is: |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
980 If this stmt is part of a pattern (i.e. the field 'in_pattern_p' is |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
981 true): S is the "pattern stmt" that represents (and replaces) the |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
982 sequence of stmts that constitutes the pattern. Similarly, the |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
983 related_stmt of the "pattern stmt" points back to this stmt (which is |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
984 the last stmt in the original sequence of stmts that constitutes the |
0 | 985 pattern). */ |
131 | 986 stmt_vec_info related_stmt; |
111 | 987 |
131 | 988 /* Used to keep a sequence of def stmts of a pattern stmt if such exists. |
989 The sequence is attached to the original statement rather than the | |
990 pattern statement. */ | |
111 | 991 gimple_seq pattern_def_seq; |
0 | 992 |
993 /* List of datarefs that are known to have the same alignment as the dataref | |
994 of this stmt. */ | |
111 | 995 vec<dr_p> same_align_refs; |
996 | |
997 /* Selected SIMD clone's function info. First vector element | |
998 is SIMD clone's function decl, followed by a pair of trees (base + step) | |
999 for linear arguments (pair of NULLs for other arguments). */ | |
1000 vec<tree> simd_clone_info; | |
0 | 1001 |
1002 /* Classify the def of this stmt. */ | |
1003 enum vect_def_type def_type; | |
1004 | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1005 /* Whether the stmt is SLPed, loop-based vectorized, or both. */ |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1006 enum slp_vect_type slp_type; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1007 |
111 | 1008 /* Interleaving and reduction chains info. */ |
1009 /* First element in the group. */ | |
131 | 1010 stmt_vec_info first_element; |
111 | 1011 /* Pointer to the next element in the group. */ |
131 | 1012 stmt_vec_info next_element; |
111 | 1013 /* The size of the group. */ |
0 | 1014 unsigned int size; |
1015 /* For stores, number of stores from this group seen. We vectorize the last | |
1016 one. */ | |
1017 unsigned int store_count; | |
1018 /* For loads only, the gap from the previous load. For consecutive loads, GAP | |
1019 is 1. */ | |
1020 unsigned int gap; | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1021 |
111 | 1022 /* The minimum negative dependence distance this stmt participates in |
1023 or zero if none. */ | |
1024 unsigned int min_neg_dist; | |
1025 | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1026 /* Not all stmts in the loop need to be vectorized. e.g, the increment |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1027 of the loop induction variable and computation of array indexes. relevant |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1028 indicates whether the stmt needs to be vectorized. */ |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1029 enum vect_relevant relevant; |
0 | 1030 |
111 | 1031 /* For loads if this is a gather, for stores if this is a scatter. */ |
1032 bool gather_scatter_p; | |
1033 | |
1034 /* True if this is an access with loop-invariant stride. */ | |
1035 bool strided_p; | |
1036 | |
1037 /* For both loads and stores. */ | |
145 | 1038 unsigned simd_lane_access_p : 3; |
111 | 1039 |
1040 /* Classifies how the load or store is going to be implemented | |
1041 for loop vectorization. */ | |
1042 vect_memory_access_type memory_access_type; | |
1043 | |
145 | 1044 /* For INTEGER_INDUC_COND_REDUCTION, the initial value to be used. */ |
1045 tree induc_cond_initial_val; | |
111 | 1046 |
145 | 1047 /* If not NULL the value to be added to compute final reduction value. */ |
1048 tree reduc_epilogue_adjustment; | |
111 | 1049 |
1050 /* On a reduction PHI the reduction type as detected by | |
145 | 1051 vect_is_simple_reduction and vectorizable_reduction. */ |
111 | 1052 enum vect_reduction_type reduc_type; |
0 | 1053 |
145 | 1054 /* The original reduction code, to be used in the epilogue. */ |
1055 enum tree_code reduc_code; | |
1056 /* An internal function we should use in the epilogue. */ | |
1057 internal_fn reduc_fn; | |
1058 | |
1059 /* On a stmt participating in the reduction the index of the operand | |
1060 on the reduction SSA cycle. */ | |
1061 int reduc_idx; | |
1062 | |
111 | 1063 /* On a reduction PHI the def returned by vect_force_simple_reduction. |
1064 On the def returned by vect_force_simple_reduction the | |
1065 corresponding PHI. */ | |
131 | 1066 stmt_vec_info reduc_def; |
111 | 1067 |
145 | 1068 /* The vector input type relevant for reduction vectorization. */ |
1069 tree reduc_vectype_in; | |
1070 | |
1071 /* The vector type for performing the actual reduction. */ | |
1072 tree reduc_vectype; | |
1073 | |
1074 /* Whether we force a single cycle PHI during reduction vectorization. */ | |
1075 bool force_single_cycle; | |
1076 | |
1077 /* Whether on this stmt reduction meta is recorded. */ | |
1078 bool is_reduc_info; | |
1079 | |
111 | 1080 /* The number of scalar stmt references from active SLP instances. */ |
1081 unsigned int num_slp_uses; | |
131 | 1082 |
1083 /* If nonzero, the lhs of the statement could be truncated to this | |
1084 many bits without affecting any users of the result. */ | |
1085 unsigned int min_output_precision; | |
1086 | |
1087 /* If nonzero, all non-boolean input operands have the same precision, | |
1088 and they could each be truncated to this many bits without changing | |
1089 the result. */ | |
1090 unsigned int min_input_precision; | |
1091 | |
1092 /* If OPERATION_BITS is nonzero, the statement could be performed on | |
1093 an integer with the sign and number of bits given by OPERATION_SIGN | |
1094 and OPERATION_BITS without changing the result. */ | |
1095 unsigned int operation_precision; | |
1096 signop operation_sign; | |
145 | 1097 |
1098 /* If the statement produces a boolean result, this value describes | |
1099 how we should choose the associated vector type. The possible | |
1100 values are: | |
1101 | |
1102 - an integer precision N if we should use the vector mask type | |
1103 associated with N-bit integers. This is only used if all relevant | |
1104 input booleans also want the vector mask type for N-bit integers, | |
1105 or if we can convert them into that form by pattern-matching. | |
1106 | |
1107 - ~0U if we considered choosing a vector mask type but decided | |
1108 to treat the boolean as a normal integer type instead. | |
1109 | |
1110 - 0 otherwise. This means either that the operation isn't one that | |
1111 could have a vector mask type (and so should have a normal vector | |
1112 type instead) or that we simply haven't made a choice either way. */ | |
1113 unsigned int mask_precision; | |
1114 | |
1115 /* True if this is only suitable for SLP vectorization. */ | |
1116 bool slp_vect_only_p; | |
131 | 1117 }; |
111 | 1118 |
1119 /* Information about a gather/scatter call. */ | |
1120 struct gather_scatter_info { | |
131 | 1121 /* The internal function to use for the gather/scatter operation, |
1122 or IFN_LAST if a built-in function should be used instead. */ | |
1123 internal_fn ifn; | |
1124 | |
1125 /* The FUNCTION_DECL for the built-in gather/scatter function, | |
1126 or null if an internal function should be used instead. */ | |
111 | 1127 tree decl; |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1128 |
111 | 1129 /* The loop-invariant base value. */ |
1130 tree base; | |
1131 | |
1132 /* The original scalar offset, which is a non-loop-invariant SSA_NAME. */ | |
1133 tree offset; | |
1134 | |
1135 /* Each offset element should be multiplied by this amount before | |
1136 being added to the base. */ | |
1137 int scale; | |
1138 | |
1139 /* The definition type for the vectorized offset. */ | |
1140 enum vect_def_type offset_dt; | |
1141 | |
1142 /* The type of the vectorized offset. */ | |
1143 tree offset_vectype; | |
131 | 1144 |
1145 /* The type of the scalar elements after loading or before storing. */ | |
1146 tree element_type; | |
1147 | |
1148 /* The type of the scalar elements being loaded or stored. */ | |
1149 tree memory_type; | |
111 | 1150 }; |
0 | 1151 |
1152 /* Access Functions. */ | |
1153 #define STMT_VINFO_TYPE(S) (S)->type | |
1154 #define STMT_VINFO_STMT(S) (S)->stmt | |
111 | 1155 inline loop_vec_info |
1156 STMT_VINFO_LOOP_VINFO (stmt_vec_info stmt_vinfo) | |
1157 { | |
1158 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_vinfo->vinfo)) | |
1159 return loop_vinfo; | |
1160 return NULL; | |
1161 } | |
1162 inline bb_vec_info | |
1163 STMT_VINFO_BB_VINFO (stmt_vec_info stmt_vinfo) | |
1164 { | |
1165 if (bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (stmt_vinfo->vinfo)) | |
1166 return bb_vinfo; | |
1167 return NULL; | |
1168 } | |
0 | 1169 #define STMT_VINFO_RELEVANT(S) (S)->relevant |
1170 #define STMT_VINFO_LIVE_P(S) (S)->live | |
1171 #define STMT_VINFO_VECTYPE(S) (S)->vectype | |
1172 #define STMT_VINFO_VEC_STMT(S) (S)->vectorized_stmt | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1173 #define STMT_VINFO_VECTORIZABLE(S) (S)->vectorizable |
131 | 1174 #define STMT_VINFO_DATA_REF(S) ((S)->dr_aux.dr + 0) |
111 | 1175 #define STMT_VINFO_GATHER_SCATTER_P(S) (S)->gather_scatter_p |
1176 #define STMT_VINFO_STRIDED_P(S) (S)->strided_p | |
1177 #define STMT_VINFO_MEMORY_ACCESS_TYPE(S) (S)->memory_access_type | |
1178 #define STMT_VINFO_SIMD_LANE_ACCESS_P(S) (S)->simd_lane_access_p | |
145 | 1179 #define STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL(S) (S)->induc_cond_initial_val |
1180 #define STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT(S) (S)->reduc_epilogue_adjustment | |
1181 #define STMT_VINFO_REDUC_IDX(S) (S)->reduc_idx | |
1182 #define STMT_VINFO_FORCE_SINGLE_CYCLE(S) (S)->force_single_cycle | |
0 | 1183 |
111 | 1184 #define STMT_VINFO_DR_WRT_VEC_LOOP(S) (S)->dr_wrt_vec_loop |
1185 #define STMT_VINFO_DR_BASE_ADDRESS(S) (S)->dr_wrt_vec_loop.base_address | |
1186 #define STMT_VINFO_DR_INIT(S) (S)->dr_wrt_vec_loop.init | |
1187 #define STMT_VINFO_DR_OFFSET(S) (S)->dr_wrt_vec_loop.offset | |
1188 #define STMT_VINFO_DR_STEP(S) (S)->dr_wrt_vec_loop.step | |
1189 #define STMT_VINFO_DR_BASE_ALIGNMENT(S) (S)->dr_wrt_vec_loop.base_alignment | |
1190 #define STMT_VINFO_DR_BASE_MISALIGNMENT(S) \ | |
1191 (S)->dr_wrt_vec_loop.base_misalignment | |
1192 #define STMT_VINFO_DR_OFFSET_ALIGNMENT(S) \ | |
1193 (S)->dr_wrt_vec_loop.offset_alignment | |
1194 #define STMT_VINFO_DR_STEP_ALIGNMENT(S) \ | |
1195 (S)->dr_wrt_vec_loop.step_alignment | |
0 | 1196 |
131 | 1197 #define STMT_VINFO_DR_INFO(S) \ |
1198 (gcc_checking_assert ((S)->dr_aux.stmt == (S)), &(S)->dr_aux) | |
1199 | |
0 | 1200 #define STMT_VINFO_IN_PATTERN_P(S) (S)->in_pattern_p |
1201 #define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt | |
111 | 1202 #define STMT_VINFO_PATTERN_DEF_SEQ(S) (S)->pattern_def_seq |
0 | 1203 #define STMT_VINFO_SAME_ALIGN_REFS(S) (S)->same_align_refs |
111 | 1204 #define STMT_VINFO_SIMD_CLONE_INFO(S) (S)->simd_clone_info |
0 | 1205 #define STMT_VINFO_DEF_TYPE(S) (S)->def_type |
131 | 1206 #define STMT_VINFO_GROUPED_ACCESS(S) \ |
1207 ((S)->dr_aux.dr && DR_GROUP_FIRST_ELEMENT(S)) | |
111 | 1208 #define STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED(S) (S)->loop_phi_evolution_base_unchanged |
1209 #define STMT_VINFO_LOOP_PHI_EVOLUTION_PART(S) (S)->loop_phi_evolution_part | |
1210 #define STMT_VINFO_MIN_NEG_DIST(S) (S)->min_neg_dist | |
1211 #define STMT_VINFO_NUM_SLP_USES(S) (S)->num_slp_uses | |
1212 #define STMT_VINFO_REDUC_TYPE(S) (S)->reduc_type | |
145 | 1213 #define STMT_VINFO_REDUC_CODE(S) (S)->reduc_code |
1214 #define STMT_VINFO_REDUC_FN(S) (S)->reduc_fn | |
111 | 1215 #define STMT_VINFO_REDUC_DEF(S) (S)->reduc_def |
145 | 1216 #define STMT_VINFO_REDUC_VECTYPE(S) (S)->reduc_vectype |
1217 #define STMT_VINFO_REDUC_VECTYPE_IN(S) (S)->reduc_vectype_in | |
1218 #define STMT_VINFO_SLP_VECT_ONLY(S) (S)->slp_vect_only_p | |
0 | 1219 |
131 | 1220 #define DR_GROUP_FIRST_ELEMENT(S) \ |
1221 (gcc_checking_assert ((S)->dr_aux.dr), (S)->first_element) | |
1222 #define DR_GROUP_NEXT_ELEMENT(S) \ | |
1223 (gcc_checking_assert ((S)->dr_aux.dr), (S)->next_element) | |
1224 #define DR_GROUP_SIZE(S) \ | |
1225 (gcc_checking_assert ((S)->dr_aux.dr), (S)->size) | |
1226 #define DR_GROUP_STORE_COUNT(S) \ | |
1227 (gcc_checking_assert ((S)->dr_aux.dr), (S)->store_count) | |
1228 #define DR_GROUP_GAP(S) \ | |
1229 (gcc_checking_assert ((S)->dr_aux.dr), (S)->gap) | |
1230 | |
1231 #define REDUC_GROUP_FIRST_ELEMENT(S) \ | |
1232 (gcc_checking_assert (!(S)->dr_aux.dr), (S)->first_element) | |
1233 #define REDUC_GROUP_NEXT_ELEMENT(S) \ | |
1234 (gcc_checking_assert (!(S)->dr_aux.dr), (S)->next_element) | |
1235 #define REDUC_GROUP_SIZE(S) \ | |
1236 (gcc_checking_assert (!(S)->dr_aux.dr), (S)->size) | |
0 | 1237 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1238 #define STMT_VINFO_RELEVANT_P(S) ((S)->relevant != vect_unused_in_scope) |
0 | 1239 |
1240 #define HYBRID_SLP_STMT(S) ((S)->slp_type == hybrid) | |
1241 #define PURE_SLP_STMT(S) ((S)->slp_type == pure_slp) | |
1242 #define STMT_SLP_TYPE(S) (S)->slp_type | |
1243 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1244 #define VECT_MAX_COST 1000 |
0 | 1245 |
1246 /* The maximum number of intermediate steps required in multi-step type | |
1247 conversion. */ | |
1248 #define MAX_INTERM_CVT_STEPS 3 | |
1249 | |
131 | 1250 #define MAX_VECTORIZATION_FACTOR INT_MAX |
111 | 1251 |
1252 /* Nonzero if TYPE represents a (scalar) boolean type or type | |
1253 in the middle-end compatible with it (unsigned precision 1 integral | |
1254 types). Used to determine which types should be vectorized as | |
1255 VECTOR_BOOLEAN_TYPE_P. */ | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1256 |
111 | 1257 #define VECT_SCALAR_BOOLEAN_TYPE_P(TYPE) \ |
1258 (TREE_CODE (TYPE) == BOOLEAN_TYPE \ | |
1259 || ((TREE_CODE (TYPE) == INTEGER_TYPE \ | |
1260 || TREE_CODE (TYPE) == ENUMERAL_TYPE) \ | |
1261 && TYPE_PRECISION (TYPE) == 1 \ | |
1262 && TYPE_UNSIGNED (TYPE))) | |
0 | 1263 |
131 | 1264 static inline bool |
145 | 1265 nested_in_vect_loop_p (class loop *loop, stmt_vec_info stmt_info) |
0 | 1266 { |
131 | 1267 return (loop->inner |
1268 && (loop->inner == (gimple_bb (stmt_info->stmt))->loop_father)); | |
0 | 1269 } |
1270 | |
145 | 1271 /* Return true if STMT_INFO should produce a vector mask type rather than |
1272 a normal nonmask type. */ | |
0 | 1273 |
145 | 1274 static inline bool |
1275 vect_use_mask_type_p (stmt_vec_info stmt_info) | |
131 | 1276 { |
145 | 1277 return stmt_info->mask_precision && stmt_info->mask_precision != ~0U; |
0 | 1278 } |
1279 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1280 /* Return TRUE if a statement represented by STMT_INFO is a part of a |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1281 pattern. */ |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1282 |
0 | 1283 static inline bool |
1284 is_pattern_stmt_p (stmt_vec_info stmt_info) | |
1285 { | |
131 | 1286 return stmt_info->pattern_stmt_p; |
1287 } | |
1288 | |
1289 /* If STMT_INFO is a pattern statement, return the statement that it | |
1290 replaces, otherwise return STMT_INFO itself. */ | |
0 | 1291 |
131 | 1292 inline stmt_vec_info |
1293 vect_orig_stmt (stmt_vec_info stmt_info) | |
1294 { | |
1295 if (is_pattern_stmt_p (stmt_info)) | |
1296 return STMT_VINFO_RELATED_STMT (stmt_info); | |
1297 return stmt_info; | |
1298 } | |
0 | 1299 |
145 | 1300 /* Return the later statement between STMT1_INFO and STMT2_INFO. */ |
1301 | |
1302 static inline stmt_vec_info | |
1303 get_later_stmt (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info) | |
1304 { | |
1305 if (gimple_uid (vect_orig_stmt (stmt1_info)->stmt) | |
1306 > gimple_uid (vect_orig_stmt (stmt2_info)->stmt)) | |
1307 return stmt1_info; | |
1308 else | |
1309 return stmt2_info; | |
1310 } | |
1311 | |
131 | 1312 /* If STMT_INFO has been replaced by a pattern statement, return the |
1313 replacement statement, otherwise return STMT_INFO itself. */ | |
1314 | |
1315 inline stmt_vec_info | |
1316 vect_stmt_to_vectorize (stmt_vec_info stmt_info) | |
1317 { | |
1318 if (STMT_VINFO_IN_PATTERN_P (stmt_info)) | |
1319 return STMT_VINFO_RELATED_STMT (stmt_info); | |
1320 return stmt_info; | |
0 | 1321 } |
1322 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1323 /* Return true if BB is a loop header. */ |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1324 |
0 | 1325 static inline bool |
1326 is_loop_header_bb_p (basic_block bb) | |
1327 { | |
1328 if (bb == (bb->loop_father)->header) | |
1329 return true; | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1330 gcc_checking_assert (EDGE_COUNT (bb->preds) == 1); |
0 | 1331 return false; |
1332 } | |
1333 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1334 /* Return pow2 (X). */ |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1335 |
0 | 1336 static inline int |
1337 vect_pow2 (int x) | |
1338 { | |
1339 int i, res = 1; | |
1340 | |
1341 for (i = 0; i < x; i++) | |
1342 res *= 2; | |
1343 | |
1344 return res; | |
1345 } | |
1346 | |
111 | 1347 /* Alias targetm.vectorize.builtin_vectorization_cost. */ |
1348 | |
1349 static inline int | |
1350 builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, | |
1351 tree vectype, int misalign) | |
1352 { | |
1353 return targetm.vectorize.builtin_vectorization_cost (type_of_cost, | |
1354 vectype, misalign); | |
1355 } | |
1356 | |
1357 /* Get cost by calling cost target builtin. */ | |
1358 | |
1359 static inline | |
1360 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost) | |
1361 { | |
1362 return builtin_vectorization_cost (type_of_cost, NULL, 0); | |
1363 } | |
1364 | |
1365 /* Alias targetm.vectorize.init_cost. */ | |
1366 | |
1367 static inline void * | |
145 | 1368 init_cost (class loop *loop_info) |
111 | 1369 { |
1370 return targetm.vectorize.init_cost (loop_info); | |
1371 } | |
1372 | |
131 | 1373 extern void dump_stmt_cost (FILE *, void *, int, enum vect_cost_for_stmt, |
1374 stmt_vec_info, int, unsigned, | |
1375 enum vect_cost_model_location); | |
1376 | |
111 | 1377 /* Alias targetm.vectorize.add_stmt_cost. */ |
1378 | |
1379 static inline unsigned | |
1380 add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, | |
1381 stmt_vec_info stmt_info, int misalign, | |
1382 enum vect_cost_model_location where) | |
1383 { | |
131 | 1384 unsigned cost = targetm.vectorize.add_stmt_cost (data, count, kind, |
1385 stmt_info, misalign, where); | |
1386 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1387 dump_stmt_cost (dump_file, data, count, kind, stmt_info, misalign, | |
1388 cost, where); | |
1389 return cost; | |
111 | 1390 } |
1391 | |
1392 /* Alias targetm.vectorize.finish_cost. */ | |
1393 | |
1394 static inline void | |
1395 finish_cost (void *data, unsigned *prologue_cost, | |
1396 unsigned *body_cost, unsigned *epilogue_cost) | |
1397 { | |
1398 targetm.vectorize.finish_cost (data, prologue_cost, body_cost, epilogue_cost); | |
1399 } | |
1400 | |
1401 /* Alias targetm.vectorize.destroy_cost_data. */ | |
1402 | |
1403 static inline void | |
1404 destroy_cost_data (void *data) | |
1405 { | |
1406 targetm.vectorize.destroy_cost_data (data); | |
1407 } | |
1408 | |
131 | 1409 inline void |
1410 add_stmt_costs (void *data, stmt_vector_for_cost *cost_vec) | |
1411 { | |
1412 stmt_info_for_cost *cost; | |
1413 unsigned i; | |
1414 FOR_EACH_VEC_ELT (*cost_vec, i, cost) | |
1415 add_stmt_cost (data, cost->count, cost->kind, cost->stmt_info, | |
1416 cost->misalign, cost->where); | |
1417 } | |
1418 | |
0 | 1419 /*-----------------------------------------------------------------*/ |
1420 /* Info on data references alignment. */ | |
1421 /*-----------------------------------------------------------------*/ | |
131 | 1422 #define DR_MISALIGNMENT_UNKNOWN (-1) |
1423 #define DR_MISALIGNMENT_UNINITIALIZED (-2) | |
111 | 1424 |
131 | 1425 inline void |
1426 set_dr_misalignment (dr_vec_info *dr_info, int val) | |
1427 { | |
1428 dr_info->misalignment = val; | |
111 | 1429 } |
1430 | |
1431 inline int | |
131 | 1432 dr_misalignment (dr_vec_info *dr_info) |
111 | 1433 { |
131 | 1434 int misalign = dr_info->misalignment; |
1435 gcc_assert (misalign != DR_MISALIGNMENT_UNINITIALIZED); | |
1436 return misalign; | |
111 | 1437 } |
0 | 1438 |
1439 /* Reflects actual alignment of first access in the vectorized loop, | |
1440 taking into account peeling/versioning if applied. */ | |
111 | 1441 #define DR_MISALIGNMENT(DR) dr_misalignment (DR) |
1442 #define SET_DR_MISALIGNMENT(DR, VAL) set_dr_misalignment (DR, VAL) | |
0 | 1443 |
111 | 1444 /* Only defined once DR_MISALIGNMENT is defined. */ |
131 | 1445 #define DR_TARGET_ALIGNMENT(DR) ((DR)->target_alignment) |
111 | 1446 |
131 | 1447 /* Return true if data access DR_INFO is aligned to its target alignment |
111 | 1448 (which may be less than a full vector). */ |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1449 |
0 | 1450 static inline bool |
131 | 1451 aligned_access_p (dr_vec_info *dr_info) |
0 | 1452 { |
131 | 1453 return (DR_MISALIGNMENT (dr_info) == 0); |
0 | 1454 } |
1455 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1456 /* Return TRUE if the alignment of the data access is known, and FALSE |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1457 otherwise. */ |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1458 |
0 | 1459 static inline bool |
131 | 1460 known_alignment_for_access_p (dr_vec_info *dr_info) |
0 | 1461 { |
131 | 1462 return (DR_MISALIGNMENT (dr_info) != DR_MISALIGNMENT_UNKNOWN); |
111 | 1463 } |
1464 | |
1465 /* Return the minimum alignment in bytes that the vectorized version | |
131 | 1466 of DR_INFO is guaranteed to have. */ |
111 | 1467 |
1468 static inline unsigned int | |
131 | 1469 vect_known_alignment_in_bytes (dr_vec_info *dr_info) |
111 | 1470 { |
131 | 1471 if (DR_MISALIGNMENT (dr_info) == DR_MISALIGNMENT_UNKNOWN) |
1472 return TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_info->dr))); | |
1473 if (DR_MISALIGNMENT (dr_info) == 0) | |
145 | 1474 return known_alignment (DR_TARGET_ALIGNMENT (dr_info)); |
131 | 1475 return DR_MISALIGNMENT (dr_info) & -DR_MISALIGNMENT (dr_info); |
111 | 1476 } |
1477 | |
131 | 1478 /* Return the behavior of DR_INFO with respect to the vectorization context |
111 | 1479 (which for outer loop vectorization might not be the behavior recorded |
131 | 1480 in DR_INFO itself). */ |
111 | 1481 |
1482 static inline innermost_loop_behavior * | |
131 | 1483 vect_dr_behavior (dr_vec_info *dr_info) |
111 | 1484 { |
131 | 1485 stmt_vec_info stmt_info = dr_info->stmt; |
111 | 1486 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); |
1487 if (loop_vinfo == NULL | |
131 | 1488 || !nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt_info)) |
1489 return &DR_INNERMOST (dr_info->dr); | |
111 | 1490 else |
1491 return &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info); | |
0 | 1492 } |
1493 | |
145 | 1494 /* Return the offset calculated by adding the offset of this DR_INFO to the |
1495 corresponding data_reference's offset. If CHECK_OUTER then use | |
1496 vect_dr_behavior to select the appropriate data_reference to use. */ | |
1497 | |
1498 inline tree | |
1499 get_dr_vinfo_offset (dr_vec_info *dr_info, bool check_outer = false) | |
1500 { | |
1501 innermost_loop_behavior *base; | |
1502 if (check_outer) | |
1503 base = vect_dr_behavior (dr_info); | |
1504 else | |
1505 base = &dr_info->dr->innermost; | |
1506 | |
1507 tree offset = base->offset; | |
1508 | |
1509 if (!dr_info->offset) | |
1510 return offset; | |
1511 | |
1512 offset = fold_convert (sizetype, offset); | |
1513 return fold_build2 (PLUS_EXPR, TREE_TYPE (dr_info->offset), offset, | |
1514 dr_info->offset); | |
1515 } | |
1516 | |
1517 | |
111 | 1518 /* Return true if the vect cost model is unlimited. */ |
1519 static inline bool | |
1520 unlimited_cost_model (loop_p loop) | |
1521 { | |
1522 if (loop != NULL && loop->force_vectorize | |
1523 && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT) | |
1524 return flag_simd_cost_model == VECT_COST_MODEL_UNLIMITED; | |
1525 return (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED); | |
1526 } | |
1527 | |
131 | 1528 /* Return true if the loop described by LOOP_VINFO is fully-masked and |
1529 if the first iteration should use a partial mask in order to achieve | |
1530 alignment. */ | |
1531 | |
1532 static inline bool | |
1533 vect_use_loop_mask_for_alignment_p (loop_vec_info loop_vinfo) | |
1534 { | |
1535 return (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) | |
1536 && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)); | |
1537 } | |
1538 | |
1539 /* Return the number of vectors of type VECTYPE that are needed to get | |
1540 NUNITS elements. NUNITS should be based on the vectorization factor, | |
1541 so it is always a known multiple of the number of elements in VECTYPE. */ | |
1542 | |
1543 static inline unsigned int | |
1544 vect_get_num_vectors (poly_uint64 nunits, tree vectype) | |
1545 { | |
1546 return exact_div (nunits, TYPE_VECTOR_SUBPARTS (vectype)).to_constant (); | |
1547 } | |
1548 | |
111 | 1549 /* Return the number of copies needed for loop vectorization when |
1550 a statement operates on vectors of type VECTYPE. This is the | |
1551 vectorization factor divided by the number of elements in | |
1552 VECTYPE and is always known at compile time. */ | |
1553 | |
1554 static inline unsigned int | |
1555 vect_get_num_copies (loop_vec_info loop_vinfo, tree vectype) | |
1556 { | |
131 | 1557 return vect_get_num_vectors (LOOP_VINFO_VECT_FACTOR (loop_vinfo), vectype); |
1558 } | |
1559 | |
1560 /* Update maximum unit count *MAX_NUNITS so that it accounts for | |
145 | 1561 NUNITS. *MAX_NUNITS can be 1 if we haven't yet recorded anything. */ |
1562 | |
1563 static inline void | |
1564 vect_update_max_nunits (poly_uint64 *max_nunits, poly_uint64 nunits) | |
1565 { | |
1566 /* All unit counts have the form vec_info::vector_size * X for some | |
1567 rational X, so two unit sizes must have a common multiple. | |
1568 Everything is a multiple of the initial value of 1. */ | |
1569 *max_nunits = force_common_multiple (*max_nunits, nunits); | |
1570 } | |
1571 | |
1572 /* Update maximum unit count *MAX_NUNITS so that it accounts for | |
131 | 1573 the number of units in vector type VECTYPE. *MAX_NUNITS can be 1 |
1574 if we haven't yet recorded any vector types. */ | |
1575 | |
1576 static inline void | |
1577 vect_update_max_nunits (poly_uint64 *max_nunits, tree vectype) | |
1578 { | |
145 | 1579 vect_update_max_nunits (max_nunits, TYPE_VECTOR_SUBPARTS (vectype)); |
111 | 1580 } |
1581 | |
131 | 1582 /* Return the vectorization factor that should be used for costing |
1583 purposes while vectorizing the loop described by LOOP_VINFO. | |
1584 Pick a reasonable estimate if the vectorization factor isn't | |
1585 known at compile time. */ | |
1586 | |
1587 static inline unsigned int | |
1588 vect_vf_for_cost (loop_vec_info loop_vinfo) | |
1589 { | |
1590 return estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo)); | |
1591 } | |
1592 | |
1593 /* Estimate the number of elements in VEC_TYPE for costing purposes. | |
1594 Pick a reasonable estimate if the exact number isn't known at | |
1595 compile time. */ | |
1596 | |
1597 static inline unsigned int | |
1598 vect_nunits_for_cost (tree vec_type) | |
1599 { | |
1600 return estimated_poly_value (TYPE_VECTOR_SUBPARTS (vec_type)); | |
1601 } | |
1602 | |
1603 /* Return the maximum possible vectorization factor for LOOP_VINFO. */ | |
1604 | |
1605 static inline unsigned HOST_WIDE_INT | |
1606 vect_max_vf (loop_vec_info loop_vinfo) | |
1607 { | |
1608 unsigned HOST_WIDE_INT vf; | |
1609 if (LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf)) | |
1610 return vf; | |
1611 return MAX_VECTORIZATION_FACTOR; | |
1612 } | |
1613 | |
1614 /* Return the size of the value accessed by unvectorized data reference | |
1615 DR_INFO. This is only valid once STMT_VINFO_VECTYPE has been calculated | |
1616 for the associated gimple statement, since that guarantees that DR_INFO | |
1617 accesses either a scalar or a scalar equivalent. ("Scalar equivalent" | |
1618 here includes things like V1SI, which can be vectorized in the same way | |
111 | 1619 as a plain SI.) */ |
1620 | |
1621 inline unsigned int | |
131 | 1622 vect_get_scalar_dr_size (dr_vec_info *dr_info) |
111 | 1623 { |
131 | 1624 return tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_info->dr)))); |
111 | 1625 } |
1626 | |
145 | 1627 /* Return true if LOOP_VINFO requires a runtime check for whether the |
1628 vector loop is profitable. */ | |
1629 | |
1630 inline bool | |
1631 vect_apply_runtime_profitability_check_p (loop_vec_info loop_vinfo) | |
1632 { | |
1633 unsigned int th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo); | |
1634 return (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) | |
1635 && th >= vect_vf_for_cost (loop_vinfo)); | |
1636 } | |
1637 | |
131 | 1638 /* Source location + hotness information. */ |
1639 extern dump_user_location_t vect_location; | |
1640 | |
1641 /* A macro for calling: | |
1642 dump_begin_scope (MSG, vect_location); | |
1643 via an RAII object, thus printing "=== MSG ===\n" to the dumpfile etc, | |
1644 and then calling | |
1645 dump_end_scope (); | |
1646 once the object goes out of scope, thus capturing the nesting of | |
1647 the scopes. | |
1648 | |
1649 These scopes affect dump messages within them: dump messages at the | |
1650 top level implicitly default to MSG_PRIORITY_USER_FACING, whereas those | |
1651 in a nested scope implicitly default to MSG_PRIORITY_INTERNALS. */ | |
1652 | |
1653 #define DUMP_VECT_SCOPE(MSG) \ | |
1654 AUTO_DUMP_SCOPE (MSG, vect_location) | |
0 | 1655 |
145 | 1656 /* A sentinel class for ensuring that the "vect_location" global gets |
1657 reset at the end of a scope. | |
1658 | |
1659 The "vect_location" global is used during dumping and contains a | |
1660 location_t, which could contain references to a tree block via the | |
1661 ad-hoc data. This data is used for tracking inlining information, | |
1662 but it's not a GC root; it's simply assumed that such locations never | |
1663 get accessed if the blocks are optimized away. | |
1664 | |
1665 Hence we need to ensure that such locations are purged at the end | |
1666 of any operations using them (e.g. via this class). */ | |
1667 | |
1668 class auto_purge_vect_location | |
1669 { | |
1670 public: | |
1671 ~auto_purge_vect_location (); | |
1672 }; | |
1673 | |
0 | 1674 /*-----------------------------------------------------------------*/ |
1675 /* Function prototypes. */ | |
1676 /*-----------------------------------------------------------------*/ | |
1677 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1678 /* Simple loop peeling and versioning utilities for vectorizer's purposes - |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1679 in tree-vect-loop-manip.c. */ |
145 | 1680 extern void vect_set_loop_condition (class loop *, loop_vec_info, |
131 | 1681 tree, tree, tree, bool); |
145 | 1682 extern bool slpeel_can_duplicate_loop_p (const class loop *, const_edge); |
1683 class loop *slpeel_tree_duplicate_loop_to_edge_cfg (class loop *, | |
1684 class loop *, edge); | |
1685 class loop *vect_loop_versioning (loop_vec_info, gimple *); | |
1686 extern class loop *vect_do_peeling (loop_vec_info, tree, tree, | |
1687 tree *, tree *, tree *, int, bool, bool, | |
1688 tree *); | |
131 | 1689 extern void vect_prepare_for_masked_peels (loop_vec_info); |
145 | 1690 extern dump_user_location_t find_loop_location (class loop *); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1691 extern bool vect_can_advance_ivs_p (loop_vec_info); |
145 | 1692 extern void vect_update_inits_of_drs (loop_vec_info, tree, tree_code); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1693 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1694 /* In tree-vect-stmts.c. */ |
145 | 1695 extern tree get_related_vectype_for_scalar_type (machine_mode, tree, |
1696 poly_uint64 = 0); | |
1697 extern tree get_vectype_for_scalar_type (vec_info *, tree, unsigned int = 0); | |
1698 extern tree get_vectype_for_scalar_type (vec_info *, tree, slp_tree); | |
1699 extern tree get_mask_type_for_scalar_type (vec_info *, tree, unsigned int = 0); | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1700 extern tree get_same_sized_vectype (tree, tree); |
145 | 1701 extern bool vect_chooses_same_modes_p (vec_info *, machine_mode); |
131 | 1702 extern bool vect_get_loop_mask_type (loop_vec_info); |
1703 extern bool vect_is_simple_use (tree, vec_info *, enum vect_def_type *, | |
1704 stmt_vec_info * = NULL, gimple ** = NULL); | |
1705 extern bool vect_is_simple_use (tree, vec_info *, enum vect_def_type *, | |
1706 tree *, stmt_vec_info * = NULL, | |
1707 gimple ** = NULL); | |
1708 extern bool supportable_widening_operation (enum tree_code, stmt_vec_info, | |
1709 tree, tree, enum tree_code *, | |
111 | 1710 enum tree_code *, int *, |
1711 vec<tree> *); | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1712 extern bool supportable_narrowing_operation (enum tree_code, tree, tree, |
145 | 1713 enum tree_code *, int *, |
1714 vec<tree> *); | |
111 | 1715 extern unsigned record_stmt_cost (stmt_vector_for_cost *, int, |
1716 enum vect_cost_for_stmt, stmt_vec_info, | |
1717 int, enum vect_cost_model_location); | |
131 | 1718 extern stmt_vec_info vect_finish_replace_stmt (stmt_vec_info, gimple *); |
1719 extern stmt_vec_info vect_finish_stmt_generation (stmt_vec_info, gimple *, | |
1720 gimple_stmt_iterator *); | |
145 | 1721 extern opt_result vect_mark_stmts_to_be_vectorized (loop_vec_info, bool *); |
131 | 1722 extern tree vect_get_store_rhs (stmt_vec_info); |
1723 extern tree vect_get_vec_def_for_operand_1 (stmt_vec_info, enum vect_def_type); | |
1724 extern tree vect_get_vec_def_for_operand (tree, stmt_vec_info, tree = NULL); | |
1725 extern void vect_get_vec_defs (tree, tree, stmt_vec_info, vec<tree> *, | |
111 | 1726 vec<tree> *, slp_tree); |
131 | 1727 extern void vect_get_vec_defs_for_stmt_copy (vec_info *, |
111 | 1728 vec<tree> *, vec<tree> *); |
131 | 1729 extern tree vect_init_vector (stmt_vec_info, tree, tree, |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1730 gimple_stmt_iterator *); |
131 | 1731 extern tree vect_get_vec_def_for_stmt_copy (vec_info *, tree); |
1732 extern bool vect_transform_stmt (stmt_vec_info, gimple_stmt_iterator *, | |
1733 slp_tree, slp_instance); | |
1734 extern void vect_remove_stores (stmt_vec_info); | |
145 | 1735 extern bool vect_nop_conversion_p (stmt_vec_info); |
131 | 1736 extern opt_result vect_analyze_stmt (stmt_vec_info, bool *, slp_tree, |
1737 slp_instance, stmt_vector_for_cost *); | |
1738 extern void vect_get_load_cost (stmt_vec_info, int, bool, | |
111 | 1739 unsigned int *, unsigned int *, |
1740 stmt_vector_for_cost *, | |
1741 stmt_vector_for_cost *, bool); | |
131 | 1742 extern void vect_get_store_cost (stmt_vec_info, int, |
111 | 1743 unsigned int *, stmt_vector_for_cost *); |
145 | 1744 extern bool vect_supportable_shift (vec_info *, enum tree_code, tree); |
131 | 1745 extern tree vect_gen_perm_mask_any (tree, const vec_perm_indices &); |
1746 extern tree vect_gen_perm_mask_checked (tree, const vec_perm_indices &); | |
145 | 1747 extern void optimize_mask_stores (class loop*); |
131 | 1748 extern gcall *vect_gen_while (tree, tree, tree); |
1749 extern tree vect_gen_while_not (gimple_seq *, tree, tree, tree); | |
1750 extern opt_result vect_get_vector_types_for_stmt (stmt_vec_info, tree *, | |
145 | 1751 tree *, unsigned int = 0); |
1752 extern opt_tree vect_get_mask_type_for_stmt (stmt_vec_info, unsigned int = 0); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1753 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1754 /* In tree-vect-data-refs.c. */ |
145 | 1755 extern bool vect_can_force_dr_alignment_p (const_tree, poly_uint64); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1756 extern enum dr_alignment_support vect_supportable_dr_alignment |
131 | 1757 (dr_vec_info *, bool); |
1758 extern tree vect_get_smallest_scalar_type (stmt_vec_info, HOST_WIDE_INT *, | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1759 HOST_WIDE_INT *); |
131 | 1760 extern opt_result vect_analyze_data_ref_dependences (loop_vec_info, unsigned int *); |
111 | 1761 extern bool vect_slp_analyze_instance_dependence (slp_instance); |
131 | 1762 extern opt_result vect_enhance_data_refs_alignment (loop_vec_info); |
1763 extern opt_result vect_analyze_data_refs_alignment (loop_vec_info); | |
1764 extern opt_result vect_verify_datarefs_alignment (loop_vec_info); | |
111 | 1765 extern bool vect_slp_analyze_and_verify_instance_alignment (slp_instance); |
131 | 1766 extern opt_result vect_analyze_data_ref_accesses (vec_info *); |
1767 extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info); | |
145 | 1768 extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree, |
1769 tree, int, internal_fn *, tree *); | |
131 | 1770 extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info, |
111 | 1771 gather_scatter_info *); |
131 | 1772 extern opt_result vect_find_stmt_data_reference (loop_p, gimple *, |
1773 vec<data_reference_p> *); | |
145 | 1774 extern opt_result vect_analyze_data_refs (vec_info *, poly_uint64 *, bool *); |
111 | 1775 extern void vect_record_base_alignments (vec_info *); |
145 | 1776 extern tree vect_create_data_ref_ptr (stmt_vec_info, tree, class loop *, tree, |
111 | 1777 tree *, gimple_stmt_iterator *, |
131 | 1778 gimple **, bool, |
1779 tree = NULL_TREE, tree = NULL_TREE); | |
1780 extern tree bump_vector_ptr (tree, gimple *, gimple_stmt_iterator *, | |
1781 stmt_vec_info, tree); | |
1782 extern void vect_copy_ref_info (tree, tree); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1783 extern tree vect_create_destination_var (tree, tree); |
111 | 1784 extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT); |
131 | 1785 extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, bool); |
111 | 1786 extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT); |
131 | 1787 extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, bool); |
1788 extern void vect_permute_store_chain (vec<tree> ,unsigned int, stmt_vec_info, | |
111 | 1789 gimple_stmt_iterator *, vec<tree> *); |
131 | 1790 extern tree vect_setup_realignment (stmt_vec_info, gimple_stmt_iterator *, |
1791 tree *, enum dr_alignment_support, tree, | |
145 | 1792 class loop **); |
131 | 1793 extern void vect_transform_grouped_load (stmt_vec_info, vec<tree> , int, |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1794 gimple_stmt_iterator *); |
131 | 1795 extern void vect_record_grouped_load_vectors (stmt_vec_info, vec<tree>); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1796 extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); |
111 | 1797 extern tree vect_get_new_ssa_name (tree, enum vect_var_kind, |
1798 const char * = NULL); | |
131 | 1799 extern tree vect_create_addr_base_for_vector_ref (stmt_vec_info, gimple_seq *, |
111 | 1800 tree, tree = NULL_TREE); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1801 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1802 /* In tree-vect-loop.c. */ |
145 | 1803 extern widest_int vect_iv_limit_for_full_masking (loop_vec_info loop_vinfo); |
1804 /* Used in tree-vect-loop-manip.c */ | |
1805 extern void determine_peel_for_niter (loop_vec_info); | |
1806 /* Used in gimple-loop-interchange.c and tree-parloops.c. */ | |
131 | 1807 extern bool check_reduction_path (dump_user_location_t, loop_p, gphi *, tree, |
1808 enum tree_code); | |
145 | 1809 extern bool needs_fold_left_reduction_p (tree, tree_code); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1810 /* Drive for loop analysis stage. */ |
145 | 1811 extern opt_loop_vec_info vect_analyze_loop (class loop *, vec_info_shared *); |
111 | 1812 extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL); |
131 | 1813 extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *, |
1814 tree *, bool); | |
145 | 1815 extern tree vect_halve_mask_nunits (tree, machine_mode); |
1816 extern tree vect_double_mask_nunits (tree, machine_mode); | |
131 | 1817 extern void vect_record_loop_mask (loop_vec_info, vec_loop_masks *, |
145 | 1818 unsigned int, tree, tree); |
131 | 1819 extern tree vect_get_loop_mask (gimple_stmt_iterator *, vec_loop_masks *, |
1820 unsigned int, tree, unsigned int); | |
145 | 1821 extern stmt_vec_info info_for_reduction (stmt_vec_info); |
131 | 1822 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1823 /* Drive for loop transformation stage. */ |
145 | 1824 extern class loop *vect_transform_loop (loop_vec_info, gimple *); |
1825 extern opt_loop_vec_info vect_analyze_loop_form (class loop *, | |
131 | 1826 vec_info_shared *); |
1827 extern bool vectorizable_live_operation (stmt_vec_info, gimple_stmt_iterator *, | |
145 | 1828 slp_tree, slp_instance, int, |
1829 bool, stmt_vector_for_cost *); | |
1830 extern bool vectorizable_reduction (stmt_vec_info, slp_tree, slp_instance, | |
131 | 1831 stmt_vector_for_cost *); |
1832 extern bool vectorizable_induction (stmt_vec_info, gimple_stmt_iterator *, | |
1833 stmt_vec_info *, slp_tree, | |
1834 stmt_vector_for_cost *); | |
145 | 1835 extern bool vect_transform_reduction (stmt_vec_info, gimple_stmt_iterator *, |
1836 stmt_vec_info *, slp_tree); | |
1837 extern bool vect_transform_cycle_phi (stmt_vec_info, stmt_vec_info *, | |
1838 slp_tree, slp_instance); | |
1839 extern bool vectorizable_lc_phi (stmt_vec_info, stmt_vec_info *, slp_tree); | |
111 | 1840 extern bool vect_worthwhile_without_simd_p (vec_info *, tree_code); |
1841 extern int vect_get_known_peeling_cost (loop_vec_info, int, int *, | |
1842 stmt_vector_for_cost *, | |
1843 stmt_vector_for_cost *, | |
1844 stmt_vector_for_cost *); | |
131 | 1845 extern tree cse_and_gimplify_to_preheader (loop_vec_info, tree); |
0 | 1846 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1847 /* In tree-vect-slp.c. */ |
131 | 1848 extern void vect_free_slp_instance (slp_instance, bool); |
111 | 1849 extern bool vect_transform_slp_perm_load (slp_tree, vec<tree> , |
131 | 1850 gimple_stmt_iterator *, poly_uint64, |
1851 slp_instance, bool, unsigned *); | |
111 | 1852 extern bool vect_slp_analyze_operations (vec_info *); |
131 | 1853 extern void vect_schedule_slp (vec_info *); |
1854 extern opt_result vect_analyze_slp (vec_info *, unsigned); | |
111 | 1855 extern bool vect_make_slp_decision (loop_vec_info); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1856 extern void vect_detect_hybrid_slp (loop_vec_info); |
145 | 1857 extern void vect_get_slp_defs (slp_tree, vec<vec<tree> > *, unsigned n = -1U); |
111 | 1858 extern bool vect_slp_bb (basic_block); |
131 | 1859 extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree); |
1860 extern bool is_simple_and_all_uses_invariant (stmt_vec_info, loop_vec_info); | |
145 | 1861 extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int, tree, |
131 | 1862 unsigned int * = NULL, |
1863 tree * = NULL, tree * = NULL); | |
145 | 1864 extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree, |
1865 vec<tree>, unsigned int, vec<tree> &); | |
131 | 1866 extern int vect_get_place_in_interleaving_chain (stmt_vec_info, stmt_vec_info); |
0 | 1867 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1868 /* In tree-vect-patterns.c. */ |
0 | 1869 /* Pattern recognition functions. |
1870 Additional pattern recognition functions can (and will) be added | |
1871 in the future. */ | |
111 | 1872 void vect_pattern_recog (vec_info *); |
0 | 1873 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1874 /* In tree-vectorizer.c. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1875 unsigned vectorize_loops (void); |
145 | 1876 void vect_free_loop_info_assumptions (class loop *); |
1877 gimple *vect_loop_vectorized_call (class loop *, gcond **cond = NULL); | |
1878 | |
0 | 1879 |
1880 #endif /* GCC_TREE_VECTORIZER_H */ |