Mercurial > hg > CbC > CbC_gcc
comparison gcc/graphite.c @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children | 77e2b8dfacca |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a06113de4d67 |
---|---|
1 /* Gimple Represented as Polyhedra. | |
2 Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc. | |
3 Contributed by Sebastian Pop <sebastian.pop@inria.fr>. | |
4 | |
5 This file is part of GCC. | |
6 | |
7 GCC is free software; you can redistribute it and/or modify | |
8 it under the terms of the GNU General Public License as published by | |
9 the Free Software Foundation; either version 3, or (at your option) | |
10 any later version. | |
11 | |
12 GCC is distributed in the hope that it will be useful, | |
13 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 GNU General Public License for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
18 along with GCC; see the file COPYING3. If not see | |
19 <http://www.gnu.org/licenses/>. */ | |
20 | |
21 /* This pass converts GIMPLE to GRAPHITE, performs some loop | |
22 transformations and then converts the resulting representation back | |
23 to GIMPLE. | |
24 | |
25 An early description of this pass can be found in the GCC Summit'06 | |
26 paper "GRAPHITE: Polyhedral Analyses and Optimizations for GCC". | |
27 The wiki page http://gcc.gnu.org/wiki/Graphite contains pointers to | |
28 the related work. | |
29 | |
30 One important document to read is CLooG's internal manual: | |
31 http://repo.or.cz/w/cloog-ppl.git?a=blob_plain;f=doc/cloog.texi;hb=HEAD | |
32 that describes the data structure of loops used in this file, and | |
33 the functions that are used for transforming the code. */ | |
34 | |
35 #include "config.h" | |
36 #include "system.h" | |
37 #include "coretypes.h" | |
38 #include "tm.h" | |
39 #include "ggc.h" | |
40 #include "tree.h" | |
41 #include "rtl.h" | |
42 #include "basic-block.h" | |
43 #include "diagnostic.h" | |
44 #include "tree-flow.h" | |
45 #include "toplev.h" | |
46 #include "tree-dump.h" | |
47 #include "timevar.h" | |
48 #include "cfgloop.h" | |
49 #include "tree-chrec.h" | |
50 #include "tree-data-ref.h" | |
51 #include "tree-scalar-evolution.h" | |
52 #include "tree-pass.h" | |
53 #include "domwalk.h" | |
54 #include "value-prof.h" | |
55 #include "pointer-set.h" | |
56 #include "gimple.h" | |
57 | |
58 #ifdef HAVE_cloog | |
59 #include "cloog/cloog.h" | |
60 #include "graphite.h" | |
61 | |
62 static VEC (scop_p, heap) *current_scops; | |
63 | |
64 /* Converts a GMP constant V to a tree and returns it. */ | |
65 | |
66 static tree | |
67 gmp_cst_to_tree (tree type, Value v) | |
68 { | |
69 return build_int_cst (type, value_get_si (v)); | |
70 } | |
71 | |
72 /* Returns true when BB is in REGION. */ | |
73 | |
74 static bool | |
75 bb_in_sese_p (basic_block bb, sese region) | |
76 { | |
77 return pointer_set_contains (SESE_REGION_BBS (region), bb); | |
78 } | |
79 | |
80 /* Returns true when LOOP is in the SESE region R. */ | |
81 | |
82 static inline bool | |
83 loop_in_sese_p (struct loop *loop, sese r) | |
84 { | |
85 return (bb_in_sese_p (loop->header, r) | |
86 && bb_in_sese_p (loop->latch, r)); | |
87 } | |
88 | |
89 /* For a USE in BB, if BB is outside REGION, mark the USE in the | |
90 SESE_LIVEIN and SESE_LIVEOUT sets. */ | |
91 | |
92 static void | |
93 sese_build_livein_liveouts_use (sese region, basic_block bb, tree use) | |
94 { | |
95 unsigned ver; | |
96 basic_block def_bb; | |
97 | |
98 if (TREE_CODE (use) != SSA_NAME) | |
99 return; | |
100 | |
101 ver = SSA_NAME_VERSION (use); | |
102 def_bb = gimple_bb (SSA_NAME_DEF_STMT (use)); | |
103 if (!def_bb | |
104 || !bb_in_sese_p (def_bb, region) | |
105 || bb_in_sese_p (bb, region)) | |
106 return; | |
107 | |
108 if (!SESE_LIVEIN_VER (region, ver)) | |
109 SESE_LIVEIN_VER (region, ver) = BITMAP_ALLOC (NULL); | |
110 | |
111 bitmap_set_bit (SESE_LIVEIN_VER (region, ver), bb->index); | |
112 bitmap_set_bit (SESE_LIVEOUT (region), ver); | |
113 } | |
114 | |
115 /* Marks for rewrite all the SSA_NAMES defined in REGION and that are | |
116 used in BB that is outside of the REGION. */ | |
117 | |
118 static void | |
119 sese_build_livein_liveouts_bb (sese region, basic_block bb) | |
120 { | |
121 gimple_stmt_iterator bsi; | |
122 edge e; | |
123 edge_iterator ei; | |
124 ssa_op_iter iter; | |
125 tree var; | |
126 | |
127 FOR_EACH_EDGE (e, ei, bb->succs) | |
128 for (bsi = gsi_start_phis (e->dest); !gsi_end_p (bsi); gsi_next (&bsi)) | |
129 sese_build_livein_liveouts_use (region, bb, | |
130 PHI_ARG_DEF_FROM_EDGE (gsi_stmt (bsi), e)); | |
131 | |
132 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) | |
133 FOR_EACH_SSA_TREE_OPERAND (var, gsi_stmt (bsi), iter, SSA_OP_ALL_USES) | |
134 sese_build_livein_liveouts_use (region, bb, var); | |
135 } | |
136 | |
137 /* Build the SESE_LIVEIN and SESE_LIVEOUT for REGION. */ | |
138 | |
139 void | |
140 sese_build_livein_liveouts (sese region) | |
141 { | |
142 basic_block bb; | |
143 | |
144 SESE_LIVEOUT (region) = BITMAP_ALLOC (NULL); | |
145 SESE_NUM_VER (region) = num_ssa_names; | |
146 SESE_LIVEIN (region) = XCNEWVEC (bitmap, SESE_NUM_VER (region)); | |
147 | |
148 FOR_EACH_BB (bb) | |
149 sese_build_livein_liveouts_bb (region, bb); | |
150 } | |
151 | |
152 /* Register basic blocks belonging to a region in a pointer set. */ | |
153 | |
154 static void | |
155 register_bb_in_sese (basic_block entry_bb, basic_block exit_bb, sese region) | |
156 { | |
157 edge_iterator ei; | |
158 edge e; | |
159 basic_block bb = entry_bb; | |
160 | |
161 FOR_EACH_EDGE (e, ei, bb->succs) | |
162 { | |
163 if (!pointer_set_contains (SESE_REGION_BBS (region), e->dest) && | |
164 e->dest->index != exit_bb->index) | |
165 { | |
166 pointer_set_insert (SESE_REGION_BBS (region), e->dest); | |
167 register_bb_in_sese (e->dest, exit_bb, region); | |
168 } | |
169 } | |
170 } | |
171 | |
172 /* Builds a new SESE region from edges ENTRY and EXIT. */ | |
173 | |
174 sese | |
175 new_sese (edge entry, edge exit) | |
176 { | |
177 sese res = XNEW (struct sese); | |
178 | |
179 SESE_ENTRY (res) = entry; | |
180 SESE_EXIT (res) = exit; | |
181 SESE_REGION_BBS (res) = pointer_set_create (); | |
182 register_bb_in_sese (entry->dest, exit->dest, res); | |
183 | |
184 SESE_LIVEOUT (res) = NULL; | |
185 SESE_NUM_VER (res) = 0; | |
186 SESE_LIVEIN (res) = NULL; | |
187 | |
188 return res; | |
189 } | |
190 | |
191 /* Deletes REGION. */ | |
192 | |
193 void | |
194 free_sese (sese region) | |
195 { | |
196 int i; | |
197 | |
198 for (i = 0; i < SESE_NUM_VER (region); i++) | |
199 BITMAP_FREE (SESE_LIVEIN_VER (region, i)); | |
200 | |
201 if (SESE_LIVEIN (region)) | |
202 free (SESE_LIVEIN (region)); | |
203 | |
204 if (SESE_LIVEOUT (region)) | |
205 BITMAP_FREE (SESE_LIVEOUT (region)); | |
206 | |
207 pointer_set_destroy (SESE_REGION_BBS (region)); | |
208 XDELETE (region); | |
209 } | |
210 | |
211 | |
212 | |
213 /* Debug the list of old induction variables for this SCOP. */ | |
214 | |
215 void | |
216 debug_oldivs (scop_p scop) | |
217 { | |
218 int i; | |
219 name_tree oldiv; | |
220 | |
221 fprintf (stderr, "Old IVs:"); | |
222 | |
223 for (i = 0; VEC_iterate (name_tree, SCOP_OLDIVS (scop), i, oldiv); i++) | |
224 { | |
225 fprintf (stderr, "("); | |
226 print_generic_expr (stderr, oldiv->t, 0); | |
227 fprintf (stderr, ", %s, %d)\n", oldiv->name, oldiv->loop->num); | |
228 } | |
229 fprintf (stderr, "\n"); | |
230 } | |
231 | |
232 /* Debug the loops around basic block GB. */ | |
233 | |
234 void | |
235 debug_loop_vec (graphite_bb_p gb) | |
236 { | |
237 int i; | |
238 loop_p loop; | |
239 | |
240 fprintf (stderr, "Loop Vec:"); | |
241 | |
242 for (i = 0; VEC_iterate (loop_p, GBB_LOOPS (gb), i, loop); i++) | |
243 fprintf (stderr, "%d: %d, ", i, loop ? loop->num : -1); | |
244 | |
245 fprintf (stderr, "\n"); | |
246 } | |
247 | |
248 /* Returns true if stack ENTRY is a constant. */ | |
249 | |
250 static bool | |
251 iv_stack_entry_is_constant (iv_stack_entry *entry) | |
252 { | |
253 return entry->kind == iv_stack_entry_const; | |
254 } | |
255 | |
256 /* Returns true if stack ENTRY is an induction variable. */ | |
257 | |
258 static bool | |
259 iv_stack_entry_is_iv (iv_stack_entry *entry) | |
260 { | |
261 return entry->kind == iv_stack_entry_iv; | |
262 } | |
263 | |
264 /* Push (IV, NAME) on STACK. */ | |
265 | |
266 static void | |
267 loop_iv_stack_push_iv (loop_iv_stack stack, tree iv, const char *name) | |
268 { | |
269 iv_stack_entry *entry = XNEW (iv_stack_entry); | |
270 name_tree named_iv = XNEW (struct name_tree); | |
271 | |
272 named_iv->t = iv; | |
273 named_iv->name = name; | |
274 | |
275 entry->kind = iv_stack_entry_iv; | |
276 entry->data.iv = named_iv; | |
277 | |
278 VEC_safe_push (iv_stack_entry_p, heap, *stack, entry); | |
279 } | |
280 | |
281 /* Inserts a CONSTANT in STACK at INDEX. */ | |
282 | |
283 static void | |
284 loop_iv_stack_insert_constant (loop_iv_stack stack, int index, | |
285 tree constant) | |
286 { | |
287 iv_stack_entry *entry = XNEW (iv_stack_entry); | |
288 | |
289 entry->kind = iv_stack_entry_const; | |
290 entry->data.constant = constant; | |
291 | |
292 VEC_safe_insert (iv_stack_entry_p, heap, *stack, index, entry); | |
293 } | |
294 | |
295 /* Pops and frees an element out of STACK. */ | |
296 | |
297 static void | |
298 loop_iv_stack_pop (loop_iv_stack stack) | |
299 { | |
300 iv_stack_entry_p entry = VEC_pop (iv_stack_entry_p, *stack); | |
301 | |
302 free (entry->data.iv); | |
303 free (entry); | |
304 } | |
305 | |
306 /* Get the IV at INDEX in STACK. */ | |
307 | |
308 static tree | |
309 loop_iv_stack_get_iv (loop_iv_stack stack, int index) | |
310 { | |
311 iv_stack_entry_p entry = VEC_index (iv_stack_entry_p, *stack, index); | |
312 iv_stack_entry_data data = entry->data; | |
313 | |
314 return iv_stack_entry_is_iv (entry) ? data.iv->t : data.constant; | |
315 } | |
316 | |
317 /* Get the IV from its NAME in STACK. */ | |
318 | |
319 static tree | |
320 loop_iv_stack_get_iv_from_name (loop_iv_stack stack, const char* name) | |
321 { | |
322 int i; | |
323 iv_stack_entry_p entry; | |
324 | |
325 for (i = 0; VEC_iterate (iv_stack_entry_p, *stack, i, entry); i++) | |
326 { | |
327 name_tree iv = entry->data.iv; | |
328 if (!strcmp (name, iv->name)) | |
329 return iv->t; | |
330 } | |
331 | |
332 return NULL; | |
333 } | |
334 | |
335 /* Prints on stderr the contents of STACK. */ | |
336 | |
337 void | |
338 debug_loop_iv_stack (loop_iv_stack stack) | |
339 { | |
340 int i; | |
341 iv_stack_entry_p entry; | |
342 bool first = true; | |
343 | |
344 fprintf (stderr, "("); | |
345 | |
346 for (i = 0; VEC_iterate (iv_stack_entry_p, *stack, i, entry); i++) | |
347 { | |
348 if (first) | |
349 first = false; | |
350 else | |
351 fprintf (stderr, " "); | |
352 | |
353 if (iv_stack_entry_is_iv (entry)) | |
354 { | |
355 name_tree iv = entry->data.iv; | |
356 fprintf (stderr, "%s:", iv->name); | |
357 print_generic_expr (stderr, iv->t, 0); | |
358 } | |
359 else | |
360 { | |
361 tree constant = entry->data.constant; | |
362 print_generic_expr (stderr, constant, 0); | |
363 fprintf (stderr, ":"); | |
364 print_generic_expr (stderr, constant, 0); | |
365 } | |
366 } | |
367 | |
368 fprintf (stderr, ")\n"); | |
369 } | |
370 | |
371 /* Frees STACK. */ | |
372 | |
373 static void | |
374 free_loop_iv_stack (loop_iv_stack stack) | |
375 { | |
376 int i; | |
377 iv_stack_entry_p entry; | |
378 | |
379 for (i = 0; VEC_iterate (iv_stack_entry_p, *stack, i, entry); i++) | |
380 { | |
381 free (entry->data.iv); | |
382 free (entry); | |
383 } | |
384 | |
385 VEC_free (iv_stack_entry_p, heap, *stack); | |
386 } | |
387 | |
388 | |
389 | |
390 /* Structure containing the mapping between the CLooG's induction | |
391 variable and the type of the old induction variable. */ | |
392 typedef struct ivtype_map_elt | |
393 { | |
394 tree type; | |
395 const char *cloog_iv; | |
396 } *ivtype_map_elt; | |
397 | |
398 /* Print to stderr the element ELT. */ | |
399 | |
400 static void | |
401 debug_ivtype_elt (ivtype_map_elt elt) | |
402 { | |
403 fprintf (stderr, "(%s, ", elt->cloog_iv); | |
404 print_generic_expr (stderr, elt->type, 0); | |
405 fprintf (stderr, ")\n"); | |
406 } | |
407 | |
408 /* Helper function for debug_ivtype_map. */ | |
409 | |
410 static int | |
411 debug_ivtype_map_1 (void **slot, void *s ATTRIBUTE_UNUSED) | |
412 { | |
413 struct ivtype_map_elt *entry = (struct ivtype_map_elt *) *slot; | |
414 debug_ivtype_elt (entry); | |
415 return 1; | |
416 } | |
417 | |
418 /* Print to stderr all the elements of MAP. */ | |
419 | |
420 void | |
421 debug_ivtype_map (htab_t map) | |
422 { | |
423 htab_traverse (map, debug_ivtype_map_1, NULL); | |
424 } | |
425 | |
426 /* Constructs a new SCEV_INFO_STR structure for VAR and INSTANTIATED_BELOW. */ | |
427 | |
428 static inline ivtype_map_elt | |
429 new_ivtype_map_elt (const char *cloog_iv, tree type) | |
430 { | |
431 ivtype_map_elt res; | |
432 | |
433 res = XNEW (struct ivtype_map_elt); | |
434 res->cloog_iv = cloog_iv; | |
435 res->type = type; | |
436 | |
437 return res; | |
438 } | |
439 | |
440 /* Computes a hash function for database element ELT. */ | |
441 | |
442 static hashval_t | |
443 ivtype_map_elt_info (const void *elt) | |
444 { | |
445 return htab_hash_pointer (((const struct ivtype_map_elt *) elt)->cloog_iv); | |
446 } | |
447 | |
448 /* Compares database elements E1 and E2. */ | |
449 | |
450 static int | |
451 eq_ivtype_map_elts (const void *e1, const void *e2) | |
452 { | |
453 const struct ivtype_map_elt *elt1 = (const struct ivtype_map_elt *) e1; | |
454 const struct ivtype_map_elt *elt2 = (const struct ivtype_map_elt *) e2; | |
455 | |
456 return (elt1->cloog_iv == elt2->cloog_iv); | |
457 } | |
458 | |
459 | |
460 | |
461 /* Given a CLOOG_IV, returns the type that it should have in GCC land. | |
462 If the information is not available, i.e. in the case one of the | |
463 transforms created the loop, just return integer_type_node. */ | |
464 | |
465 static tree | |
466 gcc_type_for_cloog_iv (const char *cloog_iv, graphite_bb_p gbb) | |
467 { | |
468 struct ivtype_map_elt tmp; | |
469 PTR *slot; | |
470 | |
471 tmp.cloog_iv = cloog_iv; | |
472 slot = htab_find_slot (GBB_CLOOG_IV_TYPES (gbb), &tmp, NO_INSERT); | |
473 | |
474 if (slot && *slot) | |
475 return ((ivtype_map_elt) *slot)->type; | |
476 | |
477 return integer_type_node; | |
478 } | |
479 | |
480 /* Inserts constants derived from the USER_STMT argument list into the | |
481 STACK. This is needed to map old ivs to constants when loops have | |
482 been eliminated. */ | |
483 | |
484 static void | |
485 loop_iv_stack_patch_for_consts (loop_iv_stack stack, | |
486 struct clast_user_stmt *user_stmt) | |
487 { | |
488 struct clast_stmt *t; | |
489 int index = 0; | |
490 CloogStatement *cs = user_stmt->statement; | |
491 graphite_bb_p gbb = (graphite_bb_p) cloog_statement_usr (cs); | |
492 | |
493 for (t = user_stmt->substitutions; t; t = t->next) | |
494 { | |
495 struct clast_expr *expr = (struct clast_expr *) | |
496 ((struct clast_assignment *)t)->RHS; | |
497 struct clast_term *term = (struct clast_term *) expr; | |
498 | |
499 /* FIXME: What should be done with expr_bin, expr_red? */ | |
500 if (expr->type == expr_term | |
501 && !term->var) | |
502 { | |
503 loop_p loop = gbb_loop_at_index (gbb, index); | |
504 tree oldiv = oldiv_for_loop (GBB_SCOP (gbb), loop); | |
505 tree type = oldiv ? TREE_TYPE (oldiv) : integer_type_node; | |
506 tree value = gmp_cst_to_tree (type, term->val); | |
507 loop_iv_stack_insert_constant (stack, index, value); | |
508 } | |
509 index = index + 1; | |
510 } | |
511 } | |
512 | |
513 /* Removes all constants in the iv STACK. */ | |
514 | |
515 static void | |
516 loop_iv_stack_remove_constants (loop_iv_stack stack) | |
517 { | |
518 int i; | |
519 iv_stack_entry *entry; | |
520 | |
521 for (i = 0; VEC_iterate (iv_stack_entry_p, *stack, i, entry);) | |
522 { | |
523 if (iv_stack_entry_is_constant (entry)) | |
524 { | |
525 free (VEC_index (iv_stack_entry_p, *stack, i)); | |
526 VEC_ordered_remove (iv_stack_entry_p, *stack, i); | |
527 } | |
528 else | |
529 i++; | |
530 } | |
531 } | |
532 | |
533 /* Returns a new loop_to_cloog_loop_str structure. */ | |
534 | |
535 static inline struct loop_to_cloog_loop_str * | |
536 new_loop_to_cloog_loop_str (int loop_num, | |
537 int loop_position, | |
538 CloogLoop *cloog_loop) | |
539 { | |
540 struct loop_to_cloog_loop_str *result; | |
541 | |
542 result = XNEW (struct loop_to_cloog_loop_str); | |
543 result->loop_num = loop_num; | |
544 result->cloog_loop = cloog_loop; | |
545 result->loop_position = loop_position; | |
546 | |
547 return result; | |
548 } | |
549 | |
550 /* Hash function for SCOP_LOOP2CLOOG_LOOP hash table. */ | |
551 | |
552 static hashval_t | |
553 hash_loop_to_cloog_loop (const void *elt) | |
554 { | |
555 return ((const struct loop_to_cloog_loop_str *) elt)->loop_num; | |
556 } | |
557 | |
558 /* Equality function for SCOP_LOOP2CLOOG_LOOP hash table. */ | |
559 | |
560 static int | |
561 eq_loop_to_cloog_loop (const void *el1, const void *el2) | |
562 { | |
563 const struct loop_to_cloog_loop_str *elt1, *elt2; | |
564 | |
565 elt1 = (const struct loop_to_cloog_loop_str *) el1; | |
566 elt2 = (const struct loop_to_cloog_loop_str *) el2; | |
567 return elt1->loop_num == elt2->loop_num; | |
568 } | |
569 | |
570 /* Compares two graphite bbs and returns an integer less than, equal to, or | |
571 greater than zero if the first argument is considered to be respectively | |
572 less than, equal to, or greater than the second. | |
573 We compare using the lexicographic order of the static schedules. */ | |
574 | |
575 static int | |
576 gbb_compare (const void *p_1, const void *p_2) | |
577 { | |
578 const struct graphite_bb *const gbb_1 | |
579 = *(const struct graphite_bb *const*) p_1; | |
580 const struct graphite_bb *const gbb_2 | |
581 = *(const struct graphite_bb *const*) p_2; | |
582 | |
583 return lambda_vector_compare (GBB_STATIC_SCHEDULE (gbb_1), | |
584 gbb_nb_loops (gbb_1) + 1, | |
585 GBB_STATIC_SCHEDULE (gbb_2), | |
586 gbb_nb_loops (gbb_2) + 1); | |
587 } | |
588 | |
589 /* Sort graphite bbs in SCOP. */ | |
590 | |
591 static void | |
592 graphite_sort_gbbs (scop_p scop) | |
593 { | |
594 VEC (graphite_bb_p, heap) *bbs = SCOP_BBS (scop); | |
595 | |
596 qsort (VEC_address (graphite_bb_p, bbs), | |
597 VEC_length (graphite_bb_p, bbs), | |
598 sizeof (graphite_bb_p), gbb_compare); | |
599 } | |
600 | |
601 /* Dump conditions of a graphite basic block GBB on FILE. */ | |
602 | |
603 static void | |
604 dump_gbb_conditions (FILE *file, graphite_bb_p gbb) | |
605 { | |
606 int i; | |
607 gimple stmt; | |
608 VEC (gimple, heap) *conditions = GBB_CONDITIONS (gbb); | |
609 | |
610 if (VEC_empty (gimple, conditions)) | |
611 return; | |
612 | |
613 fprintf (file, "\tbb %d\t: cond = {", GBB_BB (gbb)->index); | |
614 | |
615 for (i = 0; VEC_iterate (gimple, conditions, i, stmt); i++) | |
616 print_gimple_stmt (file, stmt, 0, 0); | |
617 | |
618 fprintf (file, "}\n"); | |
619 } | |
620 | |
621 /* Converts the graphite scheduling function into a cloog scattering | |
622 matrix. This scattering matrix is used to limit the possible cloog | |
623 output to valid programs in respect to the scheduling function. | |
624 | |
625 SCATTERING_DIMENSIONS specifies the dimensionality of the scattering | |
626 matrix. CLooG 0.14.0 and previous versions require, that all scattering | |
627 functions of one CloogProgram have the same dimensionality, therefore we | |
628 allow to specify it. (Should be removed in future versions) */ | |
629 | |
630 static CloogMatrix * | |
631 schedule_to_scattering (graphite_bb_p gb, int scattering_dimensions) | |
632 { | |
633 int i; | |
634 scop_p scop = GBB_SCOP (gb); | |
635 | |
636 int nb_iterators = gbb_nb_loops (gb); | |
637 | |
638 /* The cloog scattering matrix consists of these colums: | |
639 1 col = Eq/Inq, | |
640 scattering_dimensions cols = Scattering dimensions, | |
641 nb_iterators cols = bb's iterators, | |
642 scop_nb_params cols = Parameters, | |
643 1 col = Constant 1. | |
644 | |
645 Example: | |
646 | |
647 scattering_dimensions = 5 | |
648 max_nb_iterators = 2 | |
649 nb_iterators = 1 | |
650 scop_nb_params = 2 | |
651 | |
652 Schedule: | |
653 ? i | |
654 4 5 | |
655 | |
656 Scattering Matrix: | |
657 s1 s2 s3 s4 s5 i p1 p2 1 | |
658 1 0 0 0 0 0 0 0 -4 = 0 | |
659 0 1 0 0 0 -1 0 0 0 = 0 | |
660 0 0 1 0 0 0 0 0 -5 = 0 */ | |
661 int nb_params = scop_nb_params (scop); | |
662 int nb_cols = 1 + scattering_dimensions + nb_iterators + nb_params + 1; | |
663 int col_const = nb_cols - 1; | |
664 int col_iter_offset = 1 + scattering_dimensions; | |
665 | |
666 CloogMatrix *scat = cloog_matrix_alloc (scattering_dimensions, nb_cols); | |
667 | |
668 gcc_assert (scattering_dimensions >= nb_iterators * 2 + 1); | |
669 | |
670 /* Initialize the identity matrix. */ | |
671 for (i = 0; i < scattering_dimensions; i++) | |
672 value_set_si (scat->p[i][i + 1], 1); | |
673 | |
674 /* Textual order outside the first loop */ | |
675 value_set_si (scat->p[0][col_const], -GBB_STATIC_SCHEDULE (gb)[0]); | |
676 | |
677 /* For all surrounding loops. */ | |
678 for (i = 0; i < nb_iterators; i++) | |
679 { | |
680 int schedule = GBB_STATIC_SCHEDULE (gb)[i + 1]; | |
681 | |
682 /* Iterations of this loop. */ | |
683 value_set_si (scat->p[2 * i + 1][col_iter_offset + i], -1); | |
684 | |
685 /* Textual order inside this loop. */ | |
686 value_set_si (scat->p[2 * i + 2][col_const], -schedule); | |
687 } | |
688 | |
689 return scat; | |
690 } | |
691 | |
692 /* Print the schedules of GB to FILE with INDENT white spaces before. | |
693 VERBOSITY determines how verbose the code pretty printers are. */ | |
694 | |
695 void | |
696 print_graphite_bb (FILE *file, graphite_bb_p gb, int indent, int verbosity) | |
697 { | |
698 CloogMatrix *scattering; | |
699 int i; | |
700 loop_p loop; | |
701 fprintf (file, "\nGBB (\n"); | |
702 | |
703 print_loops_bb (file, GBB_BB (gb), indent+2, verbosity); | |
704 | |
705 if (GBB_DOMAIN (gb)) | |
706 { | |
707 fprintf (file, " (domain: \n"); | |
708 cloog_matrix_print (file, GBB_DOMAIN (gb)); | |
709 fprintf (file, " )\n"); | |
710 } | |
711 | |
712 if (GBB_STATIC_SCHEDULE (gb)) | |
713 { | |
714 fprintf (file, " (static schedule: "); | |
715 print_lambda_vector (file, GBB_STATIC_SCHEDULE (gb), | |
716 gbb_nb_loops (gb) + 1); | |
717 fprintf (file, " )\n"); | |
718 } | |
719 | |
720 if (GBB_LOOPS (gb)) | |
721 { | |
722 fprintf (file, " (contained loops: \n"); | |
723 for (i = 0; VEC_iterate (loop_p, GBB_LOOPS (gb), i, loop); i++) | |
724 if (loop == NULL) | |
725 fprintf (file, " iterator %d => NULL \n", i); | |
726 else | |
727 fprintf (file, " iterator %d => loop %d \n", i, | |
728 loop->num); | |
729 fprintf (file, " )\n"); | |
730 } | |
731 | |
732 if (GBB_DATA_REFS (gb)) | |
733 dump_data_references (file, GBB_DATA_REFS (gb)); | |
734 | |
735 if (GBB_CONDITIONS (gb)) | |
736 { | |
737 fprintf (file, " (conditions: \n"); | |
738 dump_gbb_conditions (file, gb); | |
739 fprintf (file, " )\n"); | |
740 } | |
741 | |
742 if (GBB_SCOP (gb) | |
743 && GBB_STATIC_SCHEDULE (gb)) | |
744 { | |
745 fprintf (file, " (scattering: \n"); | |
746 scattering = schedule_to_scattering (gb, 2 * gbb_nb_loops (gb) + 1); | |
747 cloog_matrix_print (file, scattering); | |
748 cloog_matrix_free (scattering); | |
749 fprintf (file, " )\n"); | |
750 } | |
751 | |
752 fprintf (file, ")\n"); | |
753 } | |
754 | |
755 /* Print to STDERR the schedules of GB with VERBOSITY level. */ | |
756 | |
757 void | |
758 debug_gbb (graphite_bb_p gb, int verbosity) | |
759 { | |
760 print_graphite_bb (stderr, gb, 0, verbosity); | |
761 } | |
762 | |
763 | |
764 /* Print SCOP to FILE. VERBOSITY determines how verbose the pretty | |
765 printers are. */ | |
766 | |
767 static void | |
768 print_scop (FILE *file, scop_p scop, int verbosity) | |
769 { | |
770 if (scop == NULL) | |
771 return; | |
772 | |
773 fprintf (file, "\nSCoP_%d_%d (\n", | |
774 SCOP_ENTRY (scop)->index, SCOP_EXIT (scop)->index); | |
775 | |
776 fprintf (file, " (cloog: \n"); | |
777 cloog_program_print (file, SCOP_PROG (scop)); | |
778 fprintf (file, " )\n"); | |
779 | |
780 if (SCOP_BBS (scop)) | |
781 { | |
782 graphite_bb_p gb; | |
783 int i; | |
784 | |
785 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++) | |
786 print_graphite_bb (file, gb, 0, verbosity); | |
787 } | |
788 | |
789 fprintf (file, ")\n"); | |
790 } | |
791 | |
792 /* Print all the SCOPs to FILE. VERBOSITY determines how verbose the | |
793 code pretty printers are. */ | |
794 | |
795 static void | |
796 print_scops (FILE *file, int verbosity) | |
797 { | |
798 int i; | |
799 scop_p scop; | |
800 | |
801 for (i = 0; VEC_iterate (scop_p, current_scops, i, scop); i++) | |
802 print_scop (file, scop, verbosity); | |
803 } | |
804 | |
805 /* Debug SCOP. VERBOSITY determines how verbose the code pretty | |
806 printers are. */ | |
807 | |
808 void | |
809 debug_scop (scop_p scop, int verbosity) | |
810 { | |
811 print_scop (stderr, scop, verbosity); | |
812 } | |
813 | |
814 /* Debug all SCOPs from CURRENT_SCOPS. VERBOSITY determines how | |
815 verbose the code pretty printers are. */ | |
816 | |
817 void | |
818 debug_scops (int verbosity) | |
819 { | |
820 print_scops (stderr, verbosity); | |
821 } | |
822 | |
823 /* Pretty print to FILE the SCOP in DOT format. */ | |
824 | |
825 static void | |
826 dot_scop_1 (FILE *file, scop_p scop) | |
827 { | |
828 edge e; | |
829 edge_iterator ei; | |
830 basic_block bb; | |
831 basic_block entry = SCOP_ENTRY (scop); | |
832 basic_block exit = SCOP_EXIT (scop); | |
833 | |
834 fprintf (file, "digraph SCoP_%d_%d {\n", entry->index, | |
835 exit->index); | |
836 | |
837 FOR_ALL_BB (bb) | |
838 { | |
839 if (bb == entry) | |
840 fprintf (file, "%d [shape=triangle];\n", bb->index); | |
841 | |
842 if (bb == exit) | |
843 fprintf (file, "%d [shape=box];\n", bb->index); | |
844 | |
845 if (bb_in_sese_p (bb, SCOP_REGION (scop))) | |
846 fprintf (file, "%d [color=red];\n", bb->index); | |
847 | |
848 FOR_EACH_EDGE (e, ei, bb->succs) | |
849 fprintf (file, "%d -> %d;\n", bb->index, e->dest->index); | |
850 } | |
851 | |
852 fputs ("}\n\n", file); | |
853 } | |
854 | |
855 /* Display SCOP using dotty. */ | |
856 | |
857 void | |
858 dot_scop (scop_p scop) | |
859 { | |
860 dot_scop_1 (stderr, scop); | |
861 } | |
862 | |
863 /* Pretty print all SCoPs in DOT format and mark them with different colors. | |
864 If there are not enough colors, paint later SCoPs gray. | |
865 Special nodes: | |
866 - "*" after the node number: entry of a SCoP, | |
867 - "#" after the node number: exit of a SCoP, | |
868 - "()" entry or exit not part of SCoP. */ | |
869 | |
870 static void | |
871 dot_all_scops_1 (FILE *file) | |
872 { | |
873 basic_block bb; | |
874 edge e; | |
875 edge_iterator ei; | |
876 scop_p scop; | |
877 const char* color; | |
878 int i; | |
879 | |
880 /* Disable debugging while printing graph. */ | |
881 int tmp_dump_flags = dump_flags; | |
882 dump_flags = 0; | |
883 | |
884 fprintf (file, "digraph all {\n"); | |
885 | |
886 FOR_ALL_BB (bb) | |
887 { | |
888 int part_of_scop = false; | |
889 | |
890 /* Use HTML for every bb label. So we are able to print bbs | |
891 which are part of two different SCoPs, with two different | |
892 background colors. */ | |
893 fprintf (file, "%d [label=<\n <TABLE BORDER=\"0\" CELLBORDER=\"1\" ", | |
894 bb->index); | |
895 fprintf (file, "CELLSPACING=\"0\">\n"); | |
896 | |
897 /* Select color for SCoP. */ | |
898 for (i = 0; VEC_iterate (scop_p, current_scops, i, scop); i++) | |
899 if (bb_in_sese_p (bb, SCOP_REGION (scop)) | |
900 || (SCOP_EXIT (scop) == bb) | |
901 || (SCOP_ENTRY (scop) == bb)) | |
902 { | |
903 switch (i % 17) | |
904 { | |
905 case 0: /* red */ | |
906 color = "#e41a1c"; | |
907 break; | |
908 case 1: /* blue */ | |
909 color = "#377eb8"; | |
910 break; | |
911 case 2: /* green */ | |
912 color = "#4daf4a"; | |
913 break; | |
914 case 3: /* purple */ | |
915 color = "#984ea3"; | |
916 break; | |
917 case 4: /* orange */ | |
918 color = "#ff7f00"; | |
919 break; | |
920 case 5: /* yellow */ | |
921 color = "#ffff33"; | |
922 break; | |
923 case 6: /* brown */ | |
924 color = "#a65628"; | |
925 break; | |
926 case 7: /* rose */ | |
927 color = "#f781bf"; | |
928 break; | |
929 case 8: | |
930 color = "#8dd3c7"; | |
931 break; | |
932 case 9: | |
933 color = "#ffffb3"; | |
934 break; | |
935 case 10: | |
936 color = "#bebada"; | |
937 break; | |
938 case 11: | |
939 color = "#fb8072"; | |
940 break; | |
941 case 12: | |
942 color = "#80b1d3"; | |
943 break; | |
944 case 13: | |
945 color = "#fdb462"; | |
946 break; | |
947 case 14: | |
948 color = "#b3de69"; | |
949 break; | |
950 case 15: | |
951 color = "#fccde5"; | |
952 break; | |
953 case 16: | |
954 color = "#bc80bd"; | |
955 break; | |
956 default: /* gray */ | |
957 color = "#999999"; | |
958 } | |
959 | |
960 fprintf (file, " <TR><TD WIDTH=\"50\" BGCOLOR=\"%s\">", color); | |
961 | |
962 if (!bb_in_sese_p (bb, SCOP_REGION (scop))) | |
963 fprintf (file, " ("); | |
964 | |
965 if (bb == SCOP_ENTRY (scop) | |
966 && bb == SCOP_EXIT (scop)) | |
967 fprintf (file, " %d*# ", bb->index); | |
968 else if (bb == SCOP_ENTRY (scop)) | |
969 fprintf (file, " %d* ", bb->index); | |
970 else if (bb == SCOP_EXIT (scop)) | |
971 fprintf (file, " %d# ", bb->index); | |
972 else | |
973 fprintf (file, " %d ", bb->index); | |
974 | |
975 if (!bb_in_sese_p (bb, SCOP_REGION (scop))) | |
976 fprintf (file, ")"); | |
977 | |
978 fprintf (file, "</TD></TR>\n"); | |
979 part_of_scop = true; | |
980 } | |
981 | |
982 if (!part_of_scop) | |
983 { | |
984 fprintf (file, " <TR><TD WIDTH=\"50\" BGCOLOR=\"#ffffff\">"); | |
985 fprintf (file, " %d </TD></TR>\n", bb->index); | |
986 } | |
987 | |
988 fprintf (file, " </TABLE>>, shape=box, style=\"setlinewidth(0)\"]\n"); | |
989 } | |
990 | |
991 FOR_ALL_BB (bb) | |
992 { | |
993 FOR_EACH_EDGE (e, ei, bb->succs) | |
994 fprintf (file, "%d -> %d;\n", bb->index, e->dest->index); | |
995 } | |
996 | |
997 fputs ("}\n\n", file); | |
998 | |
999 /* Enable debugging again. */ | |
1000 dump_flags = tmp_dump_flags; | |
1001 } | |
1002 | |
1003 /* Display all SCoPs using dotty. */ | |
1004 | |
1005 void | |
1006 dot_all_scops (void) | |
1007 { | |
1008 /* When debugging, enable the following code. This cannot be used | |
1009 in production compilers because it calls "system". */ | |
1010 #if 0 | |
1011 FILE *stream = fopen ("/tmp/allscops.dot", "w"); | |
1012 gcc_assert (stream); | |
1013 | |
1014 dot_all_scops_1 (stream); | |
1015 fclose (stream); | |
1016 | |
1017 system ("dotty /tmp/allscops.dot"); | |
1018 #else | |
1019 dot_all_scops_1 (stderr); | |
1020 #endif | |
1021 } | |
1022 | |
1023 /* Returns the outermost loop in SCOP that contains BB. */ | |
1024 | |
1025 static struct loop * | |
1026 outermost_loop_in_scop (scop_p scop, basic_block bb) | |
1027 { | |
1028 struct loop *nest; | |
1029 | |
1030 nest = bb->loop_father; | |
1031 while (loop_outer (nest) | |
1032 && loop_in_sese_p (loop_outer (nest), SCOP_REGION (scop))) | |
1033 nest = loop_outer (nest); | |
1034 | |
1035 return nest; | |
1036 } | |
1037 | |
1038 /* Returns the block preceding the entry of SCOP. */ | |
1039 | |
1040 static basic_block | |
1041 block_before_scop (scop_p scop) | |
1042 { | |
1043 return SESE_ENTRY (SCOP_REGION (scop))->src; | |
1044 } | |
1045 | |
1046 /* Return true when EXPR is an affine function in LOOP with parameters | |
1047 instantiated relative to SCOP_ENTRY. */ | |
1048 | |
1049 static bool | |
1050 loop_affine_expr (basic_block scop_entry, struct loop *loop, tree expr) | |
1051 { | |
1052 int n = loop->num; | |
1053 tree scev = analyze_scalar_evolution (loop, expr); | |
1054 | |
1055 scev = instantiate_scev (scop_entry, loop, scev); | |
1056 | |
1057 return (evolution_function_is_invariant_p (scev, n) | |
1058 || evolution_function_is_affine_multivariate_p (scev, n)); | |
1059 } | |
1060 | |
1061 /* Return true if REF or any of its subtrees contains a | |
1062 component_ref. */ | |
1063 | |
1064 static bool | |
1065 contains_component_ref_p (tree ref) | |
1066 { | |
1067 if (!ref) | |
1068 return false; | |
1069 | |
1070 while (handled_component_p (ref)) | |
1071 { | |
1072 if (TREE_CODE (ref) == COMPONENT_REF) | |
1073 return true; | |
1074 | |
1075 ref = TREE_OPERAND (ref, 0); | |
1076 } | |
1077 | |
1078 return false; | |
1079 } | |
1080 | |
1081 /* Return true if the operand OP is simple. */ | |
1082 | |
1083 static bool | |
1084 is_simple_operand (loop_p loop, gimple stmt, tree op) | |
1085 { | |
1086 /* It is not a simple operand when it is a declaration, */ | |
1087 if (DECL_P (op) | |
1088 /* or a structure, */ | |
1089 || AGGREGATE_TYPE_P (TREE_TYPE (op)) | |
1090 /* or a COMPONENT_REF, */ | |
1091 || contains_component_ref_p (op) | |
1092 /* or a memory access that cannot be analyzed by the data | |
1093 reference analysis. */ | |
1094 || ((handled_component_p (op) || INDIRECT_REF_P (op)) | |
1095 && !stmt_simple_memref_p (loop, stmt, op))) | |
1096 return false; | |
1097 | |
1098 return true; | |
1099 } | |
1100 | |
1101 /* Return true only when STMT is simple enough for being handled by | |
1102 Graphite. This depends on SCOP_ENTRY, as the parametetrs are | |
1103 initialized relatively to this basic block. */ | |
1104 | |
1105 static bool | |
1106 stmt_simple_for_scop_p (basic_block scop_entry, gimple stmt) | |
1107 { | |
1108 basic_block bb = gimple_bb (stmt); | |
1109 struct loop *loop = bb->loop_father; | |
1110 | |
1111 /* GIMPLE_ASM and GIMPLE_CALL may embed arbitrary side effects. | |
1112 Calls have side-effects, except those to const or pure | |
1113 functions. */ | |
1114 if (gimple_has_volatile_ops (stmt) | |
1115 || (gimple_code (stmt) == GIMPLE_CALL | |
1116 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE))) | |
1117 || (gimple_code (stmt) == GIMPLE_ASM)) | |
1118 return false; | |
1119 | |
1120 switch (gimple_code (stmt)) | |
1121 { | |
1122 case GIMPLE_RETURN: | |
1123 case GIMPLE_LABEL: | |
1124 return true; | |
1125 | |
1126 case GIMPLE_COND: | |
1127 { | |
1128 tree op; | |
1129 ssa_op_iter op_iter; | |
1130 enum tree_code code = gimple_cond_code (stmt); | |
1131 | |
1132 /* We can only handle this kind of conditional expressions. | |
1133 For inequalities like "if (i != 3 * k)" we need unions of | |
1134 polyhedrons. Expressions like "if (a)" or "if (a == 15)" need | |
1135 them for the else branch. */ | |
1136 if (!(code == LT_EXPR | |
1137 || code == GT_EXPR | |
1138 || code == LE_EXPR | |
1139 || code == GE_EXPR)) | |
1140 return false; | |
1141 | |
1142 if (!scop_entry) | |
1143 return false; | |
1144 | |
1145 FOR_EACH_SSA_TREE_OPERAND (op, stmt, op_iter, SSA_OP_ALL_USES) | |
1146 if (!loop_affine_expr (scop_entry, loop, op)) | |
1147 return false; | |
1148 | |
1149 return true; | |
1150 } | |
1151 | |
1152 case GIMPLE_ASSIGN: | |
1153 { | |
1154 enum tree_code code = gimple_assign_rhs_code (stmt); | |
1155 | |
1156 switch (get_gimple_rhs_class (code)) | |
1157 { | |
1158 case GIMPLE_UNARY_RHS: | |
1159 case GIMPLE_SINGLE_RHS: | |
1160 return (is_simple_operand (loop, stmt, gimple_assign_lhs (stmt)) | |
1161 && is_simple_operand (loop, stmt, gimple_assign_rhs1 (stmt))); | |
1162 | |
1163 case GIMPLE_BINARY_RHS: | |
1164 return (is_simple_operand (loop, stmt, gimple_assign_lhs (stmt)) | |
1165 && is_simple_operand (loop, stmt, gimple_assign_rhs1 (stmt)) | |
1166 && is_simple_operand (loop, stmt, gimple_assign_rhs2 (stmt))); | |
1167 | |
1168 case GIMPLE_INVALID_RHS: | |
1169 default: | |
1170 gcc_unreachable (); | |
1171 } | |
1172 } | |
1173 | |
1174 case GIMPLE_CALL: | |
1175 { | |
1176 size_t i; | |
1177 size_t n = gimple_call_num_args (stmt); | |
1178 tree lhs = gimple_call_lhs (stmt); | |
1179 | |
1180 if (lhs && !is_simple_operand (loop, stmt, lhs)) | |
1181 return false; | |
1182 | |
1183 for (i = 0; i < n; i++) | |
1184 if (!is_simple_operand (loop, stmt, gimple_call_arg (stmt, i))) | |
1185 return false; | |
1186 | |
1187 return true; | |
1188 } | |
1189 | |
1190 default: | |
1191 /* These nodes cut a new scope. */ | |
1192 return false; | |
1193 } | |
1194 | |
1195 return false; | |
1196 } | |
1197 | |
1198 /* Returns the statement of BB that contains a harmful operation: that | |
1199 can be a function call with side effects, the induction variables | |
1200 are not linear with respect to SCOP_ENTRY, etc. The current open | |
1201 scop should end before this statement. */ | |
1202 | |
1203 static gimple | |
1204 harmful_stmt_in_bb (basic_block scop_entry, basic_block bb) | |
1205 { | |
1206 gimple_stmt_iterator gsi; | |
1207 gimple stmt; | |
1208 | |
1209 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1210 if (!stmt_simple_for_scop_p (scop_entry, gsi_stmt (gsi))) | |
1211 return gsi_stmt (gsi); | |
1212 | |
1213 stmt = last_stmt (bb); | |
1214 if (stmt && gimple_code (stmt) == GIMPLE_COND) | |
1215 { | |
1216 tree lhs = gimple_cond_lhs (stmt); | |
1217 tree rhs = gimple_cond_rhs (stmt); | |
1218 | |
1219 if (TREE_CODE (TREE_TYPE (lhs)) == REAL_TYPE | |
1220 || TREE_CODE (TREE_TYPE (rhs)) == REAL_TYPE) | |
1221 return stmt; | |
1222 } | |
1223 | |
1224 return NULL; | |
1225 } | |
1226 | |
1227 /* Returns true when BB will be represented in graphite. Return false | |
1228 for the basic blocks that contain code eliminated in the code | |
1229 generation pass: i.e. induction variables and exit conditions. */ | |
1230 | |
1231 static bool | |
1232 graphite_stmt_p (scop_p scop, basic_block bb, | |
1233 VEC (data_reference_p, heap) *drs) | |
1234 { | |
1235 gimple_stmt_iterator gsi; | |
1236 loop_p loop = bb->loop_father; | |
1237 | |
1238 if (VEC_length (data_reference_p, drs) > 0) | |
1239 return true; | |
1240 | |
1241 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1242 { | |
1243 gimple stmt = gsi_stmt (gsi); | |
1244 | |
1245 switch (gimple_code (stmt)) | |
1246 { | |
1247 /* Control flow expressions can be ignored, as they are | |
1248 represented in the iteration domains and will be | |
1249 regenerated by graphite. */ | |
1250 case GIMPLE_COND: | |
1251 case GIMPLE_GOTO: | |
1252 case GIMPLE_SWITCH: | |
1253 break; | |
1254 | |
1255 case GIMPLE_ASSIGN: | |
1256 { | |
1257 tree var = gimple_assign_lhs (stmt); | |
1258 var = analyze_scalar_evolution (loop, var); | |
1259 var = instantiate_scev (block_before_scop (scop), loop, var); | |
1260 | |
1261 if (chrec_contains_undetermined (var)) | |
1262 return true; | |
1263 | |
1264 break; | |
1265 } | |
1266 | |
1267 default: | |
1268 return true; | |
1269 } | |
1270 } | |
1271 | |
1272 return false; | |
1273 } | |
1274 | |
1275 /* Store the GRAPHITE representation of BB. */ | |
1276 | |
1277 static void | |
1278 new_graphite_bb (scop_p scop, basic_block bb) | |
1279 { | |
1280 struct graphite_bb *gbb; | |
1281 VEC (data_reference_p, heap) *drs = VEC_alloc (data_reference_p, heap, 5); | |
1282 struct loop *nest = outermost_loop_in_scop (scop, bb); | |
1283 gimple_stmt_iterator gsi; | |
1284 | |
1285 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1286 find_data_references_in_stmt (nest, gsi_stmt (gsi), &drs); | |
1287 | |
1288 if (!graphite_stmt_p (scop, bb, drs)) | |
1289 { | |
1290 free_data_refs (drs); | |
1291 return; | |
1292 } | |
1293 | |
1294 gbb = XNEW (struct graphite_bb); | |
1295 bb->aux = gbb; | |
1296 GBB_BB (gbb) = bb; | |
1297 GBB_SCOP (gbb) = scop; | |
1298 GBB_DATA_REFS (gbb) = drs; | |
1299 GBB_DOMAIN (gbb) = NULL; | |
1300 GBB_CONDITIONS (gbb) = NULL; | |
1301 GBB_CONDITION_CASES (gbb) = NULL; | |
1302 GBB_LOOPS (gbb) = NULL; | |
1303 GBB_STATIC_SCHEDULE (gbb) = NULL; | |
1304 GBB_CLOOG_IV_TYPES (gbb) = NULL; | |
1305 VEC_safe_push (graphite_bb_p, heap, SCOP_BBS (scop), gbb); | |
1306 } | |
1307 | |
1308 /* Frees GBB. */ | |
1309 | |
1310 static void | |
1311 free_graphite_bb (struct graphite_bb *gbb) | |
1312 { | |
1313 if (GBB_DOMAIN (gbb)) | |
1314 cloog_matrix_free (GBB_DOMAIN (gbb)); | |
1315 | |
1316 if (GBB_CLOOG_IV_TYPES (gbb)) | |
1317 htab_delete (GBB_CLOOG_IV_TYPES (gbb)); | |
1318 | |
1319 /* FIXME: free_data_refs is disabled for the moment, but should be | |
1320 enabled. | |
1321 | |
1322 free_data_refs (GBB_DATA_REFS (gbb)); */ | |
1323 | |
1324 VEC_free (gimple, heap, GBB_CONDITIONS (gbb)); | |
1325 VEC_free (gimple, heap, GBB_CONDITION_CASES (gbb)); | |
1326 VEC_free (loop_p, heap, GBB_LOOPS (gbb)); | |
1327 GBB_BB (gbb)->aux = 0; | |
1328 XDELETE (gbb); | |
1329 } | |
1330 | |
1331 | |
1332 | |
1333 /* Structure containing the mapping between the old names and the new | |
1334 names used after block copy in the new loop context. */ | |
1335 typedef struct rename_map_elt | |
1336 { | |
1337 tree old_name, new_name; | |
1338 } *rename_map_elt; | |
1339 | |
1340 | |
1341 /* Print to stderr the element ELT. */ | |
1342 | |
1343 static void | |
1344 debug_rename_elt (rename_map_elt elt) | |
1345 { | |
1346 fprintf (stderr, "("); | |
1347 print_generic_expr (stderr, elt->old_name, 0); | |
1348 fprintf (stderr, ", "); | |
1349 print_generic_expr (stderr, elt->new_name, 0); | |
1350 fprintf (stderr, ")\n"); | |
1351 } | |
1352 | |
1353 /* Helper function for debug_rename_map. */ | |
1354 | |
1355 static int | |
1356 debug_rename_map_1 (void **slot, void *s ATTRIBUTE_UNUSED) | |
1357 { | |
1358 struct rename_map_elt *entry = (struct rename_map_elt *) *slot; | |
1359 debug_rename_elt (entry); | |
1360 return 1; | |
1361 } | |
1362 | |
1363 /* Print to stderr all the elements of MAP. */ | |
1364 | |
1365 void | |
1366 debug_rename_map (htab_t map) | |
1367 { | |
1368 htab_traverse (map, debug_rename_map_1, NULL); | |
1369 } | |
1370 | |
1371 /* Constructs a new SCEV_INFO_STR structure for VAR and INSTANTIATED_BELOW. */ | |
1372 | |
1373 static inline rename_map_elt | |
1374 new_rename_map_elt (tree old_name, tree new_name) | |
1375 { | |
1376 rename_map_elt res; | |
1377 | |
1378 res = XNEW (struct rename_map_elt); | |
1379 res->old_name = old_name; | |
1380 res->new_name = new_name; | |
1381 | |
1382 return res; | |
1383 } | |
1384 | |
1385 /* Computes a hash function for database element ELT. */ | |
1386 | |
1387 static hashval_t | |
1388 rename_map_elt_info (const void *elt) | |
1389 { | |
1390 return htab_hash_pointer (((const struct rename_map_elt *) elt)->old_name); | |
1391 } | |
1392 | |
1393 /* Compares database elements E1 and E2. */ | |
1394 | |
1395 static int | |
1396 eq_rename_map_elts (const void *e1, const void *e2) | |
1397 { | |
1398 const struct rename_map_elt *elt1 = (const struct rename_map_elt *) e1; | |
1399 const struct rename_map_elt *elt2 = (const struct rename_map_elt *) e2; | |
1400 | |
1401 return (elt1->old_name == elt2->old_name); | |
1402 } | |
1403 | |
1404 /* Returns the new name associated to OLD_NAME in MAP. */ | |
1405 | |
1406 static tree | |
1407 get_new_name_from_old_name (htab_t map, tree old_name) | |
1408 { | |
1409 struct rename_map_elt tmp; | |
1410 PTR *slot; | |
1411 | |
1412 tmp.old_name = old_name; | |
1413 slot = htab_find_slot (map, &tmp, NO_INSERT); | |
1414 | |
1415 if (slot && *slot) | |
1416 return ((rename_map_elt) *slot)->new_name; | |
1417 | |
1418 return old_name; | |
1419 } | |
1420 | |
1421 | |
1422 | |
1423 /* Creates a new scop starting with ENTRY. */ | |
1424 | |
1425 static scop_p | |
1426 new_scop (edge entry, edge exit) | |
1427 { | |
1428 scop_p scop = XNEW (struct scop); | |
1429 | |
1430 gcc_assert (entry && exit); | |
1431 | |
1432 SCOP_REGION (scop) = new_sese (entry, exit); | |
1433 SCOP_BBS (scop) = VEC_alloc (graphite_bb_p, heap, 3); | |
1434 SCOP_OLDIVS (scop) = VEC_alloc (name_tree, heap, 3); | |
1435 SCOP_LOOPS (scop) = BITMAP_ALLOC (NULL); | |
1436 SCOP_LOOP_NEST (scop) = VEC_alloc (loop_p, heap, 3); | |
1437 SCOP_ADD_PARAMS (scop) = true; | |
1438 SCOP_PARAMS (scop) = VEC_alloc (name_tree, heap, 3); | |
1439 SCOP_PROG (scop) = cloog_program_malloc (); | |
1440 cloog_program_set_names (SCOP_PROG (scop), cloog_names_malloc ()); | |
1441 SCOP_LOOP2CLOOG_LOOP (scop) = htab_create (10, hash_loop_to_cloog_loop, | |
1442 eq_loop_to_cloog_loop, | |
1443 free); | |
1444 SCOP_LIVEOUT_RENAMES (scop) = htab_create (10, rename_map_elt_info, | |
1445 eq_rename_map_elts, free); | |
1446 return scop; | |
1447 } | |
1448 | |
1449 /* Deletes SCOP. */ | |
1450 | |
1451 static void | |
1452 free_scop (scop_p scop) | |
1453 { | |
1454 int i; | |
1455 name_tree p; | |
1456 struct graphite_bb *gb; | |
1457 name_tree iv; | |
1458 | |
1459 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++) | |
1460 free_graphite_bb (gb); | |
1461 | |
1462 VEC_free (graphite_bb_p, heap, SCOP_BBS (scop)); | |
1463 BITMAP_FREE (SCOP_LOOPS (scop)); | |
1464 VEC_free (loop_p, heap, SCOP_LOOP_NEST (scop)); | |
1465 | |
1466 for (i = 0; VEC_iterate (name_tree, SCOP_OLDIVS (scop), i, iv); i++) | |
1467 free (iv); | |
1468 VEC_free (name_tree, heap, SCOP_OLDIVS (scop)); | |
1469 | |
1470 for (i = 0; VEC_iterate (name_tree, SCOP_PARAMS (scop), i, p); i++) | |
1471 free (p); | |
1472 | |
1473 VEC_free (name_tree, heap, SCOP_PARAMS (scop)); | |
1474 cloog_program_free (SCOP_PROG (scop)); | |
1475 htab_delete (SCOP_LOOP2CLOOG_LOOP (scop)); | |
1476 htab_delete (SCOP_LIVEOUT_RENAMES (scop)); | |
1477 free_sese (SCOP_REGION (scop)); | |
1478 XDELETE (scop); | |
1479 } | |
1480 | |
1481 /* Deletes all scops in SCOPS. */ | |
1482 | |
1483 static void | |
1484 free_scops (VEC (scop_p, heap) *scops) | |
1485 { | |
1486 int i; | |
1487 scop_p scop; | |
1488 | |
1489 for (i = 0; VEC_iterate (scop_p, scops, i, scop); i++) | |
1490 free_scop (scop); | |
1491 | |
1492 VEC_free (scop_p, heap, scops); | |
1493 } | |
1494 | |
1495 typedef enum gbb_type { | |
1496 GBB_UNKNOWN, | |
1497 GBB_LOOP_SING_EXIT_HEADER, | |
1498 GBB_LOOP_MULT_EXIT_HEADER, | |
1499 GBB_LOOP_EXIT, | |
1500 GBB_COND_HEADER, | |
1501 GBB_SIMPLE, | |
1502 GBB_LAST | |
1503 } gbb_type; | |
1504 | |
1505 /* Detect the type of BB. Loop headers are only marked, if they are | |
1506 new. This means their loop_father is different to LAST_LOOP. | |
1507 Otherwise they are treated like any other bb and their type can be | |
1508 any other type. */ | |
1509 | |
1510 static gbb_type | |
1511 get_bb_type (basic_block bb, struct loop *last_loop) | |
1512 { | |
1513 VEC (basic_block, heap) *dom; | |
1514 int nb_dom, nb_suc; | |
1515 struct loop *loop = bb->loop_father; | |
1516 | |
1517 /* Check, if we entry into a new loop. */ | |
1518 if (loop != last_loop) | |
1519 { | |
1520 if (single_exit (loop) != NULL) | |
1521 return GBB_LOOP_SING_EXIT_HEADER; | |
1522 else if (loop->num != 0) | |
1523 return GBB_LOOP_MULT_EXIT_HEADER; | |
1524 else | |
1525 return GBB_COND_HEADER; | |
1526 } | |
1527 | |
1528 dom = get_dominated_by (CDI_DOMINATORS, bb); | |
1529 nb_dom = VEC_length (basic_block, dom); | |
1530 VEC_free (basic_block, heap, dom); | |
1531 | |
1532 if (nb_dom == 0) | |
1533 return GBB_LAST; | |
1534 | |
1535 nb_suc = VEC_length (edge, bb->succs); | |
1536 | |
1537 if (nb_dom == 1 && nb_suc == 1) | |
1538 return GBB_SIMPLE; | |
1539 | |
1540 return GBB_COND_HEADER; | |
1541 } | |
1542 | |
1543 /* A SCoP detection region, defined using bbs as borders. | |
1544 All control flow touching this region, comes in passing basic_block ENTRY and | |
1545 leaves passing basic_block EXIT. By using bbs instead of edges for the | |
1546 borders we are able to represent also regions that do not have a single | |
1547 entry or exit edge. | |
1548 But as they have a single entry basic_block and a single exit basic_block, we | |
1549 are able to generate for every sd_region a single entry and exit edge. | |
1550 | |
1551 1 2 | |
1552 \ / | |
1553 3 <- entry | |
1554 | | |
1555 4 | |
1556 / \ This region contains: {3, 4, 5, 6, 7, 8} | |
1557 5 6 | |
1558 | | | |
1559 7 8 | |
1560 \ / | |
1561 9 <- exit */ | |
1562 | |
1563 | |
1564 typedef struct sd_region_p | |
1565 { | |
1566 /* The entry bb dominates all bbs in the sd_region. It is part of the | |
1567 region. */ | |
1568 basic_block entry; | |
1569 | |
1570 /* The exit bb postdominates all bbs in the sd_region, but is not | |
1571 part of the region. */ | |
1572 basic_block exit; | |
1573 } sd_region; | |
1574 | |
1575 DEF_VEC_O(sd_region); | |
1576 DEF_VEC_ALLOC_O(sd_region, heap); | |
1577 | |
1578 | |
1579 /* Moves the scops from SOURCE to TARGET and clean up SOURCE. */ | |
1580 | |
1581 static void | |
1582 move_sd_regions (VEC (sd_region, heap) **source, VEC (sd_region, heap) **target) | |
1583 { | |
1584 sd_region *s; | |
1585 int i; | |
1586 | |
1587 for (i = 0; VEC_iterate (sd_region, *source, i, s); i++) | |
1588 VEC_safe_push (sd_region, heap, *target, s); | |
1589 | |
1590 VEC_free (sd_region, heap, *source); | |
1591 } | |
1592 | |
1593 /* Return true when it is not possible to represent the upper bound of | |
1594 LOOP in the polyhedral representation. */ | |
1595 | |
1596 static bool | |
1597 graphite_cannot_represent_loop_niter (loop_p loop) | |
1598 { | |
1599 tree niter = number_of_latch_executions (loop); | |
1600 | |
1601 return chrec_contains_undetermined (niter) | |
1602 || !scev_is_linear_expression (niter); | |
1603 } | |
1604 /* Store information needed by scopdet_* functions. */ | |
1605 | |
1606 struct scopdet_info | |
1607 { | |
1608 /* Where the last open scop would stop if the current BB is harmful. */ | |
1609 basic_block last; | |
1610 | |
1611 /* Where the next scop would start if the current BB is harmful. */ | |
1612 basic_block next; | |
1613 | |
1614 /* The bb or one of its children contains open loop exits. That means | |
1615 loop exit nodes that are not surrounded by a loop dominated by bb. */ | |
1616 bool exits; | |
1617 | |
1618 /* The bb or one of its children contains only structures we can handle. */ | |
1619 bool difficult; | |
1620 }; | |
1621 | |
1622 | |
1623 static struct scopdet_info build_scops_1 (basic_block, VEC (sd_region, heap) **, | |
1624 loop_p); | |
1625 | |
1626 /* Calculates BB infos. If bb is difficult we add valid SCoPs dominated by BB | |
1627 to SCOPS. TYPE is the gbb_type of BB. */ | |
1628 | |
1629 static struct scopdet_info | |
1630 scopdet_basic_block_info (basic_block bb, VEC (sd_region, heap) **scops, | |
1631 gbb_type type) | |
1632 { | |
1633 struct loop *loop = bb->loop_father; | |
1634 struct scopdet_info result; | |
1635 gimple stmt; | |
1636 | |
1637 /* XXX: ENTRY_BLOCK_PTR could be optimized in later steps. */ | |
1638 stmt = harmful_stmt_in_bb (ENTRY_BLOCK_PTR, bb); | |
1639 result.difficult = (stmt != NULL); | |
1640 result.last = NULL; | |
1641 | |
1642 switch (type) | |
1643 { | |
1644 case GBB_LAST: | |
1645 result.next = NULL; | |
1646 result.exits = false; | |
1647 result.last = bb; | |
1648 | |
1649 /* Mark bbs terminating a SESE region difficult, if they start | |
1650 a condition. */ | |
1651 if (VEC_length (edge, bb->succs) > 1) | |
1652 result.difficult = true; | |
1653 | |
1654 break; | |
1655 | |
1656 case GBB_SIMPLE: | |
1657 result.next = single_succ (bb); | |
1658 result.exits = false; | |
1659 result.last = bb; | |
1660 break; | |
1661 | |
1662 case GBB_LOOP_SING_EXIT_HEADER: | |
1663 { | |
1664 VEC (sd_region, heap) *tmp_scops = VEC_alloc (sd_region, heap,3); | |
1665 struct scopdet_info sinfo; | |
1666 | |
1667 sinfo = build_scops_1 (bb, &tmp_scops, loop); | |
1668 | |
1669 result.last = single_exit (bb->loop_father)->src; | |
1670 result.next = single_exit (bb->loop_father)->dest; | |
1671 | |
1672 /* If we do not dominate result.next, remove it. It's either | |
1673 the EXIT_BLOCK_PTR, or another bb dominates it and will | |
1674 call the scop detection for this bb. */ | |
1675 if (!dominated_by_p (CDI_DOMINATORS, result.next, bb)) | |
1676 result.next = NULL; | |
1677 | |
1678 if (result.last->loop_father != loop) | |
1679 result.next = NULL; | |
1680 | |
1681 if (graphite_cannot_represent_loop_niter (loop)) | |
1682 result.difficult = true; | |
1683 | |
1684 if (sinfo.difficult) | |
1685 move_sd_regions (&tmp_scops, scops); | |
1686 else | |
1687 VEC_free (sd_region, heap, tmp_scops); | |
1688 | |
1689 result.exits = false; | |
1690 result.difficult |= sinfo.difficult; | |
1691 break; | |
1692 } | |
1693 | |
1694 case GBB_LOOP_MULT_EXIT_HEADER: | |
1695 { | |
1696 /* XXX: For now we just do not join loops with multiple exits. If the | |
1697 exits lead to the same bb it may be possible to join the loop. */ | |
1698 VEC (sd_region, heap) *tmp_scops = VEC_alloc (sd_region, heap, 3); | |
1699 VEC (edge, heap) *exits = get_loop_exit_edges (loop); | |
1700 edge e; | |
1701 int i; | |
1702 build_scops_1 (bb, &tmp_scops, loop); | |
1703 | |
1704 /* Scan the code dominated by this loop. This means all bbs, that are | |
1705 are dominated by a bb in this loop, but are not part of this loop. | |
1706 | |
1707 The easiest case: | |
1708 - The loop exit destination is dominated by the exit sources. | |
1709 | |
1710 TODO: We miss here the more complex cases: | |
1711 - The exit destinations are dominated by another bb inside the | |
1712 loop. | |
1713 - The loop dominates bbs, that are not exit destinations. */ | |
1714 for (i = 0; VEC_iterate (edge, exits, i, e); i++) | |
1715 if (e->src->loop_father == loop | |
1716 && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)) | |
1717 { | |
1718 /* Pass loop_outer to recognize e->dest as loop header in | |
1719 build_scops_1. */ | |
1720 if (e->dest->loop_father->header == e->dest) | |
1721 build_scops_1 (e->dest, &tmp_scops, | |
1722 loop_outer (e->dest->loop_father)); | |
1723 else | |
1724 build_scops_1 (e->dest, &tmp_scops, e->dest->loop_father); | |
1725 } | |
1726 | |
1727 result.next = NULL; | |
1728 result.last = NULL; | |
1729 result.difficult = true; | |
1730 result.exits = false; | |
1731 move_sd_regions (&tmp_scops, scops); | |
1732 VEC_free (edge, heap, exits); | |
1733 break; | |
1734 } | |
1735 case GBB_COND_HEADER: | |
1736 { | |
1737 VEC (sd_region, heap) *tmp_scops = VEC_alloc (sd_region, heap, 3); | |
1738 struct scopdet_info sinfo; | |
1739 VEC (basic_block, heap) *dominated; | |
1740 int i; | |
1741 basic_block dom_bb; | |
1742 basic_block last_bb = NULL; | |
1743 edge e; | |
1744 result.exits = false; | |
1745 | |
1746 /* First check the successors of BB, and check if it is possible to join | |
1747 the different branches. */ | |
1748 for (i = 0; VEC_iterate (edge, bb->succs, i, e); i++) | |
1749 { | |
1750 /* Ignore loop exits. They will be handled after the loop body. */ | |
1751 if (is_loop_exit (loop, e->dest)) | |
1752 { | |
1753 result.exits = true; | |
1754 continue; | |
1755 } | |
1756 | |
1757 /* Do not follow edges that lead to the end of the | |
1758 conditions block. For example, in | |
1759 | |
1760 | 0 | |
1761 | /|\ | |
1762 | 1 2 | | |
1763 | | | | | |
1764 | 3 4 | | |
1765 | \|/ | |
1766 | 6 | |
1767 | |
1768 the edge from 0 => 6. Only check if all paths lead to | |
1769 the same node 6. */ | |
1770 | |
1771 if (!single_pred_p (e->dest)) | |
1772 { | |
1773 /* Check, if edge leads directly to the end of this | |
1774 condition. */ | |
1775 if (!last_bb) | |
1776 { | |
1777 last_bb = e->dest; | |
1778 } | |
1779 | |
1780 if (e->dest != last_bb) | |
1781 result.difficult = true; | |
1782 | |
1783 continue; | |
1784 } | |
1785 | |
1786 if (!dominated_by_p (CDI_DOMINATORS, e->dest, bb)) | |
1787 { | |
1788 result.difficult = true; | |
1789 continue; | |
1790 } | |
1791 | |
1792 sinfo = build_scops_1 (e->dest, &tmp_scops, loop); | |
1793 | |
1794 result.exits |= sinfo.exits; | |
1795 result.last = sinfo.last; | |
1796 result.difficult |= sinfo.difficult; | |
1797 | |
1798 /* Checks, if all branches end at the same point. | |
1799 If that is true, the condition stays joinable. | |
1800 Have a look at the example above. */ | |
1801 if (sinfo.last && single_succ_p (sinfo.last)) | |
1802 { | |
1803 basic_block next_tmp = single_succ (sinfo.last); | |
1804 | |
1805 if (!last_bb) | |
1806 last_bb = next_tmp; | |
1807 | |
1808 if (next_tmp != last_bb) | |
1809 result.difficult = true; | |
1810 } | |
1811 else | |
1812 result.difficult = true; | |
1813 } | |
1814 | |
1815 /* If the condition is joinable. */ | |
1816 if (!result.exits && !result.difficult) | |
1817 { | |
1818 /* Only return a next pointer if we dominate this pointer. | |
1819 Otherwise it will be handled by the bb dominating it. */ | |
1820 if (dominated_by_p (CDI_DOMINATORS, last_bb, bb) && last_bb != bb) | |
1821 result.next = last_bb; | |
1822 else | |
1823 result.next = NULL; | |
1824 | |
1825 VEC_free (sd_region, heap, tmp_scops); | |
1826 break; | |
1827 } | |
1828 | |
1829 /* Scan remaining bbs dominated by BB. */ | |
1830 dominated = get_dominated_by (CDI_DOMINATORS, bb); | |
1831 | |
1832 for (i = 0; VEC_iterate (basic_block, dominated, i, dom_bb); i++) | |
1833 { | |
1834 /* Ignore loop exits: they will be handled after the loop body. */ | |
1835 if (loop_depth (find_common_loop (loop, dom_bb->loop_father)) | |
1836 < loop_depth (loop)) | |
1837 { | |
1838 result.exits = true; | |
1839 continue; | |
1840 } | |
1841 | |
1842 /* Ignore the bbs processed above. */ | |
1843 if (single_pred_p (dom_bb) && single_pred (dom_bb) == bb) | |
1844 continue; | |
1845 | |
1846 if (loop_depth (loop) > loop_depth (dom_bb->loop_father)) | |
1847 sinfo = build_scops_1 (dom_bb, &tmp_scops, loop_outer (loop)); | |
1848 else | |
1849 sinfo = build_scops_1 (dom_bb, &tmp_scops, loop); | |
1850 | |
1851 | |
1852 result.exits |= sinfo.exits; | |
1853 result.difficult = true; | |
1854 result.last = NULL; | |
1855 } | |
1856 | |
1857 VEC_free (basic_block, heap, dominated); | |
1858 | |
1859 result.next = NULL; | |
1860 move_sd_regions (&tmp_scops, scops); | |
1861 | |
1862 break; | |
1863 } | |
1864 | |
1865 default: | |
1866 gcc_unreachable (); | |
1867 } | |
1868 | |
1869 return result; | |
1870 } | |
1871 | |
1872 /* Creates the SCoPs and writes entry and exit points for every SCoP. */ | |
1873 | |
1874 static struct scopdet_info | |
1875 build_scops_1 (basic_block current, VEC (sd_region, heap) **scops, loop_p loop) | |
1876 { | |
1877 bool in_scop = false; | |
1878 sd_region open_scop; | |
1879 struct scopdet_info sinfo; | |
1880 | |
1881 /* Initialize result. */ | |
1882 struct scopdet_info result; | |
1883 result.exits = false; | |
1884 result.difficult = false; | |
1885 result.next = NULL; | |
1886 result.last = NULL; | |
1887 open_scop.entry = NULL; | |
1888 open_scop.exit = NULL; | |
1889 sinfo.last = NULL; | |
1890 | |
1891 /* Loop over the dominance tree. If we meet a difficult bb, close | |
1892 the current SCoP. Loop and condition header start a new layer, | |
1893 and can only be added if all bbs in deeper layers are simple. */ | |
1894 while (current != NULL) | |
1895 { | |
1896 sinfo = scopdet_basic_block_info (current, scops, get_bb_type (current, | |
1897 loop)); | |
1898 | |
1899 if (!in_scop && !(sinfo.exits || sinfo.difficult)) | |
1900 { | |
1901 open_scop.entry = current; | |
1902 open_scop.exit = NULL; | |
1903 in_scop = true; | |
1904 } | |
1905 else if (in_scop && (sinfo.exits || sinfo.difficult)) | |
1906 { | |
1907 open_scop.exit = current; | |
1908 VEC_safe_push (sd_region, heap, *scops, &open_scop); | |
1909 in_scop = false; | |
1910 } | |
1911 | |
1912 result.difficult |= sinfo.difficult; | |
1913 result.exits |= sinfo.exits; | |
1914 | |
1915 current = sinfo.next; | |
1916 } | |
1917 | |
1918 /* Try to close open_scop, if we are still in an open SCoP. */ | |
1919 if (in_scop) | |
1920 { | |
1921 int i; | |
1922 edge e; | |
1923 | |
1924 for (i = 0; VEC_iterate (edge, sinfo.last->succs, i, e); i++) | |
1925 if (dominated_by_p (CDI_POST_DOMINATORS, sinfo.last, e->dest)) | |
1926 open_scop.exit = e->dest; | |
1927 | |
1928 if (!open_scop.exit && open_scop.entry != sinfo.last) | |
1929 open_scop.exit = sinfo.last; | |
1930 | |
1931 if (open_scop.exit) | |
1932 VEC_safe_push (sd_region, heap, *scops, &open_scop); | |
1933 | |
1934 } | |
1935 | |
1936 result.last = sinfo.last; | |
1937 return result; | |
1938 } | |
1939 | |
1940 /* Checks if a bb is contained in REGION. */ | |
1941 | |
1942 static bool | |
1943 bb_in_sd_region (basic_block bb, sd_region *region) | |
1944 { | |
1945 return dominated_by_p (CDI_DOMINATORS, bb, region->entry) | |
1946 && !(dominated_by_p (CDI_DOMINATORS, bb, region->exit) | |
1947 && !dominated_by_p (CDI_DOMINATORS, region->entry, | |
1948 region->exit)); | |
1949 } | |
1950 | |
1951 /* Returns the single entry edge of REGION, if it does not exits NULL. */ | |
1952 | |
1953 static edge | |
1954 find_single_entry_edge (sd_region *region) | |
1955 { | |
1956 edge e; | |
1957 edge_iterator ei; | |
1958 edge entry = NULL; | |
1959 | |
1960 FOR_EACH_EDGE (e, ei, region->entry->preds) | |
1961 if (!bb_in_sd_region (e->src, region)) | |
1962 { | |
1963 if (entry) | |
1964 { | |
1965 entry = NULL; | |
1966 break; | |
1967 } | |
1968 | |
1969 else | |
1970 entry = e; | |
1971 } | |
1972 | |
1973 return entry; | |
1974 } | |
1975 | |
1976 /* Returns the single exit edge of REGION, if it does not exits NULL. */ | |
1977 | |
1978 static edge | |
1979 find_single_exit_edge (sd_region *region) | |
1980 { | |
1981 edge e; | |
1982 edge_iterator ei; | |
1983 edge exit = NULL; | |
1984 | |
1985 FOR_EACH_EDGE (e, ei, region->exit->preds) | |
1986 if (bb_in_sd_region (e->src, region)) | |
1987 { | |
1988 if (exit) | |
1989 { | |
1990 exit = NULL; | |
1991 break; | |
1992 } | |
1993 | |
1994 else | |
1995 exit = e; | |
1996 } | |
1997 | |
1998 return exit; | |
1999 } | |
2000 | |
2001 /* Create a single entry edge for REGION. */ | |
2002 | |
2003 static void | |
2004 create_single_entry_edge (sd_region *region) | |
2005 { | |
2006 if (find_single_entry_edge (region)) | |
2007 return; | |
2008 | |
2009 /* There are multiple predecessors for bb_3 | |
2010 | |
2011 | 1 2 | |
2012 | | / | |
2013 | |/ | |
2014 | 3 <- entry | |
2015 | |\ | |
2016 | | | | |
2017 | 4 ^ | |
2018 | | | | |
2019 | |/ | |
2020 | 5 | |
2021 | |
2022 There are two edges (1->3, 2->3), that point from outside into the region, | |
2023 and another one (5->3), a loop latch, lead to bb_3. | |
2024 | |
2025 We split bb_3. | |
2026 | |
2027 | 1 2 | |
2028 | | / | |
2029 | |/ | |
2030 |3.0 | |
2031 | |\ (3.0 -> 3.1) = single entry edge | |
2032 |3.1 | <- entry | |
2033 | | | | |
2034 | | | | |
2035 | 4 ^ | |
2036 | | | | |
2037 | |/ | |
2038 | 5 | |
2039 | |
2040 If the loop is part of the SCoP, we have to redirect the loop latches. | |
2041 | |
2042 | 1 2 | |
2043 | | / | |
2044 | |/ | |
2045 |3.0 | |
2046 | | (3.0 -> 3.1) = entry edge | |
2047 |3.1 <- entry | |
2048 | |\ | |
2049 | | | | |
2050 | 4 ^ | |
2051 | | | | |
2052 | |/ | |
2053 | 5 */ | |
2054 | |
2055 if (region->entry->loop_father->header != region->entry | |
2056 || dominated_by_p (CDI_DOMINATORS, | |
2057 loop_latch_edge (region->entry->loop_father)->src, | |
2058 region->exit)) | |
2059 { | |
2060 edge forwarder = split_block_after_labels (region->entry); | |
2061 region->entry = forwarder->dest; | |
2062 } | |
2063 else | |
2064 /* This case is never executed, as the loop headers seem always to have a | |
2065 single edge pointing from outside into the loop. */ | |
2066 gcc_unreachable (); | |
2067 | |
2068 #ifdef ENABLE_CHECKING | |
2069 gcc_assert (find_single_entry_edge (region)); | |
2070 #endif | |
2071 } | |
2072 | |
2073 /* Check if the sd_region, mentioned in EDGE, has no exit bb. */ | |
2074 | |
2075 static bool | |
2076 sd_region_without_exit (edge e) | |
2077 { | |
2078 sd_region *r = (sd_region *) e->aux; | |
2079 | |
2080 if (r) | |
2081 return r->exit == NULL; | |
2082 else | |
2083 return false; | |
2084 } | |
2085 | |
2086 /* Create a single exit edge for REGION. */ | |
2087 | |
2088 static void | |
2089 create_single_exit_edge (sd_region *region) | |
2090 { | |
2091 edge e; | |
2092 edge_iterator ei; | |
2093 edge forwarder = NULL; | |
2094 basic_block exit; | |
2095 | |
2096 if (find_single_exit_edge (region)) | |
2097 return; | |
2098 | |
2099 /* We create a forwarder bb (5) for all edges leaving this region | |
2100 (3->5, 4->5). All other edges leading to the same bb, are moved | |
2101 to a new bb (6). If these edges where part of another region (2->5) | |
2102 we update the region->exit pointer, of this region. | |
2103 | |
2104 To identify which edge belongs to which region we depend on the e->aux | |
2105 pointer in every edge. It points to the region of the edge or to NULL, | |
2106 if the edge is not part of any region. | |
2107 | |
2108 1 2 3 4 1->5 no region, 2->5 region->exit = 5, | |
2109 \| |/ 3->5 region->exit = NULL, 4->5 region->exit = NULL | |
2110 5 <- exit | |
2111 | |
2112 changes to | |
2113 | |
2114 1 2 3 4 1->6 no region, 2->6 region->exit = 6, | |
2115 | | \/ 3->5 no region, 4->5 no region, | |
2116 | | 5 | |
2117 \| / 5->6 region->exit = 6 | |
2118 6 | |
2119 | |
2120 Now there is only a single exit edge (5->6). */ | |
2121 exit = region->exit; | |
2122 region->exit = NULL; | |
2123 forwarder = make_forwarder_block (exit, &sd_region_without_exit, NULL); | |
2124 | |
2125 /* Unmark the edges, that are no longer exit edges. */ | |
2126 FOR_EACH_EDGE (e, ei, forwarder->src->preds) | |
2127 if (e->aux) | |
2128 e->aux = NULL; | |
2129 | |
2130 /* Mark the new exit edge. */ | |
2131 single_succ_edge (forwarder->src)->aux = region; | |
2132 | |
2133 /* Update the exit bb of all regions, where exit edges lead to | |
2134 forwarder->dest. */ | |
2135 FOR_EACH_EDGE (e, ei, forwarder->dest->preds) | |
2136 if (e->aux) | |
2137 ((sd_region *) e->aux)->exit = forwarder->dest; | |
2138 | |
2139 #ifdef ENABLE_CHECKING | |
2140 gcc_assert (find_single_exit_edge (region)); | |
2141 #endif | |
2142 } | |
2143 | |
2144 /* Unmark the exit edges of all REGIONS. | |
2145 See comment in "create_single_exit_edge". */ | |
2146 | |
2147 static void | |
2148 unmark_exit_edges (VEC (sd_region, heap) *regions) | |
2149 { | |
2150 int i; | |
2151 sd_region *s; | |
2152 edge e; | |
2153 edge_iterator ei; | |
2154 | |
2155 for (i = 0; VEC_iterate (sd_region, regions, i, s); i++) | |
2156 FOR_EACH_EDGE (e, ei, s->exit->preds) | |
2157 e->aux = NULL; | |
2158 } | |
2159 | |
2160 | |
2161 /* Mark the exit edges of all REGIONS. | |
2162 See comment in "create_single_exit_edge". */ | |
2163 | |
2164 static void | |
2165 mark_exit_edges (VEC (sd_region, heap) *regions) | |
2166 { | |
2167 int i; | |
2168 sd_region *s; | |
2169 edge e; | |
2170 edge_iterator ei; | |
2171 | |
2172 for (i = 0; VEC_iterate (sd_region, regions, i, s); i++) | |
2173 FOR_EACH_EDGE (e, ei, s->exit->preds) | |
2174 if (bb_in_sd_region (e->src, s)) | |
2175 e->aux = s; | |
2176 } | |
2177 | |
2178 /* Free and compute again all the dominators information. */ | |
2179 | |
2180 static inline void | |
2181 recompute_all_dominators (void) | |
2182 { | |
2183 mark_irreducible_loops (); | |
2184 free_dominance_info (CDI_DOMINATORS); | |
2185 free_dominance_info (CDI_POST_DOMINATORS); | |
2186 calculate_dominance_info (CDI_DOMINATORS); | |
2187 calculate_dominance_info (CDI_POST_DOMINATORS); | |
2188 } | |
2189 | |
2190 /* Verifies properties that GRAPHITE should maintain during translation. */ | |
2191 | |
2192 static inline void | |
2193 graphite_verify (void) | |
2194 { | |
2195 #ifdef ENABLE_CHECKING | |
2196 verify_loop_structure (); | |
2197 verify_dominators (CDI_DOMINATORS); | |
2198 verify_dominators (CDI_POST_DOMINATORS); | |
2199 verify_ssa (false); | |
2200 verify_loop_closed_ssa (); | |
2201 #endif | |
2202 } | |
2203 | |
2204 /* Create for all scop regions a single entry and a single exit edge. */ | |
2205 | |
2206 static void | |
2207 create_sese_edges (VEC (sd_region, heap) *regions) | |
2208 { | |
2209 int i; | |
2210 sd_region *s; | |
2211 | |
2212 for (i = 0; VEC_iterate (sd_region, regions, i, s); i++) | |
2213 create_single_entry_edge (s); | |
2214 | |
2215 mark_exit_edges (regions); | |
2216 | |
2217 for (i = 0; VEC_iterate (sd_region, regions, i, s); i++) | |
2218 create_single_exit_edge (s); | |
2219 | |
2220 unmark_exit_edges (regions); | |
2221 | |
2222 fix_loop_structure (NULL); | |
2223 | |
2224 #ifdef ENABLE_CHECKING | |
2225 verify_loop_structure (); | |
2226 verify_dominators (CDI_DOMINATORS); | |
2227 verify_ssa (false); | |
2228 #endif | |
2229 } | |
2230 | |
2231 /* Create graphite SCoPs from an array of scop detection regions. */ | |
2232 | |
2233 static void | |
2234 build_graphite_scops (VEC (sd_region, heap) *scop_regions) | |
2235 { | |
2236 int i; | |
2237 sd_region *s; | |
2238 | |
2239 for (i = 0; VEC_iterate (sd_region, scop_regions, i, s); i++) | |
2240 { | |
2241 edge entry = find_single_entry_edge (s); | |
2242 edge exit = find_single_exit_edge (s); | |
2243 scop_p scop = new_scop (entry, exit); | |
2244 VEC_safe_push (scop_p, heap, current_scops, scop); | |
2245 | |
2246 /* Are there overlapping SCoPs? */ | |
2247 #ifdef ENABLE_CHECKING | |
2248 { | |
2249 int j; | |
2250 sd_region *s2; | |
2251 | |
2252 for (j = 0; VEC_iterate (sd_region, scop_regions, j, s2); j++) | |
2253 if (s != s2) | |
2254 gcc_assert (!bb_in_sd_region (s->entry, s2)); | |
2255 } | |
2256 #endif | |
2257 } | |
2258 } | |
2259 | |
2260 /* Find static control parts. */ | |
2261 | |
2262 static void | |
2263 build_scops (void) | |
2264 { | |
2265 struct loop *loop = current_loops->tree_root; | |
2266 VEC (sd_region, heap) *tmp_scops = VEC_alloc (sd_region, heap, 3); | |
2267 | |
2268 build_scops_1 (single_succ (ENTRY_BLOCK_PTR), &tmp_scops, loop); | |
2269 create_sese_edges (tmp_scops); | |
2270 build_graphite_scops (tmp_scops); | |
2271 VEC_free (sd_region, heap, tmp_scops); | |
2272 } | |
2273 | |
2274 /* Gather the basic blocks belonging to the SCOP. */ | |
2275 | |
2276 static void | |
2277 build_scop_bbs (scop_p scop) | |
2278 { | |
2279 basic_block *stack = XNEWVEC (basic_block, n_basic_blocks + 1); | |
2280 sbitmap visited = sbitmap_alloc (last_basic_block); | |
2281 int sp = 0; | |
2282 | |
2283 sbitmap_zero (visited); | |
2284 stack[sp++] = SCOP_ENTRY (scop); | |
2285 | |
2286 while (sp) | |
2287 { | |
2288 basic_block bb = stack[--sp]; | |
2289 int depth = loop_depth (bb->loop_father); | |
2290 int num = bb->loop_father->num; | |
2291 edge_iterator ei; | |
2292 edge e; | |
2293 | |
2294 /* Scop's exit is not in the scop. Exclude also bbs, which are | |
2295 dominated by the SCoP exit. These are e.g. loop latches. */ | |
2296 if (TEST_BIT (visited, bb->index) | |
2297 || dominated_by_p (CDI_DOMINATORS, bb, SCOP_EXIT (scop)) | |
2298 /* Every block in the scop is dominated by scop's entry. */ | |
2299 || !dominated_by_p (CDI_DOMINATORS, bb, SCOP_ENTRY (scop))) | |
2300 continue; | |
2301 | |
2302 new_graphite_bb (scop, bb); | |
2303 SET_BIT (visited, bb->index); | |
2304 | |
2305 /* First push the blocks that have to be processed last. Note | |
2306 that this means that the order in which the code is organized | |
2307 below is important: do not reorder the following code. */ | |
2308 FOR_EACH_EDGE (e, ei, bb->succs) | |
2309 if (! TEST_BIT (visited, e->dest->index) | |
2310 && (int) loop_depth (e->dest->loop_father) < depth) | |
2311 stack[sp++] = e->dest; | |
2312 | |
2313 FOR_EACH_EDGE (e, ei, bb->succs) | |
2314 if (! TEST_BIT (visited, e->dest->index) | |
2315 && (int) loop_depth (e->dest->loop_father) == depth | |
2316 && e->dest->loop_father->num != num) | |
2317 stack[sp++] = e->dest; | |
2318 | |
2319 FOR_EACH_EDGE (e, ei, bb->succs) | |
2320 if (! TEST_BIT (visited, e->dest->index) | |
2321 && (int) loop_depth (e->dest->loop_father) == depth | |
2322 && e->dest->loop_father->num == num | |
2323 && EDGE_COUNT (e->dest->preds) > 1) | |
2324 stack[sp++] = e->dest; | |
2325 | |
2326 FOR_EACH_EDGE (e, ei, bb->succs) | |
2327 if (! TEST_BIT (visited, e->dest->index) | |
2328 && (int) loop_depth (e->dest->loop_father) == depth | |
2329 && e->dest->loop_father->num == num | |
2330 && EDGE_COUNT (e->dest->preds) == 1) | |
2331 stack[sp++] = e->dest; | |
2332 | |
2333 FOR_EACH_EDGE (e, ei, bb->succs) | |
2334 if (! TEST_BIT (visited, e->dest->index) | |
2335 && (int) loop_depth (e->dest->loop_father) > depth) | |
2336 stack[sp++] = e->dest; | |
2337 } | |
2338 | |
2339 free (stack); | |
2340 sbitmap_free (visited); | |
2341 } | |
2342 | |
2343 /* Returns the number of reduction phi nodes in LOOP. */ | |
2344 | |
2345 static int | |
2346 nb_reductions_in_loop (loop_p loop) | |
2347 { | |
2348 int res = 0; | |
2349 gimple_stmt_iterator gsi; | |
2350 | |
2351 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi)) | |
2352 { | |
2353 gimple phi = gsi_stmt (gsi); | |
2354 tree scev; | |
2355 affine_iv iv; | |
2356 | |
2357 if (!is_gimple_reg (PHI_RESULT (phi))) | |
2358 continue; | |
2359 | |
2360 scev = analyze_scalar_evolution (loop, PHI_RESULT (phi)); | |
2361 scev = instantiate_parameters (loop, scev); | |
2362 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true)) | |
2363 res++; | |
2364 } | |
2365 | |
2366 return res; | |
2367 } | |
2368 | |
2369 /* A LOOP is in normal form when it contains only one scalar phi node | |
2370 that defines the main induction variable of the loop, only one | |
2371 increment of the IV, and only one exit condition. */ | |
2372 | |
2373 static tree | |
2374 graphite_loop_normal_form (loop_p loop) | |
2375 { | |
2376 struct tree_niter_desc niter; | |
2377 tree nit; | |
2378 gimple_seq stmts; | |
2379 edge exit = single_dom_exit (loop); | |
2380 bool known_niter = number_of_iterations_exit (loop, exit, &niter, false); | |
2381 | |
2382 gcc_assert (known_niter); | |
2383 | |
2384 nit = force_gimple_operand (unshare_expr (niter.niter), &stmts, true, | |
2385 NULL_TREE); | |
2386 if (stmts) | |
2387 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); | |
2388 | |
2389 /* One IV per loop. */ | |
2390 if (nb_reductions_in_loop (loop) > 0) | |
2391 return NULL_TREE; | |
2392 | |
2393 return canonicalize_loop_ivs (loop, NULL, &nit); | |
2394 } | |
2395 | |
2396 /* Record LOOP as occuring in SCOP. Returns true when the operation | |
2397 was successful. */ | |
2398 | |
2399 static bool | |
2400 scop_record_loop (scop_p scop, loop_p loop) | |
2401 { | |
2402 tree induction_var; | |
2403 name_tree oldiv; | |
2404 | |
2405 if (bitmap_bit_p (SCOP_LOOPS (scop), loop->num)) | |
2406 return true; | |
2407 | |
2408 bitmap_set_bit (SCOP_LOOPS (scop), loop->num); | |
2409 VEC_safe_push (loop_p, heap, SCOP_LOOP_NEST (scop), loop); | |
2410 | |
2411 induction_var = graphite_loop_normal_form (loop); | |
2412 if (!induction_var) | |
2413 return false; | |
2414 | |
2415 oldiv = XNEW (struct name_tree); | |
2416 oldiv->t = induction_var; | |
2417 oldiv->name = get_name (SSA_NAME_VAR (oldiv->t)); | |
2418 oldiv->loop = loop; | |
2419 VEC_safe_push (name_tree, heap, SCOP_OLDIVS (scop), oldiv); | |
2420 return true; | |
2421 } | |
2422 | |
2423 /* Build the loop nests contained in SCOP. Returns true when the | |
2424 operation was successful. */ | |
2425 | |
2426 static bool | |
2427 build_scop_loop_nests (scop_p scop) | |
2428 { | |
2429 unsigned i; | |
2430 basic_block bb; | |
2431 struct loop *loop0, *loop1; | |
2432 | |
2433 FOR_EACH_BB (bb) | |
2434 if (bb_in_sese_p (bb, SCOP_REGION (scop))) | |
2435 { | |
2436 struct loop *loop = bb->loop_father; | |
2437 | |
2438 /* Only add loops if they are completely contained in the SCoP. */ | |
2439 if (loop->header == bb | |
2440 && bb_in_sese_p (loop->latch, SCOP_REGION (scop))) | |
2441 { | |
2442 if (!scop_record_loop (scop, loop)) | |
2443 return false; | |
2444 } | |
2445 } | |
2446 | |
2447 /* Make sure that the loops in the SCOP_LOOP_NEST are ordered. It | |
2448 can be the case that an inner loop is inserted before an outer | |
2449 loop. To avoid this, semi-sort once. */ | |
2450 for (i = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), i, loop0); i++) | |
2451 { | |
2452 if (VEC_length (loop_p, SCOP_LOOP_NEST (scop)) == i + 1) | |
2453 break; | |
2454 | |
2455 loop1 = VEC_index (loop_p, SCOP_LOOP_NEST (scop), i + 1); | |
2456 if (loop0->num > loop1->num) | |
2457 { | |
2458 VEC_replace (loop_p, SCOP_LOOP_NEST (scop), i, loop1); | |
2459 VEC_replace (loop_p, SCOP_LOOP_NEST (scop), i + 1, loop0); | |
2460 } | |
2461 } | |
2462 | |
2463 return true; | |
2464 } | |
2465 | |
2466 /* Calculate the number of loops around LOOP in the SCOP. */ | |
2467 | |
2468 static inline int | |
2469 nb_loops_around_loop_in_scop (struct loop *l, scop_p scop) | |
2470 { | |
2471 int d = 0; | |
2472 | |
2473 for (; loop_in_sese_p (l, SCOP_REGION (scop)); d++, l = loop_outer (l)); | |
2474 | |
2475 return d; | |
2476 } | |
2477 | |
2478 /* Calculate the number of loops around GB in the current SCOP. */ | |
2479 | |
2480 int | |
2481 nb_loops_around_gb (graphite_bb_p gb) | |
2482 { | |
2483 return nb_loops_around_loop_in_scop (gbb_loop (gb), GBB_SCOP (gb)); | |
2484 } | |
2485 | |
2486 /* Returns the dimensionality of an enclosing loop iteration domain | |
2487 with respect to enclosing SCoP for a given data reference REF. The | |
2488 returned dimensionality is homogeneous (depth of loop nest + number | |
2489 of SCoP parameters + const). */ | |
2490 | |
2491 int | |
2492 ref_nb_loops (data_reference_p ref) | |
2493 { | |
2494 loop_p loop = loop_containing_stmt (DR_STMT (ref)); | |
2495 scop_p scop = DR_SCOP (ref); | |
2496 | |
2497 return nb_loops_around_loop_in_scop (loop, scop) + scop_nb_params (scop) + 2; | |
2498 } | |
2499 | |
2500 /* Build dynamic schedules for all the BBs. */ | |
2501 | |
2502 static void | |
2503 build_scop_dynamic_schedules (scop_p scop) | |
2504 { | |
2505 int i, dim, loop_num, row, col; | |
2506 graphite_bb_p gb; | |
2507 | |
2508 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++) | |
2509 { | |
2510 loop_num = GBB_BB (gb)->loop_father->num; | |
2511 | |
2512 if (loop_num != 0) | |
2513 { | |
2514 dim = nb_loops_around_gb (gb); | |
2515 GBB_DYNAMIC_SCHEDULE (gb) = cloog_matrix_alloc (dim, dim); | |
2516 | |
2517 for (row = 0; row < GBB_DYNAMIC_SCHEDULE (gb)->NbRows; row++) | |
2518 for (col = 0; col < GBB_DYNAMIC_SCHEDULE (gb)->NbColumns; col++) | |
2519 if (row == col) | |
2520 value_set_si (GBB_DYNAMIC_SCHEDULE (gb)->p[row][col], 1); | |
2521 else | |
2522 value_set_si (GBB_DYNAMIC_SCHEDULE (gb)->p[row][col], 0); | |
2523 } | |
2524 else | |
2525 GBB_DYNAMIC_SCHEDULE (gb) = NULL; | |
2526 } | |
2527 } | |
2528 | |
2529 /* Returns the number of loops that are identical at the beginning of | |
2530 the vectors A and B. */ | |
2531 | |
2532 static int | |
2533 compare_prefix_loops (VEC (loop_p, heap) *a, VEC (loop_p, heap) *b) | |
2534 { | |
2535 int i; | |
2536 loop_p ea; | |
2537 int lb; | |
2538 | |
2539 if (!a || !b) | |
2540 return 0; | |
2541 | |
2542 lb = VEC_length (loop_p, b); | |
2543 | |
2544 for (i = 0; VEC_iterate (loop_p, a, i, ea); i++) | |
2545 if (i >= lb | |
2546 || ea != VEC_index (loop_p, b, i)) | |
2547 return i; | |
2548 | |
2549 return 0; | |
2550 } | |
2551 | |
2552 /* Build for BB the static schedule. | |
2553 | |
2554 The STATIC_SCHEDULE is defined like this: | |
2555 | |
2556 A | |
2557 for (i: ...) | |
2558 { | |
2559 for (j: ...) | |
2560 { | |
2561 B | |
2562 C | |
2563 } | |
2564 | |
2565 for (k: ...) | |
2566 { | |
2567 D | |
2568 E | |
2569 } | |
2570 } | |
2571 F | |
2572 | |
2573 Static schedules for A to F: | |
2574 | |
2575 DEPTH | |
2576 0 1 2 | |
2577 A 0 | |
2578 B 1 0 0 | |
2579 C 1 0 1 | |
2580 D 1 1 0 | |
2581 E 1 1 1 | |
2582 F 2 | |
2583 */ | |
2584 | |
2585 static void | |
2586 build_scop_canonical_schedules (scop_p scop) | |
2587 { | |
2588 int i; | |
2589 graphite_bb_p gb; | |
2590 int nb_loops = scop_nb_loops (scop); | |
2591 lambda_vector static_schedule = lambda_vector_new (nb_loops + 1); | |
2592 VEC (loop_p, heap) *loops_previous = NULL; | |
2593 | |
2594 /* We have to start schedules at 0 on the first component and | |
2595 because we cannot compare_prefix_loops against a previous loop, | |
2596 prefix will be equal to zero, and that index will be | |
2597 incremented before copying. */ | |
2598 static_schedule[0] = -1; | |
2599 | |
2600 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++) | |
2601 { | |
2602 int prefix = compare_prefix_loops (loops_previous, GBB_LOOPS (gb)); | |
2603 int nb = gbb_nb_loops (gb); | |
2604 | |
2605 loops_previous = GBB_LOOPS (gb); | |
2606 memset (&(static_schedule[prefix + 1]), 0, sizeof (int) * (nb_loops - prefix)); | |
2607 ++static_schedule[prefix]; | |
2608 GBB_STATIC_SCHEDULE (gb) = lambda_vector_new (nb + 1); | |
2609 lambda_vector_copy (static_schedule, | |
2610 GBB_STATIC_SCHEDULE (gb), nb + 1); | |
2611 } | |
2612 } | |
2613 | |
2614 /* Build the LOOPS vector for all bbs in SCOP. */ | |
2615 | |
2616 static void | |
2617 build_bb_loops (scop_p scop) | |
2618 { | |
2619 graphite_bb_p gb; | |
2620 int i; | |
2621 | |
2622 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++) | |
2623 { | |
2624 loop_p loop; | |
2625 int depth; | |
2626 | |
2627 depth = nb_loops_around_gb (gb) - 1; | |
2628 | |
2629 GBB_LOOPS (gb) = VEC_alloc (loop_p, heap, 3); | |
2630 VEC_safe_grow_cleared (loop_p, heap, GBB_LOOPS (gb), depth + 1); | |
2631 | |
2632 loop = GBB_BB (gb)->loop_father; | |
2633 | |
2634 while (scop_contains_loop (scop, loop)) | |
2635 { | |
2636 VEC_replace (loop_p, GBB_LOOPS (gb), depth, loop); | |
2637 loop = loop_outer (loop); | |
2638 depth--; | |
2639 } | |
2640 } | |
2641 } | |
2642 | |
2643 /* Get the index for parameter VAR in SCOP. */ | |
2644 | |
2645 static int | |
2646 param_index (tree var, scop_p scop) | |
2647 { | |
2648 int i; | |
2649 name_tree p; | |
2650 name_tree nvar; | |
2651 | |
2652 gcc_assert (TREE_CODE (var) == SSA_NAME); | |
2653 | |
2654 for (i = 0; VEC_iterate (name_tree, SCOP_PARAMS (scop), i, p); i++) | |
2655 if (p->t == var) | |
2656 return i; | |
2657 | |
2658 gcc_assert (SCOP_ADD_PARAMS (scop)); | |
2659 | |
2660 nvar = XNEW (struct name_tree); | |
2661 nvar->t = var; | |
2662 nvar->name = NULL; | |
2663 VEC_safe_push (name_tree, heap, SCOP_PARAMS (scop), nvar); | |
2664 return VEC_length (name_tree, SCOP_PARAMS (scop)) - 1; | |
2665 } | |
2666 | |
2667 /* Scan EXPR and translate it to an inequality vector INEQ that will | |
2668 be added, or subtracted, in the constraint domain matrix C at row | |
2669 R. K is the number of columns for loop iterators in C. */ | |
2670 | |
2671 static void | |
2672 scan_tree_for_params (scop_p s, tree e, CloogMatrix *c, int r, Value k, | |
2673 bool subtract) | |
2674 { | |
2675 int cst_col, param_col; | |
2676 | |
2677 if (e == chrec_dont_know) | |
2678 return; | |
2679 | |
2680 switch (TREE_CODE (e)) | |
2681 { | |
2682 case POLYNOMIAL_CHREC: | |
2683 { | |
2684 tree left = CHREC_LEFT (e); | |
2685 tree right = CHREC_RIGHT (e); | |
2686 int var = CHREC_VARIABLE (e); | |
2687 | |
2688 if (TREE_CODE (right) != INTEGER_CST) | |
2689 return; | |
2690 | |
2691 if (c) | |
2692 { | |
2693 int loop_col = scop_gimple_loop_depth (s, get_loop (var)) + 1; | |
2694 | |
2695 if (subtract) | |
2696 value_sub_int (c->p[r][loop_col], c->p[r][loop_col], | |
2697 int_cst_value (right)); | |
2698 else | |
2699 value_add_int (c->p[r][loop_col], c->p[r][loop_col], | |
2700 int_cst_value (right)); | |
2701 } | |
2702 | |
2703 switch (TREE_CODE (left)) | |
2704 { | |
2705 case POLYNOMIAL_CHREC: | |
2706 scan_tree_for_params (s, left, c, r, k, subtract); | |
2707 return; | |
2708 | |
2709 case INTEGER_CST: | |
2710 /* Constant part. */ | |
2711 if (c) | |
2712 { | |
2713 int v = int_cst_value (left); | |
2714 cst_col = c->NbColumns - 1; | |
2715 | |
2716 if (v < 0) | |
2717 { | |
2718 v = -v; | |
2719 subtract = subtract ? false : true; | |
2720 } | |
2721 | |
2722 if (subtract) | |
2723 value_sub_int (c->p[r][cst_col], c->p[r][cst_col], v); | |
2724 else | |
2725 value_add_int (c->p[r][cst_col], c->p[r][cst_col], v); | |
2726 } | |
2727 return; | |
2728 | |
2729 default: | |
2730 scan_tree_for_params (s, left, c, r, k, subtract); | |
2731 return; | |
2732 } | |
2733 } | |
2734 break; | |
2735 | |
2736 case MULT_EXPR: | |
2737 if (chrec_contains_symbols (TREE_OPERAND (e, 0))) | |
2738 { | |
2739 if (c) | |
2740 { | |
2741 Value val; | |
2742 gcc_assert (host_integerp (TREE_OPERAND (e, 1), 0)); | |
2743 value_init (val); | |
2744 value_set_si (val, int_cst_value (TREE_OPERAND (e, 1))); | |
2745 value_multiply (k, k, val); | |
2746 value_clear (val); | |
2747 } | |
2748 scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k, subtract); | |
2749 } | |
2750 else | |
2751 { | |
2752 if (c) | |
2753 { | |
2754 Value val; | |
2755 gcc_assert (host_integerp (TREE_OPERAND (e, 0), 0)); | |
2756 value_init (val); | |
2757 value_set_si (val, int_cst_value (TREE_OPERAND (e, 0))); | |
2758 value_multiply (k, k, val); | |
2759 value_clear (val); | |
2760 } | |
2761 scan_tree_for_params (s, TREE_OPERAND (e, 1), c, r, k, subtract); | |
2762 } | |
2763 break; | |
2764 | |
2765 case PLUS_EXPR: | |
2766 case POINTER_PLUS_EXPR: | |
2767 scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k, subtract); | |
2768 scan_tree_for_params (s, TREE_OPERAND (e, 1), c, r, k, subtract); | |
2769 break; | |
2770 | |
2771 case MINUS_EXPR: | |
2772 scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k, subtract); | |
2773 scan_tree_for_params (s, TREE_OPERAND (e, 1), c, r, k, !subtract); | |
2774 break; | |
2775 | |
2776 case NEGATE_EXPR: | |
2777 scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k, !subtract); | |
2778 break; | |
2779 | |
2780 case SSA_NAME: | |
2781 param_col = param_index (e, s); | |
2782 | |
2783 if (c) | |
2784 { | |
2785 param_col += c->NbColumns - scop_nb_params (s) - 1; | |
2786 | |
2787 if (subtract) | |
2788 value_subtract (c->p[r][param_col], c->p[r][param_col], k); | |
2789 else | |
2790 value_addto (c->p[r][param_col], c->p[r][param_col], k); | |
2791 } | |
2792 break; | |
2793 | |
2794 case INTEGER_CST: | |
2795 if (c) | |
2796 { | |
2797 int v = int_cst_value (e); | |
2798 cst_col = c->NbColumns - 1; | |
2799 | |
2800 if (v < 0) | |
2801 { | |
2802 v = -v; | |
2803 subtract = subtract ? false : true; | |
2804 } | |
2805 | |
2806 if (subtract) | |
2807 value_sub_int (c->p[r][cst_col], c->p[r][cst_col], v); | |
2808 else | |
2809 value_add_int (c->p[r][cst_col], c->p[r][cst_col], v); | |
2810 } | |
2811 break; | |
2812 | |
2813 CASE_CONVERT: | |
2814 case NON_LVALUE_EXPR: | |
2815 scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k, subtract); | |
2816 break; | |
2817 | |
2818 default: | |
2819 gcc_unreachable (); | |
2820 break; | |
2821 } | |
2822 } | |
2823 | |
2824 /* Data structure for idx_record_params. */ | |
2825 | |
2826 struct irp_data | |
2827 { | |
2828 struct loop *loop; | |
2829 scop_p scop; | |
2830 }; | |
2831 | |
2832 /* For a data reference with an ARRAY_REF as its BASE, record the | |
2833 parameters occurring in IDX. DTA is passed in as complementary | |
2834 information, and is used by the automatic walker function. This | |
2835 function is a callback for for_each_index. */ | |
2836 | |
2837 static bool | |
2838 idx_record_params (tree base, tree *idx, void *dta) | |
2839 { | |
2840 struct irp_data *data = (struct irp_data *) dta; | |
2841 | |
2842 if (TREE_CODE (base) != ARRAY_REF) | |
2843 return true; | |
2844 | |
2845 if (TREE_CODE (*idx) == SSA_NAME) | |
2846 { | |
2847 tree scev; | |
2848 scop_p scop = data->scop; | |
2849 struct loop *loop = data->loop; | |
2850 Value one; | |
2851 | |
2852 scev = analyze_scalar_evolution (loop, *idx); | |
2853 scev = instantiate_scev (block_before_scop (scop), loop, scev); | |
2854 | |
2855 value_init (one); | |
2856 value_set_si (one, 1); | |
2857 scan_tree_for_params (scop, scev, NULL, 0, one, false); | |
2858 value_clear (one); | |
2859 } | |
2860 | |
2861 return true; | |
2862 } | |
2863 | |
2864 /* Find parameters with respect to SCOP in BB. We are looking in memory | |
2865 access functions, conditions and loop bounds. */ | |
2866 | |
2867 static void | |
2868 find_params_in_bb (scop_p scop, graphite_bb_p gb) | |
2869 { | |
2870 int i; | |
2871 data_reference_p dr; | |
2872 gimple stmt; | |
2873 loop_p father = GBB_BB (gb)->loop_father; | |
2874 | |
2875 for (i = 0; VEC_iterate (data_reference_p, GBB_DATA_REFS (gb), i, dr); i++) | |
2876 { | |
2877 struct irp_data irp; | |
2878 | |
2879 irp.loop = father; | |
2880 irp.scop = scop; | |
2881 for_each_index (&dr->ref, idx_record_params, &irp); | |
2882 } | |
2883 | |
2884 /* Find parameters in conditional statements. */ | |
2885 for (i = 0; VEC_iterate (gimple, GBB_CONDITIONS (gb), i, stmt); i++) | |
2886 { | |
2887 Value one; | |
2888 loop_p loop = father; | |
2889 | |
2890 tree lhs, rhs; | |
2891 | |
2892 lhs = gimple_cond_lhs (stmt); | |
2893 lhs = analyze_scalar_evolution (loop, lhs); | |
2894 lhs = instantiate_scev (block_before_scop (scop), loop, lhs); | |
2895 | |
2896 rhs = gimple_cond_rhs (stmt); | |
2897 rhs = analyze_scalar_evolution (loop, rhs); | |
2898 rhs = instantiate_scev (block_before_scop (scop), loop, rhs); | |
2899 | |
2900 value_init (one); | |
2901 scan_tree_for_params (scop, lhs, NULL, 0, one, false); | |
2902 value_set_si (one, 1); | |
2903 scan_tree_for_params (scop, rhs, NULL, 0, one, false); | |
2904 value_clear (one); | |
2905 } | |
2906 } | |
2907 | |
2908 /* Saves in NV the name of variable P->T. */ | |
2909 | |
2910 static void | |
2911 save_var_name (char **nv, int i, name_tree p) | |
2912 { | |
2913 const char *name = get_name (SSA_NAME_VAR (p->t)); | |
2914 | |
2915 if (name) | |
2916 { | |
2917 int len = strlen (name) + 16; | |
2918 nv[i] = XNEWVEC (char, len); | |
2919 snprintf (nv[i], len, "%s_%d", name, SSA_NAME_VERSION (p->t)); | |
2920 } | |
2921 else | |
2922 { | |
2923 nv[i] = XNEWVEC (char, 16); | |
2924 snprintf (nv[i], 2 + 16, "T_%d", SSA_NAME_VERSION (p->t)); | |
2925 } | |
2926 | |
2927 p->name = nv[i]; | |
2928 } | |
2929 | |
2930 /* Return the maximal loop depth in SCOP. */ | |
2931 | |
2932 static int | |
2933 scop_max_loop_depth (scop_p scop) | |
2934 { | |
2935 int i; | |
2936 graphite_bb_p gbb; | |
2937 int max_nb_loops = 0; | |
2938 | |
2939 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gbb); i++) | |
2940 { | |
2941 int nb_loops = gbb_nb_loops (gbb); | |
2942 if (max_nb_loops < nb_loops) | |
2943 max_nb_loops = nb_loops; | |
2944 } | |
2945 | |
2946 return max_nb_loops; | |
2947 } | |
2948 | |
2949 /* Initialize Cloog's parameter names from the names used in GIMPLE. | |
2950 Initialize Cloog's iterator names, using 'graphite_iterator_%d' | |
2951 from 0 to scop_nb_loops (scop). */ | |
2952 | |
2953 static void | |
2954 initialize_cloog_names (scop_p scop) | |
2955 { | |
2956 int i, nb_params = VEC_length (name_tree, SCOP_PARAMS (scop)); | |
2957 char **params = XNEWVEC (char *, nb_params); | |
2958 int nb_iterators = scop_max_loop_depth (scop); | |
2959 int nb_scattering= cloog_program_nb_scattdims (SCOP_PROG (scop)); | |
2960 char **iterators = XNEWVEC (char *, nb_iterators * 2); | |
2961 char **scattering = XNEWVEC (char *, nb_scattering); | |
2962 name_tree p; | |
2963 | |
2964 for (i = 0; VEC_iterate (name_tree, SCOP_PARAMS (scop), i, p); i++) | |
2965 save_var_name (params, i, p); | |
2966 | |
2967 cloog_names_set_nb_parameters (cloog_program_names (SCOP_PROG (scop)), | |
2968 nb_params); | |
2969 cloog_names_set_parameters (cloog_program_names (SCOP_PROG (scop)), | |
2970 params); | |
2971 | |
2972 for (i = 0; i < nb_iterators; i++) | |
2973 { | |
2974 int len = 18 + 16; | |
2975 iterators[i] = XNEWVEC (char, len); | |
2976 snprintf (iterators[i], len, "graphite_iterator_%d", i); | |
2977 } | |
2978 | |
2979 cloog_names_set_nb_iterators (cloog_program_names (SCOP_PROG (scop)), | |
2980 nb_iterators); | |
2981 cloog_names_set_iterators (cloog_program_names (SCOP_PROG (scop)), | |
2982 iterators); | |
2983 | |
2984 for (i = 0; i < nb_scattering; i++) | |
2985 { | |
2986 int len = 2 + 16; | |
2987 scattering[i] = XNEWVEC (char, len); | |
2988 snprintf (scattering[i], len, "s_%d", i); | |
2989 } | |
2990 | |
2991 cloog_names_set_nb_scattering (cloog_program_names (SCOP_PROG (scop)), | |
2992 nb_scattering); | |
2993 cloog_names_set_scattering (cloog_program_names (SCOP_PROG (scop)), | |
2994 scattering); | |
2995 } | |
2996 | |
2997 /* Record the parameters used in the SCOP. A variable is a parameter | |
2998 in a scop if it does not vary during the execution of that scop. */ | |
2999 | |
3000 static void | |
3001 find_scop_parameters (scop_p scop) | |
3002 { | |
3003 graphite_bb_p gb; | |
3004 unsigned i; | |
3005 struct loop *loop; | |
3006 Value one; | |
3007 | |
3008 value_init (one); | |
3009 value_set_si (one, 1); | |
3010 | |
3011 /* Find the parameters used in the loop bounds. */ | |
3012 for (i = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), i, loop); i++) | |
3013 { | |
3014 tree nb_iters = number_of_latch_executions (loop); | |
3015 | |
3016 if (!chrec_contains_symbols (nb_iters)) | |
3017 continue; | |
3018 | |
3019 nb_iters = analyze_scalar_evolution (loop, nb_iters); | |
3020 nb_iters = instantiate_scev (block_before_scop (scop), loop, nb_iters); | |
3021 scan_tree_for_params (scop, nb_iters, NULL, 0, one, false); | |
3022 } | |
3023 | |
3024 value_clear (one); | |
3025 | |
3026 /* Find the parameters used in data accesses. */ | |
3027 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++) | |
3028 find_params_in_bb (scop, gb); | |
3029 | |
3030 SCOP_ADD_PARAMS (scop) = false; | |
3031 } | |
3032 | |
3033 /* Build the context constraints for SCOP: constraints and relations | |
3034 on parameters. */ | |
3035 | |
3036 static void | |
3037 build_scop_context (scop_p scop) | |
3038 { | |
3039 int nb_params = scop_nb_params (scop); | |
3040 CloogMatrix *matrix = cloog_matrix_alloc (1, nb_params + 2); | |
3041 | |
3042 /* Insert '0 >= 0' in the context matrix, as it is not allowed to be | |
3043 empty. */ | |
3044 | |
3045 value_set_si (matrix->p[0][0], 1); | |
3046 | |
3047 value_set_si (matrix->p[0][nb_params + 1], 0); | |
3048 | |
3049 cloog_program_set_context (SCOP_PROG (scop), | |
3050 cloog_domain_matrix2domain (matrix)); | |
3051 cloog_matrix_free (matrix); | |
3052 } | |
3053 | |
3054 /* Returns a graphite_bb from BB. */ | |
3055 | |
3056 static inline graphite_bb_p | |
3057 gbb_from_bb (basic_block bb) | |
3058 { | |
3059 return (graphite_bb_p) bb->aux; | |
3060 } | |
3061 | |
3062 /* Builds the constraint matrix for LOOP in SCOP. NB_OUTER_LOOPS is the | |
3063 number of loops surrounding LOOP in SCOP. OUTER_CSTR gives the | |
3064 constraints matrix for the surrounding loops. */ | |
3065 | |
3066 static void | |
3067 build_loop_iteration_domains (scop_p scop, struct loop *loop, | |
3068 CloogMatrix *outer_cstr, int nb_outer_loops) | |
3069 { | |
3070 int i, j, row; | |
3071 CloogMatrix *cstr; | |
3072 graphite_bb_p gb; | |
3073 | |
3074 int nb_rows = outer_cstr->NbRows + 1; | |
3075 int nb_cols = outer_cstr->NbColumns + 1; | |
3076 | |
3077 /* Last column of CSTR is the column of constants. */ | |
3078 int cst_col = nb_cols - 1; | |
3079 | |
3080 /* The column for the current loop is just after the columns of | |
3081 other outer loops. */ | |
3082 int loop_col = nb_outer_loops + 1; | |
3083 | |
3084 tree nb_iters = number_of_latch_executions (loop); | |
3085 | |
3086 /* When the number of iterations is a constant or a parameter, we | |
3087 add a constraint for the upper bound of the loop. So add a row | |
3088 to the constraint matrix before allocating it. */ | |
3089 if (TREE_CODE (nb_iters) == INTEGER_CST | |
3090 || !chrec_contains_undetermined (nb_iters)) | |
3091 nb_rows++; | |
3092 | |
3093 cstr = cloog_matrix_alloc (nb_rows, nb_cols); | |
3094 | |
3095 /* Copy the outer constraints. */ | |
3096 for (i = 0; i < outer_cstr->NbRows; i++) | |
3097 { | |
3098 /* Copy the eq/ineq and loops columns. */ | |
3099 for (j = 0; j < loop_col; j++) | |
3100 value_assign (cstr->p[i][j], outer_cstr->p[i][j]); | |
3101 | |
3102 /* Leave an empty column in CSTR for the current loop, and then | |
3103 copy the parameter columns. */ | |
3104 for (j = loop_col; j < outer_cstr->NbColumns; j++) | |
3105 value_assign (cstr->p[i][j + 1], outer_cstr->p[i][j]); | |
3106 } | |
3107 | |
3108 /* 0 <= loop_i */ | |
3109 row = outer_cstr->NbRows; | |
3110 value_set_si (cstr->p[row][0], 1); | |
3111 value_set_si (cstr->p[row][loop_col], 1); | |
3112 | |
3113 /* loop_i <= nb_iters */ | |
3114 if (TREE_CODE (nb_iters) == INTEGER_CST) | |
3115 { | |
3116 row++; | |
3117 value_set_si (cstr->p[row][0], 1); | |
3118 value_set_si (cstr->p[row][loop_col], -1); | |
3119 | |
3120 value_set_si (cstr->p[row][cst_col], | |
3121 int_cst_value (nb_iters)); | |
3122 } | |
3123 else if (!chrec_contains_undetermined (nb_iters)) | |
3124 { | |
3125 /* Otherwise nb_iters contains parameters: scan the nb_iters | |
3126 expression and build its matrix representation. */ | |
3127 Value one; | |
3128 | |
3129 row++; | |
3130 value_set_si (cstr->p[row][0], 1); | |
3131 value_set_si (cstr->p[row][loop_col], -1); | |
3132 | |
3133 nb_iters = analyze_scalar_evolution (loop, nb_iters); | |
3134 nb_iters = instantiate_scev (block_before_scop (scop), loop, nb_iters); | |
3135 | |
3136 value_init (one); | |
3137 value_set_si (one, 1); | |
3138 scan_tree_for_params (scop, nb_iters, cstr, row, one, false); | |
3139 value_clear (one); | |
3140 } | |
3141 else | |
3142 gcc_unreachable (); | |
3143 | |
3144 if (loop->inner && loop_in_sese_p (loop->inner, SCOP_REGION (scop))) | |
3145 build_loop_iteration_domains (scop, loop->inner, cstr, nb_outer_loops + 1); | |
3146 | |
3147 /* Only go to the next loops, if we are not at the outermost layer. These | |
3148 have to be handled seperately, as we can be sure, that the chain at this | |
3149 layer will be connected. */ | |
3150 if (nb_outer_loops != 0 && loop->next && loop_in_sese_p (loop->next, | |
3151 SCOP_REGION (scop))) | |
3152 build_loop_iteration_domains (scop, loop->next, outer_cstr, nb_outer_loops); | |
3153 | |
3154 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++) | |
3155 if (gbb_loop (gb) == loop) | |
3156 GBB_DOMAIN (gb) = cloog_matrix_copy (cstr); | |
3157 | |
3158 cloog_matrix_free (cstr); | |
3159 } | |
3160 | |
3161 /* Add conditions to the domain of GB. */ | |
3162 | |
3163 static void | |
3164 add_conditions_to_domain (graphite_bb_p gb) | |
3165 { | |
3166 unsigned int i,j; | |
3167 gimple stmt; | |
3168 VEC (gimple, heap) *conditions = GBB_CONDITIONS (gb); | |
3169 CloogMatrix *domain = GBB_DOMAIN (gb); | |
3170 scop_p scop = GBB_SCOP (gb); | |
3171 | |
3172 unsigned nb_rows; | |
3173 unsigned nb_cols; | |
3174 unsigned nb_new_rows = 0; | |
3175 unsigned row; | |
3176 | |
3177 if (VEC_empty (gimple, conditions)) | |
3178 return; | |
3179 | |
3180 if (domain) | |
3181 { | |
3182 nb_rows = domain->NbRows; | |
3183 nb_cols = domain->NbColumns; | |
3184 } | |
3185 else | |
3186 { | |
3187 nb_rows = 0; | |
3188 nb_cols = nb_loops_around_gb (gb) + scop_nb_params (scop) + 2; | |
3189 } | |
3190 | |
3191 /* Count number of necessary new rows to add the conditions to the | |
3192 domain. */ | |
3193 for (i = 0; VEC_iterate (gimple, conditions, i, stmt); i++) | |
3194 { | |
3195 switch (gimple_code (stmt)) | |
3196 { | |
3197 case GIMPLE_COND: | |
3198 { | |
3199 enum tree_code code = gimple_cond_code (stmt); | |
3200 | |
3201 switch (code) | |
3202 { | |
3203 case NE_EXPR: | |
3204 case EQ_EXPR: | |
3205 /* NE and EQ statements are not supported right know. */ | |
3206 gcc_unreachable (); | |
3207 break; | |
3208 case LT_EXPR: | |
3209 case GT_EXPR: | |
3210 case LE_EXPR: | |
3211 case GE_EXPR: | |
3212 nb_new_rows++; | |
3213 break; | |
3214 default: | |
3215 gcc_unreachable (); | |
3216 break; | |
3217 } | |
3218 break; | |
3219 } | |
3220 case SWITCH_EXPR: | |
3221 /* Switch statements are not supported right know. */ | |
3222 gcc_unreachable (); | |
3223 break; | |
3224 | |
3225 default: | |
3226 gcc_unreachable (); | |
3227 break; | |
3228 } | |
3229 } | |
3230 | |
3231 | |
3232 /* Enlarge the matrix. */ | |
3233 { | |
3234 CloogMatrix *new_domain; | |
3235 new_domain = cloog_matrix_alloc (nb_rows + nb_new_rows, nb_cols); | |
3236 | |
3237 if (domain) | |
3238 { | |
3239 for (i = 0; i < nb_rows; i++) | |
3240 for (j = 0; j < nb_cols; j++) | |
3241 value_assign (new_domain->p[i][j], domain->p[i][j]); | |
3242 | |
3243 cloog_matrix_free (domain); | |
3244 } | |
3245 | |
3246 domain = new_domain; | |
3247 GBB_DOMAIN (gb) = new_domain; | |
3248 } | |
3249 | |
3250 /* Add the conditions to the new enlarged domain matrix. */ | |
3251 row = nb_rows; | |
3252 for (i = 0; VEC_iterate (gimple, conditions, i, stmt); i++) | |
3253 { | |
3254 switch (gimple_code (stmt)) | |
3255 { | |
3256 case GIMPLE_COND: | |
3257 { | |
3258 Value one; | |
3259 enum tree_code code; | |
3260 tree left; | |
3261 tree right; | |
3262 loop_p loop = GBB_BB (gb)->loop_father; | |
3263 | |
3264 left = gimple_cond_lhs (stmt); | |
3265 right = gimple_cond_rhs (stmt); | |
3266 | |
3267 left = analyze_scalar_evolution (loop, left); | |
3268 right = analyze_scalar_evolution (loop, right); | |
3269 | |
3270 left = instantiate_scev (block_before_scop (scop), loop, left); | |
3271 right = instantiate_scev (block_before_scop (scop), loop, right); | |
3272 | |
3273 code = gimple_cond_code (stmt); | |
3274 | |
3275 /* The conditions for ELSE-branches are inverted. */ | |
3276 if (VEC_index (gimple, gb->condition_cases, i) == NULL) | |
3277 code = invert_tree_comparison (code, false); | |
3278 | |
3279 switch (code) | |
3280 { | |
3281 case NE_EXPR: | |
3282 /* NE statements are not supported right know. */ | |
3283 gcc_unreachable (); | |
3284 break; | |
3285 case EQ_EXPR: | |
3286 value_set_si (domain->p[row][0], 1); | |
3287 value_init (one); | |
3288 value_set_si (one, 1); | |
3289 scan_tree_for_params (scop, left, domain, row, one, true); | |
3290 value_set_si (one, 1); | |
3291 scan_tree_for_params (scop, right, domain, row, one, false); | |
3292 row++; | |
3293 value_set_si (domain->p[row][0], 1); | |
3294 value_set_si (one, 1); | |
3295 scan_tree_for_params (scop, left, domain, row, one, false); | |
3296 value_set_si (one, 1); | |
3297 scan_tree_for_params (scop, right, domain, row, one, true); | |
3298 value_clear (one); | |
3299 row++; | |
3300 break; | |
3301 case LT_EXPR: | |
3302 value_set_si (domain->p[row][0], 1); | |
3303 value_init (one); | |
3304 value_set_si (one, 1); | |
3305 scan_tree_for_params (scop, left, domain, row, one, true); | |
3306 value_set_si (one, 1); | |
3307 scan_tree_for_params (scop, right, domain, row, one, false); | |
3308 value_sub_int (domain->p[row][nb_cols - 1], | |
3309 domain->p[row][nb_cols - 1], 1); | |
3310 value_clear (one); | |
3311 row++; | |
3312 break; | |
3313 case GT_EXPR: | |
3314 value_set_si (domain->p[row][0], 1); | |
3315 value_init (one); | |
3316 value_set_si (one, 1); | |
3317 scan_tree_for_params (scop, left, domain, row, one, false); | |
3318 value_set_si (one, 1); | |
3319 scan_tree_for_params (scop, right, domain, row, one, true); | |
3320 value_sub_int (domain->p[row][nb_cols - 1], | |
3321 domain->p[row][nb_cols - 1], 1); | |
3322 value_clear (one); | |
3323 row++; | |
3324 break; | |
3325 case LE_EXPR: | |
3326 value_set_si (domain->p[row][0], 1); | |
3327 value_init (one); | |
3328 value_set_si (one, 1); | |
3329 scan_tree_for_params (scop, left, domain, row, one, true); | |
3330 value_set_si (one, 1); | |
3331 scan_tree_for_params (scop, right, domain, row, one, false); | |
3332 value_clear (one); | |
3333 row++; | |
3334 break; | |
3335 case GE_EXPR: | |
3336 value_set_si (domain->p[row][0], 1); | |
3337 value_init (one); | |
3338 value_set_si (one, 1); | |
3339 scan_tree_for_params (scop, left, domain, row, one, false); | |
3340 value_set_si (one, 1); | |
3341 scan_tree_for_params (scop, right, domain, row, one, true); | |
3342 value_clear (one); | |
3343 row++; | |
3344 break; | |
3345 default: | |
3346 gcc_unreachable (); | |
3347 break; | |
3348 } | |
3349 break; | |
3350 } | |
3351 case GIMPLE_SWITCH: | |
3352 /* Switch statements are not supported right know. */ | |
3353 gcc_unreachable (); | |
3354 break; | |
3355 | |
3356 default: | |
3357 gcc_unreachable (); | |
3358 break; | |
3359 } | |
3360 } | |
3361 } | |
3362 | |
3363 /* Returns true when PHI defines an induction variable in the loop | |
3364 containing the PHI node. */ | |
3365 | |
3366 static bool | |
3367 phi_node_is_iv (gimple phi) | |
3368 { | |
3369 loop_p loop = gimple_bb (phi)->loop_father; | |
3370 tree scev = analyze_scalar_evolution (loop, gimple_phi_result (phi)); | |
3371 | |
3372 return tree_contains_chrecs (scev, NULL); | |
3373 } | |
3374 | |
3375 /* Returns true when BB contains scalar phi nodes that are not an | |
3376 induction variable of a loop. */ | |
3377 | |
3378 static bool | |
3379 bb_contains_non_iv_scalar_phi_nodes (basic_block bb) | |
3380 { | |
3381 gimple phi = NULL; | |
3382 gimple_stmt_iterator si; | |
3383 | |
3384 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) | |
3385 if (is_gimple_reg (gimple_phi_result (gsi_stmt (si)))) | |
3386 { | |
3387 /* Store the unique scalar PHI node: at this point, loops | |
3388 should be in cannonical form, so we expect to see at most | |
3389 one scalar phi node in the loop header. */ | |
3390 if (phi | |
3391 || bb != bb->loop_father->header) | |
3392 return true; | |
3393 | |
3394 phi = gsi_stmt (si); | |
3395 } | |
3396 | |
3397 if (!phi | |
3398 || phi_node_is_iv (phi)) | |
3399 return false; | |
3400 | |
3401 return true; | |
3402 } | |
3403 | |
3404 /* Helper recursive function. Record in CONDITIONS and CASES all | |
3405 conditions from 'if's and 'switch'es occurring in BB from SCOP. | |
3406 | |
3407 Returns false when the conditions contain scalar computations that | |
3408 depend on the condition, i.e. when there are scalar phi nodes on | |
3409 the junction after the condition. Only the computations occurring | |
3410 on memory can be handled in the polyhedral model: operations that | |
3411 define scalar evolutions in conditions, that can potentially be | |
3412 used to index memory, can't be handled by the polyhedral model. */ | |
3413 | |
3414 static bool | |
3415 build_scop_conditions_1 (VEC (gimple, heap) **conditions, | |
3416 VEC (gimple, heap) **cases, basic_block bb, | |
3417 scop_p scop) | |
3418 { | |
3419 bool res = true; | |
3420 int i, j; | |
3421 graphite_bb_p gbb; | |
3422 basic_block bb_child, bb_iter; | |
3423 VEC (basic_block, heap) *dom; | |
3424 gimple stmt; | |
3425 | |
3426 /* Make sure we are in the SCoP. */ | |
3427 if (!bb_in_sese_p (bb, SCOP_REGION (scop))) | |
3428 return true; | |
3429 | |
3430 if (bb_contains_non_iv_scalar_phi_nodes (bb)) | |
3431 return false; | |
3432 | |
3433 gbb = gbb_from_bb (bb); | |
3434 if (gbb) | |
3435 { | |
3436 GBB_CONDITIONS (gbb) = VEC_copy (gimple, heap, *conditions); | |
3437 GBB_CONDITION_CASES (gbb) = VEC_copy (gimple, heap, *cases); | |
3438 } | |
3439 | |
3440 dom = get_dominated_by (CDI_DOMINATORS, bb); | |
3441 | |
3442 stmt = last_stmt (bb); | |
3443 if (stmt) | |
3444 { | |
3445 VEC (edge, gc) *edges; | |
3446 edge e; | |
3447 | |
3448 switch (gimple_code (stmt)) | |
3449 { | |
3450 case GIMPLE_COND: | |
3451 edges = bb->succs; | |
3452 for (i = 0; VEC_iterate (edge, edges, i, e); i++) | |
3453 if ((dominated_by_p (CDI_DOMINATORS, e->dest, bb)) | |
3454 && VEC_length (edge, e->dest->preds) == 1) | |
3455 { | |
3456 /* Remove the scanned block from the dominator successors. */ | |
3457 for (j = 0; VEC_iterate (basic_block, dom, j, bb_iter); j++) | |
3458 if (bb_iter == e->dest) | |
3459 { | |
3460 VEC_unordered_remove (basic_block, dom, j); | |
3461 break; | |
3462 } | |
3463 | |
3464 /* Recursively scan the then or else part. */ | |
3465 if (e->flags & EDGE_TRUE_VALUE) | |
3466 VEC_safe_push (gimple, heap, *cases, stmt); | |
3467 else | |
3468 { | |
3469 gcc_assert (e->flags & EDGE_FALSE_VALUE); | |
3470 VEC_safe_push (gimple, heap, *cases, NULL); | |
3471 } | |
3472 | |
3473 VEC_safe_push (gimple, heap, *conditions, stmt); | |
3474 if (!build_scop_conditions_1 (conditions, cases, e->dest, scop)) | |
3475 { | |
3476 res = false; | |
3477 goto done; | |
3478 } | |
3479 VEC_pop (gimple, *conditions); | |
3480 VEC_pop (gimple, *cases); | |
3481 } | |
3482 break; | |
3483 | |
3484 case GIMPLE_SWITCH: | |
3485 { | |
3486 unsigned i; | |
3487 gimple_stmt_iterator gsi_search_gimple_label; | |
3488 | |
3489 for (i = 0; i < gimple_switch_num_labels (stmt); ++i) | |
3490 { | |
3491 basic_block bb_iter; | |
3492 size_t k; | |
3493 size_t n_cases = VEC_length (gimple, *conditions); | |
3494 unsigned n = gimple_switch_num_labels (stmt); | |
3495 | |
3496 bb_child = label_to_block | |
3497 (CASE_LABEL (gimple_switch_label (stmt, i))); | |
3498 | |
3499 for (k = 0; k < n; k++) | |
3500 if (i != k | |
3501 && label_to_block | |
3502 (CASE_LABEL (gimple_switch_label (stmt, k))) == bb_child) | |
3503 break; | |
3504 | |
3505 /* Switches with multiple case values for the same | |
3506 block are not handled. */ | |
3507 if (k != n | |
3508 /* Switch cases with more than one predecessor are | |
3509 not handled. */ | |
3510 || VEC_length (edge, bb_child->preds) != 1) | |
3511 { | |
3512 res = false; | |
3513 goto done; | |
3514 } | |
3515 | |
3516 /* Recursively scan the corresponding 'case' block. */ | |
3517 for (gsi_search_gimple_label = gsi_start_bb (bb_child); | |
3518 !gsi_end_p (gsi_search_gimple_label); | |
3519 gsi_next (&gsi_search_gimple_label)) | |
3520 { | |
3521 gimple label = gsi_stmt (gsi_search_gimple_label); | |
3522 | |
3523 if (gimple_code (label) == GIMPLE_LABEL) | |
3524 { | |
3525 tree t = gimple_label_label (label); | |
3526 | |
3527 gcc_assert (t == gimple_switch_label (stmt, i)); | |
3528 VEC_replace (gimple, *cases, n_cases, label); | |
3529 break; | |
3530 } | |
3531 } | |
3532 | |
3533 if (!build_scop_conditions_1 (conditions, cases, bb_child, scop)) | |
3534 { | |
3535 res = false; | |
3536 goto done; | |
3537 } | |
3538 | |
3539 /* Remove the scanned block from the dominator successors. */ | |
3540 for (j = 0; VEC_iterate (basic_block, dom, j, bb_iter); j++) | |
3541 if (bb_iter == bb_child) | |
3542 { | |
3543 VEC_unordered_remove (basic_block, dom, j); | |
3544 break; | |
3545 } | |
3546 } | |
3547 | |
3548 VEC_pop (gimple, *conditions); | |
3549 VEC_pop (gimple, *cases); | |
3550 break; | |
3551 } | |
3552 | |
3553 default: | |
3554 break; | |
3555 } | |
3556 } | |
3557 | |
3558 /* Scan all immediate dominated successors. */ | |
3559 for (i = 0; VEC_iterate (basic_block, dom, i, bb_child); i++) | |
3560 if (!build_scop_conditions_1 (conditions, cases, bb_child, scop)) | |
3561 { | |
3562 res = false; | |
3563 goto done; | |
3564 } | |
3565 | |
3566 done: | |
3567 VEC_free (basic_block, heap, dom); | |
3568 return res; | |
3569 } | |
3570 | |
3571 /* Record all conditions from SCOP. | |
3572 | |
3573 Returns false when the conditions contain scalar computations that | |
3574 depend on the condition, i.e. when there are scalar phi nodes on | |
3575 the junction after the condition. Only the computations occurring | |
3576 on memory can be handled in the polyhedral model: operations that | |
3577 define scalar evolutions in conditions, that can potentially be | |
3578 used to index memory, can't be handled by the polyhedral model. */ | |
3579 | |
3580 static bool | |
3581 build_scop_conditions (scop_p scop) | |
3582 { | |
3583 bool res; | |
3584 VEC (gimple, heap) *conditions = NULL; | |
3585 VEC (gimple, heap) *cases = NULL; | |
3586 | |
3587 res = build_scop_conditions_1 (&conditions, &cases, SCOP_ENTRY (scop), scop); | |
3588 | |
3589 VEC_free (gimple, heap, conditions); | |
3590 VEC_free (gimple, heap, cases); | |
3591 return res; | |
3592 } | |
3593 | |
3594 /* Traverses all the GBBs of the SCOP and add their constraints to the | |
3595 iteration domains. */ | |
3596 | |
3597 static void | |
3598 add_conditions_to_constraints (scop_p scop) | |
3599 { | |
3600 int i; | |
3601 graphite_bb_p gbb; | |
3602 | |
3603 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gbb); i++) | |
3604 add_conditions_to_domain (gbb); | |
3605 } | |
3606 | |
3607 /* Build the current domain matrix: the loops belonging to the current | |
3608 SCOP, and that vary for the execution of the current basic block. | |
3609 Returns false if there is no loop in SCOP. */ | |
3610 | |
3611 static bool | |
3612 build_scop_iteration_domain (scop_p scop) | |
3613 { | |
3614 struct loop *loop; | |
3615 CloogMatrix *outer_cstr; | |
3616 int i; | |
3617 | |
3618 /* Build cloog loop for all loops, that are in the uppermost loop layer of | |
3619 this SCoP. */ | |
3620 for (i = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), i, loop); i++) | |
3621 if (!loop_in_sese_p (loop_outer (loop), SCOP_REGION (scop))) | |
3622 { | |
3623 /* The outermost constraints is a matrix that has: | |
3624 -first column: eq/ineq boolean | |
3625 -last column: a constant | |
3626 -scop_nb_params columns for the parameters used in the scop. */ | |
3627 outer_cstr = cloog_matrix_alloc (0, scop_nb_params (scop) + 2); | |
3628 build_loop_iteration_domains (scop, loop, outer_cstr, 0); | |
3629 cloog_matrix_free (outer_cstr); | |
3630 } | |
3631 | |
3632 return (i != 0); | |
3633 } | |
3634 | |
3635 /* Initializes an equation CY of the access matrix using the | |
3636 information for a subscript from AF, relatively to the loop | |
3637 indexes from LOOP_NEST and parameter indexes from PARAMS. NDIM is | |
3638 the dimension of the array access, i.e. the number of | |
3639 subscripts. Returns true when the operation succeeds. */ | |
3640 | |
3641 static bool | |
3642 build_access_matrix_with_af (tree af, lambda_vector cy, | |
3643 scop_p scop, int ndim) | |
3644 { | |
3645 int param_col; | |
3646 | |
3647 switch (TREE_CODE (af)) | |
3648 { | |
3649 case POLYNOMIAL_CHREC: | |
3650 { | |
3651 struct loop *outer_loop; | |
3652 tree left = CHREC_LEFT (af); | |
3653 tree right = CHREC_RIGHT (af); | |
3654 int var; | |
3655 | |
3656 if (TREE_CODE (right) != INTEGER_CST) | |
3657 return false; | |
3658 | |
3659 outer_loop = get_loop (CHREC_VARIABLE (af)); | |
3660 var = nb_loops_around_loop_in_scop (outer_loop, scop); | |
3661 cy[var] = int_cst_value (right); | |
3662 | |
3663 switch (TREE_CODE (left)) | |
3664 { | |
3665 case POLYNOMIAL_CHREC: | |
3666 return build_access_matrix_with_af (left, cy, scop, ndim); | |
3667 | |
3668 case INTEGER_CST: | |
3669 cy[ndim - 1] = int_cst_value (left); | |
3670 return true; | |
3671 | |
3672 default: | |
3673 return build_access_matrix_with_af (left, cy, scop, ndim); | |
3674 } | |
3675 } | |
3676 | |
3677 case PLUS_EXPR: | |
3678 build_access_matrix_with_af (TREE_OPERAND (af, 0), cy, scop, ndim); | |
3679 build_access_matrix_with_af (TREE_OPERAND (af, 1), cy, scop, ndim); | |
3680 return true; | |
3681 | |
3682 case MINUS_EXPR: | |
3683 build_access_matrix_with_af (TREE_OPERAND (af, 0), cy, scop, ndim); | |
3684 build_access_matrix_with_af (TREE_OPERAND (af, 1), cy, scop, ndim); | |
3685 return true; | |
3686 | |
3687 case INTEGER_CST: | |
3688 cy[ndim - 1] = int_cst_value (af); | |
3689 return true; | |
3690 | |
3691 case SSA_NAME: | |
3692 param_col = param_index (af, scop); | |
3693 cy [ndim - scop_nb_params (scop) + param_col - 1] = 1; | |
3694 return true; | |
3695 | |
3696 default: | |
3697 /* FIXME: access_fn can have parameters. */ | |
3698 return false; | |
3699 } | |
3700 } | |
3701 | |
3702 /* Initialize the access matrix in the data reference REF with respect | |
3703 to the loop nesting LOOP_NEST. Return true when the operation | |
3704 succeeded. */ | |
3705 | |
3706 static bool | |
3707 build_access_matrix (data_reference_p ref, graphite_bb_p gb) | |
3708 { | |
3709 int i, ndim = DR_NUM_DIMENSIONS (ref); | |
3710 struct access_matrix *am = GGC_NEW (struct access_matrix); | |
3711 | |
3712 AM_MATRIX (am) = VEC_alloc (lambda_vector, gc, ndim); | |
3713 DR_SCOP (ref) = GBB_SCOP (gb); | |
3714 | |
3715 for (i = 0; i < ndim; i++) | |
3716 { | |
3717 lambda_vector v = lambda_vector_new (ref_nb_loops (ref)); | |
3718 scop_p scop = GBB_SCOP (gb); | |
3719 tree af = DR_ACCESS_FN (ref, i); | |
3720 | |
3721 if (!build_access_matrix_with_af (af, v, scop, ref_nb_loops (ref))) | |
3722 return false; | |
3723 | |
3724 VEC_quick_push (lambda_vector, AM_MATRIX (am), v); | |
3725 } | |
3726 | |
3727 DR_ACCESS_MATRIX (ref) = am; | |
3728 return true; | |
3729 } | |
3730 | |
3731 /* Build the access matrices for the data references in the SCOP. */ | |
3732 | |
3733 static void | |
3734 build_scop_data_accesses (scop_p scop) | |
3735 { | |
3736 int i; | |
3737 graphite_bb_p gb; | |
3738 | |
3739 /* FIXME: Construction of access matrix is disabled until some | |
3740 pass, like the data dependence analysis, is using it. */ | |
3741 return; | |
3742 | |
3743 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++) | |
3744 { | |
3745 int j; | |
3746 data_reference_p dr; | |
3747 | |
3748 /* Construct the access matrix for each data ref, with respect to | |
3749 the loop nest of the current BB in the considered SCOP. */ | |
3750 for (j = 0; | |
3751 VEC_iterate (data_reference_p, GBB_DATA_REFS (gb), j, dr); | |
3752 j++) | |
3753 { | |
3754 bool res = build_access_matrix (dr, gb); | |
3755 | |
3756 /* FIXME: At this point the DRs should always have an affine | |
3757 form. For the moment this fails as build_access_matrix | |
3758 does not build matrices with parameters. */ | |
3759 gcc_assert (res); | |
3760 } | |
3761 } | |
3762 } | |
3763 | |
3764 /* Returns the tree variable from the name NAME that was given in | |
3765 Cloog representation. All the parameters are stored in PARAMS, and | |
3766 all the loop induction variables are stored in IVSTACK. | |
3767 | |
3768 FIXME: This is a hack, and Cloog should be fixed to not work with | |
3769 variable names represented as "char *string", but with void | |
3770 pointers that could be casted back to a tree. The only problem in | |
3771 doing that is that Cloog's pretty printer still assumes that | |
3772 variable names are char *strings. The solution would be to have a | |
3773 function pointer for pretty-printing that can be redirected to be | |
3774 print_generic_stmt in our case, or fprintf by default. | |
3775 ??? Too ugly to live. */ | |
3776 | |
3777 static tree | |
3778 clast_name_to_gcc (const char *name, VEC (name_tree, heap) *params, | |
3779 loop_iv_stack ivstack) | |
3780 { | |
3781 int i; | |
3782 name_tree t; | |
3783 tree iv; | |
3784 | |
3785 if (params) | |
3786 for (i = 0; VEC_iterate (name_tree, params, i, t); i++) | |
3787 if (!strcmp (name, t->name)) | |
3788 return t->t; | |
3789 | |
3790 iv = loop_iv_stack_get_iv_from_name (ivstack, name); | |
3791 if (iv) | |
3792 return iv; | |
3793 | |
3794 gcc_unreachable (); | |
3795 } | |
3796 | |
3797 /* Returns the maximal precision type for expressions E1 and E2. */ | |
3798 | |
3799 static inline tree | |
3800 max_precision_type (tree e1, tree e2) | |
3801 { | |
3802 tree type1 = TREE_TYPE (e1); | |
3803 tree type2 = TREE_TYPE (e2); | |
3804 return TYPE_PRECISION (type1) > TYPE_PRECISION (type2) ? type1 : type2; | |
3805 } | |
3806 | |
3807 static tree | |
3808 clast_to_gcc_expression (tree, struct clast_expr *, VEC (name_tree, heap) *, | |
3809 loop_iv_stack); | |
3810 | |
3811 /* Converts a Cloog reduction expression R with reduction operation OP | |
3812 to a GCC expression tree of type TYPE. PARAMS is a vector of | |
3813 parameters of the scop, and IVSTACK contains the stack of induction | |
3814 variables. */ | |
3815 | |
3816 static tree | |
3817 clast_to_gcc_expression_red (tree type, enum tree_code op, | |
3818 struct clast_reduction *r, | |
3819 VEC (name_tree, heap) *params, | |
3820 loop_iv_stack ivstack) | |
3821 { | |
3822 int i; | |
3823 tree res = clast_to_gcc_expression (type, r->elts[0], params, ivstack); | |
3824 | |
3825 for (i = 1; i < r->n; i++) | |
3826 { | |
3827 tree t = clast_to_gcc_expression (type, r->elts[i], params, ivstack); | |
3828 res = fold_build2 (op, type, res, t); | |
3829 } | |
3830 return res; | |
3831 } | |
3832 | |
3833 /* Converts a Cloog AST expression E back to a GCC expression tree of | |
3834 type TYPE. PARAMS is a vector of parameters of the scop, and | |
3835 IVSTACK contains the stack of induction variables. */ | |
3836 | |
3837 static tree | |
3838 clast_to_gcc_expression (tree type, struct clast_expr *e, | |
3839 VEC (name_tree, heap) *params, | |
3840 loop_iv_stack ivstack) | |
3841 { | |
3842 switch (e->type) | |
3843 { | |
3844 case expr_term: | |
3845 { | |
3846 struct clast_term *t = (struct clast_term *) e; | |
3847 | |
3848 if (t->var) | |
3849 { | |
3850 if (value_one_p (t->val)) | |
3851 { | |
3852 tree name = clast_name_to_gcc (t->var, params, ivstack); | |
3853 return fold_convert (type, name); | |
3854 } | |
3855 | |
3856 else if (value_mone_p (t->val)) | |
3857 { | |
3858 tree name = clast_name_to_gcc (t->var, params, ivstack); | |
3859 name = fold_convert (type, name); | |
3860 return fold_build1 (NEGATE_EXPR, type, name); | |
3861 } | |
3862 else | |
3863 { | |
3864 tree name = clast_name_to_gcc (t->var, params, ivstack); | |
3865 tree cst = gmp_cst_to_tree (type, t->val); | |
3866 name = fold_convert (type, name); | |
3867 return fold_build2 (MULT_EXPR, type, cst, name); | |
3868 } | |
3869 } | |
3870 else | |
3871 return gmp_cst_to_tree (type, t->val); | |
3872 } | |
3873 | |
3874 case expr_red: | |
3875 { | |
3876 struct clast_reduction *r = (struct clast_reduction *) e; | |
3877 | |
3878 switch (r->type) | |
3879 { | |
3880 case clast_red_sum: | |
3881 return clast_to_gcc_expression_red (type, PLUS_EXPR, r, params, ivstack); | |
3882 | |
3883 case clast_red_min: | |
3884 return clast_to_gcc_expression_red (type, MIN_EXPR, r, params, ivstack); | |
3885 | |
3886 case clast_red_max: | |
3887 return clast_to_gcc_expression_red (type, MAX_EXPR, r, params, ivstack); | |
3888 | |
3889 default: | |
3890 gcc_unreachable (); | |
3891 } | |
3892 break; | |
3893 } | |
3894 | |
3895 case expr_bin: | |
3896 { | |
3897 struct clast_binary *b = (struct clast_binary *) e; | |
3898 struct clast_expr *lhs = (struct clast_expr *) b->LHS; | |
3899 tree tl = clast_to_gcc_expression (type, lhs, params, ivstack); | |
3900 tree tr = gmp_cst_to_tree (type, b->RHS); | |
3901 | |
3902 switch (b->type) | |
3903 { | |
3904 case clast_bin_fdiv: | |
3905 return fold_build2 (FLOOR_DIV_EXPR, type, tl, tr); | |
3906 | |
3907 case clast_bin_cdiv: | |
3908 return fold_build2 (CEIL_DIV_EXPR, type, tl, tr); | |
3909 | |
3910 case clast_bin_div: | |
3911 return fold_build2 (EXACT_DIV_EXPR, type, tl, tr); | |
3912 | |
3913 case clast_bin_mod: | |
3914 return fold_build2 (TRUNC_MOD_EXPR, type, tl, tr); | |
3915 | |
3916 default: | |
3917 gcc_unreachable (); | |
3918 } | |
3919 } | |
3920 | |
3921 default: | |
3922 gcc_unreachable (); | |
3923 } | |
3924 | |
3925 return NULL_TREE; | |
3926 } | |
3927 | |
3928 /* Returns the type for the expression E. */ | |
3929 | |
3930 static tree | |
3931 gcc_type_for_clast_expr (struct clast_expr *e, | |
3932 VEC (name_tree, heap) *params, | |
3933 loop_iv_stack ivstack) | |
3934 { | |
3935 switch (e->type) | |
3936 { | |
3937 case expr_term: | |
3938 { | |
3939 struct clast_term *t = (struct clast_term *) e; | |
3940 | |
3941 if (t->var) | |
3942 return TREE_TYPE (clast_name_to_gcc (t->var, params, ivstack)); | |
3943 else | |
3944 return NULL_TREE; | |
3945 } | |
3946 | |
3947 case expr_red: | |
3948 { | |
3949 struct clast_reduction *r = (struct clast_reduction *) e; | |
3950 | |
3951 if (r->n == 1) | |
3952 return gcc_type_for_clast_expr (r->elts[0], params, ivstack); | |
3953 else | |
3954 { | |
3955 int i; | |
3956 for (i = 0; i < r->n; i++) | |
3957 { | |
3958 tree type = gcc_type_for_clast_expr (r->elts[i], params, ivstack); | |
3959 if (type) | |
3960 return type; | |
3961 } | |
3962 return NULL_TREE; | |
3963 } | |
3964 } | |
3965 | |
3966 case expr_bin: | |
3967 { | |
3968 struct clast_binary *b = (struct clast_binary *) e; | |
3969 struct clast_expr *lhs = (struct clast_expr *) b->LHS; | |
3970 return gcc_type_for_clast_expr (lhs, params, ivstack); | |
3971 } | |
3972 | |
3973 default: | |
3974 gcc_unreachable (); | |
3975 } | |
3976 | |
3977 return NULL_TREE; | |
3978 } | |
3979 | |
3980 /* Returns the type for the equation CLEQ. */ | |
3981 | |
3982 static tree | |
3983 gcc_type_for_clast_eq (struct clast_equation *cleq, | |
3984 VEC (name_tree, heap) *params, | |
3985 loop_iv_stack ivstack) | |
3986 { | |
3987 tree type = gcc_type_for_clast_expr (cleq->LHS, params, ivstack); | |
3988 if (type) | |
3989 return type; | |
3990 | |
3991 return gcc_type_for_clast_expr (cleq->RHS, params, ivstack); | |
3992 } | |
3993 | |
3994 /* Translates a clast equation CLEQ to a tree. */ | |
3995 | |
3996 static tree | |
3997 graphite_translate_clast_equation (scop_p scop, | |
3998 struct clast_equation *cleq, | |
3999 loop_iv_stack ivstack) | |
4000 { | |
4001 enum tree_code comp; | |
4002 tree type = gcc_type_for_clast_eq (cleq, SCOP_PARAMS (scop), ivstack); | |
4003 tree lhs = clast_to_gcc_expression (type, cleq->LHS, SCOP_PARAMS (scop), ivstack); | |
4004 tree rhs = clast_to_gcc_expression (type, cleq->RHS, SCOP_PARAMS (scop), ivstack); | |
4005 | |
4006 if (cleq->sign == 0) | |
4007 comp = EQ_EXPR; | |
4008 | |
4009 else if (cleq->sign > 0) | |
4010 comp = GE_EXPR; | |
4011 | |
4012 else | |
4013 comp = LE_EXPR; | |
4014 | |
4015 return fold_build2 (comp, type, lhs, rhs); | |
4016 } | |
4017 | |
4018 /* Creates the test for the condition in STMT. */ | |
4019 | |
4020 static tree | |
4021 graphite_create_guard_cond_expr (scop_p scop, struct clast_guard *stmt, | |
4022 loop_iv_stack ivstack) | |
4023 { | |
4024 tree cond = NULL; | |
4025 int i; | |
4026 | |
4027 for (i = 0; i < stmt->n; i++) | |
4028 { | |
4029 tree eq = graphite_translate_clast_equation (scop, &stmt->eq[i], ivstack); | |
4030 | |
4031 if (cond) | |
4032 cond = fold_build2 (TRUTH_AND_EXPR, TREE_TYPE (eq), cond, eq); | |
4033 else | |
4034 cond = eq; | |
4035 } | |
4036 | |
4037 return cond; | |
4038 } | |
4039 | |
4040 /* Creates a new if region corresponding to Cloog's guard. */ | |
4041 | |
4042 static edge | |
4043 graphite_create_new_guard (scop_p scop, edge entry_edge, | |
4044 struct clast_guard *stmt, | |
4045 loop_iv_stack ivstack) | |
4046 { | |
4047 tree cond_expr = graphite_create_guard_cond_expr (scop, stmt, ivstack); | |
4048 edge exit_edge = create_empty_if_region_on_edge (entry_edge, cond_expr); | |
4049 return exit_edge; | |
4050 } | |
4051 | |
4052 /* Walks a CLAST and returns the first statement in the body of a | |
4053 loop. */ | |
4054 | |
4055 static struct clast_user_stmt * | |
4056 clast_get_body_of_loop (struct clast_stmt *stmt) | |
4057 { | |
4058 if (!stmt | |
4059 || CLAST_STMT_IS_A (stmt, stmt_user)) | |
4060 return (struct clast_user_stmt *) stmt; | |
4061 | |
4062 if (CLAST_STMT_IS_A (stmt, stmt_for)) | |
4063 return clast_get_body_of_loop (((struct clast_for *) stmt)->body); | |
4064 | |
4065 if (CLAST_STMT_IS_A (stmt, stmt_guard)) | |
4066 return clast_get_body_of_loop (((struct clast_guard *) stmt)->then); | |
4067 | |
4068 if (CLAST_STMT_IS_A (stmt, stmt_block)) | |
4069 return clast_get_body_of_loop (((struct clast_block *) stmt)->body); | |
4070 | |
4071 gcc_unreachable (); | |
4072 } | |
4073 | |
4074 /* Returns the induction variable for the loop that gets translated to | |
4075 STMT. */ | |
4076 | |
4077 static tree | |
4078 gcc_type_for_iv_of_clast_loop (struct clast_for *stmt_for) | |
4079 { | |
4080 struct clast_user_stmt *stmt = clast_get_body_of_loop ((struct clast_stmt *) stmt_for); | |
4081 const char *cloog_iv = stmt_for->iterator; | |
4082 CloogStatement *cs = stmt->statement; | |
4083 graphite_bb_p gbb = (graphite_bb_p) cloog_statement_usr (cs); | |
4084 | |
4085 return gcc_type_for_cloog_iv (cloog_iv, gbb); | |
4086 } | |
4087 | |
4088 /* Creates a new LOOP corresponding to Cloog's STMT. Inserts an induction | |
4089 variable for the new LOOP. New LOOP is attached to CFG starting at | |
4090 ENTRY_EDGE. LOOP is inserted into the loop tree and becomes the child | |
4091 loop of the OUTER_LOOP. */ | |
4092 | |
4093 static struct loop * | |
4094 graphite_create_new_loop (scop_p scop, edge entry_edge, | |
4095 struct clast_for *stmt, loop_iv_stack ivstack, | |
4096 loop_p outer) | |
4097 { | |
4098 tree type = gcc_type_for_iv_of_clast_loop (stmt); | |
4099 VEC (name_tree, heap) *params = SCOP_PARAMS (scop); | |
4100 tree lb = clast_to_gcc_expression (type, stmt->LB, params, ivstack); | |
4101 tree ub = clast_to_gcc_expression (type, stmt->UB, params, ivstack); | |
4102 tree stride = gmp_cst_to_tree (type, stmt->stride); | |
4103 tree ivvar = create_tmp_var (type, "graphiteIV"); | |
4104 tree iv_before; | |
4105 loop_p loop = create_empty_loop_on_edge | |
4106 (entry_edge, lb, stride, ub, ivvar, &iv_before, | |
4107 outer ? outer : entry_edge->src->loop_father); | |
4108 | |
4109 add_referenced_var (ivvar); | |
4110 loop_iv_stack_push_iv (ivstack, iv_before, stmt->iterator); | |
4111 return loop; | |
4112 } | |
4113 | |
4114 /* Rename the SSA_NAMEs used in STMT and that appear in IVSTACK. */ | |
4115 | |
4116 static void | |
4117 rename_variables_in_stmt (gimple stmt, htab_t map) | |
4118 { | |
4119 ssa_op_iter iter; | |
4120 use_operand_p use_p; | |
4121 | |
4122 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE) | |
4123 { | |
4124 tree use = USE_FROM_PTR (use_p); | |
4125 tree new_name = get_new_name_from_old_name (map, use); | |
4126 | |
4127 replace_exp (use_p, new_name); | |
4128 } | |
4129 | |
4130 update_stmt (stmt); | |
4131 } | |
4132 | |
4133 /* Returns true if SSA_NAME is a parameter of SCOP. */ | |
4134 | |
4135 static bool | |
4136 is_parameter (scop_p scop, tree ssa_name) | |
4137 { | |
4138 int i; | |
4139 VEC (name_tree, heap) *params = SCOP_PARAMS (scop); | |
4140 name_tree param; | |
4141 | |
4142 for (i = 0; VEC_iterate (name_tree, params, i, param); i++) | |
4143 if (param->t == ssa_name) | |
4144 return true; | |
4145 | |
4146 return false; | |
4147 } | |
4148 | |
4149 /* Returns true if NAME is an induction variable. */ | |
4150 | |
4151 static bool | |
4152 is_iv (tree name) | |
4153 { | |
4154 return gimple_code (SSA_NAME_DEF_STMT (name)) == GIMPLE_PHI; | |
4155 } | |
4156 | |
4157 static void expand_scalar_variables_stmt (gimple, basic_block, scop_p, | |
4158 htab_t); | |
4159 static tree | |
4160 expand_scalar_variables_expr (tree, tree, enum tree_code, tree, basic_block, | |
4161 scop_p, htab_t, gimple_stmt_iterator *); | |
4162 | |
4163 /* Copies at GSI all the scalar computations on which the ssa_name OP0 | |
4164 depends on in the SCOP: these are all the scalar variables used in | |
4165 the definition of OP0, that are defined outside BB and still in the | |
4166 SCOP, i.e. not a parameter of the SCOP. The expression that is | |
4167 returned contains only induction variables from the generated code: | |
4168 MAP contains the induction variables renaming mapping, and is used | |
4169 to translate the names of induction variables. */ | |
4170 | |
4171 static tree | |
4172 expand_scalar_variables_ssa_name (tree op0, basic_block bb, | |
4173 scop_p scop, htab_t map, | |
4174 gimple_stmt_iterator *gsi) | |
4175 { | |
4176 tree var0, var1, type; | |
4177 gimple def_stmt; | |
4178 enum tree_code subcode; | |
4179 | |
4180 if (is_parameter (scop, op0) | |
4181 || is_iv (op0)) | |
4182 return get_new_name_from_old_name (map, op0); | |
4183 | |
4184 def_stmt = SSA_NAME_DEF_STMT (op0); | |
4185 | |
4186 if (gimple_bb (def_stmt) == bb) | |
4187 { | |
4188 /* If the defining statement is in the basic block already | |
4189 we do not need to create a new expression for it, we | |
4190 only need to ensure its operands are expanded. */ | |
4191 expand_scalar_variables_stmt (def_stmt, bb, scop, map); | |
4192 return get_new_name_from_old_name (map, op0); | |
4193 } | |
4194 else | |
4195 { | |
4196 if (gimple_code (def_stmt) != GIMPLE_ASSIGN | |
4197 || !bb_in_sese_p (gimple_bb (def_stmt), SCOP_REGION (scop))) | |
4198 return get_new_name_from_old_name (map, op0); | |
4199 | |
4200 var0 = gimple_assign_rhs1 (def_stmt); | |
4201 subcode = gimple_assign_rhs_code (def_stmt); | |
4202 var1 = gimple_assign_rhs2 (def_stmt); | |
4203 type = gimple_expr_type (def_stmt); | |
4204 | |
4205 return expand_scalar_variables_expr (type, var0, subcode, var1, bb, scop, | |
4206 map, gsi); | |
4207 } | |
4208 } | |
4209 | |
4210 /* Copies at GSI all the scalar computations on which the expression | |
4211 OP0 CODE OP1 depends on in the SCOP: these are all the scalar | |
4212 variables used in OP0 and OP1, defined outside BB and still defined | |
4213 in the SCOP, i.e. not a parameter of the SCOP. The expression that | |
4214 is returned contains only induction variables from the generated | |
4215 code: MAP contains the induction variables renaming mapping, and is | |
4216 used to translate the names of induction variables. */ | |
4217 | |
4218 static tree | |
4219 expand_scalar_variables_expr (tree type, tree op0, enum tree_code code, | |
4220 tree op1, basic_block bb, scop_p scop, | |
4221 htab_t map, gimple_stmt_iterator *gsi) | |
4222 { | |
4223 if (TREE_CODE_CLASS (code) == tcc_constant | |
4224 || TREE_CODE_CLASS (code) == tcc_declaration) | |
4225 return op0; | |
4226 | |
4227 /* For data references we have to duplicate also its memory | |
4228 indexing. */ | |
4229 if (TREE_CODE_CLASS (code) == tcc_reference) | |
4230 { | |
4231 switch (code) | |
4232 { | |
4233 case INDIRECT_REF: | |
4234 { | |
4235 tree old_name = TREE_OPERAND (op0, 0); | |
4236 tree expr = expand_scalar_variables_ssa_name | |
4237 (old_name, bb, scop, map, gsi); | |
4238 tree new_name = force_gimple_operand_gsi (gsi, expr, true, NULL, | |
4239 true, GSI_SAME_STMT); | |
4240 | |
4241 set_symbol_mem_tag (SSA_NAME_VAR (new_name), | |
4242 symbol_mem_tag (SSA_NAME_VAR (old_name))); | |
4243 return fold_build1 (code, type, new_name); | |
4244 } | |
4245 | |
4246 case ARRAY_REF: | |
4247 { | |
4248 tree op00 = TREE_OPERAND (op0, 0); | |
4249 tree op01 = TREE_OPERAND (op0, 1); | |
4250 tree op02 = TREE_OPERAND (op0, 2); | |
4251 tree op03 = TREE_OPERAND (op0, 3); | |
4252 tree base = expand_scalar_variables_expr | |
4253 (TREE_TYPE (op00), op00, TREE_CODE (op00), NULL, bb, scop, | |
4254 map, gsi); | |
4255 tree subscript = expand_scalar_variables_expr | |
4256 (TREE_TYPE (op01), op01, TREE_CODE (op01), NULL, bb, scop, | |
4257 map, gsi); | |
4258 | |
4259 return build4 (ARRAY_REF, type, base, subscript, op02, op03); | |
4260 } | |
4261 | |
4262 default: | |
4263 /* The above cases should catch everything. */ | |
4264 gcc_unreachable (); | |
4265 } | |
4266 } | |
4267 | |
4268 if (TREE_CODE_CLASS (code) == tcc_unary) | |
4269 { | |
4270 tree op0_type = TREE_TYPE (op0); | |
4271 enum tree_code op0_code = TREE_CODE (op0); | |
4272 tree op0_expr = expand_scalar_variables_expr (op0_type, op0, op0_code, | |
4273 NULL, bb, scop, map, gsi); | |
4274 | |
4275 return fold_build1 (code, type, op0_expr); | |
4276 } | |
4277 | |
4278 if (TREE_CODE_CLASS (code) == tcc_binary) | |
4279 { | |
4280 tree op0_type = TREE_TYPE (op0); | |
4281 enum tree_code op0_code = TREE_CODE (op0); | |
4282 tree op0_expr = expand_scalar_variables_expr (op0_type, op0, op0_code, | |
4283 NULL, bb, scop, map, gsi); | |
4284 tree op1_type = TREE_TYPE (op1); | |
4285 enum tree_code op1_code = TREE_CODE (op1); | |
4286 tree op1_expr = expand_scalar_variables_expr (op1_type, op1, op1_code, | |
4287 NULL, bb, scop, map, gsi); | |
4288 | |
4289 return fold_build2 (code, type, op0_expr, op1_expr); | |
4290 } | |
4291 | |
4292 if (code == SSA_NAME) | |
4293 return expand_scalar_variables_ssa_name (op0, bb, scop, map, gsi); | |
4294 | |
4295 gcc_unreachable (); | |
4296 return NULL; | |
4297 } | |
4298 | |
4299 /* Copies at the beginning of BB all the scalar computations on which | |
4300 STMT depends on in the SCOP: these are all the scalar variables used | |
4301 in STMT, defined outside BB and still defined in the SCOP, i.e. not a | |
4302 parameter of the SCOP. The expression that is returned contains | |
4303 only induction variables from the generated code: MAP contains the | |
4304 induction variables renaming mapping, and is used to translate the | |
4305 names of induction variables. */ | |
4306 | |
4307 static void | |
4308 expand_scalar_variables_stmt (gimple stmt, basic_block bb, scop_p scop, | |
4309 htab_t map) | |
4310 { | |
4311 ssa_op_iter iter; | |
4312 use_operand_p use_p; | |
4313 gimple_stmt_iterator gsi = gsi_after_labels (bb); | |
4314 | |
4315 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE) | |
4316 { | |
4317 tree use = USE_FROM_PTR (use_p); | |
4318 tree type = TREE_TYPE (use); | |
4319 enum tree_code code = TREE_CODE (use); | |
4320 tree use_expr = expand_scalar_variables_expr (type, use, code, NULL, bb, | |
4321 scop, map, &gsi); | |
4322 if (use_expr != use) | |
4323 { | |
4324 tree new_use = | |
4325 force_gimple_operand_gsi (&gsi, use_expr, true, NULL, | |
4326 true, GSI_NEW_STMT); | |
4327 replace_exp (use_p, new_use); | |
4328 } | |
4329 } | |
4330 | |
4331 update_stmt (stmt); | |
4332 } | |
4333 | |
4334 /* Copies at the beginning of BB all the scalar computations on which | |
4335 BB depends on in the SCOP: these are all the scalar variables used | |
4336 in BB, defined outside BB and still defined in the SCOP, i.e. not a | |
4337 parameter of the SCOP. The expression that is returned contains | |
4338 only induction variables from the generated code: MAP contains the | |
4339 induction variables renaming mapping, and is used to translate the | |
4340 names of induction variables. */ | |
4341 | |
4342 static void | |
4343 expand_scalar_variables (basic_block bb, scop_p scop, htab_t map) | |
4344 { | |
4345 gimple_stmt_iterator gsi; | |
4346 | |
4347 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);) | |
4348 { | |
4349 gimple stmt = gsi_stmt (gsi); | |
4350 expand_scalar_variables_stmt (stmt, bb, scop, map); | |
4351 gsi_next (&gsi); | |
4352 } | |
4353 } | |
4354 | |
4355 /* Rename all the SSA_NAMEs from block BB according to the MAP. */ | |
4356 | |
4357 static void | |
4358 rename_variables (basic_block bb, htab_t map) | |
4359 { | |
4360 gimple_stmt_iterator gsi; | |
4361 | |
4362 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
4363 rename_variables_in_stmt (gsi_stmt (gsi), map); | |
4364 } | |
4365 | |
4366 /* Remove condition from BB. */ | |
4367 | |
4368 static void | |
4369 remove_condition (basic_block bb) | |
4370 { | |
4371 gimple last = last_stmt (bb); | |
4372 | |
4373 if (last && gimple_code (last) == GIMPLE_COND) | |
4374 { | |
4375 gimple_stmt_iterator gsi = gsi_last_bb (bb); | |
4376 gsi_remove (&gsi, true); | |
4377 } | |
4378 } | |
4379 | |
4380 /* Returns the first successor edge of BB with EDGE_TRUE_VALUE flag set. */ | |
4381 | |
4382 static edge | |
4383 get_true_edge_from_guard_bb (basic_block bb) | |
4384 { | |
4385 edge e; | |
4386 edge_iterator ei; | |
4387 | |
4388 FOR_EACH_EDGE (e, ei, bb->succs) | |
4389 if (e->flags & EDGE_TRUE_VALUE) | |
4390 return e; | |
4391 | |
4392 gcc_unreachable (); | |
4393 return NULL; | |
4394 } | |
4395 | |
4396 /* Returns the first successor edge of BB with EDGE_TRUE_VALUE flag cleared. */ | |
4397 | |
4398 static edge | |
4399 get_false_edge_from_guard_bb (basic_block bb) | |
4400 { | |
4401 edge e; | |
4402 edge_iterator ei; | |
4403 | |
4404 FOR_EACH_EDGE (e, ei, bb->succs) | |
4405 if (!(e->flags & EDGE_TRUE_VALUE)) | |
4406 return e; | |
4407 | |
4408 gcc_unreachable (); | |
4409 return NULL; | |
4410 } | |
4411 | |
4412 /* Inserts in MAP a tuple (OLD_NAME, NEW_NAME) for the induction | |
4413 variables of the loops around GBB in SCOP, i.e. GBB_LOOPS. | |
4414 NEW_NAME is obtained from IVSTACK. IVSTACK has the same stack | |
4415 ordering as GBB_LOOPS. */ | |
4416 | |
4417 static void | |
4418 build_iv_mapping (loop_iv_stack ivstack, htab_t map, gbb_p gbb, scop_p scop) | |
4419 { | |
4420 int i; | |
4421 name_tree iv; | |
4422 PTR *slot; | |
4423 | |
4424 for (i = 0; VEC_iterate (name_tree, SCOP_OLDIVS (scop), i, iv); i++) | |
4425 { | |
4426 struct rename_map_elt tmp; | |
4427 | |
4428 if (!flow_bb_inside_loop_p (iv->loop, GBB_BB (gbb))) | |
4429 continue; | |
4430 | |
4431 tmp.old_name = iv->t; | |
4432 slot = htab_find_slot (map, &tmp, INSERT); | |
4433 | |
4434 if (!*slot) | |
4435 { | |
4436 tree new_name = loop_iv_stack_get_iv (ivstack, | |
4437 gbb_loop_index (gbb, iv->loop)); | |
4438 *slot = new_rename_map_elt (iv->t, new_name); | |
4439 } | |
4440 } | |
4441 } | |
4442 | |
4443 /* Register in MAP the tuple (old_name, new_name). */ | |
4444 | |
4445 static void | |
4446 register_old_and_new_names (htab_t map, tree old_name, tree new_name) | |
4447 { | |
4448 struct rename_map_elt tmp; | |
4449 PTR *slot; | |
4450 | |
4451 tmp.old_name = old_name; | |
4452 slot = htab_find_slot (map, &tmp, INSERT); | |
4453 | |
4454 if (!*slot) | |
4455 *slot = new_rename_map_elt (old_name, new_name); | |
4456 } | |
4457 | |
4458 /* Create a duplicate of the basic block BB. NOTE: This does not | |
4459 preserve SSA form. */ | |
4460 | |
4461 static void | |
4462 graphite_copy_stmts_from_block (basic_block bb, basic_block new_bb, htab_t map) | |
4463 { | |
4464 gimple_stmt_iterator gsi, gsi_tgt; | |
4465 | |
4466 gsi_tgt = gsi_start_bb (new_bb); | |
4467 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
4468 { | |
4469 def_operand_p def_p; | |
4470 ssa_op_iter op_iter; | |
4471 int region; | |
4472 gimple stmt = gsi_stmt (gsi); | |
4473 gimple copy; | |
4474 | |
4475 if (gimple_code (stmt) == GIMPLE_LABEL) | |
4476 continue; | |
4477 | |
4478 /* Create a new copy of STMT and duplicate STMT's virtual | |
4479 operands. */ | |
4480 copy = gimple_copy (stmt); | |
4481 gsi_insert_after (&gsi_tgt, copy, GSI_NEW_STMT); | |
4482 mark_symbols_for_renaming (copy); | |
4483 | |
4484 region = lookup_stmt_eh_region (stmt); | |
4485 if (region >= 0) | |
4486 add_stmt_to_eh_region (copy, region); | |
4487 gimple_duplicate_stmt_histograms (cfun, copy, cfun, stmt); | |
4488 | |
4489 /* Create new names for all the definitions created by COPY and | |
4490 add replacement mappings for each new name. */ | |
4491 FOR_EACH_SSA_DEF_OPERAND (def_p, copy, op_iter, SSA_OP_DEF) | |
4492 { | |
4493 tree old_name = DEF_FROM_PTR (def_p); | |
4494 tree new_name = create_new_def_for (old_name, copy, def_p); | |
4495 register_old_and_new_names (map, old_name, new_name); | |
4496 } | |
4497 } | |
4498 } | |
4499 | |
4500 /* Records in SCOP_LIVEOUT_RENAMES the names that are live out of | |
4501 the SCOP and that appear in the RENAME_MAP. */ | |
4502 | |
4503 static void | |
4504 register_scop_liveout_renames (scop_p scop, htab_t rename_map) | |
4505 { | |
4506 int i; | |
4507 sese region = SCOP_REGION (scop); | |
4508 | |
4509 for (i = 0; i < SESE_NUM_VER (region); i++) | |
4510 if (bitmap_bit_p (SESE_LIVEOUT (region), i) | |
4511 && is_gimple_reg (ssa_name (i))) | |
4512 { | |
4513 tree old_name = ssa_name (i); | |
4514 tree new_name = get_new_name_from_old_name (rename_map, old_name); | |
4515 | |
4516 register_old_and_new_names (SCOP_LIVEOUT_RENAMES (scop), | |
4517 old_name, new_name); | |
4518 } | |
4519 } | |
4520 | |
4521 /* Copies BB and includes in the copied BB all the statements that can | |
4522 be reached following the use-def chains from the memory accesses, | |
4523 and returns the next edge following this new block. */ | |
4524 | |
4525 static edge | |
4526 copy_bb_and_scalar_dependences (basic_block bb, scop_p scop, | |
4527 edge next_e, htab_t map) | |
4528 { | |
4529 basic_block new_bb = split_edge (next_e); | |
4530 | |
4531 next_e = single_succ_edge (new_bb); | |
4532 graphite_copy_stmts_from_block (bb, new_bb, map); | |
4533 remove_condition (new_bb); | |
4534 rename_variables (new_bb, map); | |
4535 remove_phi_nodes (new_bb); | |
4536 expand_scalar_variables (new_bb, scop, map); | |
4537 register_scop_liveout_renames (scop, map); | |
4538 | |
4539 return next_e; | |
4540 } | |
4541 | |
4542 /* Helper function for htab_traverse in insert_loop_close_phis. */ | |
4543 | |
4544 static int | |
4545 add_loop_exit_phis (void **slot, void *s) | |
4546 { | |
4547 struct rename_map_elt *entry = (struct rename_map_elt *) *slot; | |
4548 tree new_name = entry->new_name; | |
4549 basic_block bb = (basic_block) s; | |
4550 gimple phi = create_phi_node (new_name, bb); | |
4551 tree res = create_new_def_for (gimple_phi_result (phi), phi, | |
4552 gimple_phi_result_ptr (phi)); | |
4553 | |
4554 add_phi_arg (phi, new_name, single_pred_edge (bb)); | |
4555 | |
4556 entry->new_name = res; | |
4557 *slot = entry; | |
4558 return 1; | |
4559 } | |
4560 | |
4561 /* Iterate over the SCOP_LIVEOUT_RENAMES (SCOP) and get tuples of the | |
4562 form (OLD_NAME, NEW_NAME). Insert in BB "RES = phi (NEW_NAME)", | |
4563 and finally register in SCOP_LIVEOUT_RENAMES (scop) the tuple | |
4564 (OLD_NAME, RES). */ | |
4565 | |
4566 static void | |
4567 insert_loop_close_phis (scop_p scop, basic_block bb) | |
4568 { | |
4569 update_ssa (TODO_update_ssa); | |
4570 htab_traverse (SCOP_LIVEOUT_RENAMES (scop), add_loop_exit_phis, bb); | |
4571 update_ssa (TODO_update_ssa); | |
4572 } | |
4573 | |
4574 /* Helper structure for htab_traverse in insert_guard_phis. */ | |
4575 | |
4576 struct igp { | |
4577 basic_block bb; | |
4578 edge true_edge, false_edge; | |
4579 htab_t liveout_before_guard; | |
4580 }; | |
4581 | |
4582 /* Return the default name that is before the guard. */ | |
4583 | |
4584 static tree | |
4585 default_liveout_before_guard (htab_t liveout_before_guard, tree old_name) | |
4586 { | |
4587 tree res = get_new_name_from_old_name (liveout_before_guard, old_name); | |
4588 | |
4589 if (res == old_name) | |
4590 { | |
4591 if (is_gimple_reg (res)) | |
4592 return fold_convert (TREE_TYPE (res), integer_zero_node); | |
4593 return gimple_default_def (cfun, res); | |
4594 } | |
4595 | |
4596 return res; | |
4597 } | |
4598 | |
4599 /* Helper function for htab_traverse in insert_guard_phis. */ | |
4600 | |
4601 static int | |
4602 add_guard_exit_phis (void **slot, void *s) | |
4603 { | |
4604 struct rename_map_elt *entry = (struct rename_map_elt *) *slot; | |
4605 struct igp *i = (struct igp *) s; | |
4606 basic_block bb = i->bb; | |
4607 edge true_edge = i->true_edge; | |
4608 edge false_edge = i->false_edge; | |
4609 tree name1 = entry->new_name; | |
4610 tree name2 = default_liveout_before_guard (i->liveout_before_guard, | |
4611 entry->old_name); | |
4612 gimple phi = create_phi_node (name1, bb); | |
4613 tree res = create_new_def_for (gimple_phi_result (phi), phi, | |
4614 gimple_phi_result_ptr (phi)); | |
4615 | |
4616 add_phi_arg (phi, name1, true_edge); | |
4617 add_phi_arg (phi, name2, false_edge); | |
4618 | |
4619 entry->new_name = res; | |
4620 *slot = entry; | |
4621 return 1; | |
4622 } | |
4623 | |
4624 /* Iterate over the SCOP_LIVEOUT_RENAMES (SCOP) and get tuples of the | |
4625 form (OLD_NAME, NAME1). If there is a correspondent tuple of | |
4626 OLD_NAME in LIVEOUT_BEFORE_GUARD, i.e. (OLD_NAME, NAME2) then | |
4627 insert in BB | |
4628 | |
4629 | RES = phi (NAME1 (on TRUE_EDGE), NAME2 (on FALSE_EDGE))" | |
4630 | |
4631 if there is no tuple for OLD_NAME in LIVEOUT_BEFORE_GUARD, insert | |
4632 | |
4633 | RES = phi (NAME1 (on TRUE_EDGE), | |
4634 | DEFAULT_DEFINITION of NAME1 (on FALSE_EDGE))". | |
4635 | |
4636 Finally register in SCOP_LIVEOUT_RENAMES (scop) the tuple | |
4637 (OLD_NAME, RES). */ | |
4638 | |
4639 static void | |
4640 insert_guard_phis (scop_p scop, basic_block bb, edge true_edge, | |
4641 edge false_edge, htab_t liveout_before_guard) | |
4642 { | |
4643 struct igp i; | |
4644 i.bb = bb; | |
4645 i.true_edge = true_edge; | |
4646 i.false_edge = false_edge; | |
4647 i.liveout_before_guard = liveout_before_guard; | |
4648 | |
4649 update_ssa (TODO_update_ssa); | |
4650 htab_traverse (SCOP_LIVEOUT_RENAMES (scop), add_guard_exit_phis, &i); | |
4651 update_ssa (TODO_update_ssa); | |
4652 } | |
4653 | |
4654 /* Helper function for htab_traverse. */ | |
4655 | |
4656 static int | |
4657 copy_renames (void **slot, void *s) | |
4658 { | |
4659 struct rename_map_elt *entry = (struct rename_map_elt *) *slot; | |
4660 htab_t res = (htab_t) s; | |
4661 tree old_name = entry->old_name; | |
4662 tree new_name = entry->new_name; | |
4663 struct rename_map_elt tmp; | |
4664 PTR *x; | |
4665 | |
4666 tmp.old_name = old_name; | |
4667 x = htab_find_slot (res, &tmp, INSERT); | |
4668 | |
4669 if (!*x) | |
4670 *x = new_rename_map_elt (old_name, new_name); | |
4671 | |
4672 return 1; | |
4673 } | |
4674 | |
4675 /* Translates a CLAST statement STMT to GCC representation in the | |
4676 context of a SCOP. | |
4677 | |
4678 - NEXT_E is the edge where new generated code should be attached. | |
4679 - CONTEXT_LOOP is the loop in which the generated code will be placed | |
4680 (might be NULL). | |
4681 - IVSTACK contains the surrounding loops around the statement to be | |
4682 translated. | |
4683 */ | |
4684 | |
4685 static edge | |
4686 translate_clast (scop_p scop, struct loop *context_loop, | |
4687 struct clast_stmt *stmt, edge next_e, loop_iv_stack ivstack) | |
4688 { | |
4689 if (!stmt) | |
4690 return next_e; | |
4691 | |
4692 if (CLAST_STMT_IS_A (stmt, stmt_root)) | |
4693 return translate_clast (scop, context_loop, stmt->next, next_e, ivstack); | |
4694 | |
4695 if (CLAST_STMT_IS_A (stmt, stmt_user)) | |
4696 { | |
4697 htab_t map; | |
4698 CloogStatement *cs = ((struct clast_user_stmt *) stmt)->statement; | |
4699 graphite_bb_p gbb = (graphite_bb_p) cloog_statement_usr (cs); | |
4700 | |
4701 if (GBB_BB (gbb) == ENTRY_BLOCK_PTR) | |
4702 return next_e; | |
4703 | |
4704 map = htab_create (10, rename_map_elt_info, eq_rename_map_elts, free); | |
4705 loop_iv_stack_patch_for_consts (ivstack, (struct clast_user_stmt *) stmt); | |
4706 build_iv_mapping (ivstack, map, gbb, scop); | |
4707 next_e = copy_bb_and_scalar_dependences (GBB_BB (gbb), scop, | |
4708 next_e, map); | |
4709 htab_delete (map); | |
4710 loop_iv_stack_remove_constants (ivstack); | |
4711 update_ssa (TODO_update_ssa); | |
4712 recompute_all_dominators (); | |
4713 graphite_verify (); | |
4714 return translate_clast (scop, context_loop, stmt->next, next_e, ivstack); | |
4715 } | |
4716 | |
4717 if (CLAST_STMT_IS_A (stmt, stmt_for)) | |
4718 { | |
4719 struct loop *loop | |
4720 = graphite_create_new_loop (scop, next_e, (struct clast_for *) stmt, | |
4721 ivstack, context_loop ? context_loop | |
4722 : get_loop (0)); | |
4723 edge last_e = single_exit (loop); | |
4724 | |
4725 next_e = translate_clast (scop, loop, ((struct clast_for *) stmt)->body, | |
4726 single_pred_edge (loop->latch), ivstack); | |
4727 redirect_edge_succ_nodup (next_e, loop->latch); | |
4728 | |
4729 set_immediate_dominator (CDI_DOMINATORS, next_e->dest, next_e->src); | |
4730 loop_iv_stack_pop (ivstack); | |
4731 last_e = single_succ_edge (split_edge (last_e)); | |
4732 insert_loop_close_phis (scop, last_e->src); | |
4733 | |
4734 recompute_all_dominators (); | |
4735 graphite_verify (); | |
4736 return translate_clast (scop, context_loop, stmt->next, last_e, ivstack); | |
4737 } | |
4738 | |
4739 if (CLAST_STMT_IS_A (stmt, stmt_guard)) | |
4740 { | |
4741 htab_t liveout_before_guard = htab_create (10, rename_map_elt_info, | |
4742 eq_rename_map_elts, free); | |
4743 edge last_e = graphite_create_new_guard (scop, next_e, | |
4744 ((struct clast_guard *) stmt), | |
4745 ivstack); | |
4746 edge true_e = get_true_edge_from_guard_bb (next_e->dest); | |
4747 edge false_e = get_false_edge_from_guard_bb (next_e->dest); | |
4748 edge exit_true_e = single_succ_edge (true_e->dest); | |
4749 edge exit_false_e = single_succ_edge (false_e->dest); | |
4750 | |
4751 htab_traverse (SCOP_LIVEOUT_RENAMES (scop), copy_renames, | |
4752 liveout_before_guard); | |
4753 | |
4754 next_e = translate_clast (scop, context_loop, | |
4755 ((struct clast_guard *) stmt)->then, | |
4756 true_e, ivstack); | |
4757 insert_guard_phis (scop, last_e->src, exit_true_e, exit_false_e, | |
4758 liveout_before_guard); | |
4759 htab_delete (liveout_before_guard); | |
4760 recompute_all_dominators (); | |
4761 graphite_verify (); | |
4762 | |
4763 return translate_clast (scop, context_loop, stmt->next, last_e, ivstack); | |
4764 } | |
4765 | |
4766 if (CLAST_STMT_IS_A (stmt, stmt_block)) | |
4767 { | |
4768 next_e = translate_clast (scop, context_loop, | |
4769 ((struct clast_block *) stmt)->body, | |
4770 next_e, ivstack); | |
4771 recompute_all_dominators (); | |
4772 graphite_verify (); | |
4773 return translate_clast (scop, context_loop, stmt->next, next_e, ivstack); | |
4774 } | |
4775 | |
4776 gcc_unreachable (); | |
4777 } | |
4778 | |
4779 /* Free the SCATTERING domain list. */ | |
4780 | |
4781 static void | |
4782 free_scattering (CloogDomainList *scattering) | |
4783 { | |
4784 while (scattering) | |
4785 { | |
4786 CloogDomain *dom = cloog_domain (scattering); | |
4787 CloogDomainList *next = cloog_next_domain (scattering); | |
4788 | |
4789 cloog_domain_free (dom); | |
4790 free (scattering); | |
4791 scattering = next; | |
4792 } | |
4793 } | |
4794 | |
4795 /* Build cloog program for SCoP. */ | |
4796 | |
4797 static void | |
4798 build_cloog_prog (scop_p scop) | |
4799 { | |
4800 int i; | |
4801 int max_nb_loops = scop_max_loop_depth (scop); | |
4802 graphite_bb_p gbb; | |
4803 CloogLoop *loop_list = NULL; | |
4804 CloogBlockList *block_list = NULL; | |
4805 CloogDomainList *scattering = NULL; | |
4806 CloogProgram *prog = SCOP_PROG (scop); | |
4807 int nbs = 2 * max_nb_loops + 1; | |
4808 int *scaldims = (int *) xmalloc (nbs * (sizeof (int))); | |
4809 | |
4810 cloog_program_set_nb_scattdims (prog, nbs); | |
4811 initialize_cloog_names (scop); | |
4812 | |
4813 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gbb); i++) | |
4814 { | |
4815 /* Build new block. */ | |
4816 CloogMatrix *domain = GBB_DOMAIN (gbb); | |
4817 CloogStatement *stmt = cloog_statement_alloc (GBB_BB (gbb)->index); | |
4818 CloogBlock *block = cloog_block_alloc (stmt, 0, NULL, | |
4819 nb_loops_around_gb (gbb)); | |
4820 cloog_statement_set_usr (stmt, gbb); | |
4821 | |
4822 /* Add empty domain to all bbs, which do not yet have a domain, as they | |
4823 are not part of any loop. */ | |
4824 if (domain == NULL) | |
4825 { | |
4826 domain = cloog_matrix_alloc (0, scop_nb_params (scop) + 2); | |
4827 GBB_DOMAIN (gbb) = domain; | |
4828 } | |
4829 | |
4830 /* Build loop list. */ | |
4831 { | |
4832 CloogLoop *new_loop_list = cloog_loop_malloc (); | |
4833 cloog_loop_set_next (new_loop_list, loop_list); | |
4834 cloog_loop_set_domain (new_loop_list, | |
4835 cloog_domain_matrix2domain (domain)); | |
4836 cloog_loop_set_block (new_loop_list, block); | |
4837 loop_list = new_loop_list; | |
4838 } | |
4839 | |
4840 /* Build block list. */ | |
4841 { | |
4842 CloogBlockList *new_block_list = cloog_block_list_malloc (); | |
4843 | |
4844 cloog_block_list_set_next (new_block_list, block_list); | |
4845 cloog_block_list_set_block (new_block_list, block); | |
4846 block_list = new_block_list; | |
4847 } | |
4848 | |
4849 /* Build scattering list. */ | |
4850 { | |
4851 /* XXX: Replace with cloog_domain_list_alloc(), when available. */ | |
4852 CloogDomainList *new_scattering | |
4853 = (CloogDomainList *) xmalloc (sizeof (CloogDomainList)); | |
4854 CloogMatrix *scat_mat = schedule_to_scattering (gbb, nbs); | |
4855 | |
4856 cloog_set_next_domain (new_scattering, scattering); | |
4857 cloog_set_domain (new_scattering, | |
4858 cloog_domain_matrix2domain (scat_mat)); | |
4859 scattering = new_scattering; | |
4860 cloog_matrix_free (scat_mat); | |
4861 } | |
4862 } | |
4863 | |
4864 cloog_program_set_loop (prog, loop_list); | |
4865 cloog_program_set_blocklist (prog, block_list); | |
4866 | |
4867 for (i = 0; i < nbs; i++) | |
4868 scaldims[i] = 0 ; | |
4869 | |
4870 cloog_program_set_scaldims (prog, scaldims); | |
4871 | |
4872 /* Extract scalar dimensions to simplify the code generation problem. */ | |
4873 cloog_program_extract_scalars (prog, scattering); | |
4874 | |
4875 /* Apply scattering. */ | |
4876 cloog_program_scatter (prog, scattering); | |
4877 free_scattering (scattering); | |
4878 | |
4879 /* Iterators corresponding to scalar dimensions have to be extracted. */ | |
4880 cloog_names_scalarize (cloog_program_names (prog), nbs, | |
4881 cloog_program_scaldims (prog)); | |
4882 | |
4883 /* Free blocklist. */ | |
4884 { | |
4885 CloogBlockList *next = cloog_program_blocklist (prog); | |
4886 | |
4887 while (next) | |
4888 { | |
4889 CloogBlockList *toDelete = next; | |
4890 next = cloog_block_list_next (next); | |
4891 cloog_block_list_set_next (toDelete, NULL); | |
4892 cloog_block_list_set_block (toDelete, NULL); | |
4893 cloog_block_list_free (toDelete); | |
4894 } | |
4895 cloog_program_set_blocklist (prog, NULL); | |
4896 } | |
4897 } | |
4898 | |
4899 /* Return the options that will be used in GLOOG. */ | |
4900 | |
4901 static CloogOptions * | |
4902 set_cloog_options (void) | |
4903 { | |
4904 CloogOptions *options = cloog_options_malloc (); | |
4905 | |
4906 /* Change cloog output language to C. If we do use FORTRAN instead, cloog | |
4907 will stop e.g. with "ERROR: unbounded loops not allowed in FORTRAN.", if | |
4908 we pass an incomplete program to cloog. */ | |
4909 options->language = LANGUAGE_C; | |
4910 | |
4911 /* Enable complex equality spreading: removes dummy statements | |
4912 (assignments) in the generated code which repeats the | |
4913 substitution equations for statements. This is useless for | |
4914 GLooG. */ | |
4915 options->esp = 1; | |
4916 | |
4917 /* Enable C pretty-printing mode: normalizes the substitution | |
4918 equations for statements. */ | |
4919 options->cpp = 1; | |
4920 | |
4921 /* Allow cloog to build strides with a stride width different to one. | |
4922 This example has stride = 4: | |
4923 | |
4924 for (i = 0; i < 20; i += 4) | |
4925 A */ | |
4926 options->strides = 1; | |
4927 | |
4928 /* Disable optimizations and make cloog generate source code closer to the | |
4929 input. This is useful for debugging, but later we want the optimized | |
4930 code. | |
4931 | |
4932 XXX: We can not disable optimizations, as loop blocking is not working | |
4933 without them. */ | |
4934 if (0) | |
4935 { | |
4936 options->f = -1; | |
4937 options->l = INT_MAX; | |
4938 } | |
4939 | |
4940 return options; | |
4941 } | |
4942 | |
4943 /* Prints STMT to STDERR. */ | |
4944 | |
4945 void | |
4946 debug_clast_stmt (struct clast_stmt *stmt) | |
4947 { | |
4948 CloogOptions *options = set_cloog_options (); | |
4949 | |
4950 pprint (stderr, stmt, 0, options); | |
4951 } | |
4952 | |
4953 /* Find the right transform for the SCOP, and return a Cloog AST | |
4954 representing the new form of the program. */ | |
4955 | |
4956 static struct clast_stmt * | |
4957 find_transform (scop_p scop) | |
4958 { | |
4959 struct clast_stmt *stmt; | |
4960 CloogOptions *options = set_cloog_options (); | |
4961 | |
4962 /* Connect new cloog prog generation to graphite. */ | |
4963 build_cloog_prog (scop); | |
4964 | |
4965 if (dump_file && (dump_flags & TDF_DETAILS)) | |
4966 { | |
4967 fprintf (dump_file, "Cloog Input [\n"); | |
4968 cloog_program_print (dump_file, SCOP_PROG(scop)); | |
4969 fprintf (dump_file, "]\n"); | |
4970 } | |
4971 | |
4972 SCOP_PROG (scop) = cloog_program_generate (SCOP_PROG (scop), options); | |
4973 stmt = cloog_clast_create (SCOP_PROG (scop), options); | |
4974 | |
4975 if (dump_file && (dump_flags & TDF_DETAILS)) | |
4976 { | |
4977 fprintf (dump_file, "Cloog Output[\n"); | |
4978 pprint (dump_file, stmt, 0, options); | |
4979 cloog_program_dump_cloog (dump_file, SCOP_PROG (scop)); | |
4980 fprintf (dump_file, "]\n"); | |
4981 } | |
4982 | |
4983 cloog_options_free (options); | |
4984 return stmt; | |
4985 } | |
4986 | |
4987 /* Remove from the CFG the REGION. */ | |
4988 | |
4989 static inline void | |
4990 remove_sese_region (sese region) | |
4991 { | |
4992 VEC (basic_block, heap) *bbs = NULL; | |
4993 basic_block entry_bb = SESE_ENTRY (region)->dest; | |
4994 basic_block exit_bb = SESE_EXIT (region)->dest; | |
4995 basic_block bb; | |
4996 int i; | |
4997 | |
4998 VEC_safe_push (basic_block, heap, bbs, entry_bb); | |
4999 gather_blocks_in_sese_region (entry_bb, exit_bb, &bbs); | |
5000 | |
5001 for (i = 0; VEC_iterate (basic_block, bbs, i, bb); i++) | |
5002 delete_basic_block (bb); | |
5003 | |
5004 VEC_free (basic_block, heap, bbs); | |
5005 } | |
5006 | |
5007 typedef struct ifsese { | |
5008 sese region; | |
5009 sese true_region; | |
5010 sese false_region; | |
5011 } *ifsese; | |
5012 | |
5013 static inline edge | |
5014 if_region_entry (ifsese if_region) | |
5015 { | |
5016 return SESE_ENTRY (if_region->region); | |
5017 } | |
5018 | |
5019 static inline edge | |
5020 if_region_exit (ifsese if_region) | |
5021 { | |
5022 return SESE_EXIT (if_region->region); | |
5023 } | |
5024 | |
5025 static inline basic_block | |
5026 if_region_get_condition_block (ifsese if_region) | |
5027 { | |
5028 return if_region_entry (if_region)->dest; | |
5029 } | |
5030 | |
5031 static inline void | |
5032 if_region_set_false_region (ifsese if_region, sese region) | |
5033 { | |
5034 basic_block condition = if_region_get_condition_block (if_region); | |
5035 edge false_edge = get_false_edge_from_guard_bb (condition); | |
5036 basic_block dummy = false_edge->dest; | |
5037 edge entry_region = SESE_ENTRY (region); | |
5038 edge exit_region = SESE_EXIT (region); | |
5039 basic_block before_region = entry_region->src; | |
5040 basic_block last_in_region = exit_region->src; | |
5041 void **slot = htab_find_slot_with_hash (current_loops->exits, exit_region, | |
5042 htab_hash_pointer (exit_region), | |
5043 NO_INSERT); | |
5044 | |
5045 entry_region->flags = false_edge->flags; | |
5046 false_edge->flags = exit_region->flags; | |
5047 | |
5048 redirect_edge_pred (entry_region, condition); | |
5049 redirect_edge_pred (exit_region, before_region); | |
5050 redirect_edge_pred (false_edge, last_in_region); | |
5051 redirect_edge_succ (false_edge, single_succ (dummy)); | |
5052 delete_basic_block (dummy); | |
5053 | |
5054 exit_region->flags = EDGE_FALLTHRU; | |
5055 recompute_all_dominators (); | |
5056 | |
5057 SESE_EXIT (region) = false_edge; | |
5058 if_region->false_region = region; | |
5059 | |
5060 if (slot) | |
5061 { | |
5062 struct loop_exit *loop_exit = GGC_CNEW (struct loop_exit); | |
5063 | |
5064 memcpy (loop_exit, *((struct loop_exit **) slot), sizeof (struct loop_exit)); | |
5065 htab_clear_slot (current_loops->exits, slot); | |
5066 | |
5067 slot = htab_find_slot_with_hash (current_loops->exits, false_edge, | |
5068 htab_hash_pointer (false_edge), | |
5069 INSERT); | |
5070 loop_exit->e = false_edge; | |
5071 *slot = loop_exit; | |
5072 false_edge->src->loop_father->exits->next = loop_exit; | |
5073 } | |
5074 } | |
5075 | |
5076 static ifsese | |
5077 create_if_region_on_edge (edge entry, tree condition) | |
5078 { | |
5079 edge e; | |
5080 edge_iterator ei; | |
5081 sese sese_region = GGC_NEW (struct sese); | |
5082 sese true_region = GGC_NEW (struct sese); | |
5083 sese false_region = GGC_NEW (struct sese); | |
5084 ifsese if_region = GGC_NEW (struct ifsese); | |
5085 edge exit = create_empty_if_region_on_edge (entry, condition); | |
5086 | |
5087 if_region->region = sese_region; | |
5088 if_region->region->entry = entry; | |
5089 if_region->region->exit = exit; | |
5090 | |
5091 FOR_EACH_EDGE (e, ei, entry->dest->succs) | |
5092 { | |
5093 if (e->flags & EDGE_TRUE_VALUE) | |
5094 { | |
5095 true_region->entry = e; | |
5096 true_region->exit = single_succ_edge (e->dest); | |
5097 if_region->true_region = true_region; | |
5098 } | |
5099 else if (e->flags & EDGE_FALSE_VALUE) | |
5100 { | |
5101 false_region->entry = e; | |
5102 false_region->exit = single_succ_edge (e->dest); | |
5103 if_region->false_region = false_region; | |
5104 } | |
5105 } | |
5106 | |
5107 return if_region; | |
5108 } | |
5109 | |
5110 /* Moves REGION in a condition expression: | |
5111 | if (1) | |
5112 | ; | |
5113 | else | |
5114 | REGION; | |
5115 */ | |
5116 | |
5117 static ifsese | |
5118 move_sese_in_condition (sese region) | |
5119 { | |
5120 basic_block pred_block = split_edge (SESE_ENTRY (region)); | |
5121 ifsese if_region = NULL; | |
5122 | |
5123 SESE_ENTRY (region) = single_succ_edge (pred_block); | |
5124 if_region = create_if_region_on_edge (single_pred_edge (pred_block), integer_one_node); | |
5125 if_region_set_false_region (if_region, region); | |
5126 | |
5127 return if_region; | |
5128 } | |
5129 | |
5130 /* Add exit phis for USE on EXIT. */ | |
5131 | |
5132 static void | |
5133 scop_add_exit_phis_edge (basic_block exit, tree use, edge false_e, edge true_e) | |
5134 { | |
5135 gimple phi = create_phi_node (use, exit); | |
5136 | |
5137 create_new_def_for (gimple_phi_result (phi), phi, | |
5138 gimple_phi_result_ptr (phi)); | |
5139 add_phi_arg (phi, use, false_e); | |
5140 add_phi_arg (phi, use, true_e); | |
5141 } | |
5142 | |
5143 /* Add phi nodes for VAR that is used in LIVEIN. Phi nodes are | |
5144 inserted in block BB. */ | |
5145 | |
5146 static void | |
5147 scop_add_exit_phis_var (basic_block bb, tree var, bitmap livein, | |
5148 edge false_e, edge true_e) | |
5149 { | |
5150 bitmap def; | |
5151 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (var)); | |
5152 | |
5153 if (is_gimple_reg (var)) | |
5154 bitmap_clear_bit (livein, def_bb->index); | |
5155 else | |
5156 bitmap_set_bit (livein, def_bb->index); | |
5157 | |
5158 def = BITMAP_ALLOC (NULL); | |
5159 bitmap_set_bit (def, def_bb->index); | |
5160 compute_global_livein (livein, def); | |
5161 BITMAP_FREE (def); | |
5162 | |
5163 scop_add_exit_phis_edge (bb, var, false_e, true_e); | |
5164 } | |
5165 | |
5166 /* Insert in the block BB phi nodes for variables defined in REGION | |
5167 and used outside the REGION. The code generation moves REGION in | |
5168 the else clause of an "if (1)" and generates code in the then | |
5169 clause that is at this point empty: | |
5170 | |
5171 | if (1) | |
5172 | empty; | |
5173 | else | |
5174 | REGION; | |
5175 */ | |
5176 | |
5177 static void | |
5178 scop_insert_phis_for_liveouts (sese region, basic_block bb, | |
5179 edge false_e, edge true_e) | |
5180 { | |
5181 unsigned i; | |
5182 bitmap_iterator bi; | |
5183 | |
5184 update_ssa (TODO_update_ssa); | |
5185 | |
5186 EXECUTE_IF_SET_IN_BITMAP (SESE_LIVEOUT (region), 0, i, bi) | |
5187 scop_add_exit_phis_var (bb, ssa_name (i), SESE_LIVEIN_VER (region, i), | |
5188 false_e, true_e); | |
5189 | |
5190 update_ssa (TODO_update_ssa); | |
5191 } | |
5192 | |
5193 /* Get the definition of NAME before the SCOP. Keep track of the | |
5194 basic blocks that have been VISITED in a bitmap. */ | |
5195 | |
5196 static tree | |
5197 get_vdef_before_scop (scop_p scop, tree name, sbitmap visited) | |
5198 { | |
5199 unsigned i; | |
5200 gimple def_stmt = SSA_NAME_DEF_STMT (name); | |
5201 basic_block def_bb = gimple_bb (def_stmt); | |
5202 | |
5203 if (!def_bb | |
5204 || !bb_in_sese_p (def_bb, SCOP_REGION (scop))) | |
5205 return name; | |
5206 | |
5207 if (TEST_BIT (visited, def_bb->index)) | |
5208 return NULL_TREE; | |
5209 | |
5210 SET_BIT (visited, def_bb->index); | |
5211 | |
5212 switch (gimple_code (def_stmt)) | |
5213 { | |
5214 case GIMPLE_PHI: | |
5215 for (i = 0; i < gimple_phi_num_args (def_stmt); i++) | |
5216 { | |
5217 tree arg = gimple_phi_arg_def (def_stmt, i); | |
5218 tree res = get_vdef_before_scop (scop, arg, visited); | |
5219 if (res) | |
5220 return res; | |
5221 } | |
5222 return NULL_TREE; | |
5223 | |
5224 default: | |
5225 return NULL_TREE; | |
5226 } | |
5227 } | |
5228 | |
5229 /* Adjust a virtual phi node PHI that is placed at the end of the | |
5230 generated code for SCOP: | |
5231 | |
5232 | if (1) | |
5233 | generated code from REGION; | |
5234 | else | |
5235 | REGION; | |
5236 | |
5237 The FALSE_E edge comes from the original code, TRUE_E edge comes | |
5238 from the code generated for the SCOP. */ | |
5239 | |
5240 static void | |
5241 scop_adjust_vphi (scop_p scop, gimple phi, edge true_e) | |
5242 { | |
5243 unsigned i; | |
5244 | |
5245 gcc_assert (gimple_phi_num_args (phi) == 2); | |
5246 | |
5247 for (i = 0; i < gimple_phi_num_args (phi); i++) | |
5248 if (gimple_phi_arg_edge (phi, i) == true_e) | |
5249 { | |
5250 tree true_arg, false_arg, before_scop_arg; | |
5251 sbitmap visited; | |
5252 | |
5253 true_arg = gimple_phi_arg_def (phi, i); | |
5254 if (!SSA_NAME_IS_DEFAULT_DEF (true_arg)) | |
5255 return; | |
5256 | |
5257 false_arg = gimple_phi_arg_def (phi, i == 0 ? 1 : 0); | |
5258 if (SSA_NAME_IS_DEFAULT_DEF (false_arg)) | |
5259 return; | |
5260 | |
5261 visited = sbitmap_alloc (last_basic_block); | |
5262 sbitmap_zero (visited); | |
5263 before_scop_arg = get_vdef_before_scop (scop, false_arg, visited); | |
5264 gcc_assert (before_scop_arg != NULL_TREE); | |
5265 SET_PHI_ARG_DEF (phi, i, before_scop_arg); | |
5266 sbitmap_free (visited); | |
5267 } | |
5268 } | |
5269 | |
5270 /* Adjusts the phi nodes in the block BB for variables defined in | |
5271 SCOP_REGION and used outside the SCOP_REGION. The code generation | |
5272 moves SCOP_REGION in the else clause of an "if (1)" and generates | |
5273 code in the then clause: | |
5274 | |
5275 | if (1) | |
5276 | generated code from REGION; | |
5277 | else | |
5278 | REGION; | |
5279 | |
5280 To adjust the phi nodes after the condition, SCOP_LIVEOUT_RENAMES | |
5281 hash table is used: this stores for a name that is part of the | |
5282 LIVEOUT of SCOP_REGION its new name in the generated code. */ | |
5283 | |
5284 static void | |
5285 scop_adjust_phis_for_liveouts (scop_p scop, basic_block bb, edge false_e, | |
5286 edge true_e) | |
5287 { | |
5288 gimple_stmt_iterator si; | |
5289 | |
5290 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) | |
5291 { | |
5292 unsigned i; | |
5293 unsigned false_i = 0; | |
5294 gimple phi = gsi_stmt (si); | |
5295 | |
5296 if (!is_gimple_reg (PHI_RESULT (phi))) | |
5297 { | |
5298 scop_adjust_vphi (scop, phi, true_e); | |
5299 continue; | |
5300 } | |
5301 | |
5302 for (i = 0; i < gimple_phi_num_args (phi); i++) | |
5303 if (gimple_phi_arg_edge (phi, i) == false_e) | |
5304 { | |
5305 false_i = i; | |
5306 break; | |
5307 } | |
5308 | |
5309 for (i = 0; i < gimple_phi_num_args (phi); i++) | |
5310 if (gimple_phi_arg_edge (phi, i) == true_e) | |
5311 { | |
5312 tree old_name = gimple_phi_arg_def (phi, false_i); | |
5313 tree new_name = get_new_name_from_old_name | |
5314 (SCOP_LIVEOUT_RENAMES (scop), old_name); | |
5315 | |
5316 gcc_assert (old_name != new_name); | |
5317 SET_PHI_ARG_DEF (phi, i, new_name); | |
5318 } | |
5319 } | |
5320 } | |
5321 | |
5322 /* Returns the first cloog name used in EXPR. */ | |
5323 | |
5324 static const char * | |
5325 find_cloog_iv_in_expr (struct clast_expr *expr) | |
5326 { | |
5327 struct clast_term *term = (struct clast_term *) expr; | |
5328 | |
5329 if (expr->type == expr_term | |
5330 && !term->var) | |
5331 return NULL; | |
5332 | |
5333 if (expr->type == expr_term) | |
5334 return term->var; | |
5335 | |
5336 if (expr->type == expr_red) | |
5337 { | |
5338 int i; | |
5339 struct clast_reduction *red = (struct clast_reduction *) expr; | |
5340 | |
5341 for (i = 0; i < red->n; i++) | |
5342 { | |
5343 const char *res = find_cloog_iv_in_expr ((red)->elts[i]); | |
5344 | |
5345 if (res) | |
5346 return res; | |
5347 } | |
5348 } | |
5349 | |
5350 return NULL; | |
5351 } | |
5352 | |
5353 /* Build for a clast_user_stmt USER_STMT a map between the CLAST | |
5354 induction variables and the corresponding GCC old induction | |
5355 variables. This information is stored on each GRAPHITE_BB. */ | |
5356 | |
5357 static void | |
5358 compute_cloog_iv_types_1 (graphite_bb_p gbb, | |
5359 struct clast_user_stmt *user_stmt) | |
5360 { | |
5361 struct clast_stmt *t; | |
5362 int index = 0; | |
5363 | |
5364 for (t = user_stmt->substitutions; t; t = t->next, index++) | |
5365 { | |
5366 PTR *slot; | |
5367 struct ivtype_map_elt tmp; | |
5368 struct clast_expr *expr = (struct clast_expr *) | |
5369 ((struct clast_assignment *)t)->RHS; | |
5370 | |
5371 /* Create an entry (clast_var, type). */ | |
5372 tmp.cloog_iv = find_cloog_iv_in_expr (expr); | |
5373 if (!tmp.cloog_iv) | |
5374 continue; | |
5375 | |
5376 slot = htab_find_slot (GBB_CLOOG_IV_TYPES (gbb), &tmp, INSERT); | |
5377 | |
5378 if (!*slot) | |
5379 { | |
5380 loop_p loop = gbb_loop_at_index (gbb, index); | |
5381 tree oldiv = oldiv_for_loop (GBB_SCOP (gbb), loop); | |
5382 tree type = oldiv ? TREE_TYPE (oldiv) : integer_type_node; | |
5383 *slot = new_ivtype_map_elt (tmp.cloog_iv, type); | |
5384 } | |
5385 } | |
5386 } | |
5387 | |
5388 /* Walk the CLAST tree starting from STMT and build for each | |
5389 clast_user_stmt a map between the CLAST induction variables and the | |
5390 corresponding GCC old induction variables. This information is | |
5391 stored on each GRAPHITE_BB. */ | |
5392 | |
5393 static void | |
5394 compute_cloog_iv_types (struct clast_stmt *stmt) | |
5395 { | |
5396 if (!stmt) | |
5397 return; | |
5398 | |
5399 if (CLAST_STMT_IS_A (stmt, stmt_root)) | |
5400 goto next; | |
5401 | |
5402 if (CLAST_STMT_IS_A (stmt, stmt_user)) | |
5403 { | |
5404 CloogStatement *cs = ((struct clast_user_stmt *) stmt)->statement; | |
5405 graphite_bb_p gbb = (graphite_bb_p) cloog_statement_usr (cs); | |
5406 GBB_CLOOG_IV_TYPES (gbb) = htab_create (10, ivtype_map_elt_info, | |
5407 eq_ivtype_map_elts, free); | |
5408 compute_cloog_iv_types_1 (gbb, (struct clast_user_stmt *) stmt); | |
5409 goto next; | |
5410 } | |
5411 | |
5412 if (CLAST_STMT_IS_A (stmt, stmt_for)) | |
5413 { | |
5414 struct clast_stmt *s = ((struct clast_for *) stmt)->body; | |
5415 compute_cloog_iv_types (s); | |
5416 goto next; | |
5417 } | |
5418 | |
5419 if (CLAST_STMT_IS_A (stmt, stmt_guard)) | |
5420 { | |
5421 struct clast_stmt *s = ((struct clast_guard *) stmt)->then; | |
5422 compute_cloog_iv_types (s); | |
5423 goto next; | |
5424 } | |
5425 | |
5426 if (CLAST_STMT_IS_A (stmt, stmt_block)) | |
5427 { | |
5428 struct clast_stmt *s = ((struct clast_block *) stmt)->body; | |
5429 compute_cloog_iv_types (s); | |
5430 goto next; | |
5431 } | |
5432 | |
5433 gcc_unreachable (); | |
5434 | |
5435 next: | |
5436 compute_cloog_iv_types (stmt->next); | |
5437 } | |
5438 | |
5439 /* GIMPLE Loop Generator: generates loops from STMT in GIMPLE form for | |
5440 the given SCOP. Return true if code generation succeeded. */ | |
5441 | |
5442 static bool | |
5443 gloog (scop_p scop, struct clast_stmt *stmt) | |
5444 { | |
5445 edge new_scop_exit_edge = NULL; | |
5446 VEC (iv_stack_entry_p, heap) *ivstack = VEC_alloc (iv_stack_entry_p, heap, | |
5447 10); | |
5448 loop_p context_loop; | |
5449 ifsese if_region = NULL; | |
5450 | |
5451 recompute_all_dominators (); | |
5452 graphite_verify (); | |
5453 if_region = move_sese_in_condition (SCOP_REGION (scop)); | |
5454 sese_build_livein_liveouts (SCOP_REGION (scop)); | |
5455 scop_insert_phis_for_liveouts (SCOP_REGION (scop), | |
5456 if_region->region->exit->src, | |
5457 if_region->false_region->exit, | |
5458 if_region->true_region->exit); | |
5459 recompute_all_dominators (); | |
5460 graphite_verify (); | |
5461 context_loop = SESE_ENTRY (SCOP_REGION (scop))->src->loop_father; | |
5462 compute_cloog_iv_types (stmt); | |
5463 | |
5464 new_scop_exit_edge = translate_clast (scop, context_loop, stmt, | |
5465 if_region->true_region->entry, | |
5466 &ivstack); | |
5467 free_loop_iv_stack (&ivstack); | |
5468 cloog_clast_free (stmt); | |
5469 | |
5470 graphite_verify (); | |
5471 scop_adjust_phis_for_liveouts (scop, | |
5472 if_region->region->exit->src, | |
5473 if_region->false_region->exit, | |
5474 if_region->true_region->exit); | |
5475 | |
5476 recompute_all_dominators (); | |
5477 graphite_verify (); | |
5478 return true; | |
5479 } | |
5480 | |
5481 /* Returns the number of data references in SCOP. */ | |
5482 | |
5483 static int | |
5484 nb_data_refs_in_scop (scop_p scop) | |
5485 { | |
5486 int i; | |
5487 graphite_bb_p gbb; | |
5488 int res = 0; | |
5489 | |
5490 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gbb); i++) | |
5491 res += VEC_length (data_reference_p, GBB_DATA_REFS (gbb)); | |
5492 | |
5493 return res; | |
5494 } | |
5495 | |
5496 /* Move the loop at index LOOP and insert it before index NEW_LOOP_POS. | |
5497 This transformartion is only valid, if the loop nest between i and k is | |
5498 perfectly nested. Therefore we do not need to change the static schedule. | |
5499 | |
5500 Example: | |
5501 | |
5502 for (i = 0; i < 50; i++) | |
5503 for (j ...) | |
5504 for (k = 5; k < 100; k++) | |
5505 A | |
5506 | |
5507 To move k before i use: | |
5508 | |
5509 graphite_trans_bb_move_loop (A, 2, 0) | |
5510 | |
5511 for (k = 5; k < 100; k++) | |
5512 for (i = 0; i < 50; i++) | |
5513 for (j ...) | |
5514 A | |
5515 | |
5516 And to move k back: | |
5517 | |
5518 graphite_trans_bb_move_loop (A, 0, 2) | |
5519 | |
5520 This function does not check the validity of interchanging loops. | |
5521 This should be checked before calling this function. */ | |
5522 | |
5523 static void | |
5524 graphite_trans_bb_move_loop (graphite_bb_p gb, int loop, | |
5525 int new_loop_pos) | |
5526 { | |
5527 CloogMatrix *domain = GBB_DOMAIN (gb); | |
5528 int row, j; | |
5529 loop_p tmp_loop_p; | |
5530 | |
5531 gcc_assert (loop < gbb_nb_loops (gb) | |
5532 && new_loop_pos < gbb_nb_loops (gb)); | |
5533 | |
5534 /* Update LOOPS vector. */ | |
5535 tmp_loop_p = VEC_index (loop_p, GBB_LOOPS (gb), loop); | |
5536 VEC_ordered_remove (loop_p, GBB_LOOPS (gb), loop); | |
5537 VEC_safe_insert (loop_p, heap, GBB_LOOPS (gb), new_loop_pos, tmp_loop_p); | |
5538 | |
5539 /* Move the domain columns. */ | |
5540 if (loop < new_loop_pos) | |
5541 for (row = 0; row < domain->NbRows; row++) | |
5542 { | |
5543 Value tmp; | |
5544 value_init (tmp); | |
5545 value_assign (tmp, domain->p[row][loop + 1]); | |
5546 | |
5547 for (j = loop ; j < new_loop_pos - 1; j++) | |
5548 value_assign (domain->p[row][j + 1], domain->p[row][j + 2]); | |
5549 | |
5550 value_assign (domain->p[row][new_loop_pos], tmp); | |
5551 value_clear (tmp); | |
5552 } | |
5553 else | |
5554 for (row = 0; row < domain->NbRows; row++) | |
5555 { | |
5556 Value tmp; | |
5557 value_init (tmp); | |
5558 value_assign (tmp, domain->p[row][loop + 1]); | |
5559 | |
5560 for (j = loop ; j > new_loop_pos; j--) | |
5561 value_assign (domain->p[row][j + 1], domain->p[row][j]); | |
5562 | |
5563 value_assign (domain->p[row][new_loop_pos + 1], tmp); | |
5564 value_clear (tmp); | |
5565 } | |
5566 } | |
5567 | |
5568 /* Get the index of the column representing constants in the DOMAIN | |
5569 matrix. */ | |
5570 | |
5571 static int | |
5572 const_column_index (CloogMatrix *domain) | |
5573 { | |
5574 return domain->NbColumns - 1; | |
5575 } | |
5576 | |
5577 | |
5578 /* Get the first index that is positive or negative, determined | |
5579 following the value of POSITIVE, in matrix DOMAIN in COLUMN. */ | |
5580 | |
5581 static int | |
5582 get_first_matching_sign_row_index (CloogMatrix *domain, int column, | |
5583 bool positive) | |
5584 { | |
5585 int row; | |
5586 | |
5587 for (row = 0; row < domain->NbRows; row++) | |
5588 { | |
5589 int val = value_get_si (domain->p[row][column]); | |
5590 | |
5591 if (val > 0 && positive) | |
5592 return row; | |
5593 | |
5594 else if (val < 0 && !positive) | |
5595 return row; | |
5596 } | |
5597 | |
5598 gcc_unreachable (); | |
5599 } | |
5600 | |
5601 /* Get the lower bound of COLUMN in matrix DOMAIN. */ | |
5602 | |
5603 static int | |
5604 get_lower_bound_row (CloogMatrix *domain, int column) | |
5605 { | |
5606 return get_first_matching_sign_row_index (domain, column, true); | |
5607 } | |
5608 | |
5609 /* Get the upper bound of COLUMN in matrix DOMAIN. */ | |
5610 | |
5611 static int | |
5612 get_upper_bound_row (CloogMatrix *domain, int column) | |
5613 { | |
5614 return get_first_matching_sign_row_index (domain, column, false); | |
5615 } | |
5616 | |
5617 /* Copies the OLD_ROW constraint from OLD_DOMAIN to the NEW_DOMAIN at | |
5618 row NEW_ROW. */ | |
5619 | |
5620 static void | |
5621 copy_constraint (CloogMatrix *old_domain, CloogMatrix *new_domain, | |
5622 int old_row, int new_row) | |
5623 { | |
5624 int i; | |
5625 | |
5626 gcc_assert (old_domain->NbColumns == new_domain->NbColumns | |
5627 && old_row < old_domain->NbRows | |
5628 && new_row < new_domain->NbRows); | |
5629 | |
5630 for (i = 0; i < old_domain->NbColumns; i++) | |
5631 value_assign (new_domain->p[new_row][i], old_domain->p[old_row][i]); | |
5632 } | |
5633 | |
5634 /* Swap coefficients of variables X and Y on row R. */ | |
5635 | |
5636 static void | |
5637 swap_constraint_variables (CloogMatrix *domain, | |
5638 int r, int x, int y) | |
5639 { | |
5640 value_swap (domain->p[r][x], domain->p[r][y]); | |
5641 } | |
5642 | |
5643 /* Scale by X the coefficient C of constraint at row R in DOMAIN. */ | |
5644 | |
5645 static void | |
5646 scale_constraint_variable (CloogMatrix *domain, | |
5647 int r, int c, int x) | |
5648 { | |
5649 Value strip_size_value; | |
5650 value_init (strip_size_value); | |
5651 value_set_si (strip_size_value, x); | |
5652 value_multiply (domain->p[r][c], domain->p[r][c], strip_size_value); | |
5653 value_clear (strip_size_value); | |
5654 } | |
5655 | |
5656 /* Strip mines the loop of BB at the position LOOP_DEPTH with STRIDE. | |
5657 Always valid, but not always a performance improvement. */ | |
5658 | |
5659 static void | |
5660 graphite_trans_bb_strip_mine (graphite_bb_p gb, int loop_depth, int stride) | |
5661 { | |
5662 int row, col; | |
5663 | |
5664 CloogMatrix *domain = GBB_DOMAIN (gb); | |
5665 CloogMatrix *new_domain = cloog_matrix_alloc (domain->NbRows + 3, | |
5666 domain->NbColumns + 1); | |
5667 | |
5668 int col_loop_old = loop_depth + 2; | |
5669 int col_loop_strip = col_loop_old - 1; | |
5670 | |
5671 gcc_assert (loop_depth <= gbb_nb_loops (gb) - 1); | |
5672 | |
5673 VEC_safe_insert (loop_p, heap, GBB_LOOPS (gb), loop_depth, NULL); | |
5674 | |
5675 GBB_DOMAIN (gb) = new_domain; | |
5676 | |
5677 for (row = 0; row < domain->NbRows; row++) | |
5678 for (col = 0; col < domain->NbColumns; col++) | |
5679 if (col <= loop_depth) | |
5680 value_assign (new_domain->p[row][col], domain->p[row][col]); | |
5681 else | |
5682 value_assign (new_domain->p[row][col + 1], domain->p[row][col]); | |
5683 | |
5684 row = domain->NbRows; | |
5685 | |
5686 /* Lower bound of the outer stripped loop. */ | |
5687 copy_constraint (new_domain, new_domain, | |
5688 get_lower_bound_row (new_domain, col_loop_old), row); | |
5689 swap_constraint_variables (new_domain, row, col_loop_old, col_loop_strip); | |
5690 row++; | |
5691 | |
5692 /* Upper bound of the outer stripped loop. */ | |
5693 copy_constraint (new_domain, new_domain, | |
5694 get_upper_bound_row (new_domain, col_loop_old), row); | |
5695 swap_constraint_variables (new_domain, row, col_loop_old, col_loop_strip); | |
5696 scale_constraint_variable (new_domain, row, col_loop_strip, stride); | |
5697 row++; | |
5698 | |
5699 /* Lower bound of a tile starts at "stride * outer_iv". */ | |
5700 row = get_lower_bound_row (new_domain, col_loop_old); | |
5701 value_set_si (new_domain->p[row][0], 1); | |
5702 value_set_si (new_domain->p[row][const_column_index (new_domain)], 0); | |
5703 value_set_si (new_domain->p[row][col_loop_old], 1); | |
5704 value_set_si (new_domain->p[row][col_loop_strip], -1 * stride); | |
5705 | |
5706 /* Upper bound of a tile stops at "stride * outer_iv + stride - 1", | |
5707 or at the old upper bound that is not modified. */ | |
5708 row = new_domain->NbRows - 1; | |
5709 value_set_si (new_domain->p[row][0], 1); | |
5710 value_set_si (new_domain->p[row][col_loop_old], -1); | |
5711 value_set_si (new_domain->p[row][col_loop_strip], stride); | |
5712 value_set_si (new_domain->p[row][const_column_index (new_domain)], | |
5713 stride - 1); | |
5714 | |
5715 cloog_matrix_free (domain); | |
5716 | |
5717 /* Update static schedule. */ | |
5718 { | |
5719 int i; | |
5720 int nb_loops = gbb_nb_loops (gb); | |
5721 lambda_vector new_schedule = lambda_vector_new (nb_loops + 1); | |
5722 | |
5723 for (i = 0; i <= loop_depth; i++) | |
5724 new_schedule[i] = GBB_STATIC_SCHEDULE (gb)[i]; | |
5725 | |
5726 for (i = loop_depth + 1; i <= nb_loops - 2; i++) | |
5727 new_schedule[i + 2] = GBB_STATIC_SCHEDULE (gb)[i]; | |
5728 | |
5729 GBB_STATIC_SCHEDULE (gb) = new_schedule; | |
5730 } | |
5731 } | |
5732 | |
5733 /* Returns true when the strip mining of LOOP_INDEX by STRIDE is | |
5734 profitable or undecidable. GB is the statement around which the | |
5735 loops will be strip mined. */ | |
5736 | |
5737 static bool | |
5738 strip_mine_profitable_p (graphite_bb_p gb, int stride, | |
5739 int loop_index) | |
5740 { | |
5741 bool res = true; | |
5742 edge exit = NULL; | |
5743 tree niter; | |
5744 loop_p loop; | |
5745 long niter_val; | |
5746 | |
5747 loop = VEC_index (loop_p, GBB_LOOPS (gb), loop_index); | |
5748 exit = single_exit (loop); | |
5749 | |
5750 niter = find_loop_niter (loop, &exit); | |
5751 if (niter == chrec_dont_know | |
5752 || TREE_CODE (niter) != INTEGER_CST) | |
5753 return true; | |
5754 | |
5755 niter_val = int_cst_value (niter); | |
5756 | |
5757 if (niter_val < stride) | |
5758 { | |
5759 res = false; | |
5760 if (dump_file && (dump_flags & TDF_DETAILS)) | |
5761 { | |
5762 fprintf (dump_file, "\nStrip Mining is not profitable for loop %d:", | |
5763 loop->num); | |
5764 fprintf (dump_file, "number of iterations is too low.\n"); | |
5765 } | |
5766 } | |
5767 | |
5768 return res; | |
5769 } | |
5770 | |
5771 /* Determines when the interchange of LOOP_A and LOOP_B belonging to | |
5772 SCOP is legal. DEPTH is the number of loops around. */ | |
5773 | |
5774 static bool | |
5775 is_interchange_valid (scop_p scop, int loop_a, int loop_b, int depth) | |
5776 { | |
5777 bool res; | |
5778 VEC (ddr_p, heap) *dependence_relations; | |
5779 VEC (data_reference_p, heap) *datarefs; | |
5780 | |
5781 struct loop *nest = VEC_index (loop_p, SCOP_LOOP_NEST (scop), loop_a); | |
5782 lambda_trans_matrix trans; | |
5783 | |
5784 gcc_assert (loop_a < loop_b); | |
5785 | |
5786 dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10); | |
5787 datarefs = VEC_alloc (data_reference_p, heap, 10); | |
5788 | |
5789 if (!compute_data_dependences_for_loop (nest, true, &datarefs, | |
5790 &dependence_relations)) | |
5791 return false; | |
5792 | |
5793 if (dump_file && (dump_flags & TDF_DETAILS)) | |
5794 dump_ddrs (dump_file, dependence_relations); | |
5795 | |
5796 trans = lambda_trans_matrix_new (depth, depth); | |
5797 lambda_matrix_id (LTM_MATRIX (trans), depth); | |
5798 | |
5799 lambda_matrix_row_exchange (LTM_MATRIX (trans), 0, loop_b - loop_a); | |
5800 | |
5801 if (!lambda_transform_legal_p (trans, depth, dependence_relations)) | |
5802 { | |
5803 lambda_matrix_row_exchange (LTM_MATRIX (trans), 0, loop_b - loop_a); | |
5804 res = false; | |
5805 } | |
5806 else | |
5807 res = true; | |
5808 | |
5809 free_dependence_relations (dependence_relations); | |
5810 free_data_refs (datarefs); | |
5811 return res; | |
5812 } | |
5813 | |
5814 /* Loop block the LOOPS innermost loops of GB with stride size STRIDE. | |
5815 | |
5816 Example | |
5817 | |
5818 for (i = 0; i <= 50; i++=4) | |
5819 for (k = 0; k <= 100; k++=4) | |
5820 for (l = 0; l <= 200; l++=4) | |
5821 A | |
5822 | |
5823 To strip mine the two inner most loops with stride = 4 call: | |
5824 | |
5825 graphite_trans_bb_block (A, 4, 2) | |
5826 | |
5827 for (i = 0; i <= 50; i++) | |
5828 for (kk = 0; kk <= 100; kk+=4) | |
5829 for (ll = 0; ll <= 200; ll+=4) | |
5830 for (k = kk; k <= min (100, kk + 3); k++) | |
5831 for (l = ll; l <= min (200, ll + 3); l++) | |
5832 A | |
5833 */ | |
5834 | |
5835 static bool | |
5836 graphite_trans_bb_block (graphite_bb_p gb, int stride, int loops) | |
5837 { | |
5838 int i, j; | |
5839 int nb_loops = gbb_nb_loops (gb); | |
5840 int start = nb_loops - loops; | |
5841 scop_p scop = GBB_SCOP (gb); | |
5842 | |
5843 gcc_assert (scop_contains_loop (scop, gbb_loop (gb))); | |
5844 | |
5845 for (i = start ; i < nb_loops; i++) | |
5846 for (j = i + 1; j < nb_loops; j++) | |
5847 if (!is_interchange_valid (scop, i, j, nb_loops)) | |
5848 { | |
5849 if (dump_file && (dump_flags & TDF_DETAILS)) | |
5850 fprintf (dump_file, | |
5851 "\nInterchange not valid for loops %d and %d:\n", i, j); | |
5852 return false; | |
5853 } | |
5854 else if (dump_file && (dump_flags & TDF_DETAILS)) | |
5855 fprintf (dump_file, | |
5856 "\nInterchange valid for loops %d and %d:\n", i, j); | |
5857 | |
5858 /* Check if strip mining is profitable for every loop. */ | |
5859 for (i = 0; i < nb_loops - start; i++) | |
5860 if (!strip_mine_profitable_p (gb, stride, start + i)) | |
5861 return false; | |
5862 | |
5863 /* Strip mine loops. */ | |
5864 for (i = 0; i < nb_loops - start; i++) | |
5865 graphite_trans_bb_strip_mine (gb, start + 2 * i, stride); | |
5866 | |
5867 /* Interchange loops. */ | |
5868 for (i = 1; i < nb_loops - start; i++) | |
5869 graphite_trans_bb_move_loop (gb, start + 2 * i, start + i); | |
5870 | |
5871 if (dump_file && (dump_flags & TDF_DETAILS)) | |
5872 fprintf (dump_file, "\nLoops containing BB %d will be loop blocked.\n", | |
5873 GBB_BB (gb)->index); | |
5874 | |
5875 return true; | |
5876 } | |
5877 | |
5878 /* Loop block LOOPS innermost loops of a loop nest. BBS represent the | |
5879 basic blocks that belong to the loop nest to be blocked. */ | |
5880 | |
5881 static bool | |
5882 graphite_trans_loop_block (VEC (graphite_bb_p, heap) *bbs, int loops) | |
5883 { | |
5884 graphite_bb_p gb; | |
5885 int i; | |
5886 bool transform_done = false; | |
5887 | |
5888 /* TODO: - Calculate the stride size automatically. */ | |
5889 int stride_size = 51; | |
5890 | |
5891 for (i = 0; VEC_iterate (graphite_bb_p, bbs, i, gb); i++) | |
5892 transform_done |= graphite_trans_bb_block (gb, stride_size, loops); | |
5893 | |
5894 return transform_done; | |
5895 } | |
5896 | |
5897 /* Loop block all basic blocks of SCOP. Return false when the | |
5898 transform is not performed. */ | |
5899 | |
5900 static bool | |
5901 graphite_trans_scop_block (scop_p scop) | |
5902 { | |
5903 graphite_bb_p gb; | |
5904 int i, j; | |
5905 int last_nb_loops; | |
5906 int nb_loops; | |
5907 bool perfect = true; | |
5908 bool transform_done = false; | |
5909 | |
5910 VEC (graphite_bb_p, heap) *bbs = VEC_alloc (graphite_bb_p, heap, 3); | |
5911 int max_schedule = scop_max_loop_depth (scop) + 1; | |
5912 lambda_vector last_schedule = lambda_vector_new (max_schedule); | |
5913 | |
5914 if (VEC_length (graphite_bb_p, SCOP_BBS (scop)) == 0) | |
5915 return false; | |
5916 | |
5917 /* Get the data of the first bb. */ | |
5918 gb = VEC_index (graphite_bb_p, SCOP_BBS (scop), 0); | |
5919 last_nb_loops = gbb_nb_loops (gb); | |
5920 lambda_vector_copy (GBB_STATIC_SCHEDULE (gb), last_schedule, | |
5921 last_nb_loops + 1); | |
5922 VEC_safe_push (graphite_bb_p, heap, bbs, gb); | |
5923 | |
5924 for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++) | |
5925 { | |
5926 /* We did the first bb before. */ | |
5927 if (i == 0) | |
5928 continue; | |
5929 | |
5930 nb_loops = gbb_nb_loops (gb); | |
5931 | |
5932 /* If the number of loops is unchanged and only the last element of the | |
5933 schedule changes, we stay in the loop nest. */ | |
5934 if (nb_loops == last_nb_loops | |
5935 && (last_schedule [nb_loops + 1] | |
5936 != GBB_STATIC_SCHEDULE (gb)[nb_loops + 1])) | |
5937 { | |
5938 VEC_safe_push (graphite_bb_p, heap, bbs, gb); | |
5939 continue; | |
5940 } | |
5941 | |
5942 /* Otherwise, we left the innermost loop. So check, if the last bb was in | |
5943 a perfect loop nest and how many loops are contained in this perfect | |
5944 loop nest. | |
5945 | |
5946 Count the number of zeros from the end of the schedule. They are the | |
5947 number of surrounding loops. | |
5948 | |
5949 Example: | |
5950 last_bb 2 3 2 0 0 0 0 3 | |
5951 bb 2 4 0 | |
5952 <------ j = 4 | |
5953 | |
5954 last_bb 2 3 2 0 0 0 0 3 | |
5955 bb 2 3 2 0 1 | |
5956 <-- j = 2 | |
5957 | |
5958 If there is no zero, there were other bbs in outer loops and the loop | |
5959 nest is not perfect. */ | |
5960 for (j = last_nb_loops - 1; j >= 0; j--) | |
5961 { | |
5962 if (last_schedule [j] != 0 | |
5963 || (j <= nb_loops && GBB_STATIC_SCHEDULE (gb)[j] == 1)) | |
5964 { | |
5965 j--; | |
5966 break; | |
5967 } | |
5968 } | |
5969 | |
5970 j++; | |
5971 | |
5972 /* Found perfect loop nest. */ | |
5973 if (perfect && last_nb_loops - j >= 2) | |
5974 transform_done |= graphite_trans_loop_block (bbs, last_nb_loops - j); | |
5975 | |
5976 /* Check if we start with a new loop. | |
5977 | |
5978 Example: | |
5979 | |
5980 last_bb 2 3 2 0 0 0 0 3 | |
5981 bb 2 3 2 0 0 1 0 | |
5982 | |
5983 Here we start with the loop "2 3 2 0 0 1" | |
5984 | |
5985 last_bb 2 3 2 0 0 0 0 3 | |
5986 bb 2 3 2 0 0 1 | |
5987 | |
5988 But here not, so the loop nest can never be perfect. */ | |
5989 | |
5990 perfect = (GBB_STATIC_SCHEDULE (gb)[nb_loops] == 0); | |
5991 | |
5992 /* Update the last_bb infos. We do not do that for the bbs in the same | |
5993 loop, as the data we use is not changed. */ | |
5994 last_nb_loops = nb_loops; | |
5995 lambda_vector_copy (GBB_STATIC_SCHEDULE (gb), last_schedule, | |
5996 nb_loops + 1); | |
5997 VEC_truncate (graphite_bb_p, bbs, 0); | |
5998 VEC_safe_push (graphite_bb_p, heap, bbs, gb); | |
5999 } | |
6000 | |
6001 /* Check if the last loop nest was perfect. It is the same check as above, | |
6002 but the comparison with the next bb is missing. */ | |
6003 for (j = last_nb_loops - 1; j >= 0; j--) | |
6004 if (last_schedule [j] != 0) | |
6005 { | |
6006 j--; | |
6007 break; | |
6008 } | |
6009 | |
6010 j++; | |
6011 | |
6012 /* Found perfect loop nest. */ | |
6013 if (last_nb_loops - j >= 2) | |
6014 transform_done |= graphite_trans_loop_block (bbs, last_nb_loops - j); | |
6015 VEC_free (graphite_bb_p, heap, bbs); | |
6016 | |
6017 return transform_done; | |
6018 } | |
6019 | |
6020 /* Apply graphite transformations to all the basic blocks of SCOP. */ | |
6021 | |
6022 static bool | |
6023 graphite_apply_transformations (scop_p scop) | |
6024 { | |
6025 bool transform_done = false; | |
6026 | |
6027 /* Sort the list of bbs. Keep them always sorted. */ | |
6028 graphite_sort_gbbs (scop); | |
6029 | |
6030 if (flag_loop_block) | |
6031 transform_done = graphite_trans_scop_block (scop); | |
6032 | |
6033 /* Generate code even if we did not apply any real transformation. | |
6034 This also allows to check the performance for the identity | |
6035 transformation: GIMPLE -> GRAPHITE -> GIMPLE | |
6036 Keep in mind that CLooG optimizes in control, so the loop structure | |
6037 may change, even if we only use -fgraphite-identity. */ | |
6038 if (flag_graphite_identity) | |
6039 transform_done = true; | |
6040 | |
6041 return transform_done; | |
6042 } | |
6043 | |
6044 /* We limit all SCoPs to SCoPs, that are completely surrounded by a loop. | |
6045 | |
6046 Example: | |
6047 | |
6048 for (i | | |
6049 { | | |
6050 for (j | SCoP 1 | |
6051 for (k | | |
6052 } | | |
6053 | |
6054 * SCoP frontier, as this line is not surrounded by any loop. * | |
6055 | |
6056 for (l | SCoP 2 | |
6057 | |
6058 This is necessary as scalar evolution and parameter detection need a | |
6059 outermost loop to initialize parameters correctly. | |
6060 | |
6061 TODO: FIX scalar evolution and parameter detection to allow more flexible | |
6062 SCoP frontiers. */ | |
6063 | |
6064 static void | |
6065 limit_scops (void) | |
6066 { | |
6067 VEC (sd_region, heap) *tmp_scops = VEC_alloc (sd_region, heap, 3); | |
6068 | |
6069 int i; | |
6070 scop_p scop; | |
6071 | |
6072 for (i = 0; VEC_iterate (scop_p, current_scops, i, scop); i++) | |
6073 { | |
6074 int j; | |
6075 loop_p loop; | |
6076 build_scop_bbs (scop); | |
6077 | |
6078 if (!build_scop_loop_nests (scop)) | |
6079 continue; | |
6080 | |
6081 for (j = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), j, loop); j++) | |
6082 if (!loop_in_sese_p (loop_outer (loop), SCOP_REGION (scop))) | |
6083 { | |
6084 sd_region open_scop; | |
6085 open_scop.entry = loop->header; | |
6086 open_scop.exit = single_exit (loop)->dest; | |
6087 VEC_safe_push (sd_region, heap, tmp_scops, &open_scop); | |
6088 } | |
6089 } | |
6090 | |
6091 free_scops (current_scops); | |
6092 current_scops = VEC_alloc (scop_p, heap, 3); | |
6093 | |
6094 create_sese_edges (tmp_scops); | |
6095 build_graphite_scops (tmp_scops); | |
6096 VEC_free (sd_region, heap, tmp_scops); | |
6097 } | |
6098 | |
6099 /* Perform a set of linear transforms on the loops of the current | |
6100 function. */ | |
6101 | |
6102 void | |
6103 graphite_transform_loops (void) | |
6104 { | |
6105 int i; | |
6106 scop_p scop; | |
6107 bool transform_done = false; | |
6108 | |
6109 if (number_of_loops () <= 1) | |
6110 return; | |
6111 | |
6112 current_scops = VEC_alloc (scop_p, heap, 3); | |
6113 recompute_all_dominators (); | |
6114 | |
6115 if (dump_file && (dump_flags & TDF_DETAILS)) | |
6116 fprintf (dump_file, "Graphite loop transformations \n"); | |
6117 | |
6118 initialize_original_copy_tables (); | |
6119 cloog_initialize (); | |
6120 build_scops (); | |
6121 limit_scops (); | |
6122 | |
6123 if (dump_file && (dump_flags & TDF_DETAILS)) | |
6124 fprintf (dump_file, "\nnumber of SCoPs: %d\n", | |
6125 VEC_length (scop_p, current_scops)); | |
6126 | |
6127 for (i = 0; VEC_iterate (scop_p, current_scops, i, scop); i++) | |
6128 { | |
6129 build_scop_bbs (scop); | |
6130 if (!build_scop_loop_nests (scop)) | |
6131 continue; | |
6132 | |
6133 build_bb_loops (scop); | |
6134 | |
6135 if (!build_scop_conditions (scop)) | |
6136 continue; | |
6137 | |
6138 find_scop_parameters (scop); | |
6139 build_scop_context (scop); | |
6140 | |
6141 if (dump_file && (dump_flags & TDF_DETAILS)) | |
6142 { | |
6143 fprintf (dump_file, "\n(In SCoP %d:\n", i); | |
6144 fprintf (dump_file, "\nnumber of bbs: %d\n", | |
6145 VEC_length (graphite_bb_p, SCOP_BBS (scop))); | |
6146 fprintf (dump_file, "\nnumber of loops: %d)\n", | |
6147 VEC_length (loop_p, SCOP_LOOP_NEST (scop))); | |
6148 } | |
6149 | |
6150 if (!build_scop_iteration_domain (scop)) | |
6151 continue; | |
6152 | |
6153 add_conditions_to_constraints (scop); | |
6154 build_scop_canonical_schedules (scop); | |
6155 | |
6156 build_scop_data_accesses (scop); | |
6157 build_scop_dynamic_schedules (scop); | |
6158 | |
6159 if (dump_file && (dump_flags & TDF_DETAILS)) | |
6160 { | |
6161 int nbrefs = nb_data_refs_in_scop (scop); | |
6162 fprintf (dump_file, "\nnumber of data refs: %d\n", nbrefs); | |
6163 } | |
6164 | |
6165 if (graphite_apply_transformations (scop)) | |
6166 transform_done = gloog (scop, find_transform (scop)); | |
6167 #ifdef ENABLE_CHECKING | |
6168 else | |
6169 { | |
6170 struct clast_stmt *stmt = find_transform (scop); | |
6171 cloog_clast_free (stmt); | |
6172 } | |
6173 #endif | |
6174 } | |
6175 | |
6176 /* Cleanup. */ | |
6177 if (transform_done) | |
6178 cleanup_tree_cfg (); | |
6179 | |
6180 free_scops (current_scops); | |
6181 cloog_finalize (); | |
6182 free_original_copy_tables (); | |
6183 } | |
6184 | |
6185 #else /* If Cloog is not available: #ifndef HAVE_cloog. */ | |
6186 | |
6187 void | |
6188 graphite_transform_loops (void) | |
6189 { | |
6190 sorry ("Graphite loop optimizations cannot be used"); | |
6191 } | |
6192 | |
6193 #endif |