111
|
1 /* Lowering and expansion of OpenMP directives for HSA GPU agents.
|
|
2
|
|
3 Copyright (C) 2013-2017 Free Software Foundation, Inc.
|
|
4
|
|
5 This file is part of GCC.
|
|
6
|
|
7 GCC is free software; you can redistribute it and/or modify it under
|
|
8 the terms of the GNU General Public License as published by the Free
|
|
9 Software Foundation; either version 3, or (at your option) any later
|
|
10 version.
|
|
11
|
|
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
15 for more details.
|
|
16
|
|
17 You should have received a copy of the GNU General Public License
|
|
18 along with GCC; see the file COPYING3. If not see
|
|
19 <http://www.gnu.org/licenses/>. */
|
|
20
|
|
21 #include "config.h"
|
|
22 #include "system.h"
|
|
23 #include "coretypes.h"
|
|
24 #include "backend.h"
|
|
25 #include "tree.h"
|
|
26 #include "gimple.h"
|
|
27 #include "tree-pass.h"
|
|
28 #include "ssa.h"
|
|
29 #include "cgraph.h"
|
|
30 #include "pretty-print.h"
|
|
31 #include "fold-const.h"
|
|
32 #include "gimplify.h"
|
|
33 #include "gimple-iterator.h"
|
|
34 #include "gimple-walk.h"
|
|
35 #include "tree-inline.h"
|
|
36 #include "langhooks.h"
|
|
37 #include "omp-general.h"
|
|
38 #include "omp-low.h"
|
|
39 #include "omp-grid.h"
|
|
40 #include "gimple-pretty-print.h"
|
|
41
|
|
42 /* Return the lastprivate predicate for a given gridified loop described by
|
|
43 FD). */
|
|
44
|
|
45 tree
|
|
46 omp_grid_lastprivate_predicate (struct omp_for_data *fd)
|
|
47 {
|
|
48 /* When dealing with a gridified loop, we need to check up to three collapsed
|
|
49 iteration variables but they are not actually captured in this fd.
|
|
50 Fortunately, we can easily rely on HSA builtins to get this
|
|
51 information. */
|
|
52
|
|
53 tree id, size;
|
|
54 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP
|
|
55 && gimple_omp_for_grid_intra_group (fd->for_stmt))
|
|
56 {
|
|
57 id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMID);
|
|
58 size = builtin_decl_explicit (BUILT_IN_HSA_CURRENTWORKGROUPSIZE);
|
|
59 }
|
|
60 else
|
|
61 {
|
|
62 id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMABSID);
|
|
63 size = builtin_decl_explicit (BUILT_IN_HSA_GRIDSIZE);
|
|
64 }
|
|
65 tree cond = NULL;
|
|
66 for (int dim = 0; dim < fd->collapse; dim++)
|
|
67 {
|
|
68 tree dim_tree = build_int_cstu (unsigned_type_node, dim);
|
|
69 tree u1 = build_int_cstu (unsigned_type_node, 1);
|
|
70 tree c2
|
|
71 = build2 (EQ_EXPR, boolean_type_node,
|
|
72 build2 (PLUS_EXPR, unsigned_type_node,
|
|
73 build_call_expr (id, 1, dim_tree), u1),
|
|
74 build_call_expr (size, 1, dim_tree));
|
|
75 if (cond)
|
|
76 cond = build2 (TRUTH_AND_EXPR, boolean_type_node, cond, c2);
|
|
77 else
|
|
78 cond = c2;
|
|
79 }
|
|
80 return cond;
|
|
81 }
|
|
82
|
|
83 /* Structure describing the basic properties of the loop we ara analyzing
|
|
84 whether it can be gridified and when it is gridified. */
|
|
85
|
|
86 struct grid_prop
|
|
87 {
|
|
88 /* True when we are doing tiling gridification, i.e. when there is a distinct
|
|
89 distribute loop over groups and a loop construct over work-items. False
|
|
90 when distribute and parallel for loops form a combined construct. */
|
|
91 bool tiling;
|
|
92 /* Location of the target construct for optimization information
|
|
93 messages. */
|
|
94 location_t target_loc;
|
|
95 /* The collapse clause of the involved loops. Collapse value of all of them
|
|
96 must be the same for gridification to take place. */
|
|
97 size_t collapse;
|
|
98 /* Group sizes, if requested by the user or NULL if not requested. */
|
|
99 tree group_sizes[3];
|
|
100 };
|
|
101
|
|
102 #define GRID_MISSED_MSG_PREFIX "Will not turn target construct into a " \
|
|
103 "gridified HSA kernel because "
|
|
104
|
|
105 /* Return true if STMT is an assignment of a register-type into a local
|
|
106 VAR_DECL. If GRID is non-NULL, the assignment additionally must not be to
|
|
107 any of the trees specifying group sizes there. */
|
|
108
|
|
109 static bool
|
|
110 grid_safe_assignment_p (gimple *stmt, grid_prop *grid)
|
|
111 {
|
|
112 gassign *assign = dyn_cast <gassign *> (stmt);
|
|
113 if (!assign)
|
|
114 return false;
|
|
115 if (gimple_clobber_p (assign))
|
|
116 return true;
|
|
117 tree lhs = gimple_assign_lhs (assign);
|
|
118 if (!VAR_P (lhs)
|
|
119 || !is_gimple_reg_type (TREE_TYPE (lhs))
|
|
120 || is_global_var (lhs))
|
|
121 return false;
|
|
122 if (grid)
|
|
123 for (unsigned i = 0; i < grid->collapse; i++)
|
|
124 if (lhs == grid->group_sizes[i])
|
|
125 return false;
|
|
126 return true;
|
|
127 }
|
|
128
|
|
129 /* Return true if all statements in SEQ are assignments to local register-type
|
|
130 variables that do not hold group size information. */
|
|
131
|
|
132 static bool
|
|
133 grid_seq_only_contains_local_assignments (gimple_seq seq, grid_prop *grid)
|
|
134 {
|
|
135 if (!seq)
|
|
136 return true;
|
|
137
|
|
138 gimple_stmt_iterator gsi;
|
|
139 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
140 if (!grid_safe_assignment_p (gsi_stmt (gsi), grid))
|
|
141 return false;
|
|
142 return true;
|
|
143 }
|
|
144
|
|
145 /* Scan statements in SEQ and call itself recursively on any bind. GRID
|
|
146 describes hitherto discovered properties of the loop that is evaluated for
|
|
147 possible gridification. If during whole search only assignments to
|
|
148 register-type local variables (that do not overwrite group size information)
|
|
149 and one single OMP statement is encountered, return true, otherwise return
|
|
150 false. RET is where we store any OMP statement encountered. */
|
|
151
|
|
152 static bool
|
|
153 grid_find_single_omp_among_assignments_1 (gimple_seq seq, grid_prop *grid,
|
|
154 const char *name, gimple **ret)
|
|
155 {
|
|
156 gimple_stmt_iterator gsi;
|
|
157 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
158 {
|
|
159 gimple *stmt = gsi_stmt (gsi);
|
|
160
|
|
161 if (grid_safe_assignment_p (stmt, grid))
|
|
162 continue;
|
|
163 if (gbind *bind = dyn_cast <gbind *> (stmt))
|
|
164 {
|
|
165 gimple_seq bind_body = gimple_bind_body (bind);
|
|
166 if (!grid_find_single_omp_among_assignments_1 (bind_body, grid, name,
|
|
167 ret))
|
|
168 return false;
|
|
169 }
|
|
170 else if (is_gimple_omp (stmt))
|
|
171 {
|
|
172 if (*ret)
|
|
173 {
|
|
174 if (dump_enabled_p ())
|
|
175 {
|
|
176 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
177 GRID_MISSED_MSG_PREFIX "%s construct "
|
|
178 "contains multiple OpenMP constructs\n",
|
|
179 name);
|
|
180 dump_printf_loc (MSG_NOTE, gimple_location (*ret),
|
|
181 "The first OpenMP construct within "
|
|
182 "a parallel\n");
|
|
183 dump_printf_loc (MSG_NOTE, gimple_location (stmt),
|
|
184 "The second OpenMP construct within "
|
|
185 "a parallel\n");
|
|
186 }
|
|
187 return false;
|
|
188 }
|
|
189 *ret = stmt;
|
|
190 }
|
|
191 else
|
|
192 {
|
|
193 if (dump_enabled_p ())
|
|
194 {
|
|
195 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
196 GRID_MISSED_MSG_PREFIX "%s construct contains "
|
|
197 "a complex statement\n", name);
|
|
198 dump_printf_loc (MSG_NOTE, gimple_location (stmt),
|
|
199 "This statement cannot be analyzed for "
|
|
200 "gridification\n");
|
|
201 }
|
|
202 return false;
|
|
203 }
|
|
204 }
|
|
205 return true;
|
|
206 }
|
|
207
|
|
208 /* Scan statements in SEQ and make sure that it and any binds in it contain
|
|
209 only assignments to local register-type variables (that do not overwrite
|
|
210 group size information) and one OMP construct. If so, return that
|
|
211 construct, otherwise return NULL. GRID describes hitherto discovered
|
|
212 properties of the loop that is evaluated for possible gridification. If
|
|
213 dumping is enabled and function fails, use NAME to dump a note with the
|
|
214 reason for failure. */
|
|
215
|
|
216 static gimple *
|
|
217 grid_find_single_omp_among_assignments (gimple_seq seq, grid_prop *grid,
|
|
218 const char *name)
|
|
219 {
|
|
220 if (!seq)
|
|
221 {
|
|
222 if (dump_enabled_p ())
|
|
223 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
224 GRID_MISSED_MSG_PREFIX "%s construct has empty body\n",
|
|
225 name);
|
|
226 return NULL;
|
|
227 }
|
|
228
|
|
229 gimple *ret = NULL;
|
|
230 if (grid_find_single_omp_among_assignments_1 (seq, grid, name, &ret))
|
|
231 {
|
|
232 if (!ret && dump_enabled_p ())
|
|
233 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
234 GRID_MISSED_MSG_PREFIX "%s construct does not contain"
|
|
235 " any other OpenMP construct\n", name);
|
|
236 return ret;
|
|
237 }
|
|
238 else
|
|
239 return NULL;
|
|
240 }
|
|
241
|
|
242 /* Walker function looking for statements there is no point gridifying (and for
|
|
243 noreturn function calls which we cannot do). Return non-NULL if such a
|
|
244 function is found. */
|
|
245
|
|
246 static tree
|
|
247 grid_find_ungridifiable_statement (gimple_stmt_iterator *gsi,
|
|
248 bool *handled_ops_p,
|
|
249 struct walk_stmt_info *wi)
|
|
250 {
|
|
251 *handled_ops_p = false;
|
|
252 gimple *stmt = gsi_stmt (*gsi);
|
|
253 switch (gimple_code (stmt))
|
|
254 {
|
|
255 case GIMPLE_CALL:
|
|
256 if (gimple_call_noreturn_p (as_a <gcall *> (stmt)))
|
|
257 {
|
|
258 *handled_ops_p = true;
|
|
259 wi->info = stmt;
|
|
260 return error_mark_node;
|
|
261 }
|
|
262 break;
|
|
263
|
|
264 /* We may reduce the following list if we find a way to implement the
|
|
265 clauses, but now there is no point trying further. */
|
|
266 case GIMPLE_OMP_CRITICAL:
|
|
267 case GIMPLE_OMP_TASKGROUP:
|
|
268 case GIMPLE_OMP_TASK:
|
|
269 case GIMPLE_OMP_SECTION:
|
|
270 case GIMPLE_OMP_SECTIONS:
|
|
271 case GIMPLE_OMP_SECTIONS_SWITCH:
|
|
272 case GIMPLE_OMP_TARGET:
|
|
273 case GIMPLE_OMP_ORDERED:
|
|
274 *handled_ops_p = true;
|
|
275 wi->info = stmt;
|
|
276 return error_mark_node;
|
|
277 default:
|
|
278 break;
|
|
279 }
|
|
280 return NULL;
|
|
281 }
|
|
282
|
|
283 /* Examine clauses of omp parallel statement PAR and if any prevents
|
|
284 gridification, issue a missed-optimization diagnostics and return false,
|
|
285 otherwise return true. GRID describes hitherto discovered properties of the
|
|
286 loop that is evaluated for possible gridification. */
|
|
287
|
|
288 static bool
|
|
289 grid_parallel_clauses_gridifiable (gomp_parallel *par, location_t tloc)
|
|
290 {
|
|
291 tree clauses = gimple_omp_parallel_clauses (par);
|
|
292 while (clauses)
|
|
293 {
|
|
294 switch (OMP_CLAUSE_CODE (clauses))
|
|
295 {
|
|
296 case OMP_CLAUSE_NUM_THREADS:
|
|
297 if (dump_enabled_p ())
|
|
298 {
|
|
299 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
|
|
300 GRID_MISSED_MSG_PREFIX "because there is "
|
|
301 "a num_threads clause of the parallel "
|
|
302 "construct\n");
|
|
303 dump_printf_loc (MSG_NOTE, gimple_location (par),
|
|
304 "Parallel construct has a num_threads clause\n");
|
|
305 }
|
|
306 return false;
|
|
307
|
|
308 case OMP_CLAUSE_REDUCTION:
|
|
309 if (dump_enabled_p ())
|
|
310 {
|
|
311 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
|
|
312 GRID_MISSED_MSG_PREFIX "a reduction clause "
|
|
313 "is present\n ");
|
|
314 dump_printf_loc (MSG_NOTE, gimple_location (par),
|
|
315 "Parallel construct has a reduction clause\n");
|
|
316 }
|
|
317 return false;
|
|
318
|
|
319 default:
|
|
320 break;
|
|
321 }
|
|
322 clauses = OMP_CLAUSE_CHAIN (clauses);
|
|
323 }
|
|
324 return true;
|
|
325 }
|
|
326
|
|
327 /* Examine clauses and the body of omp loop statement GFOR and if something
|
|
328 prevents gridification, issue a missed-optimization diagnostics and return
|
|
329 false, otherwise return true. GRID describes hitherto discovered properties
|
|
330 of the loop that is evaluated for possible gridification. */
|
|
331
|
|
332 static bool
|
|
333 grid_inner_loop_gridifiable_p (gomp_for *gfor, grid_prop *grid)
|
|
334 {
|
|
335 if (!grid_seq_only_contains_local_assignments (gimple_omp_for_pre_body (gfor),
|
|
336 grid))
|
|
337 {
|
|
338 if (dump_enabled_p ())
|
|
339 {
|
|
340 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
341 GRID_MISSED_MSG_PREFIX "the inner loop "
|
|
342 "loop bounds computation contains a complex "
|
|
343 "statement\n");
|
|
344 dump_printf_loc (MSG_NOTE, gimple_location (gfor),
|
|
345 "Loop construct cannot be analyzed for "
|
|
346 "gridification\n");
|
|
347 }
|
|
348 return false;
|
|
349 }
|
|
350
|
|
351 tree clauses = gimple_omp_for_clauses (gfor);
|
|
352 while (clauses)
|
|
353 {
|
|
354 switch (OMP_CLAUSE_CODE (clauses))
|
|
355 {
|
|
356 case OMP_CLAUSE_SCHEDULE:
|
|
357 if (OMP_CLAUSE_SCHEDULE_KIND (clauses) != OMP_CLAUSE_SCHEDULE_AUTO)
|
|
358 {
|
|
359 if (dump_enabled_p ())
|
|
360 {
|
|
361 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
362 GRID_MISSED_MSG_PREFIX "the inner loop "
|
|
363 "has a non-automatic schedule clause\n");
|
|
364 dump_printf_loc (MSG_NOTE, gimple_location (gfor),
|
|
365 "Loop construct has a non automatic "
|
|
366 "schedule clause\n");
|
|
367 }
|
|
368 return false;
|
|
369 }
|
|
370 break;
|
|
371
|
|
372 case OMP_CLAUSE_REDUCTION:
|
|
373 if (dump_enabled_p ())
|
|
374 {
|
|
375 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
376 GRID_MISSED_MSG_PREFIX "a reduction "
|
|
377 "clause is present\n ");
|
|
378 dump_printf_loc (MSG_NOTE, gimple_location (gfor),
|
|
379 "Loop construct has a reduction schedule "
|
|
380 "clause\n");
|
|
381 }
|
|
382 return false;
|
|
383
|
|
384 default:
|
|
385 break;
|
|
386 }
|
|
387 clauses = OMP_CLAUSE_CHAIN (clauses);
|
|
388 }
|
|
389 struct walk_stmt_info wi;
|
|
390 memset (&wi, 0, sizeof (wi));
|
|
391 if (walk_gimple_seq (gimple_omp_body (gfor),
|
|
392 grid_find_ungridifiable_statement,
|
|
393 NULL, &wi))
|
|
394 {
|
|
395 gimple *bad = (gimple *) wi.info;
|
|
396 if (dump_enabled_p ())
|
|
397 {
|
|
398 if (is_gimple_call (bad))
|
|
399 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
400 GRID_MISSED_MSG_PREFIX "the inner loop contains "
|
|
401 "call to a noreturn function\n");
|
|
402 else
|
|
403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
404 GRID_MISSED_MSG_PREFIX "the inner loop contains "
|
|
405 "statement %s which cannot be transformed\n",
|
|
406 gimple_code_name[(int) gimple_code (bad)]);
|
|
407 dump_printf_loc (MSG_NOTE, gimple_location (bad),
|
|
408 "This statement cannot be analyzed for "
|
|
409 "gridification\n");
|
|
410 }
|
|
411 return false;
|
|
412 }
|
|
413 return true;
|
|
414 }
|
|
415
|
|
416 /* Given distribute omp construct represented by DIST, which in the original
|
|
417 source forms a compound construct with a looping construct, return true if it
|
|
418 can be turned into a gridified HSA kernel. Otherwise return false. GRID
|
|
419 describes hitherto discovered properties of the loop that is evaluated for
|
|
420 possible gridification. */
|
|
421
|
|
422 static bool
|
|
423 grid_dist_follows_simple_pattern (gomp_for *dist, grid_prop *grid)
|
|
424 {
|
|
425 location_t tloc = grid->target_loc;
|
|
426 gimple *stmt = grid_find_single_omp_among_assignments (gimple_omp_body (dist),
|
|
427 grid, "distribute");
|
|
428 gomp_parallel *par;
|
|
429 if (!stmt
|
|
430 || !(par = dyn_cast <gomp_parallel *> (stmt))
|
|
431 || !grid_parallel_clauses_gridifiable (par, tloc))
|
|
432 return false;
|
|
433
|
|
434 stmt = grid_find_single_omp_among_assignments (gimple_omp_body (par), grid,
|
|
435 "parallel");
|
|
436 gomp_for *gfor;
|
|
437 if (!stmt || !(gfor = dyn_cast <gomp_for *> (stmt)))
|
|
438 return false;
|
|
439
|
|
440 if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
|
|
441 {
|
|
442 if (dump_enabled_p ())
|
|
443 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
|
|
444 GRID_MISSED_MSG_PREFIX "the inner loop is not "
|
|
445 "a simple for loop\n");
|
|
446 return false;
|
|
447 }
|
|
448 gcc_assert (gimple_omp_for_collapse (gfor) == grid->collapse);
|
|
449
|
|
450 if (!grid_inner_loop_gridifiable_p (gfor, grid))
|
|
451 return false;
|
|
452
|
|
453 return true;
|
|
454 }
|
|
455
|
|
456 /* Given an omp loop statement GFOR, return true if it can participate in
|
|
457 tiling gridification, i.e. in one where the distribute and parallel for
|
|
458 loops do not form a compound statement. GRID describes hitherto discovered
|
|
459 properties of the loop that is evaluated for possible gridification. */
|
|
460
|
|
461 static bool
|
|
462 grid_gfor_follows_tiling_pattern (gomp_for *gfor, grid_prop *grid)
|
|
463 {
|
|
464 if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
|
|
465 {
|
|
466 if (dump_enabled_p ())
|
|
467 {
|
|
468 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
469 GRID_MISSED_MSG_PREFIX "an inner loop is not "
|
|
470 "a simple for loop\n");
|
|
471 dump_printf_loc (MSG_NOTE, gimple_location (gfor),
|
|
472 "This statement is not a simple for loop\n");
|
|
473 }
|
|
474 return false;
|
|
475 }
|
|
476
|
|
477 if (!grid_inner_loop_gridifiable_p (gfor, grid))
|
|
478 return false;
|
|
479
|
|
480 if (gimple_omp_for_collapse (gfor) != grid->collapse)
|
|
481 {
|
|
482 if (dump_enabled_p ())
|
|
483 {
|
|
484 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
485 GRID_MISSED_MSG_PREFIX "an inner loop does not "
|
|
486 "have use the same collapse clause\n");
|
|
487 dump_printf_loc (MSG_NOTE, gimple_location (gfor),
|
|
488 "Loop construct uses a different collapse clause\n");
|
|
489 }
|
|
490 return false;
|
|
491 }
|
|
492
|
|
493 struct omp_for_data fd;
|
|
494 struct omp_for_data_loop *loops
|
|
495 = (struct omp_for_data_loop *)alloca (grid->collapse
|
|
496 * sizeof (struct omp_for_data_loop));
|
|
497 omp_extract_for_data (gfor, &fd, loops);
|
|
498 for (unsigned i = 0; i < grid->collapse; i++)
|
|
499 {
|
|
500 tree itype, type = TREE_TYPE (fd.loops[i].v);
|
|
501 if (POINTER_TYPE_P (type))
|
|
502 itype = signed_type_for (type);
|
|
503 else
|
|
504 itype = type;
|
|
505
|
|
506 tree n1 = fold_convert (itype, fd.loops[i].n1);
|
|
507 tree n2 = fold_convert (itype, fd.loops[i].n2);
|
|
508 tree t = build_int_cst (itype,
|
|
509 (fd.loops[i].cond_code == LT_EXPR ? -1 : 1));
|
|
510 t = fold_build2 (PLUS_EXPR, itype, fd.loops[i].step, t);
|
|
511 t = fold_build2 (PLUS_EXPR, itype, t, n2);
|
|
512 t = fold_build2 (MINUS_EXPR, itype, t, n1);
|
|
513 if (TYPE_UNSIGNED (itype) && fd.loops[i].cond_code == GT_EXPR)
|
|
514 t = fold_build2 (TRUNC_DIV_EXPR, itype,
|
|
515 fold_build1 (NEGATE_EXPR, itype, t),
|
|
516 fold_build1 (NEGATE_EXPR, itype, fd.loops[i].step));
|
|
517 else
|
|
518 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd.loops[i].step);
|
|
519
|
|
520 if (!operand_equal_p (grid->group_sizes[i], t, 0))
|
|
521 {
|
|
522 if (dump_enabled_p ())
|
|
523 {
|
|
524 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
525 GRID_MISSED_MSG_PREFIX "the distribute and "
|
|
526 "an internal loop do not agree on tile size\n");
|
|
527 dump_printf_loc (MSG_NOTE, gimple_location (gfor),
|
|
528 "Loop construct does not seem to loop over "
|
|
529 "a tile size\n");
|
|
530 }
|
|
531 return false;
|
|
532 }
|
|
533 }
|
|
534 return true;
|
|
535 }
|
|
536
|
|
537 /* Facing a call to FNDECL in the body of a distribute construct, return true
|
|
538 if we can handle it or false if it precludes gridification. */
|
|
539
|
|
540 static bool
|
|
541 grid_call_permissible_in_distribute_p (tree fndecl)
|
|
542 {
|
|
543 if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl))
|
|
544 return true;
|
|
545
|
|
546 const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
|
|
547 if (strstr (name, "omp_") != name)
|
|
548 return false;
|
|
549
|
|
550 if ((strcmp (name, "omp_get_thread_num") == 0)
|
|
551 || (strcmp (name, "omp_get_num_threads") == 0)
|
|
552 || (strcmp (name, "omp_get_num_teams") == 0)
|
|
553 || (strcmp (name, "omp_get_team_num") == 0)
|
|
554 || (strcmp (name, "omp_get_level") == 0)
|
|
555 || (strcmp (name, "omp_get_active_level") == 0)
|
|
556 || (strcmp (name, "omp_in_parallel") == 0))
|
|
557 return true;
|
|
558
|
|
559 return false;
|
|
560 }
|
|
561
|
|
562 /* Facing a call satisfying grid_call_permissible_in_distribute_p in the body
|
|
563 of a distribute construct that is pointed at by GSI, modify it as necessary
|
|
564 for gridification. If the statement itself got removed, return true. */
|
|
565
|
|
566 static bool
|
|
567 grid_handle_call_in_distribute (gimple_stmt_iterator *gsi)
|
|
568 {
|
|
569 gimple *stmt = gsi_stmt (*gsi);
|
|
570 tree fndecl = gimple_call_fndecl (stmt);
|
|
571 gcc_checking_assert (stmt);
|
|
572 if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl))
|
|
573 return false;
|
|
574
|
|
575 const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
|
|
576 if ((strcmp (name, "omp_get_thread_num") == 0)
|
|
577 || (strcmp (name, "omp_get_level") == 0)
|
|
578 || (strcmp (name, "omp_get_active_level") == 0)
|
|
579 || (strcmp (name, "omp_in_parallel") == 0))
|
|
580 {
|
|
581 tree lhs = gimple_call_lhs (stmt);
|
|
582 if (lhs)
|
|
583 {
|
|
584 gassign *assign
|
|
585 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
|
|
586 gsi_insert_before (gsi, assign, GSI_SAME_STMT);
|
|
587 }
|
|
588 gsi_remove (gsi, true);
|
|
589 return true;
|
|
590 }
|
|
591
|
|
592 /* The rest of the omp functions can stay as they are, HSA back-end will
|
|
593 handle them correctly. */
|
|
594 gcc_checking_assert ((strcmp (name, "omp_get_num_threads") == 0)
|
|
595 || (strcmp (name, "omp_get_num_teams") == 0)
|
|
596 || (strcmp (name, "omp_get_team_num") == 0));
|
|
597 return false;
|
|
598 }
|
|
599
|
|
600 /* Given a sequence of statements within a distribute omp construct or a
|
|
601 parallel construct, which in the original source does not form a compound
|
|
602 construct with a looping construct, return true if it does not prevent us
|
|
603 from turning it into a gridified HSA kernel. Otherwise return false. GRID
|
|
604 describes hitherto discovered properties of the loop that is evaluated for
|
|
605 possible gridification. IN_PARALLEL must be true if seq is within a
|
|
606 parallel construct and flase if it is only within a distribute
|
|
607 construct. */
|
|
608
|
|
609 static bool
|
|
610 grid_dist_follows_tiling_pattern (gimple_seq seq, grid_prop *grid,
|
|
611 bool in_parallel)
|
|
612 {
|
|
613 gimple_stmt_iterator gsi;
|
|
614 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
615 {
|
|
616 gimple *stmt = gsi_stmt (gsi);
|
|
617
|
|
618 if (grid_safe_assignment_p (stmt, grid)
|
|
619 || gimple_code (stmt) == GIMPLE_GOTO
|
|
620 || gimple_code (stmt) == GIMPLE_LABEL
|
|
621 || gimple_code (stmt) == GIMPLE_COND)
|
|
622 continue;
|
|
623 else if (gbind *bind = dyn_cast <gbind *> (stmt))
|
|
624 {
|
|
625 if (!grid_dist_follows_tiling_pattern (gimple_bind_body (bind),
|
|
626 grid, in_parallel))
|
|
627 return false;
|
|
628 continue;
|
|
629 }
|
|
630 else if (gtry *try_stmt = dyn_cast <gtry *> (stmt))
|
|
631 {
|
|
632 if (gimple_try_kind (try_stmt) == GIMPLE_TRY_CATCH)
|
|
633 {
|
|
634 if (dump_enabled_p ())
|
|
635 {
|
|
636 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
637 GRID_MISSED_MSG_PREFIX "the distribute "
|
|
638 "construct contains a try..catch region\n");
|
|
639 dump_printf_loc (MSG_NOTE, gimple_location (try_stmt),
|
|
640 "This statement cannot be analyzed for "
|
|
641 "tiled gridification\n");
|
|
642 }
|
|
643 return false;
|
|
644 }
|
|
645 if (!grid_dist_follows_tiling_pattern (gimple_try_eval (try_stmt),
|
|
646 grid, in_parallel))
|
|
647 return false;
|
|
648 if (!grid_dist_follows_tiling_pattern (gimple_try_cleanup (try_stmt),
|
|
649 grid, in_parallel))
|
|
650 return false;
|
|
651 continue;
|
|
652 }
|
|
653 else if (is_gimple_call (stmt))
|
|
654 {
|
|
655 tree fndecl = gimple_call_fndecl (stmt);
|
|
656 if (fndecl && grid_call_permissible_in_distribute_p (fndecl))
|
|
657 continue;
|
|
658
|
|
659 if (dump_enabled_p ())
|
|
660 {
|
|
661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
662 GRID_MISSED_MSG_PREFIX "the distribute "
|
|
663 "construct contains a call\n");
|
|
664 dump_printf_loc (MSG_NOTE, gimple_location (stmt),
|
|
665 "This statement cannot be analyzed for "
|
|
666 "tiled gridification\n");
|
|
667 }
|
|
668 return false;
|
|
669 }
|
|
670 else if (gomp_parallel *par = dyn_cast <gomp_parallel *> (stmt))
|
|
671 {
|
|
672 if (in_parallel)
|
|
673 {
|
|
674 if (dump_enabled_p ())
|
|
675 {
|
|
676 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
677 GRID_MISSED_MSG_PREFIX "a parallel "
|
|
678 "construct contains another parallel "
|
|
679 "construct\n");
|
|
680 dump_printf_loc (MSG_NOTE, gimple_location (stmt),
|
|
681 "This parallel construct is nested in "
|
|
682 "another one\n");
|
|
683 }
|
|
684 return false;
|
|
685 }
|
|
686 if (!grid_parallel_clauses_gridifiable (par, grid->target_loc)
|
|
687 || !grid_dist_follows_tiling_pattern (gimple_omp_body (par),
|
|
688 grid, true))
|
|
689 return false;
|
|
690 }
|
|
691 else if (gomp_for *gfor = dyn_cast <gomp_for *> (stmt))
|
|
692 {
|
|
693 if (!in_parallel)
|
|
694 {
|
|
695 if (dump_enabled_p ())
|
|
696 {
|
|
697 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
698 GRID_MISSED_MSG_PREFIX "a loop "
|
|
699 "construct is not nested within a parallel "
|
|
700 "construct\n");
|
|
701 dump_printf_loc (MSG_NOTE, gimple_location (stmt),
|
|
702 "This loop construct is not nested in "
|
|
703 "a parallel construct\n");
|
|
704 }
|
|
705 return false;
|
|
706 }
|
|
707 if (!grid_gfor_follows_tiling_pattern (gfor, grid))
|
|
708 return false;
|
|
709 }
|
|
710 else
|
|
711 {
|
|
712 if (dump_enabled_p ())
|
|
713 {
|
|
714 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
715 GRID_MISSED_MSG_PREFIX "the distribute "
|
|
716 "construct contains a complex statement\n");
|
|
717 dump_printf_loc (MSG_NOTE, gimple_location (stmt),
|
|
718 "This statement cannot be analyzed for "
|
|
719 "tiled gridification\n");
|
|
720 }
|
|
721 return false;
|
|
722 }
|
|
723 }
|
|
724 return true;
|
|
725 }
|
|
726
|
|
727 /* If TARGET follows a pattern that can be turned into a gridified HSA kernel,
|
|
728 return true, otherwise return false. In the case of success, also fill in
|
|
729 GRID with information describing the kernel grid. */
|
|
730
|
|
731 static bool
|
|
732 grid_target_follows_gridifiable_pattern (gomp_target *target, grid_prop *grid)
|
|
733 {
|
|
734 if (gimple_omp_target_kind (target) != GF_OMP_TARGET_KIND_REGION)
|
|
735 return false;
|
|
736
|
|
737 location_t tloc = gimple_location (target);
|
|
738 grid->target_loc = tloc;
|
|
739 gimple *stmt
|
|
740 = grid_find_single_omp_among_assignments (gimple_omp_body (target),
|
|
741 grid, "target");
|
|
742 if (!stmt)
|
|
743 return false;
|
|
744 gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
|
|
745 tree group_size = NULL;
|
|
746 if (!teams)
|
|
747 {
|
|
748 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
|
|
749 GRID_MISSED_MSG_PREFIX "it does not have a sole teams "
|
|
750 "construct in it.\n");
|
|
751 return false;
|
|
752 }
|
|
753
|
|
754 tree clauses = gimple_omp_teams_clauses (teams);
|
|
755 while (clauses)
|
|
756 {
|
|
757 switch (OMP_CLAUSE_CODE (clauses))
|
|
758 {
|
|
759 case OMP_CLAUSE_NUM_TEAMS:
|
|
760 if (dump_enabled_p ())
|
|
761 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
|
|
762 GRID_MISSED_MSG_PREFIX "the teams construct "
|
|
763 "contains a num_teams clause\n ");
|
|
764 return false;
|
|
765
|
|
766 case OMP_CLAUSE_REDUCTION:
|
|
767 if (dump_enabled_p ())
|
|
768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
|
|
769 GRID_MISSED_MSG_PREFIX "a reduction "
|
|
770 "clause is present\n ");
|
|
771 return false;
|
|
772
|
|
773 case OMP_CLAUSE_THREAD_LIMIT:
|
|
774 if (!integer_zerop (OMP_CLAUSE_OPERAND (clauses, 0)))
|
|
775 group_size = OMP_CLAUSE_OPERAND (clauses, 0);
|
|
776 break;
|
|
777
|
|
778 default:
|
|
779 break;
|
|
780 }
|
|
781 clauses = OMP_CLAUSE_CHAIN (clauses);
|
|
782 }
|
|
783
|
|
784 stmt = grid_find_single_omp_among_assignments (gimple_omp_body (teams), grid,
|
|
785 "teams");
|
|
786 if (!stmt)
|
|
787 return false;
|
|
788 gomp_for *dist = dyn_cast <gomp_for *> (stmt);
|
|
789 if (!dist)
|
|
790 {
|
|
791 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
|
|
792 GRID_MISSED_MSG_PREFIX "the teams construct does not "
|
|
793 "have a single distribute construct in it.\n");
|
|
794 return false;
|
|
795 }
|
|
796
|
|
797 gcc_assert (gimple_omp_for_kind (dist) == GF_OMP_FOR_KIND_DISTRIBUTE);
|
|
798
|
|
799 grid->collapse = gimple_omp_for_collapse (dist);
|
|
800 if (grid->collapse > 3)
|
|
801 {
|
|
802 if (dump_enabled_p ())
|
|
803 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
|
|
804 GRID_MISSED_MSG_PREFIX "the distribute construct "
|
|
805 "contains collapse clause with parameter greater "
|
|
806 "than 3\n");
|
|
807 return false;
|
|
808 }
|
|
809
|
|
810 struct omp_for_data fd;
|
|
811 struct omp_for_data_loop *dist_loops
|
|
812 = (struct omp_for_data_loop *)alloca (grid->collapse
|
|
813 * sizeof (struct omp_for_data_loop));
|
|
814 omp_extract_for_data (dist, &fd, dist_loops);
|
|
815 if (fd.chunk_size)
|
|
816 {
|
|
817 if (group_size && !operand_equal_p (group_size, fd.chunk_size, 0))
|
|
818 {
|
|
819 if (dump_enabled_p ())
|
|
820 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
|
|
821 GRID_MISSED_MSG_PREFIX "the teams "
|
|
822 "thread limit is different from distribute "
|
|
823 "schedule chunk\n");
|
|
824 return false;
|
|
825 }
|
|
826 group_size = fd.chunk_size;
|
|
827 }
|
|
828 if (group_size && grid->collapse > 1)
|
|
829 {
|
|
830 if (dump_enabled_p ())
|
|
831 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
|
|
832 GRID_MISSED_MSG_PREFIX "group size cannot be "
|
|
833 "set using thread_limit or schedule clauses "
|
|
834 "when also using a collapse clause greater than 1\n");
|
|
835 return false;
|
|
836 }
|
|
837
|
|
838 if (gimple_omp_for_combined_p (dist))
|
|
839 {
|
|
840 grid->tiling = false;
|
|
841 grid->group_sizes[0] = group_size;
|
|
842 for (unsigned i = 1; i < grid->collapse; i++)
|
|
843 grid->group_sizes[i] = NULL;
|
|
844 return grid_dist_follows_simple_pattern (dist, grid);
|
|
845 }
|
|
846 else
|
|
847 {
|
|
848 grid->tiling = true;
|
|
849 if (group_size)
|
|
850 {
|
|
851 if (dump_enabled_p ())
|
|
852 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
|
|
853 GRID_MISSED_MSG_PREFIX "group size cannot be set "
|
|
854 "using thread_limit or schedule clauses when "
|
|
855 "distribute and loop constructs do not form "
|
|
856 "one combined construct\n");
|
|
857 return false;
|
|
858 }
|
|
859 for (unsigned i = 0; i < grid->collapse; i++)
|
|
860 {
|
|
861 if (fd.loops[i].cond_code == GT_EXPR)
|
|
862 grid->group_sizes[i] = fold_build1 (NEGATE_EXPR,
|
|
863 TREE_TYPE (fd.loops[i].step),
|
|
864 fd.loops[i].step);
|
|
865 else
|
|
866 grid->group_sizes[i] = fd.loops[i].step;
|
|
867 }
|
|
868 return grid_dist_follows_tiling_pattern (gimple_omp_body (dist), grid,
|
|
869 false);
|
|
870 }
|
|
871 }
|
|
872
|
|
873 /* Operand walker, used to remap pre-body declarations according to a hash map
|
|
874 provided in DATA. */
|
|
875
|
|
876 static tree
|
|
877 grid_remap_prebody_decls (tree *tp, int *walk_subtrees, void *data)
|
|
878 {
|
|
879 tree t = *tp;
|
|
880
|
|
881 if (DECL_P (t) || TYPE_P (t))
|
|
882 *walk_subtrees = 0;
|
|
883 else
|
|
884 *walk_subtrees = 1;
|
|
885
|
|
886 if (VAR_P (t))
|
|
887 {
|
|
888 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
|
|
889 hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
|
|
890 tree *repl = declmap->get (t);
|
|
891 if (repl)
|
|
892 *tp = *repl;
|
|
893 }
|
|
894 return NULL_TREE;
|
|
895 }
|
|
896
|
|
897 /* Identifiers of segments into which a particular variable should be places
|
|
898 when gridifying. */
|
|
899
|
|
900 enum grid_var_segment {GRID_SEGMENT_PRIVATE, GRID_SEGMENT_GROUP,
|
|
901 GRID_SEGMENT_GLOBAL};
|
|
902
|
|
903 /* Mark VAR so that it is eventually placed into SEGMENT. Place an artificial
|
|
904 builtin call into SEQ that will make sure the variable is always considered
|
|
905 address taken. */
|
|
906
|
|
907 static void
|
|
908 grid_mark_variable_segment (tree var, enum grid_var_segment segment)
|
|
909 {
|
|
910 /* Making a non-addressable variables would require that we re-gimplify all
|
|
911 their uses. Fortunately, we do not have to do this because if they are
|
|
912 not addressable, it means they are not used in atomic or parallel
|
|
913 statements and so relaxed GPU consistency rules mean we can just keep them
|
|
914 private. */
|
|
915 if (!TREE_ADDRESSABLE (var))
|
|
916 return;
|
|
917
|
|
918 switch (segment)
|
|
919 {
|
|
920 case GRID_SEGMENT_GROUP:
|
|
921 DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_group_segment"),
|
|
922 NULL, DECL_ATTRIBUTES (var));
|
|
923 break;
|
|
924 case GRID_SEGMENT_GLOBAL:
|
|
925 DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_global_segment"),
|
|
926 NULL, DECL_ATTRIBUTES (var));
|
|
927 break;
|
|
928 default:
|
|
929 gcc_unreachable ();
|
|
930 }
|
|
931
|
|
932 if (!TREE_STATIC (var))
|
|
933 {
|
|
934 TREE_STATIC (var) = 1;
|
|
935 varpool_node::finalize_decl (var);
|
|
936 }
|
|
937
|
|
938 }
|
|
939
|
|
940 /* Copy leading register-type assignments to local variables in SRC to just
|
|
941 before DST, Creating temporaries, adjusting mapping of operands in WI and
|
|
942 remapping operands as necessary. Add any new temporaries to TGT_BIND.
|
|
943 Return the first statement that does not conform to grid_safe_assignment_p
|
|
944 or NULL. If VAR_SEGMENT is not GRID_SEGMENT_PRIVATE, also mark all
|
|
945 variables in traversed bind statements so that they are put into the
|
|
946 appropriate segment. */
|
|
947
|
|
948 static gimple *
|
|
949 grid_copy_leading_local_assignments (gimple_seq src, gimple_stmt_iterator *dst,
|
|
950 gbind *tgt_bind,
|
|
951 enum grid_var_segment var_segment,
|
|
952 struct walk_stmt_info *wi)
|
|
953 {
|
|
954 hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
|
|
955 gimple_stmt_iterator gsi;
|
|
956 for (gsi = gsi_start (src); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
957 {
|
|
958 gimple *stmt = gsi_stmt (gsi);
|
|
959 if (gbind *bind = dyn_cast <gbind *> (stmt))
|
|
960 {
|
|
961 gimple *r = grid_copy_leading_local_assignments
|
|
962 (gimple_bind_body (bind), dst, tgt_bind, var_segment, wi);
|
|
963
|
|
964 if (var_segment != GRID_SEGMENT_PRIVATE)
|
|
965 for (tree var = gimple_bind_vars (bind);
|
|
966 var;
|
|
967 var = DECL_CHAIN (var))
|
|
968 grid_mark_variable_segment (var, var_segment);
|
|
969 if (r)
|
|
970 return r;
|
|
971 else
|
|
972 continue;
|
|
973 }
|
|
974 if (!grid_safe_assignment_p (stmt, NULL))
|
|
975 return stmt;
|
|
976 tree lhs = gimple_assign_lhs (as_a <gassign *> (stmt));
|
|
977 tree repl = copy_var_decl (lhs, create_tmp_var_name (NULL),
|
|
978 TREE_TYPE (lhs));
|
|
979 DECL_CONTEXT (repl) = current_function_decl;
|
|
980 gimple_bind_append_vars (tgt_bind, repl);
|
|
981
|
|
982 declmap->put (lhs, repl);
|
|
983 gassign *copy = as_a <gassign *> (gimple_copy (stmt));
|
|
984 walk_gimple_op (copy, grid_remap_prebody_decls, wi);
|
|
985 gsi_insert_before (dst, copy, GSI_SAME_STMT);
|
|
986 }
|
|
987 return NULL;
|
|
988 }
|
|
989
|
|
990 /* Statement walker function to make adjustments to statements within the
|
|
991 gridifed kernel copy. */
|
|
992
|
|
993 static tree
|
|
994 grid_process_grid_body (gimple_stmt_iterator *gsi, bool *handled_ops_p,
|
|
995 struct walk_stmt_info *)
|
|
996 {
|
|
997 *handled_ops_p = false;
|
|
998 gimple *stmt = gsi_stmt (*gsi);
|
|
999 if (gimple_code (stmt) == GIMPLE_OMP_FOR
|
|
1000 && (gimple_omp_for_kind (stmt) & GF_OMP_FOR_SIMD))
|
|
1001 {
|
|
1002 gomp_for *loop = as_a <gomp_for *> (stmt);
|
|
1003 tree clauses = gimple_omp_for_clauses (loop);
|
|
1004 tree cl = omp_find_clause (clauses, OMP_CLAUSE_SAFELEN);
|
|
1005 if (cl)
|
|
1006 OMP_CLAUSE_SAFELEN_EXPR (cl) = integer_one_node;
|
|
1007 else
|
|
1008 {
|
|
1009 tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN);
|
|
1010 OMP_CLAUSE_SAFELEN_EXPR (c) = integer_one_node;
|
|
1011 OMP_CLAUSE_CHAIN (c) = clauses;
|
|
1012 gimple_omp_for_set_clauses (loop, c);
|
|
1013 }
|
|
1014 }
|
|
1015 return NULL_TREE;
|
|
1016 }
|
|
1017
|
|
1018 /* Given a PARLOOP that is a normal for looping construct but also a part of a
|
|
1019 combined construct with a simd loop, eliminate the simd loop. */
|
|
1020
|
|
1021 static void
|
|
1022 grid_eliminate_combined_simd_part (gomp_for *parloop)
|
|
1023 {
|
|
1024 struct walk_stmt_info wi;
|
|
1025
|
|
1026 memset (&wi, 0, sizeof (wi));
|
|
1027 wi.val_only = true;
|
|
1028 enum gf_mask msk = GF_OMP_FOR_SIMD;
|
|
1029 wi.info = (void *) &msk;
|
|
1030 walk_gimple_seq (gimple_omp_body (parloop), omp_find_combined_for, NULL, &wi);
|
|
1031 gimple *stmt = (gimple *) wi.info;
|
|
1032 /* We expect that the SIMD id the only statement in the parallel loop. */
|
|
1033 gcc_assert (stmt
|
|
1034 && gimple_code (stmt) == GIMPLE_OMP_FOR
|
|
1035 && (gimple_omp_for_kind (stmt) == GF_OMP_FOR_SIMD)
|
|
1036 && gimple_omp_for_combined_into_p (stmt)
|
|
1037 && !gimple_omp_for_combined_p (stmt));
|
|
1038 gomp_for *simd = as_a <gomp_for *> (stmt);
|
|
1039
|
|
1040 /* Copy over the iteration properties because the body refers to the index in
|
|
1041 the bottmom-most loop. */
|
|
1042 unsigned i, collapse = gimple_omp_for_collapse (parloop);
|
|
1043 gcc_checking_assert (collapse == gimple_omp_for_collapse (simd));
|
|
1044 for (i = 0; i < collapse; i++)
|
|
1045 {
|
|
1046 gimple_omp_for_set_index (parloop, i, gimple_omp_for_index (simd, i));
|
|
1047 gimple_omp_for_set_initial (parloop, i, gimple_omp_for_initial (simd, i));
|
|
1048 gimple_omp_for_set_final (parloop, i, gimple_omp_for_final (simd, i));
|
|
1049 gimple_omp_for_set_incr (parloop, i, gimple_omp_for_incr (simd, i));
|
|
1050 }
|
|
1051
|
|
1052 tree *tgt= gimple_omp_for_clauses_ptr (parloop);
|
|
1053 while (*tgt)
|
|
1054 tgt = &OMP_CLAUSE_CHAIN (*tgt);
|
|
1055
|
|
1056 /* Copy over all clauses, except for linaer clauses, which are turned into
|
|
1057 private clauses, and all other simd-specificl clauses, which are
|
|
1058 ignored. */
|
|
1059 tree *pc = gimple_omp_for_clauses_ptr (simd);
|
|
1060 while (*pc)
|
|
1061 {
|
|
1062 tree c = *pc;
|
|
1063 switch (TREE_CODE (c))
|
|
1064 {
|
|
1065 case OMP_CLAUSE_LINEAR:
|
|
1066 {
|
|
1067 tree priv = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_PRIVATE);
|
|
1068 OMP_CLAUSE_DECL (priv) = OMP_CLAUSE_DECL (c);
|
|
1069 OMP_CLAUSE_CHAIN (priv) = NULL;
|
|
1070 *tgt = priv;
|
|
1071 tgt = &OMP_CLAUSE_CHAIN (priv);
|
|
1072 pc = &OMP_CLAUSE_CHAIN (c);
|
|
1073 break;
|
|
1074 }
|
|
1075
|
|
1076 case OMP_CLAUSE_SAFELEN:
|
|
1077 case OMP_CLAUSE_SIMDLEN:
|
|
1078 case OMP_CLAUSE_ALIGNED:
|
|
1079 pc = &OMP_CLAUSE_CHAIN (c);
|
|
1080 break;
|
|
1081
|
|
1082 default:
|
|
1083 *pc = OMP_CLAUSE_CHAIN (c);
|
|
1084 OMP_CLAUSE_CHAIN (c) = NULL;
|
|
1085 *tgt = c;
|
|
1086 tgt = &OMP_CLAUSE_CHAIN(c);
|
|
1087 break;
|
|
1088 }
|
|
1089 }
|
|
1090
|
|
1091 /* Finally, throw away the simd and mark the parallel loop as not
|
|
1092 combined. */
|
|
1093 gimple_omp_set_body (parloop, gimple_omp_body (simd));
|
|
1094 gimple_omp_for_set_combined_p (parloop, false);
|
|
1095 }
|
|
1096
|
|
1097 /* Statement walker function marking all parallels as grid_phony and loops as
|
|
1098 grid ones representing threads of a particular thread group. */
|
|
1099
|
|
1100 static tree
|
|
1101 grid_mark_tiling_loops (gimple_stmt_iterator *gsi, bool *handled_ops_p,
|
|
1102 struct walk_stmt_info *wi_in)
|
|
1103 {
|
|
1104 *handled_ops_p = false;
|
|
1105 if (gomp_for *loop = dyn_cast <gomp_for *> (gsi_stmt (*gsi)))
|
|
1106 {
|
|
1107 *handled_ops_p = true;
|
|
1108 gimple_omp_for_set_kind (loop, GF_OMP_FOR_KIND_GRID_LOOP);
|
|
1109 gimple_omp_for_set_grid_intra_group (loop, true);
|
|
1110 if (gimple_omp_for_combined_p (loop))
|
|
1111 grid_eliminate_combined_simd_part (loop);
|
|
1112
|
|
1113 struct walk_stmt_info body_wi;
|
|
1114 memset (&body_wi, 0, sizeof (body_wi));
|
|
1115 walk_gimple_seq_mod (gimple_omp_body_ptr (loop),
|
|
1116 grid_process_grid_body, NULL, &body_wi);
|
|
1117
|
|
1118 gbind *bind = (gbind *) wi_in->info;
|
|
1119 tree c;
|
|
1120 for (c = gimple_omp_for_clauses (loop); c; c = OMP_CLAUSE_CHAIN (c))
|
|
1121 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE)
|
|
1122 {
|
|
1123 push_gimplify_context ();
|
|
1124 tree ov = OMP_CLAUSE_DECL (c);
|
|
1125 tree gv = copy_var_decl (ov, create_tmp_var_name (NULL),
|
|
1126 TREE_TYPE (ov));
|
|
1127
|
|
1128 grid_mark_variable_segment (gv, GRID_SEGMENT_GROUP);
|
|
1129 DECL_CONTEXT (gv) = current_function_decl;
|
|
1130 gimple_bind_append_vars (bind, gv);
|
|
1131 tree x = lang_hooks.decls.omp_clause_assign_op (c, gv, ov);
|
|
1132 gimplify_and_add (x, &OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c));
|
|
1133 x = lang_hooks.decls.omp_clause_copy_ctor (c, ov, gv);
|
|
1134 gimple_seq l = NULL;
|
|
1135 gimplify_and_add (x, &l);
|
|
1136 gsi_insert_seq_after (gsi, l, GSI_SAME_STMT);
|
|
1137 pop_gimplify_context (bind);
|
|
1138 }
|
|
1139 }
|
|
1140 return NULL_TREE;
|
|
1141 }
|
|
1142
|
|
1143 /* Statement walker function marking all parallels as grid_phony and loops as
|
|
1144 grid ones representing threads of a particular thread group. */
|
|
1145
|
|
1146 static tree
|
|
1147 grid_mark_tiling_parallels_and_loops (gimple_stmt_iterator *gsi,
|
|
1148 bool *handled_ops_p,
|
|
1149 struct walk_stmt_info *wi_in)
|
|
1150 {
|
|
1151 *handled_ops_p = false;
|
|
1152 wi_in->removed_stmt = false;
|
|
1153 gimple *stmt = gsi_stmt (*gsi);
|
|
1154 if (gbind *bind = dyn_cast <gbind *> (stmt))
|
|
1155 {
|
|
1156 for (tree var = gimple_bind_vars (bind); var; var = DECL_CHAIN (var))
|
|
1157 grid_mark_variable_segment (var, GRID_SEGMENT_GROUP);
|
|
1158 }
|
|
1159 else if (gomp_parallel *parallel = dyn_cast <gomp_parallel *> (stmt))
|
|
1160 {
|
|
1161 *handled_ops_p = true;
|
|
1162 gimple_omp_parallel_set_grid_phony (parallel, true);
|
|
1163
|
|
1164 gbind *new_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK));
|
|
1165 gimple_bind_set_body (new_bind, gimple_omp_body (parallel));
|
|
1166 gimple_seq s = NULL;
|
|
1167 gimple_seq_add_stmt (&s, new_bind);
|
|
1168 gimple_omp_set_body (parallel, s);
|
|
1169
|
|
1170 struct walk_stmt_info wi_par;
|
|
1171 memset (&wi_par, 0, sizeof (wi_par));
|
|
1172 wi_par.info = new_bind;
|
|
1173 walk_gimple_seq_mod (gimple_bind_body_ptr (new_bind),
|
|
1174 grid_mark_tiling_loops, NULL, &wi_par);
|
|
1175 }
|
|
1176 else if (is_a <gcall *> (stmt))
|
|
1177 wi_in->removed_stmt = grid_handle_call_in_distribute (gsi);
|
|
1178 return NULL_TREE;
|
|
1179 }
|
|
1180
|
|
1181 /* Given freshly copied top level kernel SEQ, identify the individual OMP
|
|
1182 components, mark them as part of kernel, copy assignment leading to them
|
|
1183 just before DST, remapping them using WI and adding new temporaries to
|
|
1184 TGT_BIND, and and return the loop that will be used for kernel dispatch. */
|
|
1185
|
|
1186 static gomp_for *
|
|
1187 grid_process_kernel_body_copy (grid_prop *grid, gimple_seq seq,
|
|
1188 gimple_stmt_iterator *dst,
|
|
1189 gbind *tgt_bind, struct walk_stmt_info *wi)
|
|
1190 {
|
|
1191 gimple *stmt = grid_copy_leading_local_assignments (seq, dst, tgt_bind,
|
|
1192 GRID_SEGMENT_GLOBAL, wi);
|
|
1193 gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
|
|
1194 gcc_assert (teams);
|
|
1195 gimple_omp_teams_set_grid_phony (teams, true);
|
|
1196 stmt = grid_copy_leading_local_assignments (gimple_omp_body (teams), dst,
|
|
1197 tgt_bind, GRID_SEGMENT_GLOBAL,
|
|
1198 wi);
|
|
1199 gcc_checking_assert (stmt);
|
|
1200 gomp_for *dist = dyn_cast <gomp_for *> (stmt);
|
|
1201 gcc_assert (dist);
|
|
1202 gimple_seq prebody = gimple_omp_for_pre_body (dist);
|
|
1203 if (prebody)
|
|
1204 grid_copy_leading_local_assignments (prebody, dst, tgt_bind,
|
|
1205 GRID_SEGMENT_GROUP, wi);
|
|
1206
|
|
1207 if (grid->tiling)
|
|
1208 {
|
|
1209 gimple_omp_for_set_kind (dist, GF_OMP_FOR_KIND_GRID_LOOP);
|
|
1210 gimple_omp_for_set_grid_group_iter (dist, true);
|
|
1211
|
|
1212 struct walk_stmt_info wi_tiled;
|
|
1213 memset (&wi_tiled, 0, sizeof (wi_tiled));
|
|
1214 walk_gimple_seq_mod (gimple_omp_body_ptr (dist),
|
|
1215 grid_mark_tiling_parallels_and_loops, NULL,
|
|
1216 &wi_tiled);
|
|
1217 return dist;
|
|
1218 }
|
|
1219 else
|
|
1220 {
|
|
1221 gimple_omp_for_set_grid_phony (dist, true);
|
|
1222 stmt = grid_copy_leading_local_assignments (gimple_omp_body (dist), dst,
|
|
1223 tgt_bind,
|
|
1224 GRID_SEGMENT_PRIVATE, wi);
|
|
1225 gcc_checking_assert (stmt);
|
|
1226 gomp_parallel *parallel = as_a <gomp_parallel *> (stmt);
|
|
1227 gimple_omp_parallel_set_grid_phony (parallel, true);
|
|
1228 stmt = grid_copy_leading_local_assignments (gimple_omp_body (parallel),
|
|
1229 dst, tgt_bind,
|
|
1230 GRID_SEGMENT_PRIVATE, wi);
|
|
1231 gomp_for *inner_loop = as_a <gomp_for *> (stmt);
|
|
1232 gimple_omp_for_set_kind (inner_loop, GF_OMP_FOR_KIND_GRID_LOOP);
|
|
1233 prebody = gimple_omp_for_pre_body (inner_loop);
|
|
1234 if (prebody)
|
|
1235 grid_copy_leading_local_assignments (prebody, dst, tgt_bind,
|
|
1236 GRID_SEGMENT_PRIVATE, wi);
|
|
1237
|
|
1238 if (gimple_omp_for_combined_p (inner_loop))
|
|
1239 grid_eliminate_combined_simd_part (inner_loop);
|
|
1240 struct walk_stmt_info body_wi;;
|
|
1241 memset (&body_wi, 0, sizeof (body_wi));
|
|
1242 walk_gimple_seq_mod (gimple_omp_body_ptr (inner_loop),
|
|
1243 grid_process_grid_body, NULL, &body_wi);
|
|
1244
|
|
1245 return inner_loop;
|
|
1246 }
|
|
1247 }
|
|
1248
|
|
1249 /* If TARGET points to a GOMP_TARGET which follows a gridifiable pattern,
|
|
1250 create a GPU kernel for it. GSI must point to the same statement, TGT_BIND
|
|
1251 is the bind into which temporaries inserted before TARGET should be
|
|
1252 added. */
|
|
1253
|
|
1254 static void
|
|
1255 grid_attempt_target_gridification (gomp_target *target,
|
|
1256 gimple_stmt_iterator *gsi,
|
|
1257 gbind *tgt_bind)
|
|
1258 {
|
|
1259 /* removed group_size */
|
|
1260 grid_prop grid;
|
|
1261 memset (&grid, 0, sizeof (grid));
|
|
1262 if (!target || !grid_target_follows_gridifiable_pattern (target, &grid))
|
|
1263 return;
|
|
1264
|
|
1265 location_t loc = gimple_location (target);
|
|
1266 if (dump_enabled_p ())
|
|
1267 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
|
|
1268 "Target construct will be turned into a gridified HSA "
|
|
1269 "kernel\n");
|
|
1270
|
|
1271 /* Copy target body to a GPUKERNEL construct: */
|
|
1272 gimple_seq kernel_seq = copy_gimple_seq_and_replace_locals
|
|
1273 (gimple_omp_body (target));
|
|
1274
|
|
1275 hash_map<tree, tree> *declmap = new hash_map<tree, tree>;
|
|
1276 struct walk_stmt_info wi;
|
|
1277 memset (&wi, 0, sizeof (struct walk_stmt_info));
|
|
1278 wi.info = declmap;
|
|
1279
|
|
1280 /* Copy assignments in between OMP statements before target, mark OMP
|
|
1281 statements within copy appropriately. */
|
|
1282 gomp_for *inner_loop = grid_process_kernel_body_copy (&grid, kernel_seq, gsi,
|
|
1283 tgt_bind, &wi);
|
|
1284
|
|
1285 gbind *old_bind
|
|
1286 = as_a <gbind *> (gimple_seq_first (gimple_omp_body (target)));
|
|
1287 gbind *new_bind = as_a <gbind *> (gimple_seq_first (kernel_seq));
|
|
1288 tree new_block = gimple_bind_block (new_bind);
|
|
1289 tree enc_block = BLOCK_SUPERCONTEXT (gimple_bind_block (old_bind));
|
|
1290 BLOCK_CHAIN (new_block) = BLOCK_SUBBLOCKS (enc_block);
|
|
1291 BLOCK_SUBBLOCKS (enc_block) = new_block;
|
|
1292 BLOCK_SUPERCONTEXT (new_block) = enc_block;
|
|
1293 gimple *gpukernel = gimple_build_omp_grid_body (kernel_seq);
|
|
1294 gimple_seq_add_stmt
|
|
1295 (gimple_bind_body_ptr (as_a <gbind *> (gimple_omp_body (target))),
|
|
1296 gpukernel);
|
|
1297
|
|
1298 for (size_t i = 0; i < grid.collapse; i++)
|
|
1299 walk_tree (&grid.group_sizes[i], grid_remap_prebody_decls, &wi, NULL);
|
|
1300 push_gimplify_context ();
|
|
1301 for (size_t i = 0; i < grid.collapse; i++)
|
|
1302 {
|
|
1303 tree itype, type = TREE_TYPE (gimple_omp_for_index (inner_loop, i));
|
|
1304 if (POINTER_TYPE_P (type))
|
|
1305 itype = signed_type_for (type);
|
|
1306 else
|
|
1307 itype = type;
|
|
1308
|
|
1309 enum tree_code cond_code = gimple_omp_for_cond (inner_loop, i);
|
|
1310 tree n1 = unshare_expr (gimple_omp_for_initial (inner_loop, i));
|
|
1311 walk_tree (&n1, grid_remap_prebody_decls, &wi, NULL);
|
|
1312 tree n2 = unshare_expr (gimple_omp_for_final (inner_loop, i));
|
|
1313 walk_tree (&n2, grid_remap_prebody_decls, &wi, NULL);
|
|
1314 omp_adjust_for_condition (loc, &cond_code, &n2);
|
|
1315 n1 = fold_convert (itype, n1);
|
|
1316 n2 = fold_convert (itype, n2);
|
|
1317
|
|
1318 tree step
|
|
1319 = omp_get_for_step_from_incr (loc, gimple_omp_for_incr (inner_loop, i));
|
|
1320
|
|
1321 tree t = build_int_cst (itype, (cond_code == LT_EXPR ? -1 : 1));
|
|
1322 t = fold_build2 (PLUS_EXPR, itype, step, t);
|
|
1323 t = fold_build2 (PLUS_EXPR, itype, t, n2);
|
|
1324 t = fold_build2 (MINUS_EXPR, itype, t, n1);
|
|
1325 if (TYPE_UNSIGNED (itype) && cond_code == GT_EXPR)
|
|
1326 t = fold_build2 (TRUNC_DIV_EXPR, itype,
|
|
1327 fold_build1 (NEGATE_EXPR, itype, t),
|
|
1328 fold_build1 (NEGATE_EXPR, itype, step));
|
|
1329 else
|
|
1330 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
|
|
1331 if (grid.tiling)
|
|
1332 {
|
|
1333 if (cond_code == GT_EXPR)
|
|
1334 step = fold_build1 (NEGATE_EXPR, itype, step);
|
|
1335 t = fold_build2 (MULT_EXPR, itype, t, step);
|
|
1336 }
|
|
1337
|
|
1338 tree gs = fold_convert (uint32_type_node, t);
|
|
1339 gimple_seq tmpseq = NULL;
|
|
1340 gimplify_expr (&gs, &tmpseq, NULL, is_gimple_val, fb_rvalue);
|
|
1341 if (!gimple_seq_empty_p (tmpseq))
|
|
1342 gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
|
|
1343
|
|
1344 tree ws;
|
|
1345 if (grid.group_sizes[i])
|
|
1346 {
|
|
1347 ws = fold_convert (uint32_type_node, grid.group_sizes[i]);
|
|
1348 tmpseq = NULL;
|
|
1349 gimplify_expr (&ws, &tmpseq, NULL, is_gimple_val, fb_rvalue);
|
|
1350 if (!gimple_seq_empty_p (tmpseq))
|
|
1351 gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
|
|
1352 }
|
|
1353 else
|
|
1354 ws = build_zero_cst (uint32_type_node);
|
|
1355
|
|
1356 tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__GRIDDIM_);
|
|
1357 OMP_CLAUSE__GRIDDIM__DIMENSION (c) = i;
|
|
1358 OMP_CLAUSE__GRIDDIM__SIZE (c) = gs;
|
|
1359 OMP_CLAUSE__GRIDDIM__GROUP (c) = ws;
|
|
1360 OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (target);
|
|
1361 gimple_omp_target_set_clauses (target, c);
|
|
1362 }
|
|
1363 pop_gimplify_context (tgt_bind);
|
|
1364 delete declmap;
|
|
1365 return;
|
|
1366 }
|
|
1367
|
|
1368 /* Walker function doing all the work for create_target_kernels. */
|
|
1369
|
|
1370 static tree
|
|
1371 grid_gridify_all_targets_stmt (gimple_stmt_iterator *gsi,
|
|
1372 bool *handled_ops_p,
|
|
1373 struct walk_stmt_info *incoming)
|
|
1374 {
|
|
1375 *handled_ops_p = false;
|
|
1376
|
|
1377 gimple *stmt = gsi_stmt (*gsi);
|
|
1378 gomp_target *target = dyn_cast <gomp_target *> (stmt);
|
|
1379 if (target)
|
|
1380 {
|
|
1381 gbind *tgt_bind = (gbind *) incoming->info;
|
|
1382 gcc_checking_assert (tgt_bind);
|
|
1383 grid_attempt_target_gridification (target, gsi, tgt_bind);
|
|
1384 return NULL_TREE;
|
|
1385 }
|
|
1386 gbind *bind = dyn_cast <gbind *> (stmt);
|
|
1387 if (bind)
|
|
1388 {
|
|
1389 *handled_ops_p = true;
|
|
1390 struct walk_stmt_info wi;
|
|
1391 memset (&wi, 0, sizeof (wi));
|
|
1392 wi.info = bind;
|
|
1393 walk_gimple_seq_mod (gimple_bind_body_ptr (bind),
|
|
1394 grid_gridify_all_targets_stmt, NULL, &wi);
|
|
1395 }
|
|
1396 return NULL_TREE;
|
|
1397 }
|
|
1398
|
|
1399 /* Attempt to gridify all target constructs in BODY_P. All such targets will
|
|
1400 have their bodies duplicated, with the new copy being put into a
|
|
1401 gimple_omp_grid_body statement. All kernel-related construct within the
|
|
1402 grid_body will be marked with phony flags or kernel kinds. Moreover, some
|
|
1403 re-structuring is often needed, such as copying pre-bodies before the target
|
|
1404 construct so that kernel grid sizes can be computed. */
|
|
1405
|
|
1406 void
|
|
1407 omp_grid_gridify_all_targets (gimple_seq *body_p)
|
|
1408 {
|
|
1409 struct walk_stmt_info wi;
|
|
1410 memset (&wi, 0, sizeof (wi));
|
|
1411 walk_gimple_seq_mod (body_p, grid_gridify_all_targets_stmt, NULL, &wi);
|
|
1412 }
|