111
|
1 /* OMP constructs' SIMD clone supporting code.
|
|
2
|
145
|
3 Copyright (C) 2005-2020 Free Software Foundation, Inc.
|
111
|
4
|
|
5 This file is part of GCC.
|
|
6
|
|
7 GCC is free software; you can redistribute it and/or modify it under
|
|
8 the terms of the GNU General Public License as published by the Free
|
|
9 Software Foundation; either version 3, or (at your option) any later
|
|
10 version.
|
|
11
|
|
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
15 for more details.
|
|
16
|
|
17 You should have received a copy of the GNU General Public License
|
|
18 along with GCC; see the file COPYING3. If not see
|
|
19 <http://www.gnu.org/licenses/>. */
|
|
20
|
|
21 #include "config.h"
|
|
22 #include "system.h"
|
|
23 #include "coretypes.h"
|
|
24 #include "backend.h"
|
|
25 #include "target.h"
|
|
26 #include "tree.h"
|
|
27 #include "gimple.h"
|
|
28 #include "cfghooks.h"
|
|
29 #include "alloc-pool.h"
|
|
30 #include "tree-pass.h"
|
|
31 #include "ssa.h"
|
|
32 #include "cgraph.h"
|
|
33 #include "pretty-print.h"
|
|
34 #include "diagnostic-core.h"
|
|
35 #include "fold-const.h"
|
|
36 #include "stor-layout.h"
|
|
37 #include "cfganal.h"
|
|
38 #include "gimplify.h"
|
|
39 #include "gimple-iterator.h"
|
|
40 #include "gimplify-me.h"
|
|
41 #include "gimple-walk.h"
|
|
42 #include "langhooks.h"
|
|
43 #include "tree-cfg.h"
|
|
44 #include "tree-into-ssa.h"
|
|
45 #include "tree-dfa.h"
|
|
46 #include "cfgloop.h"
|
|
47 #include "symbol-summary.h"
|
131
|
48 #include "ipa-param-manipulation.h"
|
111
|
49 #include "tree-eh.h"
|
|
50 #include "varasm.h"
|
|
51 #include "stringpool.h"
|
|
52 #include "attribs.h"
|
131
|
53 #include "omp-simd-clone.h"
|
|
54
|
|
55 /* Return the number of elements in vector type VECTYPE, which is associated
|
|
56 with a SIMD clone. At present these always have a constant length. */
|
|
57
|
|
58 static unsigned HOST_WIDE_INT
|
|
59 simd_clone_subparts (tree vectype)
|
|
60 {
|
|
61 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
|
|
62 }
|
111
|
63
|
|
64 /* Allocate a fresh `simd_clone' and return it. NARGS is the number
|
|
65 of arguments to reserve space for. */
|
|
66
|
|
67 static struct cgraph_simd_clone *
|
|
68 simd_clone_struct_alloc (int nargs)
|
|
69 {
|
|
70 struct cgraph_simd_clone *clone_info;
|
|
71 size_t len = (sizeof (struct cgraph_simd_clone)
|
|
72 + nargs * sizeof (struct cgraph_simd_clone_arg));
|
|
73 clone_info = (struct cgraph_simd_clone *)
|
|
74 ggc_internal_cleared_alloc (len);
|
|
75 return clone_info;
|
|
76 }
|
|
77
|
|
78 /* Make a copy of the `struct cgraph_simd_clone' in FROM to TO. */
|
|
79
|
|
80 static inline void
|
|
81 simd_clone_struct_copy (struct cgraph_simd_clone *to,
|
|
82 struct cgraph_simd_clone *from)
|
|
83 {
|
|
84 memcpy (to, from, (sizeof (struct cgraph_simd_clone)
|
|
85 + ((from->nargs - from->inbranch)
|
|
86 * sizeof (struct cgraph_simd_clone_arg))));
|
|
87 }
|
|
88
|
145
|
89 /* Fill an empty vector ARGS with parameter types of function FNDECL. This
|
|
90 uses TYPE_ARG_TYPES if available, otherwise falls back to types of
|
111
|
91 DECL_ARGUMENTS types. */
|
|
92
|
145
|
93 static void
|
|
94 simd_clone_vector_of_formal_parm_types (vec<tree> *args, tree fndecl)
|
111
|
95 {
|
|
96 if (TYPE_ARG_TYPES (TREE_TYPE (fndecl)))
|
145
|
97 {
|
|
98 push_function_arg_types (args, TREE_TYPE (fndecl));
|
|
99 return;
|
|
100 }
|
|
101 push_function_arg_decls (args, fndecl);
|
111
|
102 unsigned int i;
|
|
103 tree arg;
|
145
|
104 FOR_EACH_VEC_ELT (*args, i, arg)
|
|
105 (*args)[i] = TREE_TYPE ((*args)[i]);
|
111
|
106 }
|
|
107
|
|
108 /* Given a simd function in NODE, extract the simd specific
|
|
109 information from the OMP clauses passed in CLAUSES, and return
|
|
110 the struct cgraph_simd_clone * if it should be cloned. *INBRANCH_SPECIFIED
|
|
111 is set to TRUE if the `inbranch' or `notinbranch' clause specified,
|
|
112 otherwise set to FALSE. */
|
|
113
|
|
114 static struct cgraph_simd_clone *
|
|
115 simd_clone_clauses_extract (struct cgraph_node *node, tree clauses,
|
|
116 bool *inbranch_specified)
|
|
117 {
|
145
|
118 auto_vec<tree> args;
|
|
119 simd_clone_vector_of_formal_parm_types (&args, node->decl);
|
111
|
120 tree t;
|
|
121 int n;
|
|
122 *inbranch_specified = false;
|
|
123
|
|
124 n = args.length ();
|
|
125 if (n > 0 && args.last () == void_type_node)
|
|
126 n--;
|
|
127
|
|
128 /* Allocate one more than needed just in case this is an in-branch
|
|
129 clone which will require a mask argument. */
|
|
130 struct cgraph_simd_clone *clone_info = simd_clone_struct_alloc (n + 1);
|
|
131 clone_info->nargs = n;
|
|
132
|
|
133 if (!clauses)
|
|
134 goto out;
|
|
135
|
|
136 clauses = TREE_VALUE (clauses);
|
|
137 if (!clauses || TREE_CODE (clauses) != OMP_CLAUSE)
|
|
138 goto out;
|
|
139
|
|
140 for (t = clauses; t; t = OMP_CLAUSE_CHAIN (t))
|
|
141 {
|
|
142 switch (OMP_CLAUSE_CODE (t))
|
|
143 {
|
|
144 case OMP_CLAUSE_INBRANCH:
|
|
145 clone_info->inbranch = 1;
|
|
146 *inbranch_specified = true;
|
|
147 break;
|
|
148 case OMP_CLAUSE_NOTINBRANCH:
|
|
149 clone_info->inbranch = 0;
|
|
150 *inbranch_specified = true;
|
|
151 break;
|
|
152 case OMP_CLAUSE_SIMDLEN:
|
|
153 clone_info->simdlen
|
|
154 = TREE_INT_CST_LOW (OMP_CLAUSE_SIMDLEN_EXPR (t));
|
|
155 break;
|
|
156 case OMP_CLAUSE_LINEAR:
|
|
157 {
|
|
158 tree decl = OMP_CLAUSE_DECL (t);
|
|
159 tree step = OMP_CLAUSE_LINEAR_STEP (t);
|
|
160 int argno = TREE_INT_CST_LOW (decl);
|
|
161 if (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE (t))
|
|
162 {
|
|
163 enum cgraph_simd_clone_arg_type arg_type;
|
|
164 if (TREE_CODE (args[argno]) == REFERENCE_TYPE)
|
|
165 switch (OMP_CLAUSE_LINEAR_KIND (t))
|
|
166 {
|
|
167 case OMP_CLAUSE_LINEAR_REF:
|
|
168 arg_type
|
|
169 = SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP;
|
|
170 break;
|
|
171 case OMP_CLAUSE_LINEAR_UVAL:
|
|
172 arg_type
|
|
173 = SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP;
|
|
174 break;
|
|
175 case OMP_CLAUSE_LINEAR_VAL:
|
|
176 case OMP_CLAUSE_LINEAR_DEFAULT:
|
|
177 arg_type
|
|
178 = SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP;
|
|
179 break;
|
|
180 default:
|
|
181 gcc_unreachable ();
|
|
182 }
|
|
183 else
|
|
184 arg_type = SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP;
|
|
185 clone_info->args[argno].arg_type = arg_type;
|
|
186 clone_info->args[argno].linear_step = tree_to_shwi (step);
|
|
187 gcc_assert (clone_info->args[argno].linear_step >= 0
|
|
188 && clone_info->args[argno].linear_step < n);
|
|
189 }
|
|
190 else
|
|
191 {
|
|
192 if (POINTER_TYPE_P (args[argno]))
|
|
193 step = fold_convert (ssizetype, step);
|
|
194 if (!tree_fits_shwi_p (step))
|
|
195 {
|
|
196 warning_at (OMP_CLAUSE_LOCATION (t), 0,
|
|
197 "ignoring large linear step");
|
|
198 return NULL;
|
|
199 }
|
|
200 else if (integer_zerop (step))
|
|
201 {
|
|
202 warning_at (OMP_CLAUSE_LOCATION (t), 0,
|
|
203 "ignoring zero linear step");
|
|
204 return NULL;
|
|
205 }
|
|
206 else
|
|
207 {
|
|
208 enum cgraph_simd_clone_arg_type arg_type;
|
|
209 if (TREE_CODE (args[argno]) == REFERENCE_TYPE)
|
|
210 switch (OMP_CLAUSE_LINEAR_KIND (t))
|
|
211 {
|
|
212 case OMP_CLAUSE_LINEAR_REF:
|
|
213 arg_type
|
|
214 = SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP;
|
|
215 break;
|
|
216 case OMP_CLAUSE_LINEAR_UVAL:
|
|
217 arg_type
|
|
218 = SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP;
|
|
219 break;
|
|
220 case OMP_CLAUSE_LINEAR_VAL:
|
|
221 case OMP_CLAUSE_LINEAR_DEFAULT:
|
|
222 arg_type
|
|
223 = SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP;
|
|
224 break;
|
|
225 default:
|
|
226 gcc_unreachable ();
|
|
227 }
|
|
228 else
|
|
229 arg_type = SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP;
|
|
230 clone_info->args[argno].arg_type = arg_type;
|
|
231 clone_info->args[argno].linear_step = tree_to_shwi (step);
|
|
232 }
|
|
233 }
|
|
234 break;
|
|
235 }
|
|
236 case OMP_CLAUSE_UNIFORM:
|
|
237 {
|
|
238 tree decl = OMP_CLAUSE_DECL (t);
|
|
239 int argno = tree_to_uhwi (decl);
|
|
240 clone_info->args[argno].arg_type
|
|
241 = SIMD_CLONE_ARG_TYPE_UNIFORM;
|
|
242 break;
|
|
243 }
|
|
244 case OMP_CLAUSE_ALIGNED:
|
|
245 {
|
145
|
246 /* Ignore aligned (x) for declare simd, for the ABI we really
|
|
247 need an alignment specified. */
|
|
248 if (OMP_CLAUSE_ALIGNED_ALIGNMENT (t) == NULL_TREE)
|
|
249 break;
|
111
|
250 tree decl = OMP_CLAUSE_DECL (t);
|
|
251 int argno = tree_to_uhwi (decl);
|
|
252 clone_info->args[argno].alignment
|
|
253 = TREE_INT_CST_LOW (OMP_CLAUSE_ALIGNED_ALIGNMENT (t));
|
|
254 break;
|
|
255 }
|
|
256 default:
|
|
257 break;
|
|
258 }
|
|
259 }
|
|
260
|
|
261 out:
|
|
262 if (TYPE_ATOMIC (TREE_TYPE (TREE_TYPE (node->decl))))
|
|
263 {
|
|
264 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
|
|
265 "ignoring %<#pragma omp declare simd%> on function "
|
|
266 "with %<_Atomic%> qualified return type");
|
|
267 return NULL;
|
|
268 }
|
|
269
|
|
270 for (unsigned int argno = 0; argno < clone_info->nargs; argno++)
|
|
271 if (TYPE_ATOMIC (args[argno])
|
|
272 && clone_info->args[argno].arg_type != SIMD_CLONE_ARG_TYPE_UNIFORM)
|
|
273 {
|
|
274 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
|
|
275 "ignoring %<#pragma omp declare simd%> on function "
|
|
276 "with %<_Atomic%> qualified non-%<uniform%> argument");
|
|
277 args.release ();
|
|
278 return NULL;
|
|
279 }
|
|
280
|
|
281 return clone_info;
|
|
282 }
|
|
283
|
|
284 /* Given a SIMD clone in NODE, calculate the characteristic data
|
|
285 type and return the coresponding type. The characteristic data
|
|
286 type is computed as described in the Intel Vector ABI. */
|
|
287
|
|
288 static tree
|
|
289 simd_clone_compute_base_data_type (struct cgraph_node *node,
|
|
290 struct cgraph_simd_clone *clone_info)
|
|
291 {
|
|
292 tree type = integer_type_node;
|
|
293 tree fndecl = node->decl;
|
|
294
|
|
295 /* a) For non-void function, the characteristic data type is the
|
|
296 return type. */
|
|
297 if (TREE_CODE (TREE_TYPE (TREE_TYPE (fndecl))) != VOID_TYPE)
|
|
298 type = TREE_TYPE (TREE_TYPE (fndecl));
|
|
299
|
|
300 /* b) If the function has any non-uniform, non-linear parameters,
|
|
301 then the characteristic data type is the type of the first
|
|
302 such parameter. */
|
|
303 else
|
|
304 {
|
145
|
305 auto_vec<tree> map;
|
|
306 simd_clone_vector_of_formal_parm_types (&map, fndecl);
|
111
|
307 for (unsigned int i = 0; i < clone_info->nargs; ++i)
|
|
308 if (clone_info->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
|
|
309 {
|
|
310 type = map[i];
|
|
311 break;
|
|
312 }
|
|
313 }
|
|
314
|
|
315 /* c) If the characteristic data type determined by a) or b) above
|
|
316 is struct, union, or class type which is pass-by-value (except
|
|
317 for the type that maps to the built-in complex data type), the
|
|
318 characteristic data type is int. */
|
|
319 if (RECORD_OR_UNION_TYPE_P (type)
|
|
320 && !aggregate_value_p (type, NULL)
|
|
321 && TREE_CODE (type) != COMPLEX_TYPE)
|
|
322 return integer_type_node;
|
|
323
|
|
324 /* d) If none of the above three classes is applicable, the
|
|
325 characteristic data type is int. */
|
|
326
|
|
327 return type;
|
|
328
|
|
329 /* e) For Intel Xeon Phi native and offload compilation, if the
|
|
330 resulting characteristic data type is 8-bit or 16-bit integer
|
|
331 data type, the characteristic data type is int. */
|
|
332 /* Well, we don't handle Xeon Phi yet. */
|
|
333 }
|
|
334
|
|
335 static tree
|
|
336 simd_clone_mangle (struct cgraph_node *node,
|
|
337 struct cgraph_simd_clone *clone_info)
|
|
338 {
|
|
339 char vecsize_mangle = clone_info->vecsize_mangle;
|
|
340 char mask = clone_info->inbranch ? 'M' : 'N';
|
|
341 unsigned int simdlen = clone_info->simdlen;
|
|
342 unsigned int n;
|
|
343 pretty_printer pp;
|
|
344
|
|
345 gcc_assert (vecsize_mangle && simdlen);
|
|
346
|
|
347 pp_string (&pp, "_ZGV");
|
|
348 pp_character (&pp, vecsize_mangle);
|
|
349 pp_character (&pp, mask);
|
|
350 pp_decimal_int (&pp, simdlen);
|
|
351
|
|
352 for (n = 0; n < clone_info->nargs; ++n)
|
|
353 {
|
|
354 struct cgraph_simd_clone_arg arg = clone_info->args[n];
|
|
355
|
|
356 switch (arg.arg_type)
|
|
357 {
|
|
358 case SIMD_CLONE_ARG_TYPE_UNIFORM:
|
|
359 pp_character (&pp, 'u');
|
|
360 break;
|
|
361 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
|
|
362 pp_character (&pp, 'l');
|
|
363 goto mangle_linear;
|
|
364 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
|
|
365 pp_character (&pp, 'R');
|
|
366 goto mangle_linear;
|
|
367 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
|
|
368 pp_character (&pp, 'L');
|
|
369 goto mangle_linear;
|
|
370 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
|
|
371 pp_character (&pp, 'U');
|
|
372 goto mangle_linear;
|
|
373 mangle_linear:
|
|
374 gcc_assert (arg.linear_step != 0);
|
|
375 if (arg.linear_step > 1)
|
|
376 pp_unsigned_wide_integer (&pp, arg.linear_step);
|
|
377 else if (arg.linear_step < 0)
|
|
378 {
|
|
379 pp_character (&pp, 'n');
|
|
380 pp_unsigned_wide_integer (&pp, (-(unsigned HOST_WIDE_INT)
|
|
381 arg.linear_step));
|
|
382 }
|
|
383 break;
|
|
384 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
|
|
385 pp_string (&pp, "ls");
|
|
386 pp_unsigned_wide_integer (&pp, arg.linear_step);
|
|
387 break;
|
|
388 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
|
|
389 pp_string (&pp, "Rs");
|
|
390 pp_unsigned_wide_integer (&pp, arg.linear_step);
|
|
391 break;
|
|
392 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
|
|
393 pp_string (&pp, "Ls");
|
|
394 pp_unsigned_wide_integer (&pp, arg.linear_step);
|
|
395 break;
|
|
396 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
|
|
397 pp_string (&pp, "Us");
|
|
398 pp_unsigned_wide_integer (&pp, arg.linear_step);
|
|
399 break;
|
|
400 default:
|
|
401 pp_character (&pp, 'v');
|
|
402 }
|
|
403 if (arg.alignment)
|
|
404 {
|
|
405 pp_character (&pp, 'a');
|
|
406 pp_decimal_int (&pp, arg.alignment);
|
|
407 }
|
|
408 }
|
|
409
|
|
410 pp_underscore (&pp);
|
|
411 const char *str = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (node->decl));
|
|
412 if (*str == '*')
|
|
413 ++str;
|
|
414 pp_string (&pp, str);
|
|
415 str = pp_formatted_text (&pp);
|
|
416
|
|
417 /* If there already is a SIMD clone with the same mangled name, don't
|
|
418 add another one. This can happen e.g. for
|
|
419 #pragma omp declare simd
|
|
420 #pragma omp declare simd simdlen(8)
|
|
421 int foo (int, int);
|
|
422 if the simdlen is assumed to be 8 for the first one, etc. */
|
|
423 for (struct cgraph_node *clone = node->simd_clones; clone;
|
|
424 clone = clone->simdclone->next_clone)
|
|
425 if (id_equal (DECL_ASSEMBLER_NAME (clone->decl), str))
|
|
426 return NULL_TREE;
|
|
427
|
|
428 return get_identifier (str);
|
|
429 }
|
|
430
|
|
431 /* Create a simd clone of OLD_NODE and return it. */
|
|
432
|
|
433 static struct cgraph_node *
|
|
434 simd_clone_create (struct cgraph_node *old_node)
|
|
435 {
|
|
436 struct cgraph_node *new_node;
|
|
437 if (old_node->definition)
|
|
438 {
|
|
439 if (!old_node->has_gimple_body_p ())
|
|
440 return NULL;
|
|
441 old_node->get_body ();
|
|
442 new_node = old_node->create_version_clone_with_body (vNULL, NULL, NULL,
|
145
|
443 NULL, NULL,
|
111
|
444 "simdclone");
|
|
445 }
|
|
446 else
|
|
447 {
|
|
448 tree old_decl = old_node->decl;
|
|
449 tree new_decl = copy_node (old_node->decl);
|
145
|
450 DECL_NAME (new_decl) = clone_function_name_numbered (old_decl,
|
|
451 "simdclone");
|
111
|
452 SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAME (new_decl));
|
|
453 SET_DECL_RTL (new_decl, NULL);
|
|
454 DECL_STATIC_CONSTRUCTOR (new_decl) = 0;
|
|
455 DECL_STATIC_DESTRUCTOR (new_decl) = 0;
|
|
456 new_node = old_node->create_version_clone (new_decl, vNULL, NULL);
|
|
457 if (old_node->in_other_partition)
|
|
458 new_node->in_other_partition = 1;
|
|
459 }
|
|
460 if (new_node == NULL)
|
|
461 return new_node;
|
|
462
|
145
|
463 set_decl_built_in_function (new_node->decl, NOT_BUILT_IN, 0);
|
111
|
464 TREE_PUBLIC (new_node->decl) = TREE_PUBLIC (old_node->decl);
|
|
465 DECL_COMDAT (new_node->decl) = DECL_COMDAT (old_node->decl);
|
|
466 DECL_WEAK (new_node->decl) = DECL_WEAK (old_node->decl);
|
|
467 DECL_EXTERNAL (new_node->decl) = DECL_EXTERNAL (old_node->decl);
|
|
468 DECL_VISIBILITY_SPECIFIED (new_node->decl)
|
|
469 = DECL_VISIBILITY_SPECIFIED (old_node->decl);
|
|
470 DECL_VISIBILITY (new_node->decl) = DECL_VISIBILITY (old_node->decl);
|
|
471 DECL_DLLIMPORT_P (new_node->decl) = DECL_DLLIMPORT_P (old_node->decl);
|
|
472 if (DECL_ONE_ONLY (old_node->decl))
|
|
473 make_decl_one_only (new_node->decl, DECL_ASSEMBLER_NAME (new_node->decl));
|
|
474
|
|
475 /* The method cgraph_version_clone_with_body () will force the new
|
|
476 symbol local. Undo this, and inherit external visibility from
|
|
477 the old node. */
|
145
|
478 new_node->local = old_node->local;
|
111
|
479 new_node->externally_visible = old_node->externally_visible;
|
|
480
|
|
481 return new_node;
|
|
482 }
|
|
483
|
|
484 /* Adjust the return type of the given function to its appropriate
|
|
485 vector counterpart. Returns a simd array to be used throughout the
|
|
486 function as a return value. */
|
|
487
|
|
488 static tree
|
|
489 simd_clone_adjust_return_type (struct cgraph_node *node)
|
|
490 {
|
|
491 tree fndecl = node->decl;
|
|
492 tree orig_rettype = TREE_TYPE (TREE_TYPE (fndecl));
|
|
493 unsigned int veclen;
|
|
494 tree t;
|
|
495
|
|
496 /* Adjust the function return type. */
|
|
497 if (orig_rettype == void_type_node)
|
|
498 return NULL_TREE;
|
|
499 t = TREE_TYPE (TREE_TYPE (fndecl));
|
|
500 if (INTEGRAL_TYPE_P (t) || POINTER_TYPE_P (t))
|
|
501 veclen = node->simdclone->vecsize_int;
|
|
502 else
|
|
503 veclen = node->simdclone->vecsize_float;
|
|
504 veclen /= GET_MODE_BITSIZE (SCALAR_TYPE_MODE (t));
|
|
505 if (veclen > node->simdclone->simdlen)
|
|
506 veclen = node->simdclone->simdlen;
|
|
507 if (POINTER_TYPE_P (t))
|
|
508 t = pointer_sized_int_node;
|
|
509 if (veclen == node->simdclone->simdlen)
|
|
510 t = build_vector_type (t, node->simdclone->simdlen);
|
|
511 else
|
|
512 {
|
|
513 t = build_vector_type (t, veclen);
|
|
514 t = build_array_type_nelts (t, node->simdclone->simdlen / veclen);
|
|
515 }
|
|
516 TREE_TYPE (TREE_TYPE (fndecl)) = t;
|
|
517 if (!node->definition)
|
|
518 return NULL_TREE;
|
|
519
|
|
520 t = DECL_RESULT (fndecl);
|
|
521 /* Adjust the DECL_RESULT. */
|
|
522 gcc_assert (TREE_TYPE (t) != void_type_node);
|
|
523 TREE_TYPE (t) = TREE_TYPE (TREE_TYPE (fndecl));
|
|
524 relayout_decl (t);
|
|
525
|
|
526 tree atype = build_array_type_nelts (orig_rettype,
|
|
527 node->simdclone->simdlen);
|
|
528 if (veclen != node->simdclone->simdlen)
|
|
529 return build1 (VIEW_CONVERT_EXPR, atype, t);
|
|
530
|
|
531 /* Set up a SIMD array to use as the return value. */
|
|
532 tree retval = create_tmp_var_raw (atype, "retval");
|
|
533 gimple_add_tmp_var (retval);
|
|
534 return retval;
|
|
535 }
|
|
536
|
|
537 /* Each vector argument has a corresponding array to be used locally
|
|
538 as part of the eventual loop. Create such temporary array and
|
|
539 return it.
|
|
540
|
|
541 PREFIX is the prefix to be used for the temporary.
|
|
542
|
|
543 TYPE is the inner element type.
|
|
544
|
|
545 SIMDLEN is the number of elements. */
|
|
546
|
|
547 static tree
|
|
548 create_tmp_simd_array (const char *prefix, tree type, int simdlen)
|
|
549 {
|
|
550 tree atype = build_array_type_nelts (type, simdlen);
|
|
551 tree avar = create_tmp_var_raw (atype, prefix);
|
|
552 gimple_add_tmp_var (avar);
|
|
553 return avar;
|
|
554 }
|
|
555
|
|
556 /* Modify the function argument types to their corresponding vector
|
|
557 counterparts if appropriate. Also, create one array for each simd
|
|
558 argument to be used locally when using the function arguments as
|
|
559 part of the loop.
|
|
560
|
|
561 NODE is the function whose arguments are to be adjusted.
|
|
562
|
145
|
563 If NODE does not represent function definition, returns NULL. Otherwise
|
|
564 returns an adjustment class that will be filled describing how the argument
|
|
565 declarations will be remapped. New arguments which are not to be remapped
|
|
566 are marked with USER_FLAG. */
|
111
|
567
|
145
|
568 static ipa_param_body_adjustments *
|
111
|
569 simd_clone_adjust_argument_types (struct cgraph_node *node)
|
|
570 {
|
145
|
571 auto_vec<tree> args;
|
111
|
572
|
|
573 if (node->definition)
|
145
|
574 push_function_arg_decls (&args, node->decl);
|
111
|
575 else
|
145
|
576 simd_clone_vector_of_formal_parm_types (&args, node->decl);
|
|
577 struct cgraph_simd_clone *sc = node->simdclone;
|
|
578 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
|
|
579 vec_safe_reserve (new_params, sc->nargs);
|
111
|
580 unsigned i, j, veclen;
|
|
581
|
|
582 for (i = 0; i < sc->nargs; ++i)
|
|
583 {
|
145
|
584 ipa_adjusted_param adj;
|
111
|
585 memset (&adj, 0, sizeof (adj));
|
|
586 tree parm = args[i];
|
|
587 tree parm_type = node->definition ? TREE_TYPE (parm) : parm;
|
|
588 adj.base_index = i;
|
145
|
589 adj.prev_clone_index = i;
|
111
|
590
|
|
591 sc->args[i].orig_arg = node->definition ? parm : NULL_TREE;
|
|
592 sc->args[i].orig_type = parm_type;
|
|
593
|
|
594 switch (sc->args[i].arg_type)
|
|
595 {
|
|
596 default:
|
|
597 /* No adjustment necessary for scalar arguments. */
|
145
|
598 adj.op = IPA_PARAM_OP_COPY;
|
111
|
599 break;
|
|
600 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
|
|
601 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
|
|
602 if (node->definition)
|
|
603 sc->args[i].simd_array
|
|
604 = create_tmp_simd_array (IDENTIFIER_POINTER (DECL_NAME (parm)),
|
|
605 TREE_TYPE (parm_type),
|
|
606 sc->simdlen);
|
145
|
607 adj.op = IPA_PARAM_OP_COPY;
|
111
|
608 break;
|
|
609 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
|
|
610 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
|
|
611 case SIMD_CLONE_ARG_TYPE_VECTOR:
|
|
612 if (INTEGRAL_TYPE_P (parm_type) || POINTER_TYPE_P (parm_type))
|
|
613 veclen = sc->vecsize_int;
|
|
614 else
|
|
615 veclen = sc->vecsize_float;
|
|
616 veclen /= GET_MODE_BITSIZE (SCALAR_TYPE_MODE (parm_type));
|
|
617 if (veclen > sc->simdlen)
|
|
618 veclen = sc->simdlen;
|
145
|
619 adj.op = IPA_PARAM_OP_NEW;
|
|
620 adj.param_prefix_index = IPA_PARAM_PREFIX_SIMD;
|
111
|
621 if (POINTER_TYPE_P (parm_type))
|
|
622 adj.type = build_vector_type (pointer_sized_int_node, veclen);
|
|
623 else
|
|
624 adj.type = build_vector_type (parm_type, veclen);
|
|
625 sc->args[i].vector_type = adj.type;
|
|
626 for (j = veclen; j < sc->simdlen; j += veclen)
|
|
627 {
|
145
|
628 vec_safe_push (new_params, adj);
|
111
|
629 if (j == veclen)
|
|
630 {
|
|
631 memset (&adj, 0, sizeof (adj));
|
145
|
632 adj.op = IPA_PARAM_OP_NEW;
|
|
633 adj.user_flag = 1;
|
|
634 adj.param_prefix_index = IPA_PARAM_PREFIX_SIMD;
|
111
|
635 adj.base_index = i;
|
145
|
636 adj.prev_clone_index = i;
|
111
|
637 adj.type = sc->args[i].vector_type;
|
|
638 }
|
|
639 }
|
|
640
|
|
641 if (node->definition)
|
|
642 sc->args[i].simd_array
|
|
643 = create_tmp_simd_array (DECL_NAME (parm)
|
|
644 ? IDENTIFIER_POINTER (DECL_NAME (parm))
|
|
645 : NULL, parm_type, sc->simdlen);
|
|
646 }
|
145
|
647 vec_safe_push (new_params, adj);
|
111
|
648 }
|
|
649
|
|
650 if (sc->inbranch)
|
|
651 {
|
|
652 tree base_type = simd_clone_compute_base_data_type (sc->origin, sc);
|
145
|
653 ipa_adjusted_param adj;
|
111
|
654 memset (&adj, 0, sizeof (adj));
|
145
|
655 adj.op = IPA_PARAM_OP_NEW;
|
|
656 adj.user_flag = 1;
|
|
657 adj.param_prefix_index = IPA_PARAM_PREFIX_MASK;
|
111
|
658
|
|
659 adj.base_index = i;
|
145
|
660 adj.prev_clone_index = i;
|
111
|
661 if (INTEGRAL_TYPE_P (base_type) || POINTER_TYPE_P (base_type))
|
|
662 veclen = sc->vecsize_int;
|
|
663 else
|
|
664 veclen = sc->vecsize_float;
|
|
665 veclen /= GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type));
|
|
666 if (veclen > sc->simdlen)
|
|
667 veclen = sc->simdlen;
|
|
668 if (sc->mask_mode != VOIDmode)
|
|
669 adj.type
|
|
670 = lang_hooks.types.type_for_mode (sc->mask_mode, 1);
|
|
671 else if (POINTER_TYPE_P (base_type))
|
|
672 adj.type = build_vector_type (pointer_sized_int_node, veclen);
|
|
673 else
|
|
674 adj.type = build_vector_type (base_type, veclen);
|
145
|
675 vec_safe_push (new_params, adj);
|
111
|
676
|
|
677 for (j = veclen; j < sc->simdlen; j += veclen)
|
145
|
678 vec_safe_push (new_params, adj);
|
111
|
679
|
|
680 /* We have previously allocated one extra entry for the mask. Use
|
|
681 it and fill it. */
|
|
682 sc->nargs++;
|
|
683 if (sc->mask_mode != VOIDmode)
|
|
684 base_type = boolean_type_node;
|
|
685 if (node->definition)
|
|
686 {
|
|
687 sc->args[i].orig_arg
|
|
688 = build_decl (UNKNOWN_LOCATION, PARM_DECL, NULL, base_type);
|
|
689 if (sc->mask_mode == VOIDmode)
|
|
690 sc->args[i].simd_array
|
|
691 = create_tmp_simd_array ("mask", base_type, sc->simdlen);
|
|
692 else if (veclen < sc->simdlen)
|
|
693 sc->args[i].simd_array
|
|
694 = create_tmp_simd_array ("mask", adj.type, sc->simdlen / veclen);
|
|
695 else
|
|
696 sc->args[i].simd_array = NULL_TREE;
|
|
697 }
|
|
698 sc->args[i].orig_type = base_type;
|
|
699 sc->args[i].arg_type = SIMD_CLONE_ARG_TYPE_MASK;
|
|
700 }
|
|
701
|
|
702 if (node->definition)
|
145
|
703 {
|
|
704 ipa_param_body_adjustments *adjustments
|
|
705 = new ipa_param_body_adjustments (new_params, node->decl);
|
|
706
|
|
707 adjustments->modify_formal_parameters ();
|
|
708 return adjustments;
|
|
709 }
|
111
|
710 else
|
|
711 {
|
|
712 tree new_arg_types = NULL_TREE, new_reversed;
|
|
713 bool last_parm_void = false;
|
|
714 if (args.length () > 0 && args.last () == void_type_node)
|
|
715 last_parm_void = true;
|
|
716
|
|
717 gcc_assert (TYPE_ARG_TYPES (TREE_TYPE (node->decl)));
|
145
|
718 j = vec_safe_length (new_params);
|
111
|
719 for (i = 0; i < j; i++)
|
|
720 {
|
145
|
721 struct ipa_adjusted_param *adj = &(*new_params)[i];
|
111
|
722 tree ptype;
|
145
|
723 if (adj->op == IPA_PARAM_OP_COPY)
|
111
|
724 ptype = args[adj->base_index];
|
|
725 else
|
|
726 ptype = adj->type;
|
|
727 new_arg_types = tree_cons (NULL_TREE, ptype, new_arg_types);
|
|
728 }
|
|
729 new_reversed = nreverse (new_arg_types);
|
|
730 if (last_parm_void)
|
|
731 {
|
|
732 if (new_reversed)
|
|
733 TREE_CHAIN (new_arg_types) = void_list_node;
|
|
734 else
|
|
735 new_reversed = void_list_node;
|
|
736 }
|
145
|
737 TYPE_ARG_TYPES (TREE_TYPE (node->decl)) = new_reversed;
|
|
738 return NULL;
|
111
|
739 }
|
|
740 }
|
|
741
|
|
742 /* Initialize and copy the function arguments in NODE to their
|
|
743 corresponding local simd arrays. Returns a fresh gimple_seq with
|
|
744 the instruction sequence generated. */
|
|
745
|
|
746 static gimple_seq
|
|
747 simd_clone_init_simd_arrays (struct cgraph_node *node,
|
145
|
748 ipa_param_body_adjustments *adjustments)
|
111
|
749 {
|
|
750 gimple_seq seq = NULL;
|
|
751 unsigned i = 0, j = 0, k;
|
|
752
|
|
753 for (tree arg = DECL_ARGUMENTS (node->decl);
|
|
754 arg;
|
|
755 arg = DECL_CHAIN (arg), i++, j++)
|
|
756 {
|
145
|
757 if ((*adjustments->m_adj_params)[j].op == IPA_PARAM_OP_COPY
|
111
|
758 || POINTER_TYPE_P (TREE_TYPE (arg)))
|
|
759 continue;
|
|
760
|
|
761 node->simdclone->args[i].vector_arg = arg;
|
|
762
|
|
763 tree array = node->simdclone->args[i].simd_array;
|
|
764 if (node->simdclone->mask_mode != VOIDmode
|
|
765 && node->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
|
|
766 {
|
|
767 if (array == NULL_TREE)
|
|
768 continue;
|
|
769 unsigned int l
|
|
770 = tree_to_uhwi (TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (array))));
|
|
771 for (k = 0; k <= l; k++)
|
|
772 {
|
|
773 if (k)
|
|
774 {
|
|
775 arg = DECL_CHAIN (arg);
|
|
776 j++;
|
|
777 }
|
|
778 tree t = build4 (ARRAY_REF, TREE_TYPE (TREE_TYPE (array)),
|
|
779 array, size_int (k), NULL, NULL);
|
|
780 t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg);
|
|
781 gimplify_and_add (t, &seq);
|
|
782 }
|
|
783 continue;
|
|
784 }
|
131
|
785 if (simd_clone_subparts (TREE_TYPE (arg)) == node->simdclone->simdlen)
|
111
|
786 {
|
|
787 tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array)));
|
|
788 tree ptr = build_fold_addr_expr (array);
|
|
789 tree t = build2 (MEM_REF, TREE_TYPE (arg), ptr,
|
|
790 build_int_cst (ptype, 0));
|
|
791 t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg);
|
|
792 gimplify_and_add (t, &seq);
|
|
793 }
|
|
794 else
|
|
795 {
|
131
|
796 unsigned int simdlen = simd_clone_subparts (TREE_TYPE (arg));
|
111
|
797 tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array)));
|
|
798 for (k = 0; k < node->simdclone->simdlen; k += simdlen)
|
|
799 {
|
|
800 tree ptr = build_fold_addr_expr (array);
|
|
801 int elemsize;
|
|
802 if (k)
|
|
803 {
|
|
804 arg = DECL_CHAIN (arg);
|
|
805 j++;
|
|
806 }
|
|
807 tree elemtype = TREE_TYPE (TREE_TYPE (arg));
|
|
808 elemsize = GET_MODE_SIZE (SCALAR_TYPE_MODE (elemtype));
|
|
809 tree t = build2 (MEM_REF, TREE_TYPE (arg), ptr,
|
|
810 build_int_cst (ptype, k * elemsize));
|
|
811 t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg);
|
|
812 gimplify_and_add (t, &seq);
|
|
813 }
|
|
814 }
|
|
815 }
|
|
816 return seq;
|
|
817 }
|
|
818
|
|
819 /* Callback info for ipa_simd_modify_stmt_ops below. */
|
|
820
|
|
821 struct modify_stmt_info {
|
145
|
822 ipa_param_body_adjustments *adjustments;
|
111
|
823 gimple *stmt;
|
|
824 /* True if the parent statement was modified by
|
|
825 ipa_simd_modify_stmt_ops. */
|
|
826 bool modified;
|
|
827 };
|
|
828
|
|
829 /* Callback for walk_gimple_op.
|
|
830
|
|
831 Adjust operands from a given statement as specified in the
|
|
832 adjustments vector in the callback data. */
|
|
833
|
|
834 static tree
|
|
835 ipa_simd_modify_stmt_ops (tree *tp, int *walk_subtrees, void *data)
|
|
836 {
|
|
837 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
|
|
838 struct modify_stmt_info *info = (struct modify_stmt_info *) wi->info;
|
|
839 tree *orig_tp = tp;
|
|
840 if (TREE_CODE (*tp) == ADDR_EXPR)
|
|
841 tp = &TREE_OPERAND (*tp, 0);
|
145
|
842
|
|
843 if (TREE_CODE (*tp) == BIT_FIELD_REF
|
|
844 || TREE_CODE (*tp) == IMAGPART_EXPR
|
|
845 || TREE_CODE (*tp) == REALPART_EXPR)
|
|
846 tp = &TREE_OPERAND (*tp, 0);
|
111
|
847
|
|
848 tree repl = NULL_TREE;
|
145
|
849 ipa_param_body_replacement *pbr = NULL;
|
|
850
|
|
851 if (TREE_CODE (*tp) == PARM_DECL)
|
|
852 {
|
|
853 pbr = info->adjustments->get_expr_replacement (*tp, true);
|
|
854 if (pbr)
|
|
855 repl = pbr->repl;
|
|
856 }
|
|
857 else if (TYPE_P (*tp))
|
|
858 *walk_subtrees = 0;
|
|
859
|
|
860 if (repl)
|
|
861 repl = unshare_expr (repl);
|
111
|
862 else
|
|
863 {
|
|
864 if (tp != orig_tp)
|
|
865 {
|
|
866 *walk_subtrees = 0;
|
|
867 bool modified = info->modified;
|
|
868 info->modified = false;
|
|
869 walk_tree (tp, ipa_simd_modify_stmt_ops, wi, wi->pset);
|
|
870 if (!info->modified)
|
|
871 {
|
|
872 info->modified = modified;
|
|
873 return NULL_TREE;
|
|
874 }
|
|
875 info->modified = modified;
|
|
876 repl = *tp;
|
|
877 }
|
|
878 else
|
|
879 return NULL_TREE;
|
|
880 }
|
|
881
|
|
882 if (tp != orig_tp)
|
|
883 {
|
145
|
884 if (gimple_code (info->stmt) == GIMPLE_PHI
|
|
885 && pbr
|
|
886 && TREE_CODE (*orig_tp) == ADDR_EXPR
|
|
887 && TREE_CODE (TREE_OPERAND (*orig_tp, 0)) == PARM_DECL
|
|
888 && pbr->dummy)
|
|
889 {
|
|
890 gcc_assert (TREE_CODE (pbr->dummy) == SSA_NAME);
|
|
891 *orig_tp = pbr->dummy;
|
|
892 info->modified = true;
|
|
893 return NULL_TREE;
|
|
894 }
|
|
895
|
111
|
896 repl = build_fold_addr_expr (repl);
|
|
897 gimple *stmt;
|
|
898 if (is_gimple_debug (info->stmt))
|
|
899 {
|
|
900 tree vexpr = make_node (DEBUG_EXPR_DECL);
|
|
901 stmt = gimple_build_debug_source_bind (vexpr, repl, NULL);
|
|
902 DECL_ARTIFICIAL (vexpr) = 1;
|
|
903 TREE_TYPE (vexpr) = TREE_TYPE (repl);
|
|
904 SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (repl)));
|
|
905 repl = vexpr;
|
|
906 }
|
|
907 else
|
|
908 {
|
|
909 stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (repl)), repl);
|
|
910 repl = gimple_assign_lhs (stmt);
|
|
911 }
|
145
|
912 gimple_stmt_iterator gsi;
|
|
913 if (gimple_code (info->stmt) == GIMPLE_PHI)
|
|
914 {
|
|
915 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
|
|
916 /* Cache SSA_NAME for next time. */
|
|
917 if (pbr
|
|
918 && TREE_CODE (*orig_tp) == ADDR_EXPR
|
|
919 && TREE_CODE (TREE_OPERAND (*orig_tp, 0)) == PARM_DECL)
|
|
920 {
|
|
921 gcc_assert (!pbr->dummy);
|
|
922 pbr->dummy = repl;
|
|
923 }
|
|
924 }
|
|
925 else
|
|
926 gsi = gsi_for_stmt (info->stmt);
|
111
|
927 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
|
|
928 *orig_tp = repl;
|
|
929 }
|
|
930 else if (!useless_type_conversion_p (TREE_TYPE (*tp), TREE_TYPE (repl)))
|
|
931 {
|
|
932 tree vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (*tp), repl);
|
|
933 *tp = vce;
|
|
934 }
|
|
935 else
|
|
936 *tp = repl;
|
|
937
|
|
938 info->modified = true;
|
|
939 return NULL_TREE;
|
|
940 }
|
|
941
|
|
942 /* Traverse the function body and perform all modifications as
|
|
943 described in ADJUSTMENTS. At function return, ADJUSTMENTS will be
|
|
944 modified such that the replacement/reduction value will now be an
|
|
945 offset into the corresponding simd_array.
|
|
946
|
|
947 This function will replace all function argument uses with their
|
|
948 corresponding simd array elements, and ajust the return values
|
|
949 accordingly. */
|
|
950
|
|
951 static void
|
|
952 ipa_simd_modify_function_body (struct cgraph_node *node,
|
145
|
953 ipa_param_body_adjustments *adjustments,
|
111
|
954 tree retval_array, tree iter)
|
|
955 {
|
|
956 basic_block bb;
|
145
|
957 unsigned int i, j;
|
|
958
|
111
|
959
|
145
|
960 /* Register replacements for every function argument use to an offset into
|
|
961 the corresponding simd_array. */
|
111
|
962 for (i = 0, j = 0; i < node->simdclone->nargs; ++i, ++j)
|
|
963 {
|
145
|
964 if (!node->simdclone->args[i].vector_arg
|
|
965 || (*adjustments->m_adj_params)[j].user_flag)
|
111
|
966 continue;
|
|
967
|
|
968 tree basetype = TREE_TYPE (node->simdclone->args[i].orig_arg);
|
|
969 tree vectype = TREE_TYPE (node->simdclone->args[i].vector_arg);
|
145
|
970 tree r = build4 (ARRAY_REF, basetype, node->simdclone->args[i].simd_array,
|
|
971 iter, NULL_TREE, NULL_TREE);
|
|
972 adjustments->register_replacement (&(*adjustments->m_adj_params)[j], r);
|
|
973
|
|
974 if (simd_clone_subparts (vectype) < node->simdclone->simdlen)
|
131
|
975 j += node->simdclone->simdlen / simd_clone_subparts (vectype) - 1;
|
111
|
976 }
|
|
977
|
|
978 tree name;
|
|
979 FOR_EACH_SSA_NAME (i, name, cfun)
|
|
980 {
|
145
|
981 tree base_var;
|
111
|
982 if (SSA_NAME_VAR (name)
|
145
|
983 && TREE_CODE (SSA_NAME_VAR (name)) == PARM_DECL
|
|
984 && (base_var
|
|
985 = adjustments->get_replacement_ssa_base (SSA_NAME_VAR (name))))
|
111
|
986 {
|
145
|
987 if (SSA_NAME_IS_DEFAULT_DEF (name))
|
|
988 {
|
|
989 tree old_decl = SSA_NAME_VAR (name);
|
|
990 bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
|
|
991 gimple_stmt_iterator gsi = gsi_after_labels (bb);
|
|
992 tree repl = adjustments->lookup_replacement (old_decl, 0);
|
|
993 gcc_checking_assert (repl);
|
|
994 repl = unshare_expr (repl);
|
|
995 set_ssa_default_def (cfun, old_decl, NULL_TREE);
|
|
996 SET_SSA_NAME_VAR_OR_IDENTIFIER (name, base_var);
|
|
997 SSA_NAME_IS_DEFAULT_DEF (name) = 0;
|
|
998 gimple *stmt = gimple_build_assign (name, repl);
|
|
999 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
|
|
1000 }
|
|
1001 else
|
|
1002 SET_SSA_NAME_VAR_OR_IDENTIFIER (name, base_var);
|
111
|
1003 }
|
|
1004 }
|
|
1005
|
|
1006 struct modify_stmt_info info;
|
|
1007 info.adjustments = adjustments;
|
|
1008
|
|
1009 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
|
|
1010 {
|
|
1011 gimple_stmt_iterator gsi;
|
|
1012
|
145
|
1013 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
1014 {
|
|
1015 gphi *phi = as_a <gphi *> (gsi_stmt (gsi));
|
|
1016 int i, n = gimple_phi_num_args (phi);
|
|
1017 info.stmt = phi;
|
|
1018 struct walk_stmt_info wi;
|
|
1019 memset (&wi, 0, sizeof (wi));
|
|
1020 info.modified = false;
|
|
1021 wi.info = &info;
|
|
1022 for (i = 0; i < n; ++i)
|
|
1023 {
|
|
1024 int walk_subtrees = 1;
|
|
1025 tree arg = gimple_phi_arg_def (phi, i);
|
|
1026 tree op = arg;
|
|
1027 ipa_simd_modify_stmt_ops (&op, &walk_subtrees, &wi);
|
|
1028 if (op != arg)
|
|
1029 {
|
|
1030 SET_PHI_ARG_DEF (phi, i, op);
|
|
1031 gcc_assert (TREE_CODE (op) == SSA_NAME);
|
|
1032 if (gimple_phi_arg_edge (phi, i)->flags & EDGE_ABNORMAL)
|
|
1033 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op) = 1;
|
|
1034 }
|
|
1035 }
|
|
1036 }
|
|
1037
|
111
|
1038 gsi = gsi_start_bb (bb);
|
|
1039 while (!gsi_end_p (gsi))
|
|
1040 {
|
|
1041 gimple *stmt = gsi_stmt (gsi);
|
|
1042 info.stmt = stmt;
|
|
1043 struct walk_stmt_info wi;
|
|
1044
|
|
1045 memset (&wi, 0, sizeof (wi));
|
|
1046 info.modified = false;
|
|
1047 wi.info = &info;
|
|
1048 walk_gimple_op (stmt, ipa_simd_modify_stmt_ops, &wi);
|
|
1049
|
|
1050 if (greturn *return_stmt = dyn_cast <greturn *> (stmt))
|
|
1051 {
|
|
1052 tree retval = gimple_return_retval (return_stmt);
|
145
|
1053 edge e = find_edge (bb, EXIT_BLOCK_PTR_FOR_FN (cfun));
|
|
1054 e->flags |= EDGE_FALLTHRU;
|
111
|
1055 if (!retval)
|
|
1056 {
|
|
1057 gsi_remove (&gsi, true);
|
|
1058 continue;
|
|
1059 }
|
|
1060
|
|
1061 /* Replace `return foo' with `retval_array[iter] = foo'. */
|
|
1062 tree ref = build4 (ARRAY_REF, TREE_TYPE (retval),
|
|
1063 retval_array, iter, NULL, NULL);
|
|
1064 stmt = gimple_build_assign (ref, retval);
|
|
1065 gsi_replace (&gsi, stmt, true);
|
|
1066 info.modified = true;
|
|
1067 }
|
|
1068
|
|
1069 if (info.modified)
|
|
1070 {
|
|
1071 update_stmt (stmt);
|
145
|
1072 /* If the above changed the var of a debug bind into something
|
|
1073 different, remove the debug stmt. We could also for all the
|
|
1074 replaced parameters add VAR_DECLs for debug info purposes,
|
|
1075 add debug stmts for those to be the simd array accesses and
|
|
1076 replace debug stmt var operand with that var. Debugging of
|
|
1077 vectorized loops doesn't work too well, so don't bother for
|
|
1078 now. */
|
|
1079 if ((gimple_debug_bind_p (stmt)
|
|
1080 && !DECL_P (gimple_debug_bind_get_var (stmt)))
|
|
1081 || (gimple_debug_source_bind_p (stmt)
|
|
1082 && !DECL_P (gimple_debug_source_bind_get_var (stmt))))
|
|
1083 {
|
|
1084 gsi_remove (&gsi, true);
|
|
1085 continue;
|
|
1086 }
|
111
|
1087 if (maybe_clean_eh_stmt (stmt))
|
|
1088 gimple_purge_dead_eh_edges (gimple_bb (stmt));
|
|
1089 }
|
|
1090 gsi_next (&gsi);
|
|
1091 }
|
|
1092 }
|
|
1093 }
|
|
1094
|
|
1095 /* Helper function of simd_clone_adjust, return linear step addend
|
|
1096 of Ith argument. */
|
|
1097
|
|
1098 static tree
|
|
1099 simd_clone_linear_addend (struct cgraph_node *node, unsigned int i,
|
|
1100 tree addtype, basic_block entry_bb)
|
|
1101 {
|
|
1102 tree ptype = NULL_TREE;
|
|
1103 switch (node->simdclone->args[i].arg_type)
|
|
1104 {
|
|
1105 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
|
|
1106 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
|
|
1107 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
|
|
1108 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
|
|
1109 return build_int_cst (addtype, node->simdclone->args[i].linear_step);
|
|
1110 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
|
|
1111 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
|
|
1112 ptype = TREE_TYPE (node->simdclone->args[i].orig_arg);
|
|
1113 break;
|
|
1114 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
|
|
1115 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
|
|
1116 ptype = TREE_TYPE (TREE_TYPE (node->simdclone->args[i].orig_arg));
|
|
1117 break;
|
|
1118 default:
|
|
1119 gcc_unreachable ();
|
|
1120 }
|
|
1121
|
|
1122 unsigned int idx = node->simdclone->args[i].linear_step;
|
|
1123 tree arg = node->simdclone->args[idx].orig_arg;
|
|
1124 gcc_assert (is_gimple_reg_type (TREE_TYPE (arg)));
|
|
1125 gimple_stmt_iterator gsi = gsi_after_labels (entry_bb);
|
|
1126 gimple *g;
|
|
1127 tree ret;
|
|
1128 if (is_gimple_reg (arg))
|
|
1129 ret = get_or_create_ssa_default_def (cfun, arg);
|
|
1130 else
|
|
1131 {
|
|
1132 g = gimple_build_assign (make_ssa_name (TREE_TYPE (arg)), arg);
|
|
1133 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
|
|
1134 ret = gimple_assign_lhs (g);
|
|
1135 }
|
|
1136 if (TREE_CODE (TREE_TYPE (arg)) == REFERENCE_TYPE)
|
|
1137 {
|
|
1138 g = gimple_build_assign (make_ssa_name (TREE_TYPE (TREE_TYPE (arg))),
|
|
1139 build_simple_mem_ref (ret));
|
|
1140 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
|
|
1141 ret = gimple_assign_lhs (g);
|
|
1142 }
|
|
1143 if (!useless_type_conversion_p (addtype, TREE_TYPE (ret)))
|
|
1144 {
|
|
1145 g = gimple_build_assign (make_ssa_name (addtype), NOP_EXPR, ret);
|
|
1146 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
|
|
1147 ret = gimple_assign_lhs (g);
|
|
1148 }
|
|
1149 if (POINTER_TYPE_P (ptype))
|
|
1150 {
|
|
1151 tree size = TYPE_SIZE_UNIT (TREE_TYPE (ptype));
|
|
1152 if (size && TREE_CODE (size) == INTEGER_CST)
|
|
1153 {
|
|
1154 g = gimple_build_assign (make_ssa_name (addtype), MULT_EXPR,
|
|
1155 ret, fold_convert (addtype, size));
|
|
1156 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
|
|
1157 ret = gimple_assign_lhs (g);
|
|
1158 }
|
|
1159 }
|
|
1160 return ret;
|
|
1161 }
|
|
1162
|
|
1163 /* Adjust the argument types in NODE to their appropriate vector
|
|
1164 counterparts. */
|
|
1165
|
|
1166 static void
|
|
1167 simd_clone_adjust (struct cgraph_node *node)
|
|
1168 {
|
|
1169 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
|
|
1170
|
145
|
1171 TREE_TYPE (node->decl) = build_distinct_type_copy (TREE_TYPE (node->decl));
|
111
|
1172 targetm.simd_clone.adjust (node);
|
|
1173
|
|
1174 tree retval = simd_clone_adjust_return_type (node);
|
145
|
1175 ipa_param_body_adjustments *adjustments
|
111
|
1176 = simd_clone_adjust_argument_types (node);
|
145
|
1177 gcc_assert (adjustments);
|
111
|
1178
|
|
1179 push_gimplify_context ();
|
|
1180
|
|
1181 gimple_seq seq = simd_clone_init_simd_arrays (node, adjustments);
|
|
1182
|
|
1183 /* Adjust all uses of vector arguments accordingly. Adjust all
|
|
1184 return values accordingly. */
|
|
1185 tree iter = create_tmp_var (unsigned_type_node, "iter");
|
|
1186 tree iter1 = make_ssa_name (iter);
|
|
1187 tree iter2 = NULL_TREE;
|
|
1188 ipa_simd_modify_function_body (node, adjustments, retval, iter1);
|
145
|
1189 delete adjustments;
|
111
|
1190
|
|
1191 /* Initialize the iteration variable. */
|
|
1192 basic_block entry_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
|
|
1193 basic_block body_bb = split_block_after_labels (entry_bb)->dest;
|
|
1194 gimple_stmt_iterator gsi = gsi_after_labels (entry_bb);
|
|
1195 /* Insert the SIMD array and iv initialization at function
|
|
1196 entry. */
|
|
1197 gsi_insert_seq_before (&gsi, seq, GSI_NEW_STMT);
|
|
1198
|
|
1199 pop_gimplify_context (NULL);
|
|
1200
|
|
1201 gimple *g;
|
|
1202 basic_block incr_bb = NULL;
|
145
|
1203 class loop *loop = NULL;
|
111
|
1204
|
|
1205 /* Create a new BB right before the original exit BB, to hold the
|
|
1206 iteration increment and the condition/branch. */
|
|
1207 if (EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds))
|
|
1208 {
|
|
1209 basic_block orig_exit = EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun), 0)->src;
|
|
1210 incr_bb = create_empty_bb (orig_exit);
|
131
|
1211 incr_bb->count = profile_count::zero ();
|
111
|
1212 add_bb_to_loop (incr_bb, body_bb->loop_father);
|
145
|
1213 while (EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds))
|
111
|
1214 {
|
145
|
1215 edge e = EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
|
111
|
1216 redirect_edge_succ (e, incr_bb);
|
131
|
1217 incr_bb->count += e->count ();
|
111
|
1218 }
|
|
1219 }
|
|
1220 else if (node->simdclone->inbranch)
|
|
1221 {
|
|
1222 incr_bb = create_empty_bb (entry_bb);
|
131
|
1223 incr_bb->count = profile_count::zero ();
|
111
|
1224 add_bb_to_loop (incr_bb, body_bb->loop_father);
|
|
1225 }
|
|
1226
|
|
1227 if (incr_bb)
|
|
1228 {
|
|
1229 make_single_succ_edge (incr_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
|
|
1230 gsi = gsi_last_bb (incr_bb);
|
|
1231 iter2 = make_ssa_name (iter);
|
|
1232 g = gimple_build_assign (iter2, PLUS_EXPR, iter1,
|
|
1233 build_int_cst (unsigned_type_node, 1));
|
|
1234 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
|
|
1235
|
|
1236 /* Mostly annotate the loop for the vectorizer (the rest is done
|
|
1237 below). */
|
|
1238 loop = alloc_loop ();
|
|
1239 cfun->has_force_vectorize_loops = true;
|
|
1240 loop->safelen = node->simdclone->simdlen;
|
|
1241 loop->force_vectorize = true;
|
|
1242 loop->header = body_bb;
|
|
1243 }
|
|
1244
|
|
1245 /* Branch around the body if the mask applies. */
|
|
1246 if (node->simdclone->inbranch)
|
|
1247 {
|
|
1248 gsi = gsi_last_bb (loop->header);
|
|
1249 tree mask_array
|
|
1250 = node->simdclone->args[node->simdclone->nargs - 1].simd_array;
|
|
1251 tree mask;
|
|
1252 if (node->simdclone->mask_mode != VOIDmode)
|
|
1253 {
|
|
1254 tree shift_cnt;
|
|
1255 if (mask_array == NULL_TREE)
|
|
1256 {
|
|
1257 tree arg = node->simdclone->args[node->simdclone->nargs
|
|
1258 - 1].vector_arg;
|
|
1259 mask = get_or_create_ssa_default_def (cfun, arg);
|
|
1260 shift_cnt = iter1;
|
|
1261 }
|
|
1262 else
|
|
1263 {
|
|
1264 tree maskt = TREE_TYPE (mask_array);
|
|
1265 int c = tree_to_uhwi (TYPE_MAX_VALUE (TYPE_DOMAIN (maskt)));
|
|
1266 c = node->simdclone->simdlen / (c + 1);
|
|
1267 int s = exact_log2 (c);
|
|
1268 gcc_assert (s > 0);
|
|
1269 c--;
|
|
1270 tree idx = make_ssa_name (TREE_TYPE (iter1));
|
|
1271 g = gimple_build_assign (idx, RSHIFT_EXPR, iter1,
|
|
1272 build_int_cst (NULL_TREE, s));
|
|
1273 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
|
|
1274 mask = make_ssa_name (TREE_TYPE (TREE_TYPE (mask_array)));
|
|
1275 tree aref = build4 (ARRAY_REF,
|
|
1276 TREE_TYPE (TREE_TYPE (mask_array)),
|
|
1277 mask_array, idx, NULL, NULL);
|
|
1278 g = gimple_build_assign (mask, aref);
|
|
1279 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
|
|
1280 shift_cnt = make_ssa_name (TREE_TYPE (iter1));
|
|
1281 g = gimple_build_assign (shift_cnt, BIT_AND_EXPR, iter1,
|
|
1282 build_int_cst (TREE_TYPE (iter1), c));
|
|
1283 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
|
|
1284 }
|
|
1285 g = gimple_build_assign (make_ssa_name (TREE_TYPE (mask)),
|
|
1286 RSHIFT_EXPR, mask, shift_cnt);
|
|
1287 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
|
|
1288 mask = gimple_assign_lhs (g);
|
|
1289 g = gimple_build_assign (make_ssa_name (TREE_TYPE (mask)),
|
|
1290 BIT_AND_EXPR, mask,
|
|
1291 build_int_cst (TREE_TYPE (mask), 1));
|
|
1292 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
|
|
1293 mask = gimple_assign_lhs (g);
|
|
1294 }
|
|
1295 else
|
|
1296 {
|
|
1297 mask = make_ssa_name (TREE_TYPE (TREE_TYPE (mask_array)));
|
|
1298 tree aref = build4 (ARRAY_REF,
|
|
1299 TREE_TYPE (TREE_TYPE (mask_array)),
|
|
1300 mask_array, iter1, NULL, NULL);
|
|
1301 g = gimple_build_assign (mask, aref);
|
|
1302 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
|
|
1303 int bitsize = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (aref)));
|
|
1304 if (!INTEGRAL_TYPE_P (TREE_TYPE (aref)))
|
|
1305 {
|
|
1306 aref = build1 (VIEW_CONVERT_EXPR,
|
|
1307 build_nonstandard_integer_type (bitsize, 0),
|
|
1308 mask);
|
|
1309 mask = make_ssa_name (TREE_TYPE (aref));
|
|
1310 g = gimple_build_assign (mask, aref);
|
|
1311 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
|
|
1312 }
|
|
1313 }
|
|
1314
|
|
1315 g = gimple_build_cond (EQ_EXPR, mask, build_zero_cst (TREE_TYPE (mask)),
|
|
1316 NULL, NULL);
|
|
1317 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
|
|
1318 edge e = make_edge (loop->header, incr_bb, EDGE_TRUE_VALUE);
|
|
1319 e->probability = profile_probability::unlikely ().guessed ();
|
131
|
1320 incr_bb->count += e->count ();
|
111
|
1321 edge fallthru = FALLTHRU_EDGE (loop->header);
|
|
1322 fallthru->flags = EDGE_FALSE_VALUE;
|
|
1323 fallthru->probability = profile_probability::likely ().guessed ();
|
|
1324 }
|
|
1325
|
|
1326 basic_block latch_bb = NULL;
|
|
1327 basic_block new_exit_bb = NULL;
|
|
1328
|
|
1329 /* Generate the condition. */
|
|
1330 if (incr_bb)
|
|
1331 {
|
|
1332 gsi = gsi_last_bb (incr_bb);
|
|
1333 g = gimple_build_cond (LT_EXPR, iter2,
|
|
1334 build_int_cst (unsigned_type_node,
|
|
1335 node->simdclone->simdlen),
|
|
1336 NULL, NULL);
|
|
1337 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
|
|
1338 edge e = split_block (incr_bb, gsi_stmt (gsi));
|
|
1339 latch_bb = e->dest;
|
|
1340 new_exit_bb = split_block_after_labels (latch_bb)->dest;
|
|
1341 loop->latch = latch_bb;
|
|
1342
|
|
1343 redirect_edge_succ (FALLTHRU_EDGE (latch_bb), body_bb);
|
|
1344
|
|
1345 edge new_e = make_edge (incr_bb, new_exit_bb, EDGE_FALSE_VALUE);
|
|
1346
|
|
1347 /* FIXME: Do we need to distribute probabilities for the conditional? */
|
|
1348 new_e->probability = profile_probability::guessed_never ();
|
|
1349 /* The successor of incr_bb is already pointing to latch_bb; just
|
|
1350 change the flags.
|
|
1351 make_edge (incr_bb, latch_bb, EDGE_TRUE_VALUE); */
|
|
1352 FALLTHRU_EDGE (incr_bb)->flags = EDGE_TRUE_VALUE;
|
|
1353 }
|
|
1354
|
|
1355 gphi *phi = create_phi_node (iter1, body_bb);
|
|
1356 edge preheader_edge = find_edge (entry_bb, body_bb);
|
|
1357 edge latch_edge = NULL;
|
|
1358 add_phi_arg (phi, build_zero_cst (unsigned_type_node), preheader_edge,
|
|
1359 UNKNOWN_LOCATION);
|
|
1360 if (incr_bb)
|
|
1361 {
|
|
1362 latch_edge = single_succ_edge (latch_bb);
|
|
1363 add_phi_arg (phi, iter2, latch_edge, UNKNOWN_LOCATION);
|
|
1364
|
|
1365 /* Generate the new return. */
|
|
1366 gsi = gsi_last_bb (new_exit_bb);
|
|
1367 if (retval
|
|
1368 && TREE_CODE (retval) == VIEW_CONVERT_EXPR
|
|
1369 && TREE_CODE (TREE_OPERAND (retval, 0)) == RESULT_DECL)
|
|
1370 retval = TREE_OPERAND (retval, 0);
|
|
1371 else if (retval)
|
|
1372 {
|
|
1373 retval = build1 (VIEW_CONVERT_EXPR,
|
|
1374 TREE_TYPE (TREE_TYPE (node->decl)),
|
|
1375 retval);
|
|
1376 retval = force_gimple_operand_gsi (&gsi, retval, true, NULL,
|
|
1377 false, GSI_CONTINUE_LINKING);
|
|
1378 }
|
|
1379 g = gimple_build_return (retval);
|
|
1380 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
|
|
1381 }
|
|
1382
|
|
1383 /* Handle aligned clauses by replacing default defs of the aligned
|
|
1384 uniform args with __builtin_assume_aligned (arg_N(D), alignment)
|
|
1385 lhs. Handle linear by adding PHIs. */
|
|
1386 for (unsigned i = 0; i < node->simdclone->nargs; i++)
|
|
1387 if (node->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM
|
|
1388 && (TREE_ADDRESSABLE (node->simdclone->args[i].orig_arg)
|
|
1389 || !is_gimple_reg_type
|
|
1390 (TREE_TYPE (node->simdclone->args[i].orig_arg))))
|
|
1391 {
|
|
1392 tree orig_arg = node->simdclone->args[i].orig_arg;
|
|
1393 if (is_gimple_reg_type (TREE_TYPE (orig_arg)))
|
|
1394 iter1 = make_ssa_name (TREE_TYPE (orig_arg));
|
|
1395 else
|
|
1396 {
|
|
1397 iter1 = create_tmp_var_raw (TREE_TYPE (orig_arg));
|
|
1398 gimple_add_tmp_var (iter1);
|
|
1399 }
|
|
1400 gsi = gsi_after_labels (entry_bb);
|
|
1401 g = gimple_build_assign (iter1, orig_arg);
|
|
1402 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
|
|
1403 gsi = gsi_after_labels (body_bb);
|
|
1404 g = gimple_build_assign (orig_arg, iter1);
|
|
1405 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
|
|
1406 }
|
|
1407 else if (node->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM
|
|
1408 && DECL_BY_REFERENCE (node->simdclone->args[i].orig_arg)
|
|
1409 && TREE_CODE (TREE_TYPE (node->simdclone->args[i].orig_arg))
|
|
1410 == REFERENCE_TYPE
|
|
1411 && TREE_ADDRESSABLE
|
|
1412 (TREE_TYPE (TREE_TYPE (node->simdclone->args[i].orig_arg))))
|
|
1413 {
|
|
1414 tree orig_arg = node->simdclone->args[i].orig_arg;
|
|
1415 tree def = ssa_default_def (cfun, orig_arg);
|
|
1416 if (def && !has_zero_uses (def))
|
|
1417 {
|
|
1418 iter1 = create_tmp_var_raw (TREE_TYPE (TREE_TYPE (orig_arg)));
|
|
1419 gimple_add_tmp_var (iter1);
|
|
1420 gsi = gsi_after_labels (entry_bb);
|
|
1421 g = gimple_build_assign (iter1, build_simple_mem_ref (def));
|
|
1422 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
|
|
1423 gsi = gsi_after_labels (body_bb);
|
|
1424 g = gimple_build_assign (build_simple_mem_ref (def), iter1);
|
|
1425 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
|
|
1426 }
|
|
1427 }
|
|
1428 else if (node->simdclone->args[i].alignment
|
|
1429 && node->simdclone->args[i].arg_type
|
|
1430 == SIMD_CLONE_ARG_TYPE_UNIFORM
|
|
1431 && (node->simdclone->args[i].alignment
|
|
1432 & (node->simdclone->args[i].alignment - 1)) == 0
|
|
1433 && TREE_CODE (TREE_TYPE (node->simdclone->args[i].orig_arg))
|
|
1434 == POINTER_TYPE)
|
|
1435 {
|
|
1436 unsigned int alignment = node->simdclone->args[i].alignment;
|
|
1437 tree orig_arg = node->simdclone->args[i].orig_arg;
|
|
1438 tree def = ssa_default_def (cfun, orig_arg);
|
|
1439 if (def && !has_zero_uses (def))
|
|
1440 {
|
|
1441 tree fn = builtin_decl_explicit (BUILT_IN_ASSUME_ALIGNED);
|
|
1442 gimple_seq seq = NULL;
|
|
1443 bool need_cvt = false;
|
|
1444 gcall *call
|
|
1445 = gimple_build_call (fn, 2, def, size_int (alignment));
|
|
1446 g = call;
|
|
1447 if (!useless_type_conversion_p (TREE_TYPE (orig_arg),
|
|
1448 ptr_type_node))
|
|
1449 need_cvt = true;
|
|
1450 tree t = make_ssa_name (need_cvt ? ptr_type_node : orig_arg);
|
|
1451 gimple_call_set_lhs (g, t);
|
|
1452 gimple_seq_add_stmt_without_update (&seq, g);
|
|
1453 if (need_cvt)
|
|
1454 {
|
|
1455 t = make_ssa_name (orig_arg);
|
|
1456 g = gimple_build_assign (t, NOP_EXPR, gimple_call_lhs (g));
|
|
1457 gimple_seq_add_stmt_without_update (&seq, g);
|
|
1458 }
|
|
1459 gsi_insert_seq_on_edge_immediate
|
|
1460 (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)), seq);
|
|
1461
|
|
1462 entry_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
|
|
1463 node->create_edge (cgraph_node::get_create (fn),
|
131
|
1464 call, entry_bb->count);
|
111
|
1465
|
|
1466 imm_use_iterator iter;
|
|
1467 use_operand_p use_p;
|
|
1468 gimple *use_stmt;
|
|
1469 tree repl = gimple_get_lhs (g);
|
|
1470 FOR_EACH_IMM_USE_STMT (use_stmt, iter, def)
|
|
1471 if (is_gimple_debug (use_stmt) || use_stmt == call)
|
|
1472 continue;
|
|
1473 else
|
|
1474 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
|
|
1475 SET_USE (use_p, repl);
|
|
1476 }
|
|
1477 }
|
|
1478 else if ((node->simdclone->args[i].arg_type
|
|
1479 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
|
|
1480 || (node->simdclone->args[i].arg_type
|
|
1481 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP)
|
|
1482 || (node->simdclone->args[i].arg_type
|
|
1483 == SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP)
|
|
1484 || (node->simdclone->args[i].arg_type
|
|
1485 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP))
|
|
1486 {
|
|
1487 tree orig_arg = node->simdclone->args[i].orig_arg;
|
|
1488 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (orig_arg))
|
|
1489 || POINTER_TYPE_P (TREE_TYPE (orig_arg)));
|
|
1490 tree def = NULL_TREE;
|
|
1491 if (TREE_ADDRESSABLE (orig_arg))
|
|
1492 {
|
|
1493 def = make_ssa_name (TREE_TYPE (orig_arg));
|
|
1494 iter1 = make_ssa_name (TREE_TYPE (orig_arg));
|
|
1495 if (incr_bb)
|
|
1496 iter2 = make_ssa_name (TREE_TYPE (orig_arg));
|
|
1497 gsi = gsi_after_labels (entry_bb);
|
|
1498 g = gimple_build_assign (def, orig_arg);
|
|
1499 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
|
|
1500 }
|
|
1501 else
|
|
1502 {
|
|
1503 def = ssa_default_def (cfun, orig_arg);
|
|
1504 if (!def || has_zero_uses (def))
|
|
1505 def = NULL_TREE;
|
|
1506 else
|
|
1507 {
|
|
1508 iter1 = make_ssa_name (orig_arg);
|
|
1509 if (incr_bb)
|
|
1510 iter2 = make_ssa_name (orig_arg);
|
|
1511 }
|
|
1512 }
|
|
1513 if (def)
|
|
1514 {
|
|
1515 phi = create_phi_node (iter1, body_bb);
|
|
1516 add_phi_arg (phi, def, preheader_edge, UNKNOWN_LOCATION);
|
|
1517 if (incr_bb)
|
|
1518 {
|
|
1519 add_phi_arg (phi, iter2, latch_edge, UNKNOWN_LOCATION);
|
|
1520 enum tree_code code = INTEGRAL_TYPE_P (TREE_TYPE (orig_arg))
|
|
1521 ? PLUS_EXPR : POINTER_PLUS_EXPR;
|
|
1522 tree addtype = INTEGRAL_TYPE_P (TREE_TYPE (orig_arg))
|
|
1523 ? TREE_TYPE (orig_arg) : sizetype;
|
|
1524 tree addcst = simd_clone_linear_addend (node, i, addtype,
|
|
1525 entry_bb);
|
|
1526 gsi = gsi_last_bb (incr_bb);
|
|
1527 g = gimple_build_assign (iter2, code, iter1, addcst);
|
|
1528 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
|
|
1529 }
|
|
1530
|
|
1531 imm_use_iterator iter;
|
|
1532 use_operand_p use_p;
|
|
1533 gimple *use_stmt;
|
|
1534 if (TREE_ADDRESSABLE (orig_arg))
|
|
1535 {
|
|
1536 gsi = gsi_after_labels (body_bb);
|
|
1537 g = gimple_build_assign (orig_arg, iter1);
|
|
1538 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
|
|
1539 }
|
|
1540 else
|
|
1541 FOR_EACH_IMM_USE_STMT (use_stmt, iter, def)
|
|
1542 if (use_stmt == phi)
|
|
1543 continue;
|
|
1544 else
|
|
1545 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
|
|
1546 SET_USE (use_p, iter1);
|
|
1547 }
|
|
1548 }
|
|
1549 else if (node->simdclone->args[i].arg_type
|
|
1550 == SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
|
|
1551 || (node->simdclone->args[i].arg_type
|
|
1552 == SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP))
|
|
1553 {
|
|
1554 tree orig_arg = node->simdclone->args[i].orig_arg;
|
|
1555 tree def = ssa_default_def (cfun, orig_arg);
|
|
1556 gcc_assert (!TREE_ADDRESSABLE (orig_arg)
|
|
1557 && TREE_CODE (TREE_TYPE (orig_arg)) == REFERENCE_TYPE);
|
|
1558 if (def && !has_zero_uses (def))
|
|
1559 {
|
|
1560 tree rtype = TREE_TYPE (TREE_TYPE (orig_arg));
|
|
1561 iter1 = make_ssa_name (orig_arg);
|
|
1562 if (incr_bb)
|
|
1563 iter2 = make_ssa_name (orig_arg);
|
|
1564 tree iter3 = make_ssa_name (rtype);
|
|
1565 tree iter4 = make_ssa_name (rtype);
|
|
1566 tree iter5 = incr_bb ? make_ssa_name (rtype) : NULL_TREE;
|
|
1567 gsi = gsi_after_labels (entry_bb);
|
|
1568 gimple *load
|
|
1569 = gimple_build_assign (iter3, build_simple_mem_ref (def));
|
|
1570 gsi_insert_before (&gsi, load, GSI_NEW_STMT);
|
|
1571
|
|
1572 tree array = node->simdclone->args[i].simd_array;
|
|
1573 TREE_ADDRESSABLE (array) = 1;
|
|
1574 tree ptr = build_fold_addr_expr (array);
|
|
1575 phi = create_phi_node (iter1, body_bb);
|
|
1576 add_phi_arg (phi, ptr, preheader_edge, UNKNOWN_LOCATION);
|
|
1577 if (incr_bb)
|
|
1578 {
|
|
1579 add_phi_arg (phi, iter2, latch_edge, UNKNOWN_LOCATION);
|
|
1580 g = gimple_build_assign (iter2, POINTER_PLUS_EXPR, iter1,
|
|
1581 TYPE_SIZE_UNIT (TREE_TYPE (iter3)));
|
|
1582 gsi = gsi_last_bb (incr_bb);
|
|
1583 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
|
|
1584 }
|
|
1585
|
|
1586 phi = create_phi_node (iter4, body_bb);
|
|
1587 add_phi_arg (phi, iter3, preheader_edge, UNKNOWN_LOCATION);
|
|
1588 if (incr_bb)
|
|
1589 {
|
|
1590 add_phi_arg (phi, iter5, latch_edge, UNKNOWN_LOCATION);
|
|
1591 enum tree_code code = INTEGRAL_TYPE_P (TREE_TYPE (iter3))
|
|
1592 ? PLUS_EXPR : POINTER_PLUS_EXPR;
|
|
1593 tree addtype = INTEGRAL_TYPE_P (TREE_TYPE (iter3))
|
|
1594 ? TREE_TYPE (iter3) : sizetype;
|
|
1595 tree addcst = simd_clone_linear_addend (node, i, addtype,
|
|
1596 entry_bb);
|
|
1597 g = gimple_build_assign (iter5, code, iter4, addcst);
|
|
1598 gsi = gsi_last_bb (incr_bb);
|
|
1599 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
|
|
1600 }
|
|
1601
|
|
1602 g = gimple_build_assign (build_simple_mem_ref (iter1), iter4);
|
|
1603 gsi = gsi_after_labels (body_bb);
|
|
1604 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
|
|
1605
|
|
1606 imm_use_iterator iter;
|
|
1607 use_operand_p use_p;
|
|
1608 gimple *use_stmt;
|
|
1609 FOR_EACH_IMM_USE_STMT (use_stmt, iter, def)
|
|
1610 if (use_stmt == load)
|
|
1611 continue;
|
|
1612 else
|
|
1613 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
|
|
1614 SET_USE (use_p, iter1);
|
|
1615
|
|
1616 if (!TYPE_READONLY (rtype) && incr_bb)
|
|
1617 {
|
|
1618 tree v = make_ssa_name (rtype);
|
|
1619 tree aref = build4 (ARRAY_REF, rtype, array,
|
|
1620 size_zero_node, NULL_TREE,
|
|
1621 NULL_TREE);
|
|
1622 gsi = gsi_after_labels (new_exit_bb);
|
|
1623 g = gimple_build_assign (v, aref);
|
|
1624 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
|
|
1625 g = gimple_build_assign (build_simple_mem_ref (def), v);
|
|
1626 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
|
|
1627 }
|
|
1628 }
|
|
1629 }
|
|
1630
|
|
1631 calculate_dominance_info (CDI_DOMINATORS);
|
|
1632 if (loop)
|
|
1633 add_loop (loop, loop->header->loop_father);
|
|
1634 update_ssa (TODO_update_ssa);
|
|
1635
|
|
1636 pop_cfun ();
|
|
1637 }
|
|
1638
|
|
1639 /* If the function in NODE is tagged as an elemental SIMD function,
|
|
1640 create the appropriate SIMD clones. */
|
|
1641
|
131
|
1642 void
|
111
|
1643 expand_simd_clones (struct cgraph_node *node)
|
|
1644 {
|
|
1645 tree attr = lookup_attribute ("omp declare simd",
|
|
1646 DECL_ATTRIBUTES (node->decl));
|
|
1647 if (attr == NULL_TREE
|
145
|
1648 || node->inlined_to
|
111
|
1649 || lookup_attribute ("noclone", DECL_ATTRIBUTES (node->decl)))
|
|
1650 return;
|
|
1651
|
|
1652 /* Ignore
|
|
1653 #pragma omp declare simd
|
|
1654 extern int foo ();
|
|
1655 in C, there we don't know the argument types at all. */
|
|
1656 if (!node->definition
|
|
1657 && TYPE_ARG_TYPES (TREE_TYPE (node->decl)) == NULL_TREE)
|
|
1658 return;
|
|
1659
|
|
1660 /* Call this before creating clone_info, as it might ggc_collect. */
|
|
1661 if (node->definition && node->has_gimple_body_p ())
|
|
1662 node->get_body ();
|
|
1663
|
|
1664 do
|
|
1665 {
|
|
1666 /* Start with parsing the "omp declare simd" attribute(s). */
|
|
1667 bool inbranch_clause_specified;
|
|
1668 struct cgraph_simd_clone *clone_info
|
|
1669 = simd_clone_clauses_extract (node, TREE_VALUE (attr),
|
|
1670 &inbranch_clause_specified);
|
|
1671 if (clone_info == NULL)
|
|
1672 continue;
|
|
1673
|
|
1674 int orig_simdlen = clone_info->simdlen;
|
|
1675 tree base_type = simd_clone_compute_base_data_type (node, clone_info);
|
|
1676 /* The target can return 0 (no simd clones should be created),
|
|
1677 1 (just one ISA of simd clones should be created) or higher
|
|
1678 count of ISA variants. In that case, clone_info is initialized
|
|
1679 for the first ISA variant. */
|
|
1680 int count
|
|
1681 = targetm.simd_clone.compute_vecsize_and_simdlen (node, clone_info,
|
|
1682 base_type, 0);
|
|
1683 if (count == 0)
|
|
1684 continue;
|
|
1685
|
|
1686 /* Loop over all COUNT ISA variants, and if !INBRANCH_CLAUSE_SPECIFIED,
|
|
1687 also create one inbranch and one !inbranch clone of it. */
|
|
1688 for (int i = 0; i < count * 2; i++)
|
|
1689 {
|
|
1690 struct cgraph_simd_clone *clone = clone_info;
|
|
1691 if (inbranch_clause_specified && (i & 1) != 0)
|
|
1692 continue;
|
|
1693
|
|
1694 if (i != 0)
|
|
1695 {
|
|
1696 clone = simd_clone_struct_alloc (clone_info->nargs
|
|
1697 + ((i & 1) != 0));
|
|
1698 simd_clone_struct_copy (clone, clone_info);
|
|
1699 /* Undo changes targetm.simd_clone.compute_vecsize_and_simdlen
|
|
1700 and simd_clone_adjust_argument_types did to the first
|
|
1701 clone's info. */
|
|
1702 clone->nargs -= clone_info->inbranch;
|
|
1703 clone->simdlen = orig_simdlen;
|
|
1704 /* And call the target hook again to get the right ISA. */
|
|
1705 targetm.simd_clone.compute_vecsize_and_simdlen (node, clone,
|
|
1706 base_type,
|
|
1707 i / 2);
|
|
1708 if ((i & 1) != 0)
|
|
1709 clone->inbranch = 1;
|
|
1710 }
|
|
1711
|
|
1712 /* simd_clone_mangle might fail if such a clone has been created
|
|
1713 already. */
|
|
1714 tree id = simd_clone_mangle (node, clone);
|
|
1715 if (id == NULL_TREE)
|
145
|
1716 {
|
|
1717 if (i == 0)
|
|
1718 clone->nargs += clone->inbranch;
|
|
1719 continue;
|
|
1720 }
|
111
|
1721
|
|
1722 /* Only when we are sure we want to create the clone actually
|
|
1723 clone the function (or definitions) or create another
|
|
1724 extern FUNCTION_DECL (for prototypes without definitions). */
|
|
1725 struct cgraph_node *n = simd_clone_create (node);
|
|
1726 if (n == NULL)
|
145
|
1727 {
|
|
1728 if (i == 0)
|
|
1729 clone->nargs += clone->inbranch;
|
|
1730 continue;
|
|
1731 }
|
111
|
1732
|
|
1733 n->simdclone = clone;
|
|
1734 clone->origin = node;
|
|
1735 clone->next_clone = NULL;
|
|
1736 if (node->simd_clones == NULL)
|
|
1737 {
|
|
1738 clone->prev_clone = n;
|
|
1739 node->simd_clones = n;
|
|
1740 }
|
|
1741 else
|
|
1742 {
|
|
1743 clone->prev_clone = node->simd_clones->simdclone->prev_clone;
|
|
1744 clone->prev_clone->simdclone->next_clone = n;
|
|
1745 node->simd_clones->simdclone->prev_clone = n;
|
|
1746 }
|
|
1747 symtab->change_decl_assembler_name (n->decl, id);
|
|
1748 /* And finally adjust the return type, parameters and for
|
|
1749 definitions also function body. */
|
|
1750 if (node->definition)
|
|
1751 simd_clone_adjust (n);
|
|
1752 else
|
|
1753 {
|
145
|
1754 TREE_TYPE (n->decl)
|
|
1755 = build_distinct_type_copy (TREE_TYPE (n->decl));
|
|
1756 targetm.simd_clone.adjust (n);
|
111
|
1757 simd_clone_adjust_return_type (n);
|
|
1758 simd_clone_adjust_argument_types (n);
|
|
1759 }
|
|
1760 }
|
|
1761 }
|
|
1762 while ((attr = lookup_attribute ("omp declare simd", TREE_CHAIN (attr))));
|
|
1763 }
|
|
1764
|
|
1765 /* Entry point for IPA simd clone creation pass. */
|
|
1766
|
|
1767 static unsigned int
|
|
1768 ipa_omp_simd_clone (void)
|
|
1769 {
|
|
1770 struct cgraph_node *node;
|
|
1771 FOR_EACH_FUNCTION (node)
|
|
1772 expand_simd_clones (node);
|
|
1773 return 0;
|
|
1774 }
|
|
1775
|
|
1776 namespace {
|
|
1777
|
|
1778 const pass_data pass_data_omp_simd_clone =
|
|
1779 {
|
|
1780 SIMPLE_IPA_PASS, /* type */
|
|
1781 "simdclone", /* name */
|
|
1782 OPTGROUP_OMP, /* optinfo_flags */
|
|
1783 TV_NONE, /* tv_id */
|
|
1784 ( PROP_ssa | PROP_cfg ), /* properties_required */
|
|
1785 0, /* properties_provided */
|
|
1786 0, /* properties_destroyed */
|
|
1787 0, /* todo_flags_start */
|
|
1788 0, /* todo_flags_finish */
|
|
1789 };
|
|
1790
|
|
1791 class pass_omp_simd_clone : public simple_ipa_opt_pass
|
|
1792 {
|
|
1793 public:
|
|
1794 pass_omp_simd_clone(gcc::context *ctxt)
|
|
1795 : simple_ipa_opt_pass(pass_data_omp_simd_clone, ctxt)
|
|
1796 {}
|
|
1797
|
|
1798 /* opt_pass methods: */
|
|
1799 virtual bool gate (function *);
|
|
1800 virtual unsigned int execute (function *) { return ipa_omp_simd_clone (); }
|
|
1801 };
|
|
1802
|
|
1803 bool
|
|
1804 pass_omp_simd_clone::gate (function *)
|
|
1805 {
|
|
1806 return targetm.simd_clone.compute_vecsize_and_simdlen != NULL;
|
|
1807 }
|
|
1808
|
|
1809 } // anon namespace
|
|
1810
|
|
1811 simple_ipa_opt_pass *
|
|
1812 make_pass_omp_simd_clone (gcc::context *ctxt)
|
|
1813 {
|
|
1814 return new pass_omp_simd_clone (ctxt);
|
|
1815 }
|