111
|
1 /* Copyright (C) 2013-2017 Free Software Foundation, Inc.
|
|
2
|
|
3 Contributed by Mentor Embedded.
|
|
4
|
|
5 This file is part of the GNU Offloading and Multi Processing Library
|
|
6 (libgomp).
|
|
7
|
|
8 Libgomp is free software; you can redistribute it and/or modify it
|
|
9 under the terms of the GNU General Public License as published by
|
|
10 the Free Software Foundation; either version 3, or (at your option)
|
|
11 any later version.
|
|
12
|
|
13 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
16 more details.
|
|
17
|
|
18 Under Section 7 of GPL version 3, you are granted additional
|
|
19 permissions described in the GCC Runtime Library Exception, version
|
|
20 3.1, as published by the Free Software Foundation.
|
|
21
|
|
22 You should have received a copy of the GNU General Public License and
|
|
23 a copy of the GCC Runtime Library Exception along with this program;
|
|
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
25 <http://www.gnu.org/licenses/>. */
|
|
26
|
|
27 /* This file handles OpenACC constructs. */
|
|
28
|
|
29 #include "openacc.h"
|
|
30 #include "libgomp.h"
|
|
31 #include "libgomp_g.h"
|
|
32 #include "gomp-constants.h"
|
|
33 #include "oacc-int.h"
|
|
34 #ifdef HAVE_INTTYPES_H
|
|
35 # include <inttypes.h> /* For PRIu64. */
|
|
36 #endif
|
|
37 #include <string.h>
|
|
38 #include <stdarg.h>
|
|
39 #include <assert.h>
|
|
40
|
|
41 static int
|
|
42 find_pset (int pos, size_t mapnum, unsigned short *kinds)
|
|
43 {
|
|
44 if (pos + 1 >= mapnum)
|
|
45 return 0;
|
|
46
|
|
47 unsigned char kind = kinds[pos+1] & 0xff;
|
|
48
|
|
49 return kind == GOMP_MAP_TO_PSET;
|
|
50 }
|
|
51
|
|
52 static void goacc_wait (int async, int num_waits, va_list *ap);
|
|
53
|
|
54
|
|
55 /* Launch a possibly offloaded function on DEVICE. FN is the host fn
|
|
56 address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory
|
|
57 blocks to be copied to/from the device. Varadic arguments are
|
|
58 keyed optional parameters terminated with a zero. */
|
|
59
|
|
60 void
|
|
61 GOACC_parallel_keyed (int device, void (*fn) (void *),
|
|
62 size_t mapnum, void **hostaddrs, size_t *sizes,
|
|
63 unsigned short *kinds, ...)
|
|
64 {
|
|
65 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
|
|
66 va_list ap;
|
|
67 struct goacc_thread *thr;
|
|
68 struct gomp_device_descr *acc_dev;
|
|
69 struct target_mem_desc *tgt;
|
|
70 void **devaddrs;
|
|
71 unsigned int i;
|
|
72 struct splay_tree_key_s k;
|
|
73 splay_tree_key tgt_fn_key;
|
|
74 void (*tgt_fn);
|
|
75 int async = GOMP_ASYNC_SYNC;
|
|
76 unsigned dims[GOMP_DIM_MAX];
|
|
77 unsigned tag;
|
|
78
|
|
79 #ifdef HAVE_INTTYPES_H
|
|
80 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
|
|
81 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
|
|
82 #else
|
|
83 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
|
|
84 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
|
|
85 #endif
|
|
86 goacc_lazy_initialize ();
|
|
87
|
|
88 thr = goacc_thread ();
|
|
89 acc_dev = thr->dev;
|
|
90
|
|
91 /* Host fallback if "if" clause is false or if the current device is set to
|
|
92 the host. */
|
|
93 if (host_fallback)
|
|
94 {
|
|
95 goacc_save_and_set_bind (acc_device_host);
|
|
96 fn (hostaddrs);
|
|
97 goacc_restore_bind ();
|
|
98 return;
|
|
99 }
|
|
100 else if (acc_device_type (acc_dev->type) == acc_device_host)
|
|
101 {
|
|
102 fn (hostaddrs);
|
|
103 return;
|
|
104 }
|
|
105
|
|
106 /* Default: let the runtime choose. */
|
|
107 for (i = 0; i != GOMP_DIM_MAX; i++)
|
|
108 dims[i] = 0;
|
|
109
|
|
110 va_start (ap, kinds);
|
|
111 /* TODO: This will need amending when device_type is implemented. */
|
|
112 while ((tag = va_arg (ap, unsigned)) != 0)
|
|
113 {
|
|
114 if (GOMP_LAUNCH_DEVICE (tag))
|
|
115 gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
|
|
116 GOMP_LAUNCH_DEVICE (tag));
|
|
117
|
|
118 switch (GOMP_LAUNCH_CODE (tag))
|
|
119 {
|
|
120 case GOMP_LAUNCH_DIM:
|
|
121 {
|
|
122 unsigned mask = GOMP_LAUNCH_OP (tag);
|
|
123
|
|
124 for (i = 0; i != GOMP_DIM_MAX; i++)
|
|
125 if (mask & GOMP_DIM_MASK (i))
|
|
126 dims[i] = va_arg (ap, unsigned);
|
|
127 }
|
|
128 break;
|
|
129
|
|
130 case GOMP_LAUNCH_ASYNC:
|
|
131 {
|
|
132 /* Small constant values are encoded in the operand. */
|
|
133 async = GOMP_LAUNCH_OP (tag);
|
|
134
|
|
135 if (async == GOMP_LAUNCH_OP_MAX)
|
|
136 async = va_arg (ap, unsigned);
|
|
137 break;
|
|
138 }
|
|
139
|
|
140 case GOMP_LAUNCH_WAIT:
|
|
141 {
|
|
142 unsigned num_waits = GOMP_LAUNCH_OP (tag);
|
|
143
|
|
144 if (num_waits)
|
|
145 goacc_wait (async, num_waits, &ap);
|
|
146 break;
|
|
147 }
|
|
148
|
|
149 default:
|
|
150 gomp_fatal ("unrecognized offload code '%d',"
|
|
151 " libgomp is too old", GOMP_LAUNCH_CODE (tag));
|
|
152 }
|
|
153 }
|
|
154 va_end (ap);
|
|
155
|
|
156 acc_dev->openacc.async_set_async_func (async);
|
|
157
|
|
158 if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
|
|
159 {
|
|
160 k.host_start = (uintptr_t) fn;
|
|
161 k.host_end = k.host_start + 1;
|
|
162 gomp_mutex_lock (&acc_dev->lock);
|
|
163 tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
|
|
164 gomp_mutex_unlock (&acc_dev->lock);
|
|
165
|
|
166 if (tgt_fn_key == NULL)
|
|
167 gomp_fatal ("target function wasn't mapped");
|
|
168
|
|
169 tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
|
|
170 }
|
|
171 else
|
|
172 tgt_fn = (void (*)) fn;
|
|
173
|
|
174 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
|
|
175 GOMP_MAP_VARS_OPENACC);
|
|
176
|
|
177 devaddrs = gomp_alloca (sizeof (void *) * mapnum);
|
|
178 for (i = 0; i < mapnum; i++)
|
|
179 devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
|
|
180 + tgt->list[i].key->tgt_offset);
|
|
181
|
|
182 acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
|
|
183 async, dims, tgt);
|
|
184
|
|
185 /* If running synchronously, unmap immediately. */
|
|
186 if (async < acc_async_noval)
|
|
187 gomp_unmap_vars (tgt, true);
|
|
188 else
|
|
189 tgt->device_descr->openacc.register_async_cleanup_func (tgt, async);
|
|
190
|
|
191 acc_dev->openacc.async_set_async_func (acc_async_sync);
|
|
192 }
|
|
193
|
|
194 /* Legacy entry point, only provide host execution. */
|
|
195
|
|
196 void
|
|
197 GOACC_parallel (int device, void (*fn) (void *),
|
|
198 size_t mapnum, void **hostaddrs, size_t *sizes,
|
|
199 unsigned short *kinds,
|
|
200 int num_gangs, int num_workers, int vector_length,
|
|
201 int async, int num_waits, ...)
|
|
202 {
|
|
203 goacc_save_and_set_bind (acc_device_host);
|
|
204 fn (hostaddrs);
|
|
205 goacc_restore_bind ();
|
|
206 }
|
|
207
|
|
208 void
|
|
209 GOACC_data_start (int device, size_t mapnum,
|
|
210 void **hostaddrs, size_t *sizes, unsigned short *kinds)
|
|
211 {
|
|
212 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
|
|
213 struct target_mem_desc *tgt;
|
|
214
|
|
215 #ifdef HAVE_INTTYPES_H
|
|
216 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
|
|
217 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
|
|
218 #else
|
|
219 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
|
|
220 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
|
|
221 #endif
|
|
222
|
|
223 goacc_lazy_initialize ();
|
|
224
|
|
225 struct goacc_thread *thr = goacc_thread ();
|
|
226 struct gomp_device_descr *acc_dev = thr->dev;
|
|
227
|
|
228 /* Host fallback or 'do nothing'. */
|
|
229 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
230 || host_fallback)
|
|
231 {
|
|
232 tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
|
|
233 GOMP_MAP_VARS_OPENACC);
|
|
234 tgt->prev = thr->mapped_data;
|
|
235 thr->mapped_data = tgt;
|
|
236
|
|
237 return;
|
|
238 }
|
|
239
|
|
240 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
|
|
241 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
|
|
242 GOMP_MAP_VARS_OPENACC);
|
|
243 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
|
|
244 tgt->prev = thr->mapped_data;
|
|
245 thr->mapped_data = tgt;
|
|
246 }
|
|
247
|
|
248 void
|
|
249 GOACC_data_end (void)
|
|
250 {
|
|
251 struct goacc_thread *thr = goacc_thread ();
|
|
252 struct target_mem_desc *tgt = thr->mapped_data;
|
|
253
|
|
254 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
|
|
255 thr->mapped_data = tgt->prev;
|
|
256 gomp_unmap_vars (tgt, true);
|
|
257 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
|
|
258 }
|
|
259
|
|
260 void
|
|
261 GOACC_enter_exit_data (int device, size_t mapnum,
|
|
262 void **hostaddrs, size_t *sizes, unsigned short *kinds,
|
|
263 int async, int num_waits, ...)
|
|
264 {
|
|
265 struct goacc_thread *thr;
|
|
266 struct gomp_device_descr *acc_dev;
|
|
267 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
|
|
268 bool data_enter = false;
|
|
269 size_t i;
|
|
270
|
|
271 goacc_lazy_initialize ();
|
|
272
|
|
273 thr = goacc_thread ();
|
|
274 acc_dev = thr->dev;
|
|
275
|
|
276 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
277 || host_fallback)
|
|
278 return;
|
|
279
|
|
280 if (num_waits)
|
|
281 {
|
|
282 va_list ap;
|
|
283
|
|
284 va_start (ap, num_waits);
|
|
285 goacc_wait (async, num_waits, &ap);
|
|
286 va_end (ap);
|
|
287 }
|
|
288
|
|
289 acc_dev->openacc.async_set_async_func (async);
|
|
290
|
|
291 /* Determine if this is an "acc enter data". */
|
|
292 for (i = 0; i < mapnum; ++i)
|
|
293 {
|
|
294 unsigned char kind = kinds[i] & 0xff;
|
|
295
|
|
296 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
|
|
297 continue;
|
|
298
|
|
299 if (kind == GOMP_MAP_FORCE_ALLOC
|
|
300 || kind == GOMP_MAP_FORCE_PRESENT
|
|
301 || kind == GOMP_MAP_FORCE_TO)
|
|
302 {
|
|
303 data_enter = true;
|
|
304 break;
|
|
305 }
|
|
306
|
|
307 if (kind == GOMP_MAP_DELETE
|
|
308 || kind == GOMP_MAP_FORCE_FROM)
|
|
309 break;
|
|
310
|
|
311 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
|
|
312 kind);
|
|
313 }
|
|
314
|
|
315 if (data_enter)
|
|
316 {
|
|
317 for (i = 0; i < mapnum; i++)
|
|
318 {
|
|
319 unsigned char kind = kinds[i] & 0xff;
|
|
320
|
|
321 /* Scan for PSETs. */
|
|
322 int psets = find_pset (i, mapnum, kinds);
|
|
323
|
|
324 if (!psets)
|
|
325 {
|
|
326 switch (kind)
|
|
327 {
|
|
328 case GOMP_MAP_POINTER:
|
|
329 gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i],
|
|
330 &kinds[i]);
|
|
331 break;
|
|
332 case GOMP_MAP_FORCE_ALLOC:
|
|
333 acc_create (hostaddrs[i], sizes[i]);
|
|
334 break;
|
|
335 case GOMP_MAP_FORCE_PRESENT:
|
|
336 acc_present_or_copyin (hostaddrs[i], sizes[i]);
|
|
337 break;
|
|
338 case GOMP_MAP_FORCE_TO:
|
|
339 acc_present_or_copyin (hostaddrs[i], sizes[i]);
|
|
340 break;
|
|
341 default:
|
|
342 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
|
|
343 kind);
|
|
344 break;
|
|
345 }
|
|
346 }
|
|
347 else
|
|
348 {
|
|
349 gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]);
|
|
350 /* Increment 'i' by two because OpenACC requires fortran
|
|
351 arrays to be contiguous, so each PSET is associated with
|
|
352 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
|
|
353 one MAP_POINTER. */
|
|
354 i += 2;
|
|
355 }
|
|
356 }
|
|
357 }
|
|
358 else
|
|
359 for (i = 0; i < mapnum; ++i)
|
|
360 {
|
|
361 unsigned char kind = kinds[i] & 0xff;
|
|
362
|
|
363 int psets = find_pset (i, mapnum, kinds);
|
|
364
|
|
365 if (!psets)
|
|
366 {
|
|
367 switch (kind)
|
|
368 {
|
|
369 case GOMP_MAP_POINTER:
|
|
370 gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
|
|
371 == GOMP_MAP_FORCE_FROM,
|
|
372 async, 1);
|
|
373 break;
|
|
374 case GOMP_MAP_DELETE:
|
|
375 acc_delete (hostaddrs[i], sizes[i]);
|
|
376 break;
|
|
377 case GOMP_MAP_FORCE_FROM:
|
|
378 acc_copyout (hostaddrs[i], sizes[i]);
|
|
379 break;
|
|
380 default:
|
|
381 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
|
|
382 kind);
|
|
383 break;
|
|
384 }
|
|
385 }
|
|
386 else
|
|
387 {
|
|
388 gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
|
|
389 == GOMP_MAP_FORCE_FROM, async, 3);
|
|
390 /* See the above comment. */
|
|
391 i += 2;
|
|
392 }
|
|
393 }
|
|
394
|
|
395 acc_dev->openacc.async_set_async_func (acc_async_sync);
|
|
396 }
|
|
397
|
|
398 static void
|
|
399 goacc_wait (int async, int num_waits, va_list *ap)
|
|
400 {
|
|
401 struct goacc_thread *thr = goacc_thread ();
|
|
402 struct gomp_device_descr *acc_dev = thr->dev;
|
|
403
|
|
404 while (num_waits--)
|
|
405 {
|
|
406 int qid = va_arg (*ap, int);
|
|
407
|
|
408 if (acc_async_test (qid))
|
|
409 continue;
|
|
410
|
|
411 if (async == acc_async_sync)
|
|
412 acc_wait (qid);
|
|
413 else if (qid == async)
|
|
414 ;/* If we're waiting on the same asynchronous queue as we're
|
|
415 launching on, the queue itself will order work as
|
|
416 required, so there's no need to wait explicitly. */
|
|
417 else
|
|
418 acc_dev->openacc.async_wait_async_func (qid, async);
|
|
419 }
|
|
420 }
|
|
421
|
|
422 void
|
|
423 GOACC_update (int device, size_t mapnum,
|
|
424 void **hostaddrs, size_t *sizes, unsigned short *kinds,
|
|
425 int async, int num_waits, ...)
|
|
426 {
|
|
427 bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
|
|
428 size_t i;
|
|
429
|
|
430 goacc_lazy_initialize ();
|
|
431
|
|
432 struct goacc_thread *thr = goacc_thread ();
|
|
433 struct gomp_device_descr *acc_dev = thr->dev;
|
|
434
|
|
435 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|
|
436 || host_fallback)
|
|
437 return;
|
|
438
|
|
439 if (num_waits)
|
|
440 {
|
|
441 va_list ap;
|
|
442
|
|
443 va_start (ap, num_waits);
|
|
444 goacc_wait (async, num_waits, &ap);
|
|
445 va_end (ap);
|
|
446 }
|
|
447
|
|
448 acc_dev->openacc.async_set_async_func (async);
|
|
449
|
|
450 for (i = 0; i < mapnum; ++i)
|
|
451 {
|
|
452 unsigned char kind = kinds[i] & 0xff;
|
|
453
|
|
454 switch (kind)
|
|
455 {
|
|
456 case GOMP_MAP_POINTER:
|
|
457 case GOMP_MAP_TO_PSET:
|
|
458 break;
|
|
459
|
|
460 case GOMP_MAP_FORCE_TO:
|
|
461 acc_update_device (hostaddrs[i], sizes[i]);
|
|
462 break;
|
|
463
|
|
464 case GOMP_MAP_FORCE_FROM:
|
|
465 acc_update_self (hostaddrs[i], sizes[i]);
|
|
466 break;
|
|
467
|
|
468 default:
|
|
469 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
|
|
470 break;
|
|
471 }
|
|
472 }
|
|
473
|
|
474 acc_dev->openacc.async_set_async_func (acc_async_sync);
|
|
475 }
|
|
476
|
|
477 void
|
|
478 GOACC_wait (int async, int num_waits, ...)
|
|
479 {
|
|
480 if (num_waits)
|
|
481 {
|
|
482 va_list ap;
|
|
483
|
|
484 va_start (ap, num_waits);
|
|
485 goacc_wait (async, num_waits, &ap);
|
|
486 va_end (ap);
|
|
487 }
|
|
488 else if (async == acc_async_sync)
|
|
489 acc_wait_all ();
|
|
490 else if (async == acc_async_noval)
|
|
491 goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval);
|
|
492 }
|
|
493
|
|
494 int
|
|
495 GOACC_get_num_threads (void)
|
|
496 {
|
|
497 return 1;
|
|
498 }
|
|
499
|
|
500 int
|
|
501 GOACC_get_thread_num (void)
|
|
502 {
|
|
503 return 0;
|
|
504 }
|
|
505
|
|
506 void
|
|
507 GOACC_declare (int device, size_t mapnum,
|
|
508 void **hostaddrs, size_t *sizes, unsigned short *kinds)
|
|
509 {
|
|
510 int i;
|
|
511
|
|
512 for (i = 0; i < mapnum; i++)
|
|
513 {
|
|
514 unsigned char kind = kinds[i] & 0xff;
|
|
515
|
|
516 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
|
|
517 continue;
|
|
518
|
|
519 switch (kind)
|
|
520 {
|
|
521 case GOMP_MAP_FORCE_ALLOC:
|
|
522 case GOMP_MAP_FORCE_FROM:
|
|
523 case GOMP_MAP_FORCE_TO:
|
|
524 case GOMP_MAP_POINTER:
|
|
525 case GOMP_MAP_DELETE:
|
|
526 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
|
|
527 &kinds[i], 0, 0);
|
|
528 break;
|
|
529
|
|
530 case GOMP_MAP_FORCE_DEVICEPTR:
|
|
531 break;
|
|
532
|
|
533 case GOMP_MAP_ALLOC:
|
|
534 if (!acc_is_present (hostaddrs[i], sizes[i]))
|
|
535 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
|
|
536 &kinds[i], 0, 0);
|
|
537 break;
|
|
538
|
|
539 case GOMP_MAP_TO:
|
|
540 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
|
|
541 &kinds[i], 0, 0);
|
|
542
|
|
543 break;
|
|
544
|
|
545 case GOMP_MAP_FROM:
|
|
546 kinds[i] = GOMP_MAP_FORCE_FROM;
|
|
547 GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
|
|
548 &kinds[i], 0, 0);
|
|
549 break;
|
|
550
|
|
551 case GOMP_MAP_FORCE_PRESENT:
|
|
552 if (!acc_is_present (hostaddrs[i], sizes[i]))
|
|
553 gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i],
|
|
554 (unsigned long) sizes[i]);
|
|
555 break;
|
|
556
|
|
557 default:
|
|
558 assert (0);
|
|
559 break;
|
|
560 }
|
|
561 }
|
|
562 }
|