Mercurial > hg > CbC > CbC_gcc
diff libgomp/loop_ull.c @ 145:1830386684a0
gcc-9.2.0
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 11:34:05 +0900 |
parents | 84e7813d76e9 |
children |
line wrap: on
line diff
--- a/libgomp/loop_ull.c Thu Oct 25 07:37:49 2018 +0900 +++ b/libgomp/loop_ull.c Thu Feb 13 11:34:05 2020 +0900 @@ -1,4 +1,4 @@ -/* Copyright (C) 2005-2018 Free Software Foundation, Inc. +/* Copyright (C) 2005-2020 Free Software Foundation, Inc. Contributed by Richard Henderson <rth@redhat.com>. This file is part of the GNU Offloading and Multi Processing Library @@ -27,8 +27,12 @@ #include <limits.h> #include <stdlib.h> +#include <string.h> #include "libgomp.h" +ialias (GOMP_loop_ull_runtime_next) +ialias_redirect (GOMP_taskgroup_reduction_register) + typedef unsigned long long gomp_ull; /* Initialize the given work share construct from the given arguments. */ @@ -104,7 +108,7 @@ struct gomp_thread *thr = gomp_thread (); thr->ts.static_trip = 0; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, GFS_STATIC, chunk_size); @@ -122,7 +126,7 @@ struct gomp_thread *thr = gomp_thread (); bool ret; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, GFS_DYNAMIC, chunk_size); @@ -148,7 +152,7 @@ struct gomp_thread *thr = gomp_thread (); bool ret; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, GFS_GUIDED, chunk_size); @@ -171,7 +175,7 @@ gomp_ull incr, gomp_ull *istart, gomp_ull *iend) { struct gomp_task_icv *icv = gomp_icv (false); - switch (icv->run_sched_var) + switch (icv->run_sched_var & ~GFS_MONOTONIC) { case GFS_STATIC: return gomp_loop_ull_static_start (up, start, end, incr, @@ -195,6 +199,113 @@ } } +static long +gomp_adjust_sched (long sched, gomp_ull *chunk_size) +{ + sched &= ~GFS_MONOTONIC; + switch (sched) + { + case GFS_STATIC: + case GFS_DYNAMIC: + case GFS_GUIDED: + return sched; + /* GFS_RUNTIME is used for runtime schedule without monotonic + or nonmonotonic modifiers on the clause. + GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic + modifier. */ + case GFS_RUNTIME: + /* GFS_AUTO is used for runtime schedule with nonmonotonic + modifier. */ + case GFS_AUTO: + { + struct gomp_task_icv *icv = gomp_icv (false); + sched = icv->run_sched_var & ~GFS_MONOTONIC; + switch (sched) + { + case GFS_STATIC: + case GFS_DYNAMIC: + case GFS_GUIDED: + *chunk_size = icv->run_sched_chunk_size; + break; + case GFS_AUTO: + sched = GFS_STATIC; + *chunk_size = 0; + break; + default: + abort (); + } + return sched; + } + default: + abort (); + } +} + +bool +GOMP_loop_ull_start (bool up, gomp_ull start, gomp_ull end, + gomp_ull incr, long sched, gomp_ull chunk_size, + gomp_ull *istart, gomp_ull *iend, + uintptr_t *reductions, void **mem) +{ + struct gomp_thread *thr = gomp_thread (); + + thr->ts.static_trip = 0; + if (reductions) + gomp_workshare_taskgroup_start (); + if (gomp_work_share_start (0)) + { + sched = gomp_adjust_sched (sched, &chunk_size); + gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, + sched, chunk_size); + if (reductions) + { + GOMP_taskgroup_reduction_register (reductions); + thr->task->taskgroup->workshare = true; + thr->ts.work_share->task_reductions = reductions; + } + if (mem) + { + uintptr_t size = (uintptr_t) *mem; +#define INLINE_ORDERED_TEAM_IDS_OFF \ + ((offsetof (struct gomp_work_share, inline_ordered_team_ids) \ + + __alignof__ (long long) - 1) & ~(__alignof__ (long long) - 1)) + if (size > (sizeof (struct gomp_work_share) + - INLINE_ORDERED_TEAM_IDS_OFF)) + *mem + = (void *) (thr->ts.work_share->ordered_team_ids + = gomp_malloc_cleared (size)); + else + *mem = memset (((char *) thr->ts.work_share) + + INLINE_ORDERED_TEAM_IDS_OFF, '\0', size); + } + gomp_work_share_init_done (); + } + else + { + if (reductions) + { + uintptr_t *first_reductions = thr->ts.work_share->task_reductions; + gomp_workshare_task_reduction_register (reductions, + first_reductions); + } + if (mem) + { + if ((offsetof (struct gomp_work_share, inline_ordered_team_ids) + & (__alignof__ (long long) - 1)) == 0) + *mem = (void *) thr->ts.work_share->ordered_team_ids; + else + { + uintptr_t p = (uintptr_t) thr->ts.work_share->ordered_team_ids; + p += __alignof__ (long long) - 1; + p &= ~(__alignof__ (long long) - 1); + *mem = (void *) p; + } + } + } + + return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend); +} + /* The *_ordered_*_start routines are similar. The only difference is that this work-share construct is initialized to expect an ORDERED section. */ @@ -206,7 +317,7 @@ struct gomp_thread *thr = gomp_thread (); thr->ts.static_trip = 0; - if (gomp_work_share_start (true)) + if (gomp_work_share_start (1)) { gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, GFS_STATIC, chunk_size); @@ -225,7 +336,7 @@ struct gomp_thread *thr = gomp_thread (); bool ret; - if (gomp_work_share_start (true)) + if (gomp_work_share_start (1)) { gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, GFS_DYNAMIC, chunk_size); @@ -251,7 +362,7 @@ struct gomp_thread *thr = gomp_thread (); bool ret; - if (gomp_work_share_start (true)) + if (gomp_work_share_start (1)) { gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, GFS_GUIDED, chunk_size); @@ -275,7 +386,7 @@ gomp_ull *iend) { struct gomp_task_icv *icv = gomp_icv (false); - switch (icv->run_sched_var) + switch (icv->run_sched_var & ~GFS_MONOTONIC) { case GFS_STATIC: return gomp_loop_ull_ordered_static_start (up, start, end, incr, @@ -299,6 +410,82 @@ } } +bool +GOMP_loop_ull_ordered_start (bool up, gomp_ull start, gomp_ull end, + gomp_ull incr, long sched, gomp_ull chunk_size, + gomp_ull *istart, gomp_ull *iend, + uintptr_t *reductions, void **mem) +{ + struct gomp_thread *thr = gomp_thread (); + size_t ordered = 1; + bool ret; + + thr->ts.static_trip = 0; + if (reductions) + gomp_workshare_taskgroup_start (); + if (mem) + ordered += (uintptr_t) *mem; + if (gomp_work_share_start (ordered)) + { + sched = gomp_adjust_sched (sched, &chunk_size); + gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, + sched, chunk_size); + if (reductions) + { + GOMP_taskgroup_reduction_register (reductions); + thr->task->taskgroup->workshare = true; + thr->ts.work_share->task_reductions = reductions; + } + if (sched == GFS_STATIC) + gomp_ordered_static_init (); + else + gomp_mutex_lock (&thr->ts.work_share->lock); + gomp_work_share_init_done (); + } + else + { + if (reductions) + { + uintptr_t *first_reductions = thr->ts.work_share->task_reductions; + gomp_workshare_task_reduction_register (reductions, + first_reductions); + } + sched = thr->ts.work_share->sched; + if (sched != GFS_STATIC) + gomp_mutex_lock (&thr->ts.work_share->lock); + } + + if (mem) + { + uintptr_t p + = (uintptr_t) (thr->ts.work_share->ordered_team_ids + + (thr->ts.team ? thr->ts.team->nthreads : 1)); + p += __alignof__ (long long) - 1; + p &= ~(__alignof__ (long long) - 1); + *mem = (void *) p; + } + + switch (sched) + { + case GFS_STATIC: + case GFS_AUTO: + return !gomp_iter_ull_static_next (istart, iend); + case GFS_DYNAMIC: + ret = gomp_iter_ull_dynamic_next_locked (istart, iend); + break; + case GFS_GUIDED: + ret = gomp_iter_ull_guided_next_locked (istart, iend); + break; + default: + abort (); + } + + if (ret) + gomp_ordered_first (); + gomp_mutex_unlock (&thr->ts.work_share->lock); + return ret; +} + /* The *_doacross_*_start routines are similar. The only difference is that this work-share construct is initialized to expect an ORDERED(N) - DOACROSS section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1 @@ -313,11 +500,11 @@ struct gomp_thread *thr = gomp_thread (); thr->ts.static_trip = 0; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, GFS_STATIC, chunk_size); - gomp_doacross_ull_init (ncounts, counts, chunk_size); + gomp_doacross_ull_init (ncounts, counts, chunk_size, 0); gomp_work_share_init_done (); } @@ -332,11 +519,11 @@ struct gomp_thread *thr = gomp_thread (); bool ret; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, GFS_DYNAMIC, chunk_size); - gomp_doacross_ull_init (ncounts, counts, chunk_size); + gomp_doacross_ull_init (ncounts, counts, chunk_size, 0); gomp_work_share_init_done (); } @@ -359,11 +546,11 @@ struct gomp_thread *thr = gomp_thread (); bool ret; - if (gomp_work_share_start (false)) + if (gomp_work_share_start (0)) { gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, GFS_GUIDED, chunk_size); - gomp_doacross_ull_init (ncounts, counts, chunk_size); + gomp_doacross_ull_init (ncounts, counts, chunk_size, 0); gomp_work_share_init_done (); } @@ -383,7 +570,7 @@ gomp_ull *istart, gomp_ull *iend) { struct gomp_task_icv *icv = gomp_icv (false); - switch (icv->run_sched_var) + switch (icv->run_sched_var & ~GFS_MONOTONIC) { case GFS_STATIC: return gomp_loop_ull_doacross_static_start (ncounts, counts, @@ -407,6 +594,51 @@ } } +bool +GOMP_loop_ull_doacross_start (unsigned ncounts, gomp_ull *counts, + long sched, gomp_ull chunk_size, + gomp_ull *istart, gomp_ull *iend, + uintptr_t *reductions, void **mem) +{ + struct gomp_thread *thr = gomp_thread (); + + thr->ts.static_trip = 0; + if (reductions) + gomp_workshare_taskgroup_start (); + if (gomp_work_share_start (0)) + { + size_t extra = 0; + if (mem) + extra = (uintptr_t) *mem; + sched = gomp_adjust_sched (sched, &chunk_size); + gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, + sched, chunk_size); + gomp_doacross_ull_init (ncounts, counts, chunk_size, extra); + if (reductions) + { + GOMP_taskgroup_reduction_register (reductions); + thr->task->taskgroup->workshare = true; + thr->ts.work_share->task_reductions = reductions; + } + gomp_work_share_init_done (); + } + else + { + if (reductions) + { + uintptr_t *first_reductions = thr->ts.work_share->task_reductions; + gomp_workshare_task_reduction_register (reductions, + first_reductions); + } + sched = thr->ts.work_share->sched; + } + + if (mem) + *mem = thr->ts.work_share->doacross->extra; + + return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend); +} + /* The *_next routines are called when the thread completes processing of the iteration block currently assigned to it. If the work-share construct is bound directly to a parallel construct, then the iteration @@ -570,6 +802,10 @@ __attribute__((alias ("gomp_loop_ull_dynamic_start"))); extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_nonmonotonic_guided_start __attribute__((alias ("gomp_loop_ull_guided_start"))); +extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_nonmonotonic_runtime_start + __attribute__((alias ("GOMP_loop_ull_runtime_start"))); +extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_maybe_nonmonotonic_runtime_start + __attribute__((alias ("GOMP_loop_ull_runtime_start"))); extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start __attribute__((alias ("gomp_loop_ull_ordered_static_start"))); @@ -595,6 +831,10 @@ __attribute__((alias ("gomp_loop_ull_dynamic_next"))); extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_nonmonotonic_guided_next __attribute__((alias ("gomp_loop_ull_guided_next"))); +extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_nonmonotonic_runtime_next + __attribute__((alias ("GOMP_loop_ull_runtime_next"))); +extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_maybe_nonmonotonic_runtime_next + __attribute__((alias ("GOMP_loop_ull_runtime_next"))); extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next __attribute__((alias ("gomp_loop_ull_ordered_static_next"))); @@ -650,6 +890,23 @@ } bool +GOMP_loop_ull_nonmonotonic_runtime_start (bool up, gomp_ull start, + gomp_ull end, gomp_ull incr, + gomp_ull *istart, gomp_ull *iend) +{ + return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend); +} + +bool +GOMP_loop_ull_maybe_nonmonotonic_runtime_start (bool up, gomp_ull start, + gomp_ull end, gomp_ull incr, + gomp_ull *istart, + gomp_ull *iend) +{ + return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend); +} + +bool GOMP_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end, gomp_ull incr, gomp_ull chunk_size, gomp_ull *istart, gomp_ull *iend) @@ -734,6 +991,19 @@ } bool +GOMP_loop_ull_nonmonotonic_runtime_next (gomp_ull *istart, gomp_ull *iend) +{ + return GOMP_loop_ull_runtime_next (istart, iend); +} + +bool +GOMP_loop_ull_maybe_nonmonotonic_runtime_next (gomp_ull *istart, + gomp_ull *iend) +{ + return GOMP_loop_ull_runtime_next (istart, iend); +} + +bool GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend) { return gomp_loop_ull_ordered_static_next (istart, iend);