Mercurial > hg > CbC > CbC_gcc
diff libgomp/loop_ull.c @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children | 04ced10e8804 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libgomp/loop_ull.c Fri Jul 17 14:47:48 2009 +0900 @@ -0,0 +1,571 @@ +/* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc. + Contributed by Richard Henderson <rth@redhat.com>. + + This file is part of the GNU OpenMP Library (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This file handles the LOOP (FOR/DO) construct. */ + +#include <limits.h> +#include <stdlib.h> +#include "libgomp.h" + +typedef unsigned long long gomp_ull; + +/* Initialize the given work share construct from the given arguments. */ + +static inline void +gomp_loop_ull_init (struct gomp_work_share *ws, bool up, gomp_ull start, + gomp_ull end, gomp_ull incr, enum gomp_schedule_type sched, + gomp_ull chunk_size) +{ + ws->sched = sched; + ws->chunk_size_ull = chunk_size; + /* Canonicalize loops that have zero iterations to ->next == ->end. */ + ws->end_ull = ((up && start > end) || (!up && start < end)) + ? start : end; + ws->incr_ull = incr; + ws->next_ull = start; + ws->mode = 0; + if (sched == GFS_DYNAMIC) + { + ws->chunk_size_ull *= incr; + +#if defined HAVE_SYNC_BUILTINS && defined __LP64__ + { + /* For dynamic scheduling prepare things to make each iteration + faster. */ + struct gomp_thread *thr = gomp_thread (); + struct gomp_team *team = thr->ts.team; + long nthreads = team ? team->nthreads : 1; + + if (__builtin_expect (up, 1)) + { + /* Cheap overflow protection. */ + if (__builtin_expect ((nthreads | ws->chunk_size_ull) + < 1ULL << (sizeof (gomp_ull) + * __CHAR_BIT__ / 2 - 1), 1)) + ws->mode = ws->end_ull < (__LONG_LONG_MAX__ * 2ULL + 1 + - (nthreads + 1) * ws->chunk_size_ull); + } + /* Cheap overflow protection. */ + else if (__builtin_expect ((nthreads | -ws->chunk_size_ull) + < 1ULL << (sizeof (gomp_ull) + * __CHAR_BIT__ / 2 - 1), 1)) + ws->mode = ws->end_ull > ((nthreads + 1) * -ws->chunk_size_ull + - (__LONG_LONG_MAX__ * 2ULL + 1)); + } +#endif + } + if (!up) + ws->mode |= 2; +} + +/* The *_start routines are called when first encountering a loop construct + that is not bound directly to a parallel construct. The first thread + that arrives will create the work-share construct; subsequent threads + will see the construct exists and allocate work from it. + + START, END, INCR are the bounds of the loop; due to the restrictions of + OpenMP, these values must be the same in every thread. This is not + verified (nor is it entirely verifiable, since START is not necessarily + retained intact in the work-share data structure). CHUNK_SIZE is the + scheduling parameter; again this must be identical in all threads. + + Returns true if there's any work for this thread to perform. If so, + *ISTART and *IEND are filled with the bounds of the iteration block + allocated to this thread. Returns false if all work was assigned to + other threads prior to this thread's arrival. */ + +static bool +gomp_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end, + gomp_ull incr, gomp_ull chunk_size, + gomp_ull *istart, gomp_ull *iend) +{ + struct gomp_thread *thr = gomp_thread (); + + thr->ts.static_trip = 0; + if (gomp_work_share_start (false)) + { + gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, + GFS_STATIC, chunk_size); + gomp_work_share_init_done (); + } + + return !gomp_iter_ull_static_next (istart, iend); +} + +static bool +gomp_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end, + gomp_ull incr, gomp_ull chunk_size, + gomp_ull *istart, gomp_ull *iend) +{ + struct gomp_thread *thr = gomp_thread (); + bool ret; + + if (gomp_work_share_start (false)) + { + gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, + GFS_DYNAMIC, chunk_size); + gomp_work_share_init_done (); + } + +#if defined HAVE_SYNC_BUILTINS && defined __LP64__ + ret = gomp_iter_ull_dynamic_next (istart, iend); +#else + gomp_mutex_lock (&thr->ts.work_share->lock); + ret = gomp_iter_ull_dynamic_next_locked (istart, iend); + gomp_mutex_unlock (&thr->ts.work_share->lock); +#endif + + return ret; +} + +static bool +gomp_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end, + gomp_ull incr, gomp_ull chunk_size, + gomp_ull *istart, gomp_ull *iend) +{ + struct gomp_thread *thr = gomp_thread (); + bool ret; + + if (gomp_work_share_start (false)) + { + gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, + GFS_GUIDED, chunk_size); + gomp_work_share_init_done (); + } + +#if defined HAVE_SYNC_BUILTINS && defined __LP64__ + ret = gomp_iter_ull_guided_next (istart, iend); +#else + gomp_mutex_lock (&thr->ts.work_share->lock); + ret = gomp_iter_ull_guided_next_locked (istart, iend); + gomp_mutex_unlock (&thr->ts.work_share->lock); +#endif + + return ret; +} + +bool +GOMP_loop_ull_runtime_start (bool up, gomp_ull start, gomp_ull end, + gomp_ull incr, gomp_ull *istart, gomp_ull *iend) +{ + struct gomp_task_icv *icv = gomp_icv (false); + switch (icv->run_sched_var) + { + case GFS_STATIC: + return gomp_loop_ull_static_start (up, start, end, incr, + icv->run_sched_modifier, + istart, iend); + case GFS_DYNAMIC: + return gomp_loop_ull_dynamic_start (up, start, end, incr, + icv->run_sched_modifier, + istart, iend); + case GFS_GUIDED: + return gomp_loop_ull_guided_start (up, start, end, incr, + icv->run_sched_modifier, + istart, iend); + case GFS_AUTO: + /* For now map to schedule(static), later on we could play with feedback + driven choice. */ + return gomp_loop_ull_static_start (up, start, end, incr, + 0, istart, iend); + default: + abort (); + } +} + +/* The *_ordered_*_start routines are similar. The only difference is that + this work-share construct is initialized to expect an ORDERED section. */ + +static bool +gomp_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end, + gomp_ull incr, gomp_ull chunk_size, + gomp_ull *istart, gomp_ull *iend) +{ + struct gomp_thread *thr = gomp_thread (); + + thr->ts.static_trip = 0; + if (gomp_work_share_start (true)) + { + gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, + GFS_STATIC, chunk_size); + gomp_ordered_static_init (); + gomp_work_share_init_done (); + } + + return !gomp_iter_ull_static_next (istart, iend); +} + +static bool +gomp_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end, + gomp_ull incr, gomp_ull chunk_size, + gomp_ull *istart, gomp_ull *iend) +{ + struct gomp_thread *thr = gomp_thread (); + bool ret; + + if (gomp_work_share_start (true)) + { + gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, + GFS_DYNAMIC, chunk_size); + gomp_mutex_lock (&thr->ts.work_share->lock); + gomp_work_share_init_done (); + } + else + gomp_mutex_lock (&thr->ts.work_share->lock); + + ret = gomp_iter_ull_dynamic_next_locked (istart, iend); + if (ret) + gomp_ordered_first (); + gomp_mutex_unlock (&thr->ts.work_share->lock); + + return ret; +} + +static bool +gomp_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end, + gomp_ull incr, gomp_ull chunk_size, + gomp_ull *istart, gomp_ull *iend) +{ + struct gomp_thread *thr = gomp_thread (); + bool ret; + + if (gomp_work_share_start (true)) + { + gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, + GFS_GUIDED, chunk_size); + gomp_mutex_lock (&thr->ts.work_share->lock); + gomp_work_share_init_done (); + } + else + gomp_mutex_lock (&thr->ts.work_share->lock); + + ret = gomp_iter_ull_guided_next_locked (istart, iend); + if (ret) + gomp_ordered_first (); + gomp_mutex_unlock (&thr->ts.work_share->lock); + + return ret; +} + +bool +GOMP_loop_ull_ordered_runtime_start (bool up, gomp_ull start, gomp_ull end, + gomp_ull incr, gomp_ull *istart, + gomp_ull *iend) +{ + struct gomp_task_icv *icv = gomp_icv (false); + switch (icv->run_sched_var) + { + case GFS_STATIC: + return gomp_loop_ull_ordered_static_start (up, start, end, incr, + icv->run_sched_modifier, + istart, iend); + case GFS_DYNAMIC: + return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr, + icv->run_sched_modifier, + istart, iend); + case GFS_GUIDED: + return gomp_loop_ull_ordered_guided_start (up, start, end, incr, + icv->run_sched_modifier, + istart, iend); + case GFS_AUTO: + /* For now map to schedule(static), later on we could play with feedback + driven choice. */ + return gomp_loop_ull_ordered_static_start (up, start, end, incr, + 0, istart, iend); + default: + abort (); + } +} + +/* The *_next routines are called when the thread completes processing of + the iteration block currently assigned to it. If the work-share + construct is bound directly to a parallel construct, then the iteration + bounds may have been set up before the parallel. In which case, this + may be the first iteration for the thread. + + Returns true if there is work remaining to be performed; *ISTART and + *IEND are filled with a new iteration block. Returns false if all work + has been assigned. */ + +static bool +gomp_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend) +{ + return !gomp_iter_ull_static_next (istart, iend); +} + +static bool +gomp_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend) +{ + bool ret; + +#if defined HAVE_SYNC_BUILTINS && defined __LP64__ + ret = gomp_iter_ull_dynamic_next (istart, iend); +#else + struct gomp_thread *thr = gomp_thread (); + gomp_mutex_lock (&thr->ts.work_share->lock); + ret = gomp_iter_ull_dynamic_next_locked (istart, iend); + gomp_mutex_unlock (&thr->ts.work_share->lock); +#endif + + return ret; +} + +static bool +gomp_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend) +{ + bool ret; + +#if defined HAVE_SYNC_BUILTINS && defined __LP64__ + ret = gomp_iter_ull_guided_next (istart, iend); +#else + struct gomp_thread *thr = gomp_thread (); + gomp_mutex_lock (&thr->ts.work_share->lock); + ret = gomp_iter_ull_guided_next_locked (istart, iend); + gomp_mutex_unlock (&thr->ts.work_share->lock); +#endif + + return ret; +} + +bool +GOMP_loop_ull_runtime_next (gomp_ull *istart, gomp_ull *iend) +{ + struct gomp_thread *thr = gomp_thread (); + + switch (thr->ts.work_share->sched) + { + case GFS_STATIC: + case GFS_AUTO: + return gomp_loop_ull_static_next (istart, iend); + case GFS_DYNAMIC: + return gomp_loop_ull_dynamic_next (istart, iend); + case GFS_GUIDED: + return gomp_loop_ull_guided_next (istart, iend); + default: + abort (); + } +} + +/* The *_ordered_*_next routines are called when the thread completes + processing of the iteration block currently assigned to it. + + Returns true if there is work remaining to be performed; *ISTART and + *IEND are filled with a new iteration block. Returns false if all work + has been assigned. */ + +static bool +gomp_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend) +{ + struct gomp_thread *thr = gomp_thread (); + int test; + + gomp_ordered_sync (); + gomp_mutex_lock (&thr->ts.work_share->lock); + test = gomp_iter_ull_static_next (istart, iend); + if (test >= 0) + gomp_ordered_static_next (); + gomp_mutex_unlock (&thr->ts.work_share->lock); + + return test == 0; +} + +static bool +gomp_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend) +{ + struct gomp_thread *thr = gomp_thread (); + bool ret; + + gomp_ordered_sync (); + gomp_mutex_lock (&thr->ts.work_share->lock); + ret = gomp_iter_ull_dynamic_next_locked (istart, iend); + if (ret) + gomp_ordered_next (); + else + gomp_ordered_last (); + gomp_mutex_unlock (&thr->ts.work_share->lock); + + return ret; +} + +static bool +gomp_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend) +{ + struct gomp_thread *thr = gomp_thread (); + bool ret; + + gomp_ordered_sync (); + gomp_mutex_lock (&thr->ts.work_share->lock); + ret = gomp_iter_ull_guided_next_locked (istart, iend); + if (ret) + gomp_ordered_next (); + else + gomp_ordered_last (); + gomp_mutex_unlock (&thr->ts.work_share->lock); + + return ret; +} + +bool +GOMP_loop_ull_ordered_runtime_next (gomp_ull *istart, gomp_ull *iend) +{ + struct gomp_thread *thr = gomp_thread (); + + switch (thr->ts.work_share->sched) + { + case GFS_STATIC: + case GFS_AUTO: + return gomp_loop_ull_ordered_static_next (istart, iend); + case GFS_DYNAMIC: + return gomp_loop_ull_ordered_dynamic_next (istart, iend); + case GFS_GUIDED: + return gomp_loop_ull_ordered_guided_next (istart, iend); + default: + abort (); + } +} + +/* We use static functions above so that we're sure that the "runtime" + function can defer to the proper routine without interposition. We + export the static function with a strong alias when possible, or with + a wrapper function otherwise. */ + +#ifdef HAVE_ATTRIBUTE_ALIAS +extern __typeof(gomp_loop_ull_static_start) GOMP_loop_ull_static_start + __attribute__((alias ("gomp_loop_ull_static_start"))); +extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_dynamic_start + __attribute__((alias ("gomp_loop_ull_dynamic_start"))); +extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_guided_start + __attribute__((alias ("gomp_loop_ull_guided_start"))); + +extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start + __attribute__((alias ("gomp_loop_ull_ordered_static_start"))); +extern __typeof(gomp_loop_ull_ordered_dynamic_start) GOMP_loop_ull_ordered_dynamic_start + __attribute__((alias ("gomp_loop_ull_ordered_dynamic_start"))); +extern __typeof(gomp_loop_ull_ordered_guided_start) GOMP_loop_ull_ordered_guided_start + __attribute__((alias ("gomp_loop_ull_ordered_guided_start"))); + +extern __typeof(gomp_loop_ull_static_next) GOMP_loop_ull_static_next + __attribute__((alias ("gomp_loop_ull_static_next"))); +extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_dynamic_next + __attribute__((alias ("gomp_loop_ull_dynamic_next"))); +extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_guided_next + __attribute__((alias ("gomp_loop_ull_guided_next"))); + +extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next + __attribute__((alias ("gomp_loop_ull_ordered_static_next"))); +extern __typeof(gomp_loop_ull_ordered_dynamic_next) GOMP_loop_ull_ordered_dynamic_next + __attribute__((alias ("gomp_loop_ull_ordered_dynamic_next"))); +extern __typeof(gomp_loop_ull_ordered_guided_next) GOMP_loop_ull_ordered_guided_next + __attribute__((alias ("gomp_loop_ull_ordered_guided_next"))); +#else +bool +GOMP_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end, + gomp_ull incr, gomp_ull chunk_size, + gomp_ull *istart, gomp_ull *iend) +{ + return gomp_loop_ull_static_start (up, start, end, incr, chunk_size, istart, + iend); +} + +bool +GOMP_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end, + gomp_ull incr, gomp_ull chunk_size, + gomp_ull *istart, gomp_ull *iend) +{ + return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart, + iend); +} + +bool +GOMP_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end, + gomp_ull incr, gomp_ull chunk_size, + gomp_ull *istart, gomp_ull *iend) +{ + return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart, + iend); +} + +bool +GOMP_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end, + gomp_ull incr, gomp_ull chunk_size, + gomp_ull *istart, gomp_ull *iend) +{ + return gomp_loop_ull_ordered_static_start (up, start, end, incr, chunk_size, + istart, iend); +} + +bool +GOMP_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end, + gomp_ull incr, gomp_ull chunk_size, + gomp_ull *istart, gomp_ull *iend) +{ + return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr, chunk_size, + istart, iend); +} + +bool +GOMP_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end, + gomp_ull incr, gomp_ull chunk_size, + gomp_ull *istart, gomp_ull *iend) +{ + return gomp_loop_ull_ordered_guided_start (up, start, end, incr, chunk_size, + istart, iend); +} + +bool +GOMP_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend) +{ + return gomp_loop_ull_static_next (istart, iend); +} + +bool +GOMP_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend) +{ + return gomp_loop_ull_dynamic_next (istart, iend); +} + +bool +GOMP_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend) +{ + return gomp_loop_ull_guided_next (istart, iend); +} + +bool +GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend) +{ + return gomp_loop_ull_ordered_static_next (istart, iend); +} + +bool +GOMP_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend) +{ + return gomp_loop_ull_ordered_dynamic_next (istart, iend); +} + +bool +GOMP_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend) +{ + return gomp_loop_ull_ordered_guided_next (istart, iend); +} +#endif