diff libgomp/loop_ull.c @ 145:1830386684a0

gcc-9.2.0
author anatofuz
date Thu, 13 Feb 2020 11:34:05 +0900
parents 84e7813d76e9
children
line wrap: on
line diff
--- a/libgomp/loop_ull.c	Thu Oct 25 07:37:49 2018 +0900
+++ b/libgomp/loop_ull.c	Thu Feb 13 11:34:05 2020 +0900
@@ -1,4 +1,4 @@
-/* Copyright (C) 2005-2018 Free Software Foundation, Inc.
+/* Copyright (C) 2005-2020 Free Software Foundation, Inc.
    Contributed by Richard Henderson <rth@redhat.com>.
 
    This file is part of the GNU Offloading and Multi Processing Library
@@ -27,8 +27,12 @@
 
 #include <limits.h>
 #include <stdlib.h>
+#include <string.h>
 #include "libgomp.h"
 
+ialias (GOMP_loop_ull_runtime_next)
+ialias_redirect (GOMP_taskgroup_reduction_register)
+
 typedef unsigned long long gomp_ull;
 
 /* Initialize the given work share construct from the given arguments.  */
@@ -104,7 +108,7 @@
   struct gomp_thread *thr = gomp_thread ();
 
   thr->ts.static_trip = 0;
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
     {
       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
 			  GFS_STATIC, chunk_size);
@@ -122,7 +126,7 @@
   struct gomp_thread *thr = gomp_thread ();
   bool ret;
 
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
     {
       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
 			  GFS_DYNAMIC, chunk_size);
@@ -148,7 +152,7 @@
   struct gomp_thread *thr = gomp_thread ();
   bool ret;
 
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
     {
       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
 			  GFS_GUIDED, chunk_size);
@@ -171,7 +175,7 @@
 			     gomp_ull incr, gomp_ull *istart, gomp_ull *iend)
 {
   struct gomp_task_icv *icv = gomp_icv (false);
-  switch (icv->run_sched_var)
+  switch (icv->run_sched_var & ~GFS_MONOTONIC)
     {
     case GFS_STATIC:
       return gomp_loop_ull_static_start (up, start, end, incr,
@@ -195,6 +199,113 @@
     }
 }
 
+static long
+gomp_adjust_sched (long sched, gomp_ull *chunk_size)
+{
+  sched &= ~GFS_MONOTONIC;
+  switch (sched)
+    {
+    case GFS_STATIC:
+    case GFS_DYNAMIC:
+    case GFS_GUIDED:
+      return sched;
+    /* GFS_RUNTIME is used for runtime schedule without monotonic
+       or nonmonotonic modifiers on the clause.
+       GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
+       modifier.  */
+    case GFS_RUNTIME:
+    /* GFS_AUTO is used for runtime schedule with nonmonotonic
+       modifier.  */
+    case GFS_AUTO:
+      {
+	struct gomp_task_icv *icv = gomp_icv (false);
+	sched = icv->run_sched_var & ~GFS_MONOTONIC;
+	switch (sched)
+	  {
+	  case GFS_STATIC:
+	  case GFS_DYNAMIC:
+	  case GFS_GUIDED:
+	    *chunk_size = icv->run_sched_chunk_size;
+	    break;
+	  case GFS_AUTO:
+	    sched = GFS_STATIC;
+	    *chunk_size = 0;
+	    break;
+	  default:
+	    abort ();
+	  }
+	return sched;
+      }
+    default:
+      abort ();
+    }
+}
+
+bool
+GOMP_loop_ull_start (bool up, gomp_ull start, gomp_ull end,
+		     gomp_ull incr, long sched, gomp_ull chunk_size,
+		     gomp_ull *istart, gomp_ull *iend,
+		     uintptr_t *reductions, void **mem)
+{
+  struct gomp_thread *thr = gomp_thread ();
+
+  thr->ts.static_trip = 0;
+  if (reductions)
+    gomp_workshare_taskgroup_start ();
+  if (gomp_work_share_start (0))
+    {
+      sched = gomp_adjust_sched (sched, &chunk_size);
+      gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
+      			  sched, chunk_size);
+      if (reductions)
+	{
+	  GOMP_taskgroup_reduction_register (reductions);
+	  thr->task->taskgroup->workshare = true;
+	  thr->ts.work_share->task_reductions = reductions;
+	}
+      if (mem)
+	{
+	  uintptr_t size = (uintptr_t) *mem;
+#define INLINE_ORDERED_TEAM_IDS_OFF \
+  ((offsetof (struct gomp_work_share, inline_ordered_team_ids)		\
+    + __alignof__ (long long) - 1) & ~(__alignof__ (long long) - 1))
+	  if (size > (sizeof (struct gomp_work_share)
+		      - INLINE_ORDERED_TEAM_IDS_OFF))
+	    *mem
+	      = (void *) (thr->ts.work_share->ordered_team_ids
+			  = gomp_malloc_cleared (size));
+	  else
+	    *mem = memset (((char *) thr->ts.work_share)
+			   + INLINE_ORDERED_TEAM_IDS_OFF, '\0', size);
+	}
+      gomp_work_share_init_done ();
+    }
+  else
+    {
+      if (reductions)
+	{
+	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
+	  gomp_workshare_task_reduction_register (reductions,
+						  first_reductions);
+	}
+      if (mem)
+	{
+	  if ((offsetof (struct gomp_work_share, inline_ordered_team_ids)
+	       & (__alignof__ (long long) - 1)) == 0)
+	    *mem = (void *) thr->ts.work_share->ordered_team_ids;
+	  else
+	    {
+	      uintptr_t p = (uintptr_t) thr->ts.work_share->ordered_team_ids;
+	      p += __alignof__ (long long) - 1;
+	      p &= ~(__alignof__ (long long) - 1);
+	      *mem = (void *) p;
+	    }
+	}
+    }
+
+  return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
+}
+
 /* The *_ordered_*_start routines are similar.  The only difference is that
    this work-share construct is initialized to expect an ORDERED section.  */
 
@@ -206,7 +317,7 @@
   struct gomp_thread *thr = gomp_thread ();
 
   thr->ts.static_trip = 0;
-  if (gomp_work_share_start (true))
+  if (gomp_work_share_start (1))
     {
       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
 			  GFS_STATIC, chunk_size);
@@ -225,7 +336,7 @@
   struct gomp_thread *thr = gomp_thread ();
   bool ret;
 
-  if (gomp_work_share_start (true))
+  if (gomp_work_share_start (1))
     {
       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
 			  GFS_DYNAMIC, chunk_size);
@@ -251,7 +362,7 @@
   struct gomp_thread *thr = gomp_thread ();
   bool ret;
 
-  if (gomp_work_share_start (true))
+  if (gomp_work_share_start (1))
     {
       gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
 			  GFS_GUIDED, chunk_size);
@@ -275,7 +386,7 @@
 				     gomp_ull *iend)
 {
   struct gomp_task_icv *icv = gomp_icv (false);
-  switch (icv->run_sched_var)
+  switch (icv->run_sched_var & ~GFS_MONOTONIC)
     {
     case GFS_STATIC:
       return gomp_loop_ull_ordered_static_start (up, start, end, incr,
@@ -299,6 +410,82 @@
     }
 }
 
+bool
+GOMP_loop_ull_ordered_start (bool up, gomp_ull start, gomp_ull end,
+			     gomp_ull incr, long sched, gomp_ull chunk_size,
+			     gomp_ull *istart, gomp_ull *iend,
+			     uintptr_t *reductions, void **mem)
+{
+  struct gomp_thread *thr = gomp_thread ();
+  size_t ordered = 1;
+  bool ret;
+
+  thr->ts.static_trip = 0;
+  if (reductions)
+    gomp_workshare_taskgroup_start ();
+  if (mem)
+    ordered += (uintptr_t) *mem;
+  if (gomp_work_share_start (ordered))
+    {
+      sched = gomp_adjust_sched (sched, &chunk_size);
+      gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
+			  sched, chunk_size);
+      if (reductions)
+	{
+	  GOMP_taskgroup_reduction_register (reductions);
+	  thr->task->taskgroup->workshare = true;
+	  thr->ts.work_share->task_reductions = reductions;
+	}
+      if (sched == GFS_STATIC)
+	gomp_ordered_static_init ();
+      else
+	gomp_mutex_lock (&thr->ts.work_share->lock);
+      gomp_work_share_init_done ();
+    }
+  else
+    {
+      if (reductions)
+	{
+	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
+	  gomp_workshare_task_reduction_register (reductions,
+						  first_reductions);
+	}
+      sched = thr->ts.work_share->sched;
+      if (sched != GFS_STATIC)
+	gomp_mutex_lock (&thr->ts.work_share->lock);
+    }
+
+  if (mem)
+    {
+      uintptr_t p
+	= (uintptr_t) (thr->ts.work_share->ordered_team_ids
+		       + (thr->ts.team ? thr->ts.team->nthreads : 1));
+      p += __alignof__ (long long) - 1;
+      p &= ~(__alignof__ (long long) - 1);
+      *mem = (void *) p;
+    }
+
+  switch (sched)
+    {
+    case GFS_STATIC:
+    case GFS_AUTO:
+      return !gomp_iter_ull_static_next (istart, iend);
+    case GFS_DYNAMIC:
+      ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
+      break;
+    case GFS_GUIDED:
+      ret = gomp_iter_ull_guided_next_locked (istart, iend);
+      break;
+    default:
+      abort ();
+    }
+
+  if (ret)
+    gomp_ordered_first ();
+  gomp_mutex_unlock (&thr->ts.work_share->lock);
+  return ret;
+}
+
 /* The *_doacross_*_start routines are similar.  The only difference is that
    this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
    section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
@@ -313,11 +500,11 @@
   struct gomp_thread *thr = gomp_thread ();
 
   thr->ts.static_trip = 0;
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
     {
       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
 			  GFS_STATIC, chunk_size);
-      gomp_doacross_ull_init (ncounts, counts, chunk_size);
+      gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
       gomp_work_share_init_done ();
     }
 
@@ -332,11 +519,11 @@
   struct gomp_thread *thr = gomp_thread ();
   bool ret;
 
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
     {
       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
 			  GFS_DYNAMIC, chunk_size);
-      gomp_doacross_ull_init (ncounts, counts, chunk_size);
+      gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
       gomp_work_share_init_done ();
     }
 
@@ -359,11 +546,11 @@
   struct gomp_thread *thr = gomp_thread ();
   bool ret;
 
-  if (gomp_work_share_start (false))
+  if (gomp_work_share_start (0))
     {
       gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
 			  GFS_GUIDED, chunk_size);
-      gomp_doacross_ull_init (ncounts, counts, chunk_size);
+      gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
       gomp_work_share_init_done ();
     }
 
@@ -383,7 +570,7 @@
 				      gomp_ull *istart, gomp_ull *iend)
 {
   struct gomp_task_icv *icv = gomp_icv (false);
-  switch (icv->run_sched_var)
+  switch (icv->run_sched_var & ~GFS_MONOTONIC)
     {
     case GFS_STATIC:
       return gomp_loop_ull_doacross_static_start (ncounts, counts,
@@ -407,6 +594,51 @@
     }
 }
 
+bool
+GOMP_loop_ull_doacross_start (unsigned ncounts, gomp_ull *counts,
+			      long sched, gomp_ull chunk_size,
+			      gomp_ull *istart, gomp_ull *iend,
+			      uintptr_t *reductions, void **mem)
+{
+  struct gomp_thread *thr = gomp_thread ();
+
+  thr->ts.static_trip = 0;
+  if (reductions)
+    gomp_workshare_taskgroup_start ();
+  if (gomp_work_share_start (0))
+    {
+      size_t extra = 0;
+      if (mem)
+	extra = (uintptr_t) *mem;
+      sched = gomp_adjust_sched (sched, &chunk_size);
+      gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
+			  sched, chunk_size);
+      gomp_doacross_ull_init (ncounts, counts, chunk_size, extra);
+      if (reductions)
+	{
+	  GOMP_taskgroup_reduction_register (reductions);
+	  thr->task->taskgroup->workshare = true;
+	  thr->ts.work_share->task_reductions = reductions;
+	}
+      gomp_work_share_init_done ();
+    }
+  else
+    {
+      if (reductions)
+	{
+	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
+	  gomp_workshare_task_reduction_register (reductions,
+						  first_reductions);
+	}
+      sched = thr->ts.work_share->sched;
+    }
+
+  if (mem)
+    *mem = thr->ts.work_share->doacross->extra;
+
+  return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
+}
+
 /* The *_next routines are called when the thread completes processing of
    the iteration block currently assigned to it.  If the work-share
    construct is bound directly to a parallel construct, then the iteration
@@ -570,6 +802,10 @@
 	__attribute__((alias ("gomp_loop_ull_dynamic_start")));
 extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_nonmonotonic_guided_start
 	__attribute__((alias ("gomp_loop_ull_guided_start")));
+extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_nonmonotonic_runtime_start
+	__attribute__((alias ("GOMP_loop_ull_runtime_start")));
+extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_maybe_nonmonotonic_runtime_start
+	__attribute__((alias ("GOMP_loop_ull_runtime_start")));
 
 extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start
 	__attribute__((alias ("gomp_loop_ull_ordered_static_start")));
@@ -595,6 +831,10 @@
 	__attribute__((alias ("gomp_loop_ull_dynamic_next")));
 extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_nonmonotonic_guided_next
 	__attribute__((alias ("gomp_loop_ull_guided_next")));
+extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_nonmonotonic_runtime_next
+	__attribute__((alias ("GOMP_loop_ull_runtime_next")));
+extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_maybe_nonmonotonic_runtime_next
+	__attribute__((alias ("GOMP_loop_ull_runtime_next")));
 
 extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next
 	__attribute__((alias ("gomp_loop_ull_ordered_static_next")));
@@ -650,6 +890,23 @@
 }
 
 bool
+GOMP_loop_ull_nonmonotonic_runtime_start (bool up, gomp_ull start,
+					  gomp_ull end, gomp_ull incr,
+					  gomp_ull *istart, gomp_ull *iend)
+{
+  return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend);
+}
+
+bool
+GOMP_loop_ull_maybe_nonmonotonic_runtime_start (bool up, gomp_ull start,
+						gomp_ull end, gomp_ull incr,
+						gomp_ull *istart,
+						gomp_ull *iend)
+{
+  return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend);
+}
+
+bool
 GOMP_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
 				    gomp_ull incr, gomp_ull chunk_size,
 				    gomp_ull *istart, gomp_ull *iend)
@@ -734,6 +991,19 @@
 }
 
 bool
+GOMP_loop_ull_nonmonotonic_runtime_next (gomp_ull *istart, gomp_ull *iend)
+{
+  return GOMP_loop_ull_runtime_next (istart, iend);
+}
+
+bool
+GOMP_loop_ull_maybe_nonmonotonic_runtime_next (gomp_ull *istart,
+					       gomp_ull *iend)
+{
+  return GOMP_loop_ull_runtime_next (istart, iend);
+}
+
+bool
 GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
 {
   return gomp_loop_ull_ordered_static_next (istart, iend);