0
|
1 /* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
|
|
2 Contributed by Richard Henderson <rth@redhat.com>.
|
|
3
|
|
4 This file is part of the GNU OpenMP Library (libgomp).
|
|
5
|
|
6 Libgomp is free software; you can redistribute it and/or modify it
|
|
7 under the terms of the GNU General Public License as published by
|
|
8 the Free Software Foundation; either version 3, or (at your option)
|
|
9 any later version.
|
|
10
|
|
11 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
13 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
14 more details.
|
|
15
|
|
16 Under Section 7 of GPL version 3, you are granted additional
|
|
17 permissions described in the GCC Runtime Library Exception, version
|
|
18 3.1, as published by the Free Software Foundation.
|
|
19
|
|
20 You should have received a copy of the GNU General Public License and
|
|
21 a copy of the GCC Runtime Library Exception along with this program;
|
|
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
23 <http://www.gnu.org/licenses/>. */
|
|
24
|
|
25 /* This file contains routines for managing work-share iteration, both
|
|
26 for loops and sections. */
|
|
27
|
|
28 #include "libgomp.h"
|
|
29 #include <stdlib.h>
|
|
30
|
|
31
|
|
32 /* This function implements the STATIC scheduling method. The caller should
|
|
33 iterate *pstart <= x < *pend. Return zero if there are more iterations
|
|
34 to perform; nonzero if not. Return less than 0 if this thread had
|
|
35 received the absolutely last iteration. */
|
|
36
|
|
37 int
|
|
38 gomp_iter_static_next (long *pstart, long *pend)
|
|
39 {
|
|
40 struct gomp_thread *thr = gomp_thread ();
|
|
41 struct gomp_team *team = thr->ts.team;
|
|
42 struct gomp_work_share *ws = thr->ts.work_share;
|
|
43 unsigned long nthreads = team ? team->nthreads : 1;
|
|
44
|
|
45 if (thr->ts.static_trip == -1)
|
|
46 return -1;
|
|
47
|
|
48 /* Quick test for degenerate teams and orphaned constructs. */
|
|
49 if (nthreads == 1)
|
|
50 {
|
|
51 *pstart = ws->next;
|
|
52 *pend = ws->end;
|
|
53 thr->ts.static_trip = -1;
|
|
54 return ws->next == ws->end;
|
|
55 }
|
|
56
|
|
57 /* We interpret chunk_size zero as "unspecified", which means that we
|
|
58 should break up the iterations such that each thread makes only one
|
|
59 trip through the outer loop. */
|
|
60 if (ws->chunk_size == 0)
|
|
61 {
|
|
62 unsigned long n, q, i;
|
|
63 unsigned long s0, e0;
|
|
64 long s, e;
|
|
65
|
|
66 if (thr->ts.static_trip > 0)
|
|
67 return 1;
|
|
68
|
|
69 /* Compute the total number of iterations. */
|
|
70 s = ws->incr + (ws->incr > 0 ? -1 : 1);
|
|
71 n = (ws->end - ws->next + s) / ws->incr;
|
|
72 i = thr->ts.team_id;
|
|
73
|
|
74 /* Compute the "zero-based" start and end points. That is, as
|
|
75 if the loop began at zero and incremented by one. */
|
|
76 q = n / nthreads;
|
|
77 q += (q * nthreads != n);
|
|
78 s0 = q * i;
|
|
79 e0 = s0 + q;
|
|
80 if (e0 > n)
|
|
81 e0 = n;
|
|
82
|
|
83 /* Notice when no iterations allocated for this thread. */
|
|
84 if (s0 >= e0)
|
|
85 {
|
|
86 thr->ts.static_trip = 1;
|
|
87 return 1;
|
|
88 }
|
|
89
|
|
90 /* Transform these to the actual start and end numbers. */
|
|
91 s = (long)s0 * ws->incr + ws->next;
|
|
92 e = (long)e0 * ws->incr + ws->next;
|
|
93
|
|
94 *pstart = s;
|
|
95 *pend = e;
|
|
96 thr->ts.static_trip = (e0 == n ? -1 : 1);
|
|
97 return 0;
|
|
98 }
|
|
99 else
|
|
100 {
|
|
101 unsigned long n, s0, e0, i, c;
|
|
102 long s, e;
|
|
103
|
|
104 /* Otherwise, each thread gets exactly chunk_size iterations
|
|
105 (if available) each time through the loop. */
|
|
106
|
|
107 s = ws->incr + (ws->incr > 0 ? -1 : 1);
|
|
108 n = (ws->end - ws->next + s) / ws->incr;
|
|
109 i = thr->ts.team_id;
|
|
110 c = ws->chunk_size;
|
|
111
|
|
112 /* Initial guess is a C sized chunk positioned nthreads iterations
|
|
113 in, offset by our thread number. */
|
|
114 s0 = (thr->ts.static_trip * nthreads + i) * c;
|
|
115 e0 = s0 + c;
|
|
116
|
|
117 /* Detect overflow. */
|
|
118 if (s0 >= n)
|
|
119 return 1;
|
|
120 if (e0 > n)
|
|
121 e0 = n;
|
|
122
|
|
123 /* Transform these to the actual start and end numbers. */
|
|
124 s = (long)s0 * ws->incr + ws->next;
|
|
125 e = (long)e0 * ws->incr + ws->next;
|
|
126
|
|
127 *pstart = s;
|
|
128 *pend = e;
|
|
129
|
|
130 if (e0 == n)
|
|
131 thr->ts.static_trip = -1;
|
|
132 else
|
|
133 thr->ts.static_trip++;
|
|
134 return 0;
|
|
135 }
|
|
136 }
|
|
137
|
|
138
|
|
139 /* This function implements the DYNAMIC scheduling method. Arguments are
|
|
140 as for gomp_iter_static_next. This function must be called with ws->lock
|
|
141 held. */
|
|
142
|
|
143 bool
|
|
144 gomp_iter_dynamic_next_locked (long *pstart, long *pend)
|
|
145 {
|
|
146 struct gomp_thread *thr = gomp_thread ();
|
|
147 struct gomp_work_share *ws = thr->ts.work_share;
|
|
148 long start, end, chunk, left;
|
|
149
|
|
150 start = ws->next;
|
|
151 if (start == ws->end)
|
|
152 return false;
|
|
153
|
|
154 chunk = ws->chunk_size;
|
|
155 left = ws->end - start;
|
|
156 if (ws->incr < 0)
|
|
157 {
|
|
158 if (chunk < left)
|
|
159 chunk = left;
|
|
160 }
|
|
161 else
|
|
162 {
|
|
163 if (chunk > left)
|
|
164 chunk = left;
|
|
165 }
|
|
166 end = start + chunk;
|
|
167
|
|
168 ws->next = end;
|
|
169 *pstart = start;
|
|
170 *pend = end;
|
|
171 return true;
|
|
172 }
|
|
173
|
|
174
|
|
175 #ifdef HAVE_SYNC_BUILTINS
|
|
176 /* Similar, but doesn't require the lock held, and uses compare-and-swap
|
|
177 instead. Note that the only memory value that changes is ws->next. */
|
|
178
|
|
179 bool
|
|
180 gomp_iter_dynamic_next (long *pstart, long *pend)
|
|
181 {
|
|
182 struct gomp_thread *thr = gomp_thread ();
|
|
183 struct gomp_work_share *ws = thr->ts.work_share;
|
|
184 long start, end, nend, chunk, incr;
|
|
185
|
|
186 end = ws->end;
|
|
187 incr = ws->incr;
|
|
188 chunk = ws->chunk_size;
|
|
189
|
|
190 if (__builtin_expect (ws->mode, 1))
|
|
191 {
|
|
192 long tmp = __sync_fetch_and_add (&ws->next, chunk);
|
|
193 if (incr > 0)
|
|
194 {
|
|
195 if (tmp >= end)
|
|
196 return false;
|
|
197 nend = tmp + chunk;
|
|
198 if (nend > end)
|
|
199 nend = end;
|
|
200 *pstart = tmp;
|
|
201 *pend = nend;
|
|
202 return true;
|
|
203 }
|
|
204 else
|
|
205 {
|
|
206 if (tmp <= end)
|
|
207 return false;
|
|
208 nend = tmp + chunk;
|
|
209 if (nend < end)
|
|
210 nend = end;
|
|
211 *pstart = tmp;
|
|
212 *pend = nend;
|
|
213 return true;
|
|
214 }
|
|
215 }
|
|
216
|
|
217 start = ws->next;
|
|
218 while (1)
|
|
219 {
|
|
220 long left = end - start;
|
|
221 long tmp;
|
|
222
|
|
223 if (start == end)
|
|
224 return false;
|
|
225
|
|
226 if (incr < 0)
|
|
227 {
|
|
228 if (chunk < left)
|
|
229 chunk = left;
|
|
230 }
|
|
231 else
|
|
232 {
|
|
233 if (chunk > left)
|
|
234 chunk = left;
|
|
235 }
|
|
236 nend = start + chunk;
|
|
237
|
|
238 tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
|
|
239 if (__builtin_expect (tmp == start, 1))
|
|
240 break;
|
|
241
|
|
242 start = tmp;
|
|
243 }
|
|
244
|
|
245 *pstart = start;
|
|
246 *pend = nend;
|
|
247 return true;
|
|
248 }
|
|
249 #endif /* HAVE_SYNC_BUILTINS */
|
|
250
|
|
251
|
|
252 /* This function implements the GUIDED scheduling method. Arguments are
|
|
253 as for gomp_iter_static_next. This function must be called with the
|
|
254 work share lock held. */
|
|
255
|
|
256 bool
|
|
257 gomp_iter_guided_next_locked (long *pstart, long *pend)
|
|
258 {
|
|
259 struct gomp_thread *thr = gomp_thread ();
|
|
260 struct gomp_work_share *ws = thr->ts.work_share;
|
|
261 struct gomp_team *team = thr->ts.team;
|
|
262 unsigned long nthreads = team ? team->nthreads : 1;
|
|
263 unsigned long n, q;
|
|
264 long start, end;
|
|
265
|
|
266 if (ws->next == ws->end)
|
|
267 return false;
|
|
268
|
|
269 start = ws->next;
|
|
270 n = (ws->end - start) / ws->incr;
|
|
271 q = (n + nthreads - 1) / nthreads;
|
|
272
|
|
273 if (q < ws->chunk_size)
|
|
274 q = ws->chunk_size;
|
|
275 if (q <= n)
|
|
276 end = start + q * ws->incr;
|
|
277 else
|
|
278 end = ws->end;
|
|
279
|
|
280 ws->next = end;
|
|
281 *pstart = start;
|
|
282 *pend = end;
|
|
283 return true;
|
|
284 }
|
|
285
|
|
286 #ifdef HAVE_SYNC_BUILTINS
|
|
287 /* Similar, but doesn't require the lock held, and uses compare-and-swap
|
|
288 instead. Note that the only memory value that changes is ws->next. */
|
|
289
|
|
290 bool
|
|
291 gomp_iter_guided_next (long *pstart, long *pend)
|
|
292 {
|
|
293 struct gomp_thread *thr = gomp_thread ();
|
|
294 struct gomp_work_share *ws = thr->ts.work_share;
|
|
295 struct gomp_team *team = thr->ts.team;
|
|
296 unsigned long nthreads = team ? team->nthreads : 1;
|
|
297 long start, end, nend, incr;
|
|
298 unsigned long chunk_size;
|
|
299
|
|
300 start = ws->next;
|
|
301 end = ws->end;
|
|
302 incr = ws->incr;
|
|
303 chunk_size = ws->chunk_size;
|
|
304
|
|
305 while (1)
|
|
306 {
|
|
307 unsigned long n, q;
|
|
308 long tmp;
|
|
309
|
|
310 if (start == end)
|
|
311 return false;
|
|
312
|
|
313 n = (end - start) / incr;
|
|
314 q = (n + nthreads - 1) / nthreads;
|
|
315
|
|
316 if (q < chunk_size)
|
|
317 q = chunk_size;
|
|
318 if (__builtin_expect (q <= n, 1))
|
|
319 nend = start + q * incr;
|
|
320 else
|
|
321 nend = end;
|
|
322
|
|
323 tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
|
|
324 if (__builtin_expect (tmp == start, 1))
|
|
325 break;
|
|
326
|
|
327 start = tmp;
|
|
328 }
|
|
329
|
|
330 *pstart = start;
|
|
331 *pend = nend;
|
|
332 return true;
|
|
333 }
|
|
334 #endif /* HAVE_SYNC_BUILTINS */
|