0
|
1 /* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
|
|
2 Contributed by Richard Henderson <rth@redhat.com>.
|
|
3
|
|
4 This file is part of the GNU OpenMP Library (libgomp).
|
|
5
|
|
6 Libgomp is free software; you can redistribute it and/or modify it
|
|
7 under the terms of the GNU General Public License as published by
|
|
8 the Free Software Foundation; either version 3, or (at your option)
|
|
9 any later version.
|
|
10
|
|
11 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
13 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
14 more details.
|
|
15
|
|
16 Under Section 7 of GPL version 3, you are granted additional
|
|
17 permissions described in the GCC Runtime Library Exception, version
|
|
18 3.1, as published by the Free Software Foundation.
|
|
19
|
|
20 You should have received a copy of the GNU General Public License and
|
|
21 a copy of the GCC Runtime Library Exception along with this program;
|
|
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
23 <http://www.gnu.org/licenses/>. */
|
|
24
|
|
25 /* This file contains routines for managing work-share iteration, both
|
|
26 for loops and sections. */
|
|
27
|
|
28 #include "libgomp.h"
|
|
29 #include <stdlib.h>
|
|
30
|
|
31 typedef unsigned long long gomp_ull;
|
|
32
|
|
33 /* This function implements the STATIC scheduling method. The caller should
|
|
34 iterate *pstart <= x < *pend. Return zero if there are more iterations
|
|
35 to perform; nonzero if not. Return less than 0 if this thread had
|
|
36 received the absolutely last iteration. */
|
|
37
|
|
38 int
|
|
39 gomp_iter_ull_static_next (gomp_ull *pstart, gomp_ull *pend)
|
|
40 {
|
|
41 struct gomp_thread *thr = gomp_thread ();
|
|
42 struct gomp_team *team = thr->ts.team;
|
|
43 struct gomp_work_share *ws = thr->ts.work_share;
|
|
44 unsigned long nthreads = team ? team->nthreads : 1;
|
|
45
|
|
46 if (thr->ts.static_trip == -1)
|
|
47 return -1;
|
|
48
|
|
49 /* Quick test for degenerate teams and orphaned constructs. */
|
|
50 if (nthreads == 1)
|
|
51 {
|
|
52 *pstart = ws->next_ull;
|
|
53 *pend = ws->end_ull;
|
|
54 thr->ts.static_trip = -1;
|
|
55 return ws->next_ull == ws->end_ull;
|
|
56 }
|
|
57
|
|
58 /* We interpret chunk_size zero as "unspecified", which means that we
|
|
59 should break up the iterations such that each thread makes only one
|
|
60 trip through the outer loop. */
|
|
61 if (ws->chunk_size_ull == 0)
|
|
62 {
|
|
63 gomp_ull n, q, i, s0, e0, s, e;
|
|
64
|
|
65 if (thr->ts.static_trip > 0)
|
|
66 return 1;
|
|
67
|
|
68 /* Compute the total number of iterations. */
|
|
69 if (__builtin_expect (ws->mode, 0) == 0)
|
|
70 n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull;
|
|
71 else
|
|
72 n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull;
|
|
73 i = thr->ts.team_id;
|
|
74
|
|
75 /* Compute the "zero-based" start and end points. That is, as
|
|
76 if the loop began at zero and incremented by one. */
|
|
77 q = n / nthreads;
|
|
78 q += (q * nthreads != n);
|
|
79 s0 = q * i;
|
|
80 e0 = s0 + q;
|
|
81 if (e0 > n)
|
|
82 e0 = n;
|
|
83
|
|
84 /* Notice when no iterations allocated for this thread. */
|
|
85 if (s0 >= e0)
|
|
86 {
|
|
87 thr->ts.static_trip = 1;
|
|
88 return 1;
|
|
89 }
|
|
90
|
|
91 /* Transform these to the actual start and end numbers. */
|
|
92 s = s0 * ws->incr_ull + ws->next_ull;
|
|
93 e = e0 * ws->incr_ull + ws->next_ull;
|
|
94
|
|
95 *pstart = s;
|
|
96 *pend = e;
|
|
97 thr->ts.static_trip = (e0 == n ? -1 : 1);
|
|
98 return 0;
|
|
99 }
|
|
100 else
|
|
101 {
|
|
102 gomp_ull n, s0, e0, i, c, s, e;
|
|
103
|
|
104 /* Otherwise, each thread gets exactly chunk_size iterations
|
|
105 (if available) each time through the loop. */
|
|
106
|
|
107 if (__builtin_expect (ws->mode, 0) == 0)
|
|
108 n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull;
|
|
109 else
|
|
110 n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull;
|
|
111 i = thr->ts.team_id;
|
|
112 c = ws->chunk_size_ull;
|
|
113
|
|
114 /* Initial guess is a C sized chunk positioned nthreads iterations
|
|
115 in, offset by our thread number. */
|
|
116 s0 = (thr->ts.static_trip * (gomp_ull) nthreads + i) * c;
|
|
117 e0 = s0 + c;
|
|
118
|
|
119 /* Detect overflow. */
|
|
120 if (s0 >= n)
|
|
121 return 1;
|
|
122 if (e0 > n)
|
|
123 e0 = n;
|
|
124
|
|
125 /* Transform these to the actual start and end numbers. */
|
|
126 s = s0 * ws->incr_ull + ws->next_ull;
|
|
127 e = e0 * ws->incr_ull + ws->next_ull;
|
|
128
|
|
129 *pstart = s;
|
|
130 *pend = e;
|
|
131
|
|
132 if (e0 == n)
|
|
133 thr->ts.static_trip = -1;
|
|
134 else
|
|
135 thr->ts.static_trip++;
|
|
136 return 0;
|
|
137 }
|
|
138 }
|
|
139
|
|
140
|
|
141 /* This function implements the DYNAMIC scheduling method. Arguments are
|
|
142 as for gomp_iter_ull_static_next. This function must be called with
|
|
143 ws->lock held. */
|
|
144
|
|
145 bool
|
|
146 gomp_iter_ull_dynamic_next_locked (gomp_ull *pstart, gomp_ull *pend)
|
|
147 {
|
|
148 struct gomp_thread *thr = gomp_thread ();
|
|
149 struct gomp_work_share *ws = thr->ts.work_share;
|
|
150 gomp_ull start, end, chunk, left;
|
|
151
|
|
152 start = ws->next_ull;
|
|
153 if (start == ws->end_ull)
|
|
154 return false;
|
|
155
|
|
156 chunk = ws->chunk_size_ull;
|
|
157 left = ws->end_ull - start;
|
|
158 if (__builtin_expect (ws->mode & 2, 0))
|
|
159 {
|
|
160 if (chunk < left)
|
|
161 chunk = left;
|
|
162 }
|
|
163 else
|
|
164 {
|
|
165 if (chunk > left)
|
|
166 chunk = left;
|
|
167 }
|
|
168 end = start + chunk;
|
|
169
|
|
170 ws->next_ull = end;
|
|
171 *pstart = start;
|
|
172 *pend = end;
|
|
173 return true;
|
|
174 }
|
|
175
|
|
176
|
|
177 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
|
|
178 /* Similar, but doesn't require the lock held, and uses compare-and-swap
|
|
179 instead. Note that the only memory value that changes is ws->next_ull. */
|
|
180
|
|
181 bool
|
|
182 gomp_iter_ull_dynamic_next (gomp_ull *pstart, gomp_ull *pend)
|
|
183 {
|
|
184 struct gomp_thread *thr = gomp_thread ();
|
|
185 struct gomp_work_share *ws = thr->ts.work_share;
|
|
186 gomp_ull start, end, nend, chunk;
|
|
187
|
|
188 end = ws->end_ull;
|
|
189 chunk = ws->chunk_size_ull;
|
|
190
|
|
191 if (__builtin_expect (ws->mode & 1, 1))
|
|
192 {
|
|
193 gomp_ull tmp = __sync_fetch_and_add (&ws->next_ull, chunk);
|
|
194 if (__builtin_expect (ws->mode & 2, 0) == 0)
|
|
195 {
|
|
196 if (tmp >= end)
|
|
197 return false;
|
|
198 nend = tmp + chunk;
|
|
199 if (nend > end)
|
|
200 nend = end;
|
|
201 *pstart = tmp;
|
|
202 *pend = nend;
|
|
203 return true;
|
|
204 }
|
|
205 else
|
|
206 {
|
|
207 if (tmp <= end)
|
|
208 return false;
|
|
209 nend = tmp + chunk;
|
|
210 if (nend < end)
|
|
211 nend = end;
|
|
212 *pstart = tmp;
|
|
213 *pend = nend;
|
|
214 return true;
|
|
215 }
|
|
216 }
|
|
217
|
|
218 start = ws->next_ull;
|
|
219 while (1)
|
|
220 {
|
|
221 gomp_ull left = end - start;
|
|
222 gomp_ull tmp;
|
|
223
|
|
224 if (start == end)
|
|
225 return false;
|
|
226
|
|
227 if (__builtin_expect (ws->mode & 2, 0))
|
|
228 {
|
|
229 if (chunk < left)
|
|
230 chunk = left;
|
|
231 }
|
|
232 else
|
|
233 {
|
|
234 if (chunk > left)
|
|
235 chunk = left;
|
|
236 }
|
|
237 nend = start + chunk;
|
|
238
|
|
239 tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend);
|
|
240 if (__builtin_expect (tmp == start, 1))
|
|
241 break;
|
|
242
|
|
243 start = tmp;
|
|
244 }
|
|
245
|
|
246 *pstart = start;
|
|
247 *pend = nend;
|
|
248 return true;
|
|
249 }
|
|
250 #endif /* HAVE_SYNC_BUILTINS */
|
|
251
|
|
252
|
|
253 /* This function implements the GUIDED scheduling method. Arguments are
|
|
254 as for gomp_iter_ull_static_next. This function must be called with the
|
|
255 work share lock held. */
|
|
256
|
|
257 bool
|
|
258 gomp_iter_ull_guided_next_locked (gomp_ull *pstart, gomp_ull *pend)
|
|
259 {
|
|
260 struct gomp_thread *thr = gomp_thread ();
|
|
261 struct gomp_work_share *ws = thr->ts.work_share;
|
|
262 struct gomp_team *team = thr->ts.team;
|
|
263 gomp_ull nthreads = team ? team->nthreads : 1;
|
|
264 gomp_ull n, q;
|
|
265 gomp_ull start, end;
|
|
266
|
|
267 if (ws->next_ull == ws->end_ull)
|
|
268 return false;
|
|
269
|
|
270 start = ws->next_ull;
|
|
271 if (__builtin_expect (ws->mode, 0) == 0)
|
|
272 n = (ws->end_ull - start) / ws->incr_ull;
|
|
273 else
|
|
274 n = (start - ws->end_ull) / -ws->incr_ull;
|
|
275 q = (n + nthreads - 1) / nthreads;
|
|
276
|
|
277 if (q < ws->chunk_size_ull)
|
|
278 q = ws->chunk_size_ull;
|
|
279 if (q <= n)
|
|
280 end = start + q * ws->incr_ull;
|
|
281 else
|
|
282 end = ws->end_ull;
|
|
283
|
|
284 ws->next_ull = end;
|
|
285 *pstart = start;
|
|
286 *pend = end;
|
|
287 return true;
|
|
288 }
|
|
289
|
|
290 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
|
|
291 /* Similar, but doesn't require the lock held, and uses compare-and-swap
|
|
292 instead. Note that the only memory value that changes is ws->next_ull. */
|
|
293
|
|
294 bool
|
|
295 gomp_iter_ull_guided_next (gomp_ull *pstart, gomp_ull *pend)
|
|
296 {
|
|
297 struct gomp_thread *thr = gomp_thread ();
|
|
298 struct gomp_work_share *ws = thr->ts.work_share;
|
|
299 struct gomp_team *team = thr->ts.team;
|
|
300 gomp_ull nthreads = team ? team->nthreads : 1;
|
|
301 gomp_ull start, end, nend, incr;
|
|
302 gomp_ull chunk_size;
|
|
303
|
|
304 start = ws->next_ull;
|
|
305 end = ws->end_ull;
|
|
306 incr = ws->incr_ull;
|
|
307 chunk_size = ws->chunk_size_ull;
|
|
308
|
|
309 while (1)
|
|
310 {
|
|
311 gomp_ull n, q;
|
|
312 gomp_ull tmp;
|
|
313
|
|
314 if (start == end)
|
|
315 return false;
|
|
316
|
|
317 if (__builtin_expect (ws->mode, 0) == 0)
|
|
318 n = (end - start) / incr;
|
|
319 else
|
|
320 n = (start - end) / -incr;
|
|
321 q = (n + nthreads - 1) / nthreads;
|
|
322
|
|
323 if (q < chunk_size)
|
|
324 q = chunk_size;
|
|
325 if (__builtin_expect (q <= n, 1))
|
|
326 nend = start + q * incr;
|
|
327 else
|
|
328 nend = end;
|
|
329
|
|
330 tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend);
|
|
331 if (__builtin_expect (tmp == start, 1))
|
|
332 break;
|
|
333
|
|
334 start = tmp;
|
|
335 }
|
|
336
|
|
337 *pstart = start;
|
|
338 *pend = nend;
|
|
339 return true;
|
|
340 }
|
|
341 #endif /* HAVE_SYNC_BUILTINS */
|