Mercurial > hg > CbC > CbC_gcc
comparison libgomp/iter.c @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children | 04ced10e8804 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a06113de4d67 |
---|---|
1 /* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc. | |
2 Contributed by Richard Henderson <rth@redhat.com>. | |
3 | |
4 This file is part of the GNU OpenMP Library (libgomp). | |
5 | |
6 Libgomp is free software; you can redistribute it and/or modify it | |
7 under the terms of the GNU General Public License as published by | |
8 the Free Software Foundation; either version 3, or (at your option) | |
9 any later version. | |
10 | |
11 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
13 FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
14 more details. | |
15 | |
16 Under Section 7 of GPL version 3, you are granted additional | |
17 permissions described in the GCC Runtime Library Exception, version | |
18 3.1, as published by the Free Software Foundation. | |
19 | |
20 You should have received a copy of the GNU General Public License and | |
21 a copy of the GCC Runtime Library Exception along with this program; | |
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 <http://www.gnu.org/licenses/>. */ | |
24 | |
25 /* This file contains routines for managing work-share iteration, both | |
26 for loops and sections. */ | |
27 | |
28 #include "libgomp.h" | |
29 #include <stdlib.h> | |
30 | |
31 | |
32 /* This function implements the STATIC scheduling method. The caller should | |
33 iterate *pstart <= x < *pend. Return zero if there are more iterations | |
34 to perform; nonzero if not. Return less than 0 if this thread had | |
35 received the absolutely last iteration. */ | |
36 | |
37 int | |
38 gomp_iter_static_next (long *pstart, long *pend) | |
39 { | |
40 struct gomp_thread *thr = gomp_thread (); | |
41 struct gomp_team *team = thr->ts.team; | |
42 struct gomp_work_share *ws = thr->ts.work_share; | |
43 unsigned long nthreads = team ? team->nthreads : 1; | |
44 | |
45 if (thr->ts.static_trip == -1) | |
46 return -1; | |
47 | |
48 /* Quick test for degenerate teams and orphaned constructs. */ | |
49 if (nthreads == 1) | |
50 { | |
51 *pstart = ws->next; | |
52 *pend = ws->end; | |
53 thr->ts.static_trip = -1; | |
54 return ws->next == ws->end; | |
55 } | |
56 | |
57 /* We interpret chunk_size zero as "unspecified", which means that we | |
58 should break up the iterations such that each thread makes only one | |
59 trip through the outer loop. */ | |
60 if (ws->chunk_size == 0) | |
61 { | |
62 unsigned long n, q, i; | |
63 unsigned long s0, e0; | |
64 long s, e; | |
65 | |
66 if (thr->ts.static_trip > 0) | |
67 return 1; | |
68 | |
69 /* Compute the total number of iterations. */ | |
70 s = ws->incr + (ws->incr > 0 ? -1 : 1); | |
71 n = (ws->end - ws->next + s) / ws->incr; | |
72 i = thr->ts.team_id; | |
73 | |
74 /* Compute the "zero-based" start and end points. That is, as | |
75 if the loop began at zero and incremented by one. */ | |
76 q = n / nthreads; | |
77 q += (q * nthreads != n); | |
78 s0 = q * i; | |
79 e0 = s0 + q; | |
80 if (e0 > n) | |
81 e0 = n; | |
82 | |
83 /* Notice when no iterations allocated for this thread. */ | |
84 if (s0 >= e0) | |
85 { | |
86 thr->ts.static_trip = 1; | |
87 return 1; | |
88 } | |
89 | |
90 /* Transform these to the actual start and end numbers. */ | |
91 s = (long)s0 * ws->incr + ws->next; | |
92 e = (long)e0 * ws->incr + ws->next; | |
93 | |
94 *pstart = s; | |
95 *pend = e; | |
96 thr->ts.static_trip = (e0 == n ? -1 : 1); | |
97 return 0; | |
98 } | |
99 else | |
100 { | |
101 unsigned long n, s0, e0, i, c; | |
102 long s, e; | |
103 | |
104 /* Otherwise, each thread gets exactly chunk_size iterations | |
105 (if available) each time through the loop. */ | |
106 | |
107 s = ws->incr + (ws->incr > 0 ? -1 : 1); | |
108 n = (ws->end - ws->next + s) / ws->incr; | |
109 i = thr->ts.team_id; | |
110 c = ws->chunk_size; | |
111 | |
112 /* Initial guess is a C sized chunk positioned nthreads iterations | |
113 in, offset by our thread number. */ | |
114 s0 = (thr->ts.static_trip * nthreads + i) * c; | |
115 e0 = s0 + c; | |
116 | |
117 /* Detect overflow. */ | |
118 if (s0 >= n) | |
119 return 1; | |
120 if (e0 > n) | |
121 e0 = n; | |
122 | |
123 /* Transform these to the actual start and end numbers. */ | |
124 s = (long)s0 * ws->incr + ws->next; | |
125 e = (long)e0 * ws->incr + ws->next; | |
126 | |
127 *pstart = s; | |
128 *pend = e; | |
129 | |
130 if (e0 == n) | |
131 thr->ts.static_trip = -1; | |
132 else | |
133 thr->ts.static_trip++; | |
134 return 0; | |
135 } | |
136 } | |
137 | |
138 | |
139 /* This function implements the DYNAMIC scheduling method. Arguments are | |
140 as for gomp_iter_static_next. This function must be called with ws->lock | |
141 held. */ | |
142 | |
143 bool | |
144 gomp_iter_dynamic_next_locked (long *pstart, long *pend) | |
145 { | |
146 struct gomp_thread *thr = gomp_thread (); | |
147 struct gomp_work_share *ws = thr->ts.work_share; | |
148 long start, end, chunk, left; | |
149 | |
150 start = ws->next; | |
151 if (start == ws->end) | |
152 return false; | |
153 | |
154 chunk = ws->chunk_size; | |
155 left = ws->end - start; | |
156 if (ws->incr < 0) | |
157 { | |
158 if (chunk < left) | |
159 chunk = left; | |
160 } | |
161 else | |
162 { | |
163 if (chunk > left) | |
164 chunk = left; | |
165 } | |
166 end = start + chunk; | |
167 | |
168 ws->next = end; | |
169 *pstart = start; | |
170 *pend = end; | |
171 return true; | |
172 } | |
173 | |
174 | |
175 #ifdef HAVE_SYNC_BUILTINS | |
176 /* Similar, but doesn't require the lock held, and uses compare-and-swap | |
177 instead. Note that the only memory value that changes is ws->next. */ | |
178 | |
179 bool | |
180 gomp_iter_dynamic_next (long *pstart, long *pend) | |
181 { | |
182 struct gomp_thread *thr = gomp_thread (); | |
183 struct gomp_work_share *ws = thr->ts.work_share; | |
184 long start, end, nend, chunk, incr; | |
185 | |
186 end = ws->end; | |
187 incr = ws->incr; | |
188 chunk = ws->chunk_size; | |
189 | |
190 if (__builtin_expect (ws->mode, 1)) | |
191 { | |
192 long tmp = __sync_fetch_and_add (&ws->next, chunk); | |
193 if (incr > 0) | |
194 { | |
195 if (tmp >= end) | |
196 return false; | |
197 nend = tmp + chunk; | |
198 if (nend > end) | |
199 nend = end; | |
200 *pstart = tmp; | |
201 *pend = nend; | |
202 return true; | |
203 } | |
204 else | |
205 { | |
206 if (tmp <= end) | |
207 return false; | |
208 nend = tmp + chunk; | |
209 if (nend < end) | |
210 nend = end; | |
211 *pstart = tmp; | |
212 *pend = nend; | |
213 return true; | |
214 } | |
215 } | |
216 | |
217 start = ws->next; | |
218 while (1) | |
219 { | |
220 long left = end - start; | |
221 long tmp; | |
222 | |
223 if (start == end) | |
224 return false; | |
225 | |
226 if (incr < 0) | |
227 { | |
228 if (chunk < left) | |
229 chunk = left; | |
230 } | |
231 else | |
232 { | |
233 if (chunk > left) | |
234 chunk = left; | |
235 } | |
236 nend = start + chunk; | |
237 | |
238 tmp = __sync_val_compare_and_swap (&ws->next, start, nend); | |
239 if (__builtin_expect (tmp == start, 1)) | |
240 break; | |
241 | |
242 start = tmp; | |
243 } | |
244 | |
245 *pstart = start; | |
246 *pend = nend; | |
247 return true; | |
248 } | |
249 #endif /* HAVE_SYNC_BUILTINS */ | |
250 | |
251 | |
252 /* This function implements the GUIDED scheduling method. Arguments are | |
253 as for gomp_iter_static_next. This function must be called with the | |
254 work share lock held. */ | |
255 | |
256 bool | |
257 gomp_iter_guided_next_locked (long *pstart, long *pend) | |
258 { | |
259 struct gomp_thread *thr = gomp_thread (); | |
260 struct gomp_work_share *ws = thr->ts.work_share; | |
261 struct gomp_team *team = thr->ts.team; | |
262 unsigned long nthreads = team ? team->nthreads : 1; | |
263 unsigned long n, q; | |
264 long start, end; | |
265 | |
266 if (ws->next == ws->end) | |
267 return false; | |
268 | |
269 start = ws->next; | |
270 n = (ws->end - start) / ws->incr; | |
271 q = (n + nthreads - 1) / nthreads; | |
272 | |
273 if (q < ws->chunk_size) | |
274 q = ws->chunk_size; | |
275 if (q <= n) | |
276 end = start + q * ws->incr; | |
277 else | |
278 end = ws->end; | |
279 | |
280 ws->next = end; | |
281 *pstart = start; | |
282 *pend = end; | |
283 return true; | |
284 } | |
285 | |
286 #ifdef HAVE_SYNC_BUILTINS | |
287 /* Similar, but doesn't require the lock held, and uses compare-and-swap | |
288 instead. Note that the only memory value that changes is ws->next. */ | |
289 | |
290 bool | |
291 gomp_iter_guided_next (long *pstart, long *pend) | |
292 { | |
293 struct gomp_thread *thr = gomp_thread (); | |
294 struct gomp_work_share *ws = thr->ts.work_share; | |
295 struct gomp_team *team = thr->ts.team; | |
296 unsigned long nthreads = team ? team->nthreads : 1; | |
297 long start, end, nend, incr; | |
298 unsigned long chunk_size; | |
299 | |
300 start = ws->next; | |
301 end = ws->end; | |
302 incr = ws->incr; | |
303 chunk_size = ws->chunk_size; | |
304 | |
305 while (1) | |
306 { | |
307 unsigned long n, q; | |
308 long tmp; | |
309 | |
310 if (start == end) | |
311 return false; | |
312 | |
313 n = (end - start) / incr; | |
314 q = (n + nthreads - 1) / nthreads; | |
315 | |
316 if (q < chunk_size) | |
317 q = chunk_size; | |
318 if (__builtin_expect (q <= n, 1)) | |
319 nend = start + q * incr; | |
320 else | |
321 nend = end; | |
322 | |
323 tmp = __sync_val_compare_and_swap (&ws->next, start, nend); | |
324 if (__builtin_expect (tmp == start, 1)) | |
325 break; | |
326 | |
327 start = tmp; | |
328 } | |
329 | |
330 *pstart = start; | |
331 *pend = nend; | |
332 return true; | |
333 } | |
334 #endif /* HAVE_SYNC_BUILTINS */ |