comparison libcilkrts/include/internal/cilk_fake.h @ 111:04ced10e8804

gcc 7
author kono
date Fri, 27 Oct 2017 22:46:09 +0900
parents
children
comparison
equal deleted inserted replaced
68:561a7518be6b 111:04ced10e8804
1 /* cilk_fake.h -*-C++-*-
2 *
3 *************************************************************************
4 *
5 * Copyright (C) 2011-2016, Intel Corporation
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * * Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * * Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 * * Neither the name of Intel Corporation nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
29 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
32 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 *
35 * *********************************************************************
36 *
37 * PLEASE NOTE: This file is a downstream copy of a file mainitained in
38 * a repository at cilkplus.org. Changes made to this file that are not
39 * submitted through the contribution process detailed at
40 * http://www.cilkplus.org/submit-cilk-contribution will be lost the next
41 * time that a new version is released. Changes only submitted to the
42 * GNU compiler collection or posted to the git repository at
43 * https://bitbucket.org/intelcilkruntime/intel-cilk-runtime.git are
44 * not tracked.
45 *
46 * We welcome your contributions to this open source project. Thank you
47 * for your assistance in helping us improve Cilk Plus.
48 **************************************************************************/
49
50 /**
51 * @file cilk_fake.h
52 *
53 * @brief Macros to simulate a compiled Cilk program.
54 *
55 * Used carefully, these macros can be used to create a Cilk program with a
56 * non-Cilk compiler by manually inserting the code necessary for interacting
57 * with the Cilk runtime library. They are not intended to be pretty (you
58 * wouldn't want to write a whole program using these macros), but they are
59 * useful for experiments. They also work well as an illustration of what the
60 * compiler generates.
61 *
62 * Details of the mechanisms used in these macros are described in
63 * design-notes/CilkPlusABI.docx
64 *
65 * Example 1: fib in C++
66 * ---------------------
67 *
68 * #include <internal/cilk_fake.h>
69 *
70 * int fib(int n)
71 * {
72 * CILK_FAKE_PROLOG();
73 *
74 * if (n < 2)
75 * return n;
76 *
77 * int a, b;
78 * CILK_FAKE_SPAWN_R(a, fib(n - 1));
79 * b = fib(n - 2);
80 * CILK_FAKE_SYNC();
81 *
82 * return a + b;
83 * }
84 *
85 *
86 * Example 2: fib in C
87 * -------------------
88 *
89 * #include <internal/cilk_fake.h>
90 *
91 * int fib(int n);
92 *
93 * void fib_spawn_helper(__cilkrts_stack_frame* parent_sf, int* a, int n)
94 * {
95 * CILK_FAKE_SPAWN_HELPER_PROLOG(*parent_sf);
96 * *a = fib(n - 1);
97 * CILK_FAKE_SPAWN_HELPER_EPILOG();
98 * }
99 *
100 * int fib(int n)
101 * {
102 * CILK_FAKE_PROLOG();
103 *
104 * if (n < 2)
105 * return n;
106 *
107 * int a, b;
108 * CILK_FAKE_CALL_SPAWN_HELPER(fib_spawn_helper(&__cilk_sf, &a, n));
109 * b = fib(n - 2);
110 * CILK_FAKE_SYNC();
111 *
112 * CILK_FAKE_EPILOG();
113 * return a + b;
114 * }
115 */
116
117 #ifndef INCLUDED_CILK_FAKE_DOT_H
118 #define INCLUDED_CILK_FAKE_DOT_H
119
120 // This header implements ABI version 1. If __CILKRTS_ABI_VERSION is already
121 // defined but is less than 1, then the data structures in <internal/abi.h>
122 // will not match the expectations of facilities in this header. Therefore,
123 // for successful compilation, __CILKRTS_ABI_VERSION must either be not
124 // defined, or defined to be 1 or greater.
125 #ifndef __CILKRTS_ABI_VERSION
126 // ABI version was not specified. Set it to 1.
127 # define __CILKRTS_ABI_VERSION 1
128 #elif __CILKRTS_ABI_VERSION < 1
129 // ABI version was specified but was too old. Fail compilation.
130 # error cilk_fake.h requirs an ABI version of 1 or greater
131 #endif
132
133 #include <internal/abi.h>
134
135 // alloca is defined in malloc.h on Windows, alloca.h on Linux
136 #ifndef _MSC_VER
137 #include <alloca.h>
138 #else
139 #include <malloc.h>
140 // Define offsetof
141 #include <stddef.h>
142 #endif
143
144 // Allows use of a different version that the one defined in abi.h
145 #define CILK_FAKE_VERSION_FLAG (__CILKRTS_ABI_VERSION << 24)
146
147 /* Initialize frame. To be called when worker is known */
148 __CILKRTS_INLINE void __cilk_fake_enter_frame_fast(__cilkrts_stack_frame *sf,
149 __cilkrts_worker *w)
150 {
151 sf->call_parent = w->current_stack_frame;
152 sf->worker = w;
153 sf->flags = CILK_FAKE_VERSION_FLAG;
154 w->current_stack_frame = sf;
155 }
156
157 /* Initialize frame. To be called when worker is not known */
158 __CILKRTS_INLINE void __cilk_fake_enter_frame(__cilkrts_stack_frame *sf)
159 {
160 __cilkrts_worker* w = __cilkrts_get_tls_worker();
161 uint32_t last_flag = 0;
162 if (! w) {
163 w = __cilkrts_bind_thread_1();
164 last_flag = CILK_FRAME_LAST;
165 }
166 __cilk_fake_enter_frame_fast(sf, w);
167 sf->flags |= last_flag;
168 }
169
170 /* Initialize frame. To be called within the spawn helper */
171 __CILKRTS_INLINE void __cilk_fake_helper_enter_frame(
172 __cilkrts_stack_frame *sf,
173 __cilkrts_stack_frame *parent_sf)
174 {
175 sf->worker = 0;
176 sf->call_parent = parent_sf;
177 }
178
179 /* Called from the spawn helper to push the parent continuation on the task
180 * deque so that it can be stolen.
181 */
182 __CILKRTS_INLINE void __cilk_fake_detach(__cilkrts_stack_frame *sf)
183 {
184 /* Initialize spawn helper frame.
185 * call_parent was saved in __cilk_fake_helper_enter_frame */
186 __cilkrts_stack_frame *parent = sf->call_parent;
187 __cilkrts_worker *w = parent->worker;
188 __cilk_fake_enter_frame_fast(sf, w);
189
190 /* Append a node to the pedigree */
191 sf->spawn_helper_pedigree = w->pedigree;
192 parent->parent_pedigree = w->pedigree;
193 w->pedigree.rank = 0;
194 w->pedigree.parent = &sf->spawn_helper_pedigree;
195
196 /* Push parent onto the task deque */
197 __cilkrts_stack_frame *volatile *tail = w->tail;
198 *tail++ = sf->call_parent;
199 /* The stores must be separated by a store fence (noop on x86)
200 * or the second store is a release (st8.rel on Itanium) */
201 w->tail = tail;
202 sf->flags |= CILK_FRAME_DETACHED;
203 }
204
205 /* This variable is used in CILK_FAKE_FORCE_FRAME_PTR(), below */
206 static int __cilk_fake_dummy = 8;
207
208 /* The following macro is used to force the compiler into generating a frame
209 * pointer. We never change the value of __cilk_fake_dummy, so the alloca()
210 * is never called, but we need the 'if' statement and the __cilk_fake_dummy
211 * variable so that the compiler does not attempt to optimize it away.
212 */
213 #define CILK_FAKE_FORCE_FRAME_PTR(sf) do { \
214 if (__builtin_expect(1 & __cilk_fake_dummy, 0)) \
215 (sf).worker = (__cilkrts_worker*) alloca(__cilk_fake_dummy); \
216 } while (0)
217
218 #ifndef CILK_FAKE_NO_SHRINKWRAP
219 /* "shrink-wrap" optimization enabled. Do not initialize frame on entry,
220 * except to clear worker pointer. Instead, defer initialization until
221 * the first spawn.
222 */
223 # define CILK_FAKE_INITIAL_ENTER_FRAME(sf) ((void) ((sf).worker = 0))
224 # define CILK_FAKE_DEFERRED_ENTER_FRAME(sf) do { \
225 if (! (sf).worker) __cilk_fake_enter_frame(&(sf)); \
226 } while (0)
227 #else
228 /* "shrink-wrap" optimization disabled. Initialize frame immediately on
229 * entry. Do not initialize frame on spawn.
230 */
231 # define CILK_FAKE_INITIAL_ENTER_FRAME(sf) \
232 __cilk_fake_enter_frame(&(sf))
233 # define CILK_FAKE_DEFERRED_ENTER_FRAME(sf) ((void) &(sf))
234 #endif
235
236 /* Prologue of a spawning function. Declares and initializes the stack
237 * frame.
238 */
239 #define CILK_FAKE_PROLOG() \
240 __cilk_fake_stack_frame __cilk_sf; \
241 CILK_FAKE_FORCE_FRAME_PTR(__cilk_sf); \
242 CILK_FAKE_INITIAL_ENTER_FRAME(__cilk_sf)
243
244 /* Prologue of a spawning function where the current worker is already known.
245 * Declares and initializes the stack frame without looking up the worker from
246 * TLS.
247 */
248 #define CILK_FAKE_PROLOG_FAST(w) \
249 __cilk_fake_stack_frame __cilk_sf; \
250 CILK_FAKE_FORCE_FRAME_PTR(__cilk_sf); \
251 __cilk_fake_enter_frame_fast(&__cilk_sf, (w))
252
253 /* Simulate a cilk_sync */
254 #define CILK_FAKE_SYNC() CILK_FAKE_SYNC_IMP(__cilk_sf)
255
256 /* Epilog at the end of a spawning function. Does a sync and calls the
257 * runtime for leaving the frame.
258 */
259 #ifdef __cplusplus
260 // Epilogue is run automatically by __cilk_fake_stack_frame destructor.
261 # define CILK_FAKE_EPILOG() ((void) __cilk_sf)
262 #else
263 # define CILK_FAKE_EPILOG() CILK_FAKE_CLEANUP_FRAME(__cilk_sf)
264 #endif // C
265
266 /* Implementation of spawning function epilog. See CILK_FAKE_EPILOG macro and
267 * __cilk_fake_stack_frame destructor body.
268 */
269 #define CILK_FAKE_CLEANUP_FRAME(sf) do { \
270 if (! (sf).worker) break; \
271 CILK_FAKE_SYNC_IMP(sf); \
272 CILK_FAKE_POP_FRAME(sf); \
273 if ((sf).flags != CILK_FAKE_VERSION_FLAG) \
274 __cilkrts_leave_frame(&(sf)); \
275 } while (0)
276
277 /* Implementation of CILK_FAKE_SYNC with sf argument */
278 #define CILK_FAKE_SYNC_IMP(sf) do { \
279 if (__builtin_expect((sf).flags & CILK_FRAME_UNSYNCHED, 0)) { \
280 (sf).parent_pedigree = (sf).worker->pedigree; \
281 CILK_FAKE_SAVE_FP(sf); \
282 if (! CILK_SETJMP((sf).ctx)) \
283 __cilkrts_sync(&(sf)); \
284 } \
285 ++(sf).worker->pedigree.rank; \
286 } while (0)
287
288 /* Save the floating-point control registers.
289 * The definition of CILK_FAKE_SAVE_FP is compiler specific (and
290 * architecture specific on Windows)
291 */
292 #ifdef _MSC_VER
293 # define MXCSR_OFFSET offsetof(struct __cilkrts_stack_frame, mxcsr)
294 # define FPCSR_OFFSET offsetof(struct __cilkrts_stack_frame, fpcsr)
295 # if defined(_M_IX86)
296 /* Windows x86 */
297 # define CILK_FAKE_SAVE_FP(sf) do { \
298 __asm \
299 { \
300 mov eax, sf \
301 stmxcsr [eax+MXCSR_OFFSET] \
302 fnstcw [eax+FPCSR_OFFSET] \
303 } \
304 } while (0)
305 # elif defined(_M_X64)
306 /* Windows Intel64 - Not needed - saved by setjmp call */
307 # define CILK_FAKE_SAVE_FP(sf) ((void) sf)
308 # else
309 # error "Unknown architecture"
310 # endif /* Microsoft architecture specifics */
311 #else
312 /* Non-Windows */
313 # define CILK_FAKE_SAVE_FP(sf) do { \
314 __asm__ ( "stmxcsr %0\n\t" \
315 "fnstcw %1" : : "m" ((sf).mxcsr), "m" ((sf).fpcsr)); \
316 } while (0)
317 #endif
318
319 /* Call the spawn helper as part of a fake spawn */
320 #define CILK_FAKE_CALL_SPAWN_HELPER(helper) do { \
321 CILK_FAKE_DEFERRED_ENTER_FRAME(__cilk_sf); \
322 CILK_FAKE_SAVE_FP(__cilk_sf); \
323 if (__builtin_expect(! CILK_SETJMP(__cilk_sf.ctx), 1)) { \
324 helper; \
325 } \
326 } while (0)
327
328 /* Body of a spawn helper function. In addition to the worker and the
329 * expression to spawn, pass it any number of statements to be executed before
330 * detaching.
331 */
332 #define CILK_FAKE_SPAWN_HELPER_BODY(parent_sf, expr, ...) \
333 CILK_FAKE_SPAWN_HELPER_PROLOG(parent_sf); \
334 __VA_ARGS__; \
335 __cilk_fake_detach(&__cilk_sf); \
336 expr; \
337 CILK_FAKE_SPAWN_HELPER_EPILOG()
338
339 /* Prolog for a spawn helper function */
340 #define CILK_FAKE_SPAWN_HELPER_PROLOG(parent_sf) \
341 __cilk_fake_spawn_helper_stack_frame __cilk_sf; \
342 __cilk_fake_helper_enter_frame(&__cilk_sf, &(parent_sf))
343
344 /* Implementation of spawn helper epilog. See CILK_FAKE_SPAWN_HELPER_EPILOG
345 * and the __cilk_fake_spawn_helper_frame destructor.
346 */
347 #define CILK_FAKE_SPAWN_HELPER_CLEANUP_FRAME(sf) do { \
348 if (! (sf).worker) break; \
349 CILK_FAKE_POP_FRAME(sf); \
350 __cilkrts_leave_frame(&(sf)); \
351 } while (0)
352
353 /* Epilog to execute at the end of a spawn helper */
354 #ifdef __cplusplus
355 // Epilog handled by __cilk_fake_spawn_helper_stack_frame destructor
356 # define CILK_FAKE_SPAWN_HELPER_EPILOG() ((void) __cilk_sf)
357 #else
358 # define CILK_FAKE_SPAWN_HELPER_EPILOG() \
359 CILK_FAKE_SPAWN_HELPER_CLEANUP_FRAME(__cilk_sf)
360 #endif
361
362 /* Pop the current frame off of the call chain */
363 #define CILK_FAKE_POP_FRAME(sf) do { \
364 (sf).worker->current_stack_frame = (sf).call_parent; \
365 (sf).call_parent = 0; \
366 } while (0)
367
368 #ifdef _WIN32
369 /* define macros for synching functions before allowing them to propagate. */
370 # define CILK_FAKE_EXCEPT_BEGIN \
371 if (0 == CILK_SETJMP(__cilk_sf.except_ctx)) {
372
373 # define CILK_FAKE_EXCEPT_END \
374 } else { \
375 assert((__cilk_sf.flags & (CILK_FRAME_UNSYNCHED|CILK_FRAME_EXCEPTING))\
376 == CILK_FRAME_EXCEPTING); \
377 __cilkrts_rethrow(&__cilk_sf); \
378 exit(0); \
379 }
380 #else
381 # define CILK_EXCEPT_BEGIN {
382 # define CILK_EXCEPT_END }
383 #endif
384
385 #ifdef __cplusplus
386 // The following definitions depend on C++ features.
387
388 // Wrap a functor (probably a lambda), so that a call to it cannot be
389 // inlined.
390 template <typename F>
391 class __cilk_fake_noinline_wrapper
392 {
393 F&& m_fn;
394 public:
395 __cilk_fake_noinline_wrapper(F&& fn) : m_fn(static_cast<F&&>(fn)) { }
396
397 #ifdef _WIN32
398 __declspec(noinline) void operator()(__cilkrts_stack_frame *sf);
399 #else
400 void operator()(__cilkrts_stack_frame *sf) __attribute__((noinline));
401 #endif
402
403 };
404
405 template <typename F>
406 void __cilk_fake_noinline_wrapper<F>::operator()(__cilkrts_stack_frame *sf)
407 {
408 m_fn(sf);
409 }
410
411 template <typename F>
412 inline
413 __cilk_fake_noinline_wrapper<F> __cilk_fake_make_noinline_wrapper(F&& fn)
414 {
415 return __cilk_fake_noinline_wrapper<F>(static_cast<F&&>(fn));
416 }
417
418 // Simulate "_Cilk_spawn expr", where expr must be a function call.
419 //
420 // Note: this macro does not correctly construct function arguments.
421 // According to the ABI specification, function arguments should be evaluated
422 // before the detach and destroyed after the detach. This macro both
423 // evaluates and destroys them after the detach. This means that if any part
424 // of the function argument expression depends on a value that is modified in
425 // the continuation of the spawn, race will occur between the continuation and
426 // the argument evaluation.
427 //
428 // To work around this problem, this macro accepts an arbitrary list of
429 // declarations and statements (separated by semicolons) that are evaluated
430 // before the detach. Thus, to simulate:
431 //
432 // _Cilk_spawn f(expr);
433 //
434 // one would write:
435 //
436 // CILK_FAKE_SPAWN(f(arg), auto arg = expr);
437 //
438 // Despite appearing in the reverse order, the 'arg' variable is created and
439 // initialized before the detach and the call to f(arg) occurs after the
440 // detach.
441 #define CILK_FAKE_SPAWN(expr, ...) \
442 CILK_FAKE_CALL_SPAWN_HELPER( \
443 CILK_FAKE_SPAWN_HELPER(expr, __VA_ARGS__)(&__cilk_sf))
444
445 // Simulate "ret = cilk_spawn expr". See CILK_FAKE_SPAWN for constraints.
446 #define CILK_FAKE_SPAWN_R(ret, expr, ...) \
447 CILK_FAKE_SPAWN(((ret) = (expr)), __VA_ARGS__)
448
449 // Create a spawn helper as a C++11 lambda function. In addition to the
450 // expression to spawn, this macro takes a any number of statements to be
451 // executed before detaching.
452 #define CILK_FAKE_SPAWN_HELPER(expr, ...) \
453 __cilk_fake_make_noinline_wrapper([&](__cilkrts_stack_frame *parent_sf) { \
454 CILK_FAKE_SPAWN_HELPER_BODY(*parent_sf, expr, __VA_ARGS__); \
455 })
456
457 // C++ version of a __cilkrts_stack_frame for a spawning function.
458 // This struct is identical to __cilkrts_stack_frame except that the
459 // destructor automatically does frame cleanup.
460 struct __cilk_fake_stack_frame : __cilkrts_stack_frame
461 {
462 // Extension of __cilkrts_stack_frame with constructor and destructor
463 __cilk_fake_stack_frame() { }
464 __forceinline ~__cilk_fake_stack_frame() {
465 CILK_FAKE_CLEANUP_FRAME(*this);
466 }
467 };
468
469 // C++ version of a __cilkrts_stack_frame for a spawn helper.
470 // This struct is identical to __cilkrts_stack_frame except that the
471 // destructor automatically does frame cleanup.
472 struct __cilk_fake_spawn_helper_stack_frame : __cilkrts_stack_frame
473 {
474 // Extension of __cilkrts_stack_frame with constructor and destructor
475 __cilk_fake_spawn_helper_stack_frame() { worker = 0; }
476 __forceinline ~__cilk_fake_spawn_helper_stack_frame() {
477 CILK_FAKE_SPAWN_HELPER_CLEANUP_FRAME(*this);
478 }
479 };
480 #else
481 // For C, __cilk_fake_stack_frame and __cilk_fake_spawn_helper_stack_frame are
482 // identical to __cilkrts_stack_frame. Frame cleanup must be performed
483 // excplicitly (in CILK_FAKE_EPILOG and CILK_FAKE_SPAWN_HELPER_EPILOG)
484 typedef __cilkrts_stack_frame __cilk_fake_stack_frame;
485 typedef __cilkrts_stack_frame __cilk_fake_spawn_helper_stack_frame;
486 #endif
487
488 #endif // ! defined(INCLUDED_CILK_FAKE_DOT_H)