Mercurial > hg > Members > Moririn
annotate src/parallel_execution/CUDAWorker.cbc @ 316:54d203daf06b
CUDAtwice.cbc is called.
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 15 Feb 2017 16:25:23 +0900 |
parents | 1839586f5b41 |
children | 51aa65676e37 |
rev | line source |
---|---|
303 | 1 #include <stdio.h> |
2 #include <sys/time.h> | |
3 #include <string.h> | |
4 #include <stdlib.h> | |
304
9755206813cb
helper_string.h for ANSI C
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
303
diff
changeset
|
5 #include <libkern/OSAtomic.h> |
305 | 6 |
7 // includes, project | |
8 #include <driver_types.h> | |
9 #include <cuda_runtime.h> | |
303 | 10 #include <cuda.h> |
11 #include "helper_cuda.h" | |
12 | |
302 | 13 #include "../context.h" |
95
3e28ee215c0e
modify twice, use OSAtomiceCompareAndSwap
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
14 |
303 | 15 static void start_CUDAworker(Worker* worker); |
316
54d203daf06b
CUDAtwice.cbc is called.
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
314
diff
changeset
|
16 static void cudaInit(struct CUDAWorker *cudaWorker,int phase) ; |
233 | 17 |
316
54d203daf06b
CUDAtwice.cbc is called.
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
314
diff
changeset
|
18 volatile int cuda_initialized = 0; |
313 | 19 |
316
54d203daf06b
CUDAtwice.cbc is called.
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
314
diff
changeset
|
20 Worker* createCUDAWorker(struct Context* context, int id, Queue* queue, TaskManagerImpl *im) { |
233 | 21 struct Worker* worker = ALLOC(context, Worker); |
305 | 22 struct CUDAWorker* cudaWorker = new CUDAWorker(); |
316
54d203daf06b
CUDAtwice.cbc is called.
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
314
diff
changeset
|
23 |
54d203daf06b
CUDAtwice.cbc is called.
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
314
diff
changeset
|
24 cudaInit(cudaWorker,0); |
54d203daf06b
CUDAtwice.cbc is called.
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
314
diff
changeset
|
25 |
305 | 26 worker->worker = (union Data*)cudaWorker; |
244 | 27 worker->tasks = queue; |
316
54d203daf06b
CUDAtwice.cbc is called.
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
314
diff
changeset
|
28 printf("createCUDAWorker %p\n",queue); |
305 | 29 cudaWorker->id = id; |
313 | 30 worker->shutdown = C_shutdownCUDAWorker; |
316
54d203daf06b
CUDAtwice.cbc is called.
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
314
diff
changeset
|
31 // pthread_create(&worker->worker->CUDAWorker.thread, NULL, (void*)&start_CUDAworker, worker); |
54d203daf06b
CUDAtwice.cbc is called.
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
314
diff
changeset
|
32 if (im) { |
54d203daf06b
CUDAtwice.cbc is called.
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
314
diff
changeset
|
33 printf("im->worker %p\n",im->workers); |
54d203daf06b
CUDAtwice.cbc is called.
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
314
diff
changeset
|
34 im->workers[0] = worker; |
54d203daf06b
CUDAtwice.cbc is called.
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
314
diff
changeset
|
35 } |
54d203daf06b
CUDAtwice.cbc is called.
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
314
diff
changeset
|
36 cuda_initialized = 1; |
54d203daf06b
CUDAtwice.cbc is called.
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
314
diff
changeset
|
37 start_CUDAworker(worker); |
313 | 38 return worker; |
39 } | |
40 | |
316
54d203daf06b
CUDAtwice.cbc is called.
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
314
diff
changeset
|
41 static void cudaInit(struct CUDAWorker *cudaWorker,int phase) { |
312
7dd5a7d52a67
USE_CUDAWorker flag only for CUDAtwice
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
305
diff
changeset
|
42 // initialize and load kernel |
7dd5a7d52a67
USE_CUDAWorker flag only for CUDAtwice
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
305
diff
changeset
|
43 cudaWorker->num_stream = 1; // number of stream |
314 | 44 // cudaWorker->stream = NEWN(cudaWorker->num_stream, CUstream ); |
45 printf("cudaInit 1\n"); | |
316
54d203daf06b
CUDAtwice.cbc is called.
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
314
diff
changeset
|
46 if (phase==0) |
312
7dd5a7d52a67
USE_CUDAWorker flag only for CUDAtwice
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
305
diff
changeset
|
47 checkCudaErrors(cuInit(0)); |
316
54d203daf06b
CUDAtwice.cbc is called.
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
314
diff
changeset
|
48 if (phase==0) |
312
7dd5a7d52a67
USE_CUDAWorker flag only for CUDAtwice
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
305
diff
changeset
|
49 checkCudaErrors(cuDeviceGet(&cudaWorker->device, 0)); |
314 | 50 printf("cudaInit 2\n"); |
316
54d203daf06b
CUDAtwice.cbc is called.
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
314
diff
changeset
|
51 if (phase==0) |
312
7dd5a7d52a67
USE_CUDAWorker flag only for CUDAtwice
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
305
diff
changeset
|
52 checkCudaErrors(cuCtxCreate(&cudaWorker->cuCtx, CU_CTX_SCHED_SPIN, cudaWorker->device)); |
314 | 53 printf("cudaInit 3\n"); |
54 // if (cudaWorker->num_stream) { | |
55 // for (int i=0;i<cudaWorker->num_stream;i++) | |
56 // checkCudaErrors(cuStreamCreate(&cudaWorker->stream[i],0)); | |
57 // } | |
316
54d203daf06b
CUDAtwice.cbc is called.
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
314
diff
changeset
|
58 CUdeviceptr devA; |
54d203daf06b
CUDAtwice.cbc is called.
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
314
diff
changeset
|
59 checkCudaErrors(cuMemAlloc(&devA, 16)); |
54d203daf06b
CUDAtwice.cbc is called.
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
314
diff
changeset
|
60 |
314 | 61 printf("cudaInit done\n"); |
183 | 62 } |
63 | |
303 | 64 static void start_CUDAworker(Worker* worker) { |
305 | 65 CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker; |
66 cudaWorker->context = NEW(struct Context); | |
67 initContext(cudaWorker->context); | |
68 Gearef(cudaWorker->context, Worker)->worker = (union Data*)worker; | |
303 | 69 |
305 | 70 goto meta(cudaWorker->context, C_taskReceiveCUDAWorker); |
233 | 71 } |
72 | |
305 | 73 __code taskReceiveCUDAWorker(struct Worker* worker,struct Queue* queue) { |
247
ce262b2c1daf
Fix createTask for main
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
244
diff
changeset
|
74 queue->queue = (union Data*)worker->tasks; |
305 | 75 queue->next = C_getTaskCUDA; |
248
1ede5390cda2
Fix segmentation fault but not multi thread running
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
247
diff
changeset
|
76 goto meta(context, worker->tasks->take); |
227 | 77 } |
78 | |
302 | 79 __code taskReceiveCUDAWorker_stub(struct Context* context) { |
80 goto taskReceiveCUDAWorker(context, &Gearef(context, Worker)->worker->Worker, Gearef(context, Queue)); | |
222
77faa28128b4
Add taskSend for TaskManager
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
217
diff
changeset
|
81 } |
77faa28128b4
Add taskSend for TaskManager
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
217
diff
changeset
|
82 |
305 | 83 __code getTaskCUDA(struct Worker* worker, struct Context* task) { |
240 | 84 if (!task) |
236 | 85 return; // end thread |
316
54d203daf06b
CUDAtwice.cbc is called.
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
314
diff
changeset
|
86 // if (cuda_initialized==0 || 1) { |
54d203daf06b
CUDAtwice.cbc is called.
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
314
diff
changeset
|
87 // CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker; |
54d203daf06b
CUDAtwice.cbc is called.
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
314
diff
changeset
|
88 // cudaInit(cudaWorker,1); |
54d203daf06b
CUDAtwice.cbc is called.
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
314
diff
changeset
|
89 // } |
314 | 90 worker->taskReceive = C_taskReceiveCUDAWorker; |
230 | 91 task->worker = worker; |
305 | 92 enum Code taskCg = task->next; |
93 task->next = C_odgCommitCUDA; // set CG after task exec | |
94 goto meta(task, taskCg); | |
169
ea7b11f3e717
Using Queue Interface
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
149
diff
changeset
|
95 } |
109
059b26a250cc
Change put_queue process
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
103
diff
changeset
|
96 |
305 | 97 __code getTaskCUDA_stub(struct Context* context) { |
247
ce262b2c1daf
Fix createTask for main
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
244
diff
changeset
|
98 Worker* worker = &Gearef(context,Worker)->worker->Worker; |
260
6b5444bbea8a
generated stub no comiple errors
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
257
diff
changeset
|
99 struct Context* task = &Gearef(context, Queue)->data->Context; |
305 | 100 goto getTaskCUDA(context, worker, task); |
101 } | |
102 | |
103 __code odgCommitCUDA(struct LoopCounter* loopCounter, struct Queue* queue, struct Context* task) { | |
104 int i = loopCounter->i ; | |
105 if(task->odg + i < task->maxOdg) { | |
106 queue->queue = (union Data*)GET_WAIT_LIST(task->data[task->odg+i]); | |
107 queue->next = C_odgCommitCUDA1; | |
108 goto meta(context, queue->queue->Queue.take); | |
109 } | |
110 loopCounter->i = 0; | |
111 goto meta(context, C_taskReceiveCUDAWorker); | |
95
3e28ee215c0e
modify twice, use OSAtomiceCompareAndSwap
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
112 } |
101 | 113 |
305 | 114 __code odgCommitCUDA_stub(struct Context* context) { |
115 struct Context* workerContext = context->worker->worker->CUDAWorker.context; | |
116 goto odgCommitCUDA(workerContext, | |
117 Gearef(workerContext, LoopCounter), | |
118 Gearef(workerContext, Queue), | |
119 context); | |
98 | 120 } |
305 | 121 |
122 __code odgCommitCUDA1(struct TaskManager* taskManager, struct Context* task) { | |
123 if(__sync_fetch_and_sub(&task->idgCount, 1)) { | |
124 if(task->idgCount == 0) { | |
125 taskManager->taskManager = (union Data*)task->taskManager; | |
126 taskManager->context = task; | |
127 taskManager->next = C_odgCommitCUDA; | |
128 goto meta(context, task->taskManager->spawn); | |
129 } | |
130 } else { | |
131 goto meta(context, C_odgCommitCUDA1); | |
132 } | |
133 } | |
227 | 134 |
305 | 135 __code odgCommitCUDA1_stub(struct Context* context) { |
136 struct Context* task = &Gearef(context, Queue)->data->Context; | |
137 goto odgCommitCUDA1(context, | |
138 Gearef(context, TaskManager), | |
139 task); | |
140 | |
141 } | |
142 | |
143 | |
144 __code shutdownCUDAWorker(struct Context* context, CUDAWorker* worker) { | |
314 | 145 // for (int i=0;i<worker->num_stream;i++) |
146 // checkCudaErrors(cuStreamDestroy(worker->stream[i])); | |
305 | 147 checkCudaErrors(cuCtxDestroy(worker->cuCtx)); |
230 | 148 } |
227 | 149 |
303 | 150 __code shutdownCUDAWorker_stub(struct Context* context) { |
305 | 151 CUDAWorker* worker = (CUDAWorker *)GearImpl(context, Worker, worker); |
303 | 152 goto shutdownCUDAWorker(context,worker); |
227 | 153 } |