Mercurial > hg > GearsTemplate
diff src/parallel_execution/CUDAWorker.cbc @ 410:85b0ddbf458e
Fix CudaWorker
author | Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Thu, 14 Sep 2017 02:35:20 +0900 |
parents | f730761bb044 |
children | 0eba9a04633f |
line wrap: on
line diff
--- a/src/parallel_execution/CUDAWorker.cbc Wed Sep 06 22:01:27 2017 +0900 +++ b/src/parallel_execution/CUDAWorker.cbc Thu Sep 14 02:35:20 2017 +0900 @@ -1,14 +1,8 @@ -#include <stdio.h> -#include <sys/time.h> -#include <string.h> -#include <stdlib.h> -#include <libkern/OSAtomic.h> - #include "../context.h" extern void cudaInit(struct CUDAWorker *cudaWorker,int phase) ; -static void start_CUDAworker(Worker* worker); +static void startCUDAWorker(Worker* worker); #ifndef USE_CUDA_MAIN_THREAD volatile @@ -19,32 +13,31 @@ struct Worker* worker = ALLOC(context, Worker); struct CUDAWorker* cudaWorker = new CUDAWorker(); - cudaInit(cudaWorker,0); + cudaInit(cudaWorker, 0); worker->worker = (union Data*)cudaWorker; worker->tasks = queue; cudaWorker->id = id; + worker->taskReceive = C_taskReceiveWorker; worker->shutdown = C_shutdownCUDAWorker; #ifndef USE_CUDA_MAIN_THREAD - pthread_create(&worker->worker->CUDAWorker.thread, NULL, (void*)&start_CUDAworker, worker); + pthread_create(&worker->worker->CUDAWorker.thread, NULL, (void*)&startCUDAWorker, worker); #else if (im) { im->workers[0] = worker; } cuda_initialized = 1; - start_CUDAworker(worker); + startCUDAWorker(worker); #endif return worker; } - -static void start_CUDAworker(Worker* worker) { +static void startCUDAWorker(Worker* worker) { CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker; cudaWorker->context = NEW(struct Context); initContext(cudaWorker->context); Gearef(cudaWorker->context, Worker)->worker = (union Data*)worker; - - goto meta(cudaWorker->context, C_taskReceiveCUDAWorker); + goto meta(cudaWorker->context, worker->taskReceive); } __code taskReceiveCUDAWorker(struct Worker* worker,struct Queue* queue) { @@ -60,10 +53,13 @@ __code getTaskCUDA(struct Worker* worker, struct Context* task) { if (!task) return; // end thread - worker->taskReceive = C_taskReceiveCUDAWorker; task->worker = worker; enum Code taskCg = task->next; - task->next = C_odgCommitCUDA; // set CG after task exec + if (task->iterate) { + task->next = C_iterateCommitCUDA; + } else { + task->next = C_odgCommitCUDA; // set CG after task exec + } goto meta(task, taskCg); } @@ -73,50 +69,97 @@ goto getTaskCUDA(context, worker, task); } -__code odgCommitCUDA(struct LoopCounter* loopCounter, struct Queue* queue, struct Context* task) { +__code iterateCommitCUDA(struct Iterator* iterator) { + iterator->iterator = (union Data*)context->iterator; + iterator->task = context; + iterator->next = C_odgCommitCUDA; + iterator->whenWait = C_iterateCommitCUDA1; + goto meta(context, context->iterator->barrier); +} + +__code iterateCommitCUDA1(struct Context* task) { + goto meta(context, C_taskReceiveWorker); +} + +__code iterateCommitCUDA1_stub(struct Context* context) { + struct Context* workerContext = context->worker->worker->CUDAWorker.context; + goto iterateCommitCUDA1(workerContext, context); +} + +__code odgCommitCUDA(struct LoopCounter* loopCounter, struct Context* task, struct TaskManager* taskManager) { int i = loopCounter->i ; - if(task->odg + i < task->maxOdg) { - queue->queue = (union Data*)GET_WAIT_LIST(task->data[task->odg+i]); - queue->next = C_odgCommitCUDA1; - goto meta(context, queue->queue->Queue.take); + if (task->odg+i < task->maxOdg) { + goto meta(task, C_odgCommitCUDA1); } loopCounter->i = 0; - goto meta(context, C_taskReceiveCUDAWorker); + taskManager->taskManager = (union Data*)task->taskManager; + taskManager->next = C_taskReceiveWorker; + goto meta(context, task->taskManager->decrementTaskCount); } __code odgCommitCUDA_stub(struct Context* context) { struct Context* workerContext = context->worker->worker->CUDAWorker.context; goto odgCommitCUDA(workerContext, - Gearef(workerContext, LoopCounter), - Gearef(workerContext, Queue), - context); + Gearef(context, LoopCounter), + context, + Gearef(workerContext, TaskManager)); } -__code odgCommitCUDA1(struct TaskManager* taskManager, struct Context* task) { - if(__sync_fetch_and_sub(&task->idgCount, 1)) { - if(task->idgCount == 0) { - taskManager->taskManager = (union Data*)task->taskManager; - taskManager->context = task; - taskManager->next = C_odgCommitCUDA; - goto meta(context, task->taskManager->spawn); - } - } else { - goto meta(context, C_odgCommitCUDA1); - } +__code odgCommitCUDA1(struct LoopCounter* loopCounter, struct Queue* queue) { + int i = loopCounter->i ; + queue->queue = (union Data*)GET_WAIT_LIST(context->data[context->odg+i]); + queue->whenEmpty = C_odgCommitCUDA4; + queue->next = C_odgCommitCUDA2; + goto meta(context, queue->queue->Queue.isEmpty); } __code odgCommitCUDA1_stub(struct Context* context) { + goto odgCommitCUDA1(context, + Gearef(context, LoopCounter), + Gearef(context, Queue)); +} + +__code odgCommitCUDA2(struct Queue* queue) { + queue->next = C_odgCommitCUDA3; + goto meta(context, queue->queue->Queue.take); +} + +__code odgCommitCUDA2_stub(struct Context* context) { + goto odgCommitCUDA2(context, + Gearef(context, Queue)); +} + +__code odgCommitCUDA3(struct TaskManager* taskManager, struct Context* task) { + if (__sync_fetch_and_sub(&task->idgCount, 1) == 1) { // atomic decrement idg counter(__sync_fetch_and_sub function return initial value of task->idgCount point) + taskManager->taskManager = (union Data*)task->taskManager; + taskManager->context = task; + taskManager->next = C_odgCommitCUDA1; + goto meta(context, task->taskManager->spawn); + } + goto meta(context, C_odgCommitCUDA1); +} + +__code odgCommitCUDA3_stub(struct Context* context) { struct Context* task = &Gearef(context, Queue)->data->Context; - goto odgCommitCUDA1(context, - Gearef(context, TaskManager), - task); - + goto odgCommitCUDA3(context, + Gearef(context, TaskManager), + task); +} + +__code odgCommitCUDA4(struct LoopCounter* loopCounter) { + loopCounter->i++; + goto meta(context, C_odgCommitCUDA); +} + +__code odgCommitCUDA4_stub(struct Context* context) { + goto odgCommitCUDA4(context, + Gearef(context, LoopCounter)); } extern void cudaShutdown( CUDAWorker *cudaWorker) ; __code shutdownCUDAWorker(struct Context* context, CUDAWorker* worker) { - cudaShutdown( worker) ; + cudaShutdown(worker) ; } __code shutdownCUDAWorker_stub(struct Context* context) {