diff src/parallel_execution/CUDAWorker.cbc @ 410:85b0ddbf458e

Fix CudaWorker
author Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
date Thu, 14 Sep 2017 02:35:20 +0900
parents f730761bb044
children 0eba9a04633f
line wrap: on
line diff
--- a/src/parallel_execution/CUDAWorker.cbc	Wed Sep 06 22:01:27 2017 +0900
+++ b/src/parallel_execution/CUDAWorker.cbc	Thu Sep 14 02:35:20 2017 +0900
@@ -1,14 +1,8 @@
-#include <stdio.h>
-#include <sys/time.h>
-#include <string.h>
-#include <stdlib.h>
-#include <libkern/OSAtomic.h>
-
 #include "../context.h"
 
 extern void cudaInit(struct CUDAWorker *cudaWorker,int phase) ;
 
-static void start_CUDAworker(Worker* worker);
+static void startCUDAWorker(Worker* worker);
 
 #ifndef USE_CUDA_MAIN_THREAD
 volatile 
@@ -19,32 +13,31 @@
     struct Worker* worker = ALLOC(context, Worker);
     struct CUDAWorker* cudaWorker = new CUDAWorker();
 
-    cudaInit(cudaWorker,0);
+    cudaInit(cudaWorker, 0);
 
     worker->worker = (union Data*)cudaWorker;
     worker->tasks = queue;
     cudaWorker->id = id;
+    worker->taskReceive = C_taskReceiveWorker;
     worker->shutdown = C_shutdownCUDAWorker;
 #ifndef USE_CUDA_MAIN_THREAD
-    pthread_create(&worker->worker->CUDAWorker.thread, NULL, (void*)&start_CUDAworker, worker);
+    pthread_create(&worker->worker->CUDAWorker.thread, NULL, (void*)&startCUDAWorker, worker);
 #else
     if (im) {
         im->workers[0] = worker;
     }
     cuda_initialized = 1;
-    start_CUDAworker(worker);
+    startCUDAWorker(worker);
 #endif
     return worker;
 }
 
-
-static void start_CUDAworker(Worker* worker) {
+static void startCUDAWorker(Worker* worker) {
     CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker;
     cudaWorker->context = NEW(struct Context);
     initContext(cudaWorker->context);
     Gearef(cudaWorker->context, Worker)->worker = (union Data*)worker;
-
-    goto meta(cudaWorker->context, C_taskReceiveCUDAWorker);
+    goto meta(cudaWorker->context, worker->taskReceive);
 }
 
 __code taskReceiveCUDAWorker(struct Worker* worker,struct Queue* queue) {
@@ -60,10 +53,13 @@
 __code getTaskCUDA(struct Worker* worker, struct Context* task) {
     if (!task)
         return; // end thread
-    worker->taskReceive = C_taskReceiveCUDAWorker;
     task->worker = worker;
     enum Code taskCg = task->next;
-    task->next = C_odgCommitCUDA; // set CG after task exec
+    if (task->iterate) {
+        task->next = C_iterateCommitCUDA;
+    } else {
+        task->next = C_odgCommitCUDA; // set CG after task exec
+    }
     goto meta(task, taskCg);
 }
 
@@ -73,50 +69,97 @@
     goto getTaskCUDA(context, worker, task);
 }
 
-__code odgCommitCUDA(struct LoopCounter* loopCounter, struct Queue* queue, struct Context* task) {
+__code iterateCommitCUDA(struct Iterator* iterator) {
+    iterator->iterator = (union Data*)context->iterator;
+    iterator->task = context;
+    iterator->next = C_odgCommitCUDA;
+    iterator->whenWait = C_iterateCommitCUDA1;
+    goto meta(context, context->iterator->barrier);
+}
+
+__code iterateCommitCUDA1(struct Context* task) {
+    goto meta(context, C_taskReceiveWorker);
+}
+
+__code iterateCommitCUDA1_stub(struct Context* context) {
+    struct Context* workerContext = context->worker->worker->CUDAWorker.context;
+    goto iterateCommitCUDA1(workerContext, context);
+}
+
+__code odgCommitCUDA(struct LoopCounter* loopCounter, struct Context* task, struct TaskManager* taskManager) {
     int i = loopCounter->i ;
-    if(task->odg + i < task->maxOdg) {
-        queue->queue = (union Data*)GET_WAIT_LIST(task->data[task->odg+i]);
-        queue->next = C_odgCommitCUDA1;
-        goto meta(context, queue->queue->Queue.take);
+    if (task->odg+i < task->maxOdg) {
+        goto meta(task, C_odgCommitCUDA1);
     }
     loopCounter->i = 0;
-    goto meta(context, C_taskReceiveCUDAWorker);
+    taskManager->taskManager = (union Data*)task->taskManager;
+    taskManager->next = C_taskReceiveWorker;
+    goto meta(context, task->taskManager->decrementTaskCount);
 }
 
 __code odgCommitCUDA_stub(struct Context* context) {
     struct Context* workerContext = context->worker->worker->CUDAWorker.context;
     goto odgCommitCUDA(workerContext,
-                   Gearef(workerContext, LoopCounter),
-                   Gearef(workerContext, Queue),
-                   context);
+            Gearef(context, LoopCounter),
+            context,
+            Gearef(workerContext, TaskManager));
 }
 
-__code odgCommitCUDA1(struct TaskManager* taskManager, struct Context* task) {
-    if(__sync_fetch_and_sub(&task->idgCount, 1)) {
-        if(task->idgCount == 0) {
-            taskManager->taskManager = (union Data*)task->taskManager;
-            taskManager->context = task;
-            taskManager->next = C_odgCommitCUDA;
-            goto meta(context, task->taskManager->spawn);
-        }
-    } else {
-        goto meta(context, C_odgCommitCUDA1);
-    }
+__code odgCommitCUDA1(struct LoopCounter* loopCounter, struct Queue* queue) {
+    int i = loopCounter->i ;
+    queue->queue = (union Data*)GET_WAIT_LIST(context->data[context->odg+i]);
+    queue->whenEmpty = C_odgCommitCUDA4;
+    queue->next = C_odgCommitCUDA2;
+    goto meta(context, queue->queue->Queue.isEmpty);
 }
 
 __code odgCommitCUDA1_stub(struct Context* context) {
+    goto odgCommitCUDA1(context,
+            Gearef(context, LoopCounter),
+            Gearef(context, Queue));
+}
+
+__code odgCommitCUDA2(struct Queue* queue) {
+    queue->next = C_odgCommitCUDA3;
+    goto meta(context, queue->queue->Queue.take);
+}
+
+__code odgCommitCUDA2_stub(struct Context* context) {
+    goto odgCommitCUDA2(context,
+            Gearef(context, Queue));
+}
+
+__code odgCommitCUDA3(struct TaskManager* taskManager, struct Context* task) {
+    if (__sync_fetch_and_sub(&task->idgCount, 1) == 1) { // atomic decrement idg counter(__sync_fetch_and_sub function return initial value of task->idgCount point)
+        taskManager->taskManager = (union Data*)task->taskManager;
+        taskManager->context = task;
+        taskManager->next = C_odgCommitCUDA1;
+        goto meta(context, task->taskManager->spawn);
+    }
+    goto meta(context, C_odgCommitCUDA1);
+}
+
+__code odgCommitCUDA3_stub(struct Context* context) {
     struct Context* task = &Gearef(context, Queue)->data->Context;
-    goto odgCommitCUDA1(context,
-                    Gearef(context, TaskManager),
-                    task);
-                 
+    goto odgCommitCUDA3(context,
+            Gearef(context, TaskManager),
+            task);
+}
+
+__code odgCommitCUDA4(struct LoopCounter* loopCounter) {
+    loopCounter->i++;
+    goto meta(context, C_odgCommitCUDA);
+}
+
+__code odgCommitCUDA4_stub(struct Context* context) {
+    goto odgCommitCUDA4(context,
+            Gearef(context, LoopCounter));
 }
 
 extern void cudaShutdown( CUDAWorker *cudaWorker) ;
 
 __code shutdownCUDAWorker(struct Context* context, CUDAWorker* worker) {
-    cudaShutdown( worker) ;
+    cudaShutdown(worker) ;
 }
 
 __code shutdownCUDAWorker_stub(struct Context* context) {