Mercurial > hg > GearsTemplate

diff src/parallel_execution/CUDAWorker.cbc @ 319:a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
author: Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date: Wed, 15 Feb 2017 20:43:55 +0900
parents: 51aa65676e37
children: f730761bb044
--- a/src/parallel_execution/CUDAWorker.cbc	Wed Feb 15 16:45:10 2017 +0900
+++ b/src/parallel_execution/CUDAWorker.cbc	Wed Feb 15 20:43:55 2017 +0900
@@ -4,16 +4,11 @@
 #include <stdlib.h>
 #include <libkern/OSAtomic.h>
 
-// includes, project
-#include <driver_types.h>
-#include <cuda_runtime.h>
-#include <cuda.h>
-#include "helper_cuda.h"
-
 #include "../context.h"
 
+extern void cudaInit(struct CUDAWorker *cudaWorker,int phase) ;
+
 static void start_CUDAworker(Worker* worker);
-static void cudaInit(struct CUDAWorker *cudaWorker,int phase) ;
 
 volatile int cuda_initialized = 0;
 
@@ -27,33 +22,18 @@
     worker->tasks = queue;
     cudaWorker->id = id;
     worker->shutdown = C_shutdownCUDAWorker;
-    // pthread_create(&worker->worker->CUDAWorker.thread, NULL, (void*)&start_CUDAworker, worker);
+#ifndef USE_CUDA_MAIN_THREAD
+    pthread_create(&worker->worker->CUDAWorker.thread, NULL, (void*)&start_CUDAworker, worker);
+#else
     if (im) {
         im->workers[0] = worker;
     }
     cuda_initialized = 1;
     start_CUDAworker(worker);
+#endif
     return worker;
 }
 
-static void cudaInit(struct CUDAWorker *cudaWorker,int phase) {
-    // initialize and load kernel
-    cudaWorker->num_stream = 1; // number of stream
-//    cudaWorker->stream = NEWN(cudaWorker->num_stream, CUstream );
-   if (phase==0)
-    checkCudaErrors(cuInit(0));
-   if (phase==0)
-    checkCudaErrors(cuDeviceGet(&cudaWorker->device, 0));
-   if (phase==0)
-    checkCudaErrors(cuCtxCreate(&cudaWorker->cuCtx, CU_CTX_SCHED_SPIN, cudaWorker->device));
-//    if (cudaWorker->num_stream) {
-//        for (int i=0;i<cudaWorker->num_stream;i++)
-//            checkCudaErrors(cuStreamCreate(&cudaWorker->stream[i],0));
-//    }
-    CUdeviceptr devA;
-    checkCudaErrors(cuMemAlloc(&devA, 16));
-
-}
 
 static void start_CUDAworker(Worker* worker) {
     CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker;
@@ -77,10 +57,6 @@
 __code getTaskCUDA(struct Worker* worker, struct Context* task) {
     if (!task)
         return; // end thread
-//    if (cuda_initialized==0 || 1) {
-//        CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker;
-//        cudaInit(cudaWorker,1);
-//    }
     worker->taskReceive = C_taskReceiveCUDAWorker;
     task->worker = worker;
     enum Code taskCg = task->next;
@@ -134,11 +110,10 @@
                  
 }
 
+extern void cudaShutdown( CUDAWorker *cudaWorker) ;
 
 __code shutdownCUDAWorker(struct Context* context, CUDAWorker* worker) {
-//    for (int i=0;i<worker->num_stream;i++)
-//        checkCudaErrors(cuStreamDestroy(worker->stream[i]));
-    checkCudaErrors(cuCtxDestroy(worker->cuCtx));
+    cudaShutdown( worker) ;
 }
 
 __code shutdownCUDAWorker_stub(struct Context* context) {
author	Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date	Wed, 15 Feb 2017 20:43:55 +0900
parents	51aa65676e37
children	f730761bb044