# HG changeset patch # User Shinji KONO # Date 1487129659 -32400 # Node ID 1839586f5b41d36a5f5af1e248153dbb931e8466 # Parent 4addbc7469ee32333686f7b46010fb5bfd863c65 pthread CUDA test diff -r 4addbc7469ee -r 1839586f5b41 src/parallel_execution/CUDAWorker.cbc --- a/src/parallel_execution/CUDAWorker.cbc Wed Feb 15 11:36:10 2017 +0900 +++ b/src/parallel_execution/CUDAWorker.cbc Wed Feb 15 12:34:19 2017 +0900 @@ -13,6 +13,7 @@ #include "../context.h" static void start_CUDAworker(Worker* worker); +static void cudaInit(struct CUDAWorker *cudaWorker) ; static int cuda_initialized = 0; @@ -22,8 +23,6 @@ worker->worker = (union Data*)cudaWorker; worker->tasks = queue; cudaWorker->id = id; - - worker->taskReceive = C_taskReceiveCUDAWorker; worker->shutdown = C_shutdownCUDAWorker; pthread_create(&worker->worker->CUDAWorker.thread, NULL, (void*)&start_CUDAworker, worker); return worker; @@ -32,16 +31,19 @@ static void cudaInit(struct CUDAWorker *cudaWorker) { // initialize and load kernel cudaWorker->num_stream = 1; // number of stream - cudaWorker->stream = NEWN(cudaWorker->num_stream, CUstream ); +// cudaWorker->stream = NEWN(cudaWorker->num_stream, CUstream ); +printf("cudaInit 1\n"); checkCudaErrors(cuInit(0)); checkCudaErrors(cuDeviceGet(&cudaWorker->device, 0)); +printf("cudaInit 2\n"); checkCudaErrors(cuCtxCreate(&cudaWorker->cuCtx, CU_CTX_SCHED_SPIN, cudaWorker->device)); - - if (cudaWorker->num_stream) { - for (int i=0;inum_stream;i++) - checkCudaErrors(cuStreamCreate(&cudaWorker->stream[i],0)); - } +printf("cudaInit 3\n"); +// if (cudaWorker->num_stream) { +// for (int i=0;inum_stream;i++) +// checkCudaErrors(cuStreamCreate(&cudaWorker->stream[i],0)); +// } cuda_initialized = 1; +printf("cudaInit done\n"); } static void start_CUDAworker(Worker* worker) { @@ -54,10 +56,6 @@ } __code taskReceiveCUDAWorker(struct Worker* worker,struct Queue* queue) { - if (cuda_initialized==0) { - CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker; - cudaInit(cudaWorker); - } queue->queue = (union Data*)worker->tasks; queue->next = C_getTaskCUDA; goto meta(context, worker->tasks->take); @@ -70,6 +68,11 @@ __code getTaskCUDA(struct Worker* worker, struct Context* task) { if (!task) return; // end thread + if (cuda_initialized==0) { + CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker; + cudaInit(cudaWorker); + } + worker->taskReceive = C_taskReceiveCUDAWorker; task->worker = worker; enum Code taskCg = task->next; task->next = C_odgCommitCUDA; // set CG after task exec @@ -124,8 +127,8 @@ __code shutdownCUDAWorker(struct Context* context, CUDAWorker* worker) { - for (int i=0;inum_stream;i++) - checkCudaErrors(cuStreamDestroy(worker->stream[i])); +// for (int i=0;inum_stream;i++) +// checkCudaErrors(cuStreamDestroy(worker->stream[i])); checkCudaErrors(cuCtxDestroy(worker->cuCtx)); } diff -r 4addbc7469ee -r 1839586f5b41 src/parallel_execution/CUDAtwice.cbc --- a/src/parallel_execution/CUDAtwice.cbc Wed Feb 15 11:36:10 2017 +0900 +++ b/src/parallel_execution/CUDAtwice.cbc Wed Feb 15 12:34:19 2017 +0900 @@ -12,6 +12,7 @@ // memory allocate CUdeviceptr devA; CUdeviceptr devLoopCounter; +printf("CUdA Exe 1\n"); checkCudaErrors(cuMemAlloc(&devA, array->size)); checkCudaErrors(cuMemAlloc(&devLoopCounter, sizeof(LoopCounter))); @@ -19,6 +20,7 @@ //twiceカーネルが定義されてなければそれをロードする checkCudaErrors(cuModuleLoad(&context->module, "CUDAtwice.ptx")); checkCudaErrors(cuModuleGetFunction(&context->function, context->module, "twice")); +printf("CUdA Exe 2\n"); //入力のDataGearをGPUにbuffer経由で送る // Synchronous data transfer(host to device) @@ -53,6 +55,7 @@ } __code CUDAtwice_stub(struct Context* context) { +printf("CUdAtwice stub\n"); struct LoopCounter* loopCounter = &context->data[context->dataNum]->LoopCounter; struct Array* array = &context->data[context->dataNum+1]->Array; CUDAExec(context,array,loopCounter); diff -r 4addbc7469ee -r 1839586f5b41 src/parallel_execution/main.cbc --- a/src/parallel_execution/main.cbc Wed Feb 15 11:36:10 2017 +0900 +++ b/src/parallel_execution/main.cbc Wed Feb 15 12:34:19 2017 +0900 @@ -97,8 +97,12 @@ loopCounter2->i = 0; task->idgCount = 0; if (gpu_num) { +#ifdef USE_CUDAWorker task->next = C_CUDAtwice; task->workerId = CPU_CUDA; +#else + task->next = C_twice; +#endif } else { task->next = C_twice; } diff -r 4addbc7469ee -r 1839586f5b41 src/test/twice.cc --- a/src/test/twice.cc Wed Feb 15 11:36:10 2017 +0900 +++ b/src/test/twice.cc Wed Feb 15 12:34:19 2017 +0900 @@ -2,6 +2,9 @@ #include #include #include +extern "C" { +#include +} #include @@ -35,9 +38,12 @@ } } +int num_stream = 1; // number of stream +int num_exec = 16; // number of executed kernel + +static void *start_cuda(void *) ; + int main(int args, char* argv[]) { - int num_stream = 1; // number of stream - int num_exec = 16; // number of executed kernel for (int i=1;argv[i];i++) { if (strcmp(argv[i], "--stream") == 0 || strcmp(argv[i], "-s") == 0) { @@ -47,7 +53,17 @@ num_exec = atoi(argv[++i]); } } +#if 0 + start_cuda(NULL); +#else + pthread_t thread; + pthread_create(&thread, NULL, start_cuda, NULL); + pthread_join(thread,NULL); +#endif + return 0; +} +static void *start_cuda(void *args) { // initialize and load kernel CUdevice device; CUcontext context; @@ -161,7 +177,6 @@ for (int i=0;i