Mercurial > hg > Gears > GearsAgda
changeset 302:8e7926f3e271
fix CUDAWorker
author | ikkun |
---|---|
date | Mon, 13 Feb 2017 17:58:04 +0900 |
parents | 609bf62768b9 |
children | 1dbaef86593b |
files | src/parallel_execution/CMakeLists.txt src/parallel_execution/CUDAWorker.cbc src/parallel_execution/CUDAtwice.cbc src/parallel_execution/CUDAtwice.cu src/parallel_execution/GPUWorker.cbc src/parallel_execution/GPUtwice.cbc src/parallel_execution/GPUtwice.cu src/parallel_execution/context.h |
diffstat | 8 files changed, 112 insertions(+), 110 deletions(-) [+] |
line wrap: on
line diff
--- a/src/parallel_execution/CMakeLists.txt Sun Feb 12 12:35:11 2017 +0900 +++ b/src/parallel_execution/CMakeLists.txt Mon Feb 13 17:58:04 2017 +0900 @@ -4,7 +4,6 @@ # -DUSE_CUDA # add_definitions("-Wall -g -O") -add_definitions("-Wall -g") set(CMAKE_C_COMPILER $ENV{CBC_COMPILER}) @@ -12,8 +11,12 @@ set(NVCCFLAG "-std=c++11" "-g" "-O0" ) set(CUDA_LINK_FLAGS "-framework CUDA -lc++ -Wl,-search_paths_first -Wl,-headerpad_max_install_names /Developer/NVIDIA/CUDA-8.0/lib/libcudart_static.a -Wl,-rpath,/usr/local/cuda/lib") find_package(CUDA REQUIRED) + add_definitions("-Wall -g -DUSE_CUDAWorker") +else() + add_definitions("-Wall -g") endif() + macro( GearsCommand ) set( _OPTIONS_ARGS ) set( _ONE_VALUE_ARGS TARGET ) @@ -30,7 +33,7 @@ COMMAND "perl" "generate_stub.pl" "-o" ${j} ${i} ) elseif (${i} MATCHES "\\.cu") - string(REGEX REPLACE "(.*).cbc" "c/\\1.ptx" j ${i}) + string(REGEX REPLACE "(.*).cu" "c/\\1.ptx" j ${i}) add_custom_command ( OUTPUT ${j} DEPENDS ${i} @@ -61,9 +64,9 @@ if (${USE_CUDA}) GearsCommand( TARGET - GPUtwice + CUDAtwice SOURCES - main.cbc RedBlackTree.cbc compare.c SingleLinkedStack.cbc CPUWorker.cbc time.cbc twice.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc SemaphoreImpl.cbc GPUWorker.cbc GPUtwice.cu + main.cbc RedBlackTree.cbc compare.c SingleLinkedStack.cbc CPUWorker.cbc time.cbc twice.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc SemaphoreImpl.cbc CUDAWorker.cbc CUDAtwice.cu ) endif()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/parallel_execution/CUDAWorker.cbc Mon Feb 13 17:58:04 2017 +0900 @@ -0,0 +1,67 @@ +#include <libkern/OSAtomic.h> + +#include "../context.h" + +static void start_worker(Worker* worker); + +union Data* createCUDAWorker(struct Context* context, int id, Queue* queue) { + struct Worker* worker = ALLOC(context, Worker); + struct CUDAWorker* CUDAWorker = ALLOC(context, CUDAWorker); + worker->worker = (union Data*)CUDAWorker; + worker->tasks = queue; + cpuWorker->id = id; + worker->taskReceive = C_taskReceiveCUDAWorker; + worker->shutdown = C_shutdownCUDAWorker; + pthread_create(&worker->worker->CUDAWorker.thread, NULL, (void*)&start_CUDAworker, worker); + return (union Data*)(worker); +} + +static void start_worker(Worker* worker) { + CUDAWorker* CUDAWorker = (CUDAWorker*)worker->worker; + CUDAWorker->context = NEW(struct Context); + initContext(CUDAWorker->context); + Gearef(CUDAWorker->context, Worker)->worker = (union Data*)worker; + goto meta(CUDAWorker->context, C_taskReceiveCUDAWorker); +} + +__code taskReceiveCUDAWorker(struct Context* context, Worker* worker, Queue* queue) { + queue->queue = (union Data*)worker->tasks; + queue->next = C_getTask; + goto meta(context, worker->tasks->take); +} + +__code taskReceiveCUDAWorker_stub(struct Context* context) { + CUDAWorker* CUDAWorker = (CUDAWorker *)GearImpl(context, CUDAWorker, CUDAworker); + pthread_cond_wait(&CUDAWorker->cond, &CUDAWorker->mutex); + goto taskReceiveCUDAWorker(context, &Gearef(context, Worker)->worker->Worker, Gearef(context, Queue)); +} + +__code getCUDATask(struct Context* context, Worker* worker, struct Context* task) { + if (!task) + return; // end thread + task->worker = worker; + context->next = C_taskReceiveCUDAWorker; // set CG after task exec + goto meta(task, task->next); +} + +__code getCUDATask_stub(struct Context* context) { + Worker* worker = &Gearef(context,Worker)->worker->Worker; + struct Context* task = &Gearef(context, Queue)->data->Context; + goto getCUDATask(context, worker, task); +} + +#ifdef USE_CUDA +__code twiceCUDA(struct Context* context) { + cuMemcpyHtoDAsync(context,context,context,context->stream); + cuLaunchkanel(); + cuMemcpyDtoHAsync(); +} +#endif + +__code shutdownWorker(struct Context* context, CPUWorker* worker) { +} + +__code shutdownWorker_stub(struct Context* context) { + CPUWorker* worker = (CPUWorker *)GearImpl(context, Worker, worker); + goto shutdownWorker(context,worker); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/parallel_execution/CUDAtwice.cbc Mon Feb 13 17:58:04 2017 +0900 @@ -0,0 +1,27 @@ +#include <stdio.h> + +#include "context.h" +#include "origin_cs.h" + +__code twice(struct Context* context, struct LoopCounter* loopCounter, int index, int prefix, int* array, struct Context* workerContext) { + int i = loopCounter->i; + if (i < prefix) { + array[i+index*prefix] = array[i+index*prefix]*2; + loopCounter->i++; + + goto meta(context, C_twice); + } + + loopCounter->i = 0; + goto meta(workerContext, workerContext->next); +} + +__code twice_stub(struct Context* context) { + struct Context* workerContext = context->worker->worker->CPUWorker.context; + //入力のDataGearをGPUにbuffer経由で送る + //twiceカーネルが定義されてなければそれをロードする + //結果を取ってくるコマンドを入力する + //コマンドの終了待ちを行う + //continationにそってGPUworkerに戻る + goto twice(context, Gearef(context, LoopCounter), 0, 0, NULL, workerContext); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/parallel_execution/CUDAtwice.cu Mon Feb 13 17:58:04 2017 +0900 @@ -0,0 +1,8 @@ +extern "C" { + __global__ void multiply(float* A, float* B, float* C) { +// printf("%d %d\n",i[0],i[1]); + int index = blockIdx.x * blockDim.x + threadIdx.x; + C[index] = A[index] * B[0]; + } + +}
--- a/src/parallel_execution/GPUWorker.cbc Sun Feb 12 12:35:11 2017 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,68 +0,0 @@ -#include <libkern/OSAtomic.h> - -#include "context.h" -#include "origin_cs.h" - -static void start_worker(Worker* worker); - -union Data* createGPUWorker(struct Context* context, int id, Queue* queue) { - struct Worker* worker = ALLOC(context, Worker); - struct GPUWorker* gpuWorker = ALLOC(context, GPUWorker); - worker->worker = (union Data*)gpuWorker; - worker->tasks = queue; - cpuWorker->id = id; - worker->taskReceive = C_taskReceiveGPUWorker; - worker->shutdown = C_shutdownGPUWorker; - pthread_create(&worker->worker->GPUWorker.thread, NULL, (void*)&start_GPUworker, worker); - return (union Data*)(worker); -} - -static void start_worker(Worker* worker) { - GPUWorker* gpuWorker = (GPUWorker*)worker->worker; - gpuWorker->context = NEW(struct Context); - initContext(gpuWorker->context); - Gearef(gpuWorker->context, Worker)->worker = (union Data*)worker; - goto meta(gpuWorker->context, C_taskReceiveGPUWorker); -} - -__code taskReceiveGPUWorker(struct Context* context, Worker* worker, Queue* queue) { - queue->queue = (union Data*)worker->tasks; - queue->next = C_getTask; - goto meta(context, worker->tasks->take); -} - -__code taskReceiveGPUWorker_stub(struct Context* context) { - GPUWorker* gpuWorker = (GPUWorker *)GearImpl(context, GPUWorker, gPUworker); - pthread_cond_wait(&gpuWorker->cond, &gpuWorker->mutex); - goto taskReceiveGPUWorker(context, &Gearef(context, Worker)->worker->Worker, Gearef(context, Queue)); -} - -__code getGPUTask(struct Context* context, Worker* worker, struct Context* task) { - if (!task) - return; // end thread - task->worker = worker; - context->next = C_taskReceiveGPUWorker; // set CG after task exec - goto meta(task, task->next); -} - -__code getGPUTask_stub(struct Context* context) { - Worker* worker = &Gearef(context,Worker)->worker->Worker; - struct Context* task = &Gearef(context, Queue)->data->Context; - goto getGPUTask(context, worker, task); -} - -#ifdef USE_CUDA -__code twiceGpu(struct Context* context) { - cuMemcpyHtoDAsync(context,context,context,context->stream); - cuLaunchkanel(); - cuMemcpyDtoHAsync(); -} -#endif - -__code shutdownWorker(struct Context* context, CPUWorker* worker) { -} - -__code shutdownWorker_stub(struct Context* context) { - CPUWorker* worker = (CPUWorker *)GearImpl(context, Worker, worker); - goto shutdownWorker(context,worker); -}
--- a/src/parallel_execution/GPUtwice.cbc Sun Feb 12 12:35:11 2017 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,27 +0,0 @@ -#include <stdio.h> - -#include "context.h" -#include "origin_cs.h" - -__code twice(struct Context* context, struct LoopCounter* loopCounter, int index, int prefix, int* array, struct Context* workerContext) { - int i = loopCounter->i; - if (i < prefix) { - array[i+index*prefix] = array[i+index*prefix]*2; - loopCounter->i++; - - goto meta(context, C_twice); - } - - loopCounter->i = 0; - goto meta(workerContext, workerContext->next); -} - -__code twice_stub(struct Context* context) { - struct Context* workerContext = context->worker->worker->CPUWorker.context; - //入力のDataGearをGPUにbuffer経由で送る - //twiceカーネルが定義されてなければそれをロードする - //結果を取ってくるコマンドを入力する - //コマンドの終了待ちを行う - //continationにそってGPUworkerに戻る - goto twice(context, Gearef(context, LoopCounter), 0, 0, NULL, workerContext); -}
--- a/src/parallel_execution/GPUtwice.cu Sun Feb 12 12:35:11 2017 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -extern "C" { - __global__ void multiply(float* A, float* B, float* C) { -// printf("%d %d\n",i[0],i[1]); - int index = blockIdx.x * blockDim.x + threadIdx.x; - C[index] = A[index] * B[0]; - } - -}
--- a/src/parallel_execution/context.h Sun Feb 12 12:35:11 2017 +0900 +++ b/src/parallel_execution/context.h Mon Feb 13 17:58:04 2017 +0900 @@ -135,8 +135,8 @@ struct Context* context; int id; } CPUWorker; -#ifdef USE_CUDA - struct CudaWorker { +#ifdef USE_CUDAWorker + struct CUDAWorker { pthread_t thread; struct Context* context; int id; @@ -150,7 +150,7 @@ CUstream stream; } CudaWorker; #else - struct CudaWorker { + struct CUDAWorker { } CudaWorker; #endif struct Main {