Mercurial > hg > GearsTemplate
changeset 411:0eba9a04633f
Work CUDAtwice
author | Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Thu, 14 Sep 2017 18:26:52 +0900 |
parents | 85b0ddbf458e |
children | 409e6b5fb775 |
files | src/parallel_execution/CPUWorker.cbc src/parallel_execution/CUDAWorker.cbc src/parallel_execution/MultiDimIterator.cbc src/parallel_execution/TaskManagerImpl.cbc src/parallel_execution/context.h src/parallel_execution/cuda.c src/parallel_execution/examples/twice/CUDAtwice.cbc src/parallel_execution/examples/twice/CUDAtwice.cu src/parallel_execution/examples/twice/main.cbc |
diffstat | 9 files changed, 44 insertions(+), 35 deletions(-) [+] |
line wrap: on
line diff
--- a/src/parallel_execution/CPUWorker.cbc Thu Sep 14 02:35:20 2017 +0900 +++ b/src/parallel_execution/CPUWorker.cbc Thu Sep 14 18:26:52 2017 +0900 @@ -10,7 +10,7 @@ cpuWorker->id = id; worker->taskReceive = C_taskReceiveWorker; worker->shutdown = C_shutdownWorker; - pthread_create(&worker->worker->CPUWorker.thread, NULL, (void*)&startWorker, worker); + pthread_create(&worker->thread, NULL, (void*)&startWorker, worker); return worker; } @@ -34,7 +34,7 @@ __code getTask(struct Worker* worker, struct Context* task) { if (!task) - return; // end thread + goto meta(context, worker->shutdown); // end thread task->worker = worker; enum Code taskCg = task->next; if (task->iterate) {
--- a/src/parallel_execution/CUDAWorker.cbc Thu Sep 14 02:35:20 2017 +0900 +++ b/src/parallel_execution/CUDAWorker.cbc Thu Sep 14 18:26:52 2017 +0900 @@ -10,15 +10,12 @@ int cuda_initialized = 0; Worker* createCUDAWorker(struct Context* context, int id, Queue* queue, TaskManagerImpl *im) { - struct Worker* worker = ALLOC(context, Worker); + struct Worker* worker = new Worker(); struct CUDAWorker* cudaWorker = new CUDAWorker(); - - cudaInit(cudaWorker, 0); - worker->worker = (union Data*)cudaWorker; worker->tasks = queue; cudaWorker->id = id; - worker->taskReceive = C_taskReceiveWorker; + worker->taskReceive = C_taskReceiveCUDAWorker; worker->shutdown = C_shutdownCUDAWorker; #ifndef USE_CUDA_MAIN_THREAD pthread_create(&worker->worker->CUDAWorker.thread, NULL, (void*)&startCUDAWorker, worker); @@ -33,7 +30,8 @@ } static void startCUDAWorker(Worker* worker) { - CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker; + struct CUDAWorker* cudaWorker = &worker->worker->CUDAWorker; + cudaInit(cudaWorker, 0); cudaWorker->context = NEW(struct Context); initContext(cudaWorker->context); Gearef(cudaWorker->context, Worker)->worker = (union Data*)worker; @@ -52,7 +50,7 @@ __code getTaskCUDA(struct Worker* worker, struct Context* task) { if (!task) - return; // end thread + goto meta(context, worker->shutdown); // end thread task->worker = worker; enum Code taskCg = task->next; if (task->iterate) { @@ -156,10 +154,10 @@ Gearef(context, LoopCounter)); } -extern void cudaShutdown( CUDAWorker *cudaWorker) ; +extern void cudaShutdown( CUDAWorker *cudaWorker); __code shutdownCUDAWorker(struct Context* context, CUDAWorker* worker) { - cudaShutdown(worker) ; + cudaShutdown(worker); } __code shutdownCUDAWorker_stub(struct Context* context) {
--- a/src/parallel_execution/MultiDimIterator.cbc Thu Sep 14 02:35:20 2017 +0900 +++ b/src/parallel_execution/MultiDimIterator.cbc Thu Sep 14 18:26:52 2017 +0900 @@ -109,7 +109,7 @@ } __code barrierMultiDimIterator(struct MultiDimIterator* iterator, struct Context* task, __code next(...), __code whenWait(...)) { - if (__sync_fetch_and_sub(&iterator->count, 1) == 1) { + if (task->gpu || __sync_fetch_and_sub(&iterator->count, 1) == 1) { goto next(...); } goto whenWait(...);
--- a/src/parallel_execution/TaskManagerImpl.cbc Thu Sep 14 02:35:20 2017 +0900 +++ b/src/parallel_execution/TaskManagerImpl.cbc Thu Sep 14 18:26:52 2017 +0900 @@ -221,36 +221,35 @@ Gearef(context, TaskManager)->next); } -__code shutdownTaskManagerImpl(struct TaskManagerImpl* taskManagerImpl, __code next(...), struct TaskManager* taskManager, struct Queue* queue) { - if (taskManagerImpl->taskCount != 0) { - sleep(1); - goto meta(context, taskManager->shutdown); +__code shutdownTaskManagerImpl(struct TaskManagerImpl* taskManager, __code next(...), struct Queue* queue) { + if (taskManager->taskCount != 0) { + usleep(1000); + goto meta(context, C_shutdownTaskManagerImpl); } - int i = taskManagerImpl->loopCounter->i; - if (taskManager->cpu <= i && i < taskManager->maxCPU) { - struct Queue* tasks = taskManagerImpl->workers[i]->tasks; + int i = taskManager->loopCounter->i; + if (i < taskManager->numWorker) { + struct Queue* tasks = taskManager->workers[i]->tasks; queue->queue = (union Data*)tasks; queue->data = NULL; queue->next = C_shutdownTaskManagerImpl1; goto meta(context, tasks->put); } - taskManagerImpl->loopCounter->i = 0; + taskManager->loopCounter->i = 0; goto meta(context, next); } __code shutdownTaskManagerImpl_stub(struct Context* context) { TaskManagerImpl* taskManagerImpl = (TaskManagerImpl*)GearImpl(context, TaskManager, taskManager); goto shutdownTaskManagerImpl(context, - taskManagerImpl, - Gearef(context, TaskManager)->next, - &Gearef(context, TaskManager)->taskManager->TaskManager, - Gearef(context, Queue)); + taskManagerImpl, + Gearef(context, TaskManager)->next, + Gearef(context, Queue)); } __code shutdownTaskManagerImpl1(TaskManagerImpl* taskManager) { int i = taskManager->loopCounter->i; - pthread_join(taskManager->workers[i]->worker->CPUWorker.thread, NULL); + pthread_join(taskManager->workers[i]->thread, NULL); taskManager->loopCounter->i++; goto meta(context, C_shutdownTaskManagerImpl); }
--- a/src/parallel_execution/context.h Thu Sep 14 02:35:20 2017 +0900 +++ b/src/parallel_execution/context.h Thu Sep 14 18:26:52 2017 +0900 @@ -171,10 +171,10 @@ enum Code shutdown; enum Code next; struct Queue* tasks; + pthread_t thread; struct TaskManager* taskManager; } Worker; struct CPUWorker { - pthread_t thread; pthread_mutex_t mutex; pthread_cond_t cond; struct Context* context;
--- a/src/parallel_execution/cuda.c Thu Sep 14 02:35:20 2017 +0900 +++ b/src/parallel_execution/cuda.c Thu Sep 14 18:26:52 2017 +0900 @@ -91,8 +91,10 @@ // CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker; // memory allocate CUdeviceptr devA; + CUdeviceptr devB; checkCudaErrors(cuMemAlloc(&devA, sizeof(int)*array->size)); + checkCudaErrors(cuMemAlloc(&devB, sizeof(int))); //twiceカーネルが定義されてなければそれをロードする checkCudaErrors(cuModuleLoad(&context->module, "c/examples/twice/CUDAtwice.ptx")); @@ -100,11 +102,12 @@ //入力のDataGearをGPUにbuffer経由で送る // Synchronous data transfer(host to device) - checkCudaErrors(cuMemcpyHtoD(devA, array->array, array->size)); + checkCudaErrors(cuMemcpyHtoD(devA, array->array, sizeof(int)*array->size)); + checkCudaErrors(cuMemcpyHtoD(devB, &array->prefix, sizeof(int))); // Asynchronous launch kernel context->num_exec = 1; - void* args[] = {&devA}; + void* args[] = {&devA, &devB}; if (context->iterate) { struct MultiDimIterator* iterator = &context->iterator->iterator->MultiDimIterator; checkCudaErrors(cuLaunchKernel(context->function, @@ -120,7 +123,7 @@ } //結果を取ってくるコマンドを入力する //コマンドの終了待ちを行う - checkCudaErrors(cuMemcpyDtoH(array->array, devA, array->size)); + checkCudaErrors(cuMemcpyDtoH(array->array, devA, sizeof(int)*array->size)); // wait for stream checkCudaErrors(cuCtxSynchronize());
--- a/src/parallel_execution/examples/twice/CUDAtwice.cbc Thu Sep 14 02:35:20 2017 +0900 +++ b/src/parallel_execution/examples/twice/CUDAtwice.cbc Thu Sep 14 18:26:52 2017 +0900 @@ -19,7 +19,7 @@ __code CUDAtwice_stub(struct Context* context) { printf("CUDAtwice stub\n"); - struct Array* array = &context->data[context->dataNum+1]->Array; + struct Array* array = &context->data[context->idg]->Array; CUDAExec(context,array); //continuationにそってGPUworkerに戻る
--- a/src/parallel_execution/examples/twice/CUDAtwice.cu Thu Sep 14 02:35:20 2017 +0900 +++ b/src/parallel_execution/examples/twice/CUDAtwice.cu Thu Sep 14 18:26:52 2017 +0900 @@ -1,8 +1,14 @@ extern "C" { #include <stdio.h> - __global__ void twice(int* array) { - printf("array %p",array); - array[blockIdx.x] = array[blockIdx.x]*2; + __global__ void twice(int* array, int* prefixPtr) { + int i = 0; + int prefix = *prefixPtr; +C_twice: + if (i < prefix) { + array[i+blockIdx.x*prefix] = array[i+blockIdx.x*prefix]*2; + i++; + goto C_twice; + } } }
--- a/src/parallel_execution/examples/twice/main.cbc Thu Sep 14 02:35:20 2017 +0900 +++ b/src/parallel_execution/examples/twice/main.cbc Thu Sep 14 18:26:52 2017 +0900 @@ -87,7 +87,6 @@ goto meta(context, C_code3); } else puts("wrong result"); - } goto meta(context, C_exit_code); @@ -98,7 +97,11 @@ par goto createArray(array, __exit); - par goto CUDAtwice(array, iterate(split), __exit); + if(gpu_num) { + par goto CUDAtwice(array, iterate(split), __exit); + } else { + par goto twice(array, iterate(split), __exit); + } goto code2(); }