# HG changeset patch # User ikkun # Date 1487058922 -32400 # Node ID aeddca6860075284d6ae0af196195fbea6b7803b # Parent 700f247f32a11f7f978397574ccd8a88505a2ed3 CUDAtwice diff -r 700f247f32a1 -r aeddca686007 src/parallel_execution/CMakeLists.txt --- a/src/parallel_execution/CMakeLists.txt Tue Feb 14 12:31:58 2017 +0900 +++ b/src/parallel_execution/CMakeLists.txt Tue Feb 14 16:55:22 2017 +0900 @@ -63,7 +63,7 @@ main.cbc RedBlackTree.cbc compare.c SingleLinkedStack.cbc CPUWorker.cbc time.cbc twice.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc SemaphoreImpl.cbc ) -if (${USE_CUDA}==1) +if (${USE_CUDA}) GearsCommand( TARGET CUDAtwice diff -r 700f247f32a1 -r aeddca686007 src/parallel_execution/CUDAtwice.cbc --- a/src/parallel_execution/CUDAtwice.cbc Tue Feb 14 12:31:58 2017 +0900 +++ b/src/parallel_execution/CUDAtwice.cbc Tue Feb 14 16:55:22 2017 +0900 @@ -15,73 +15,38 @@ } __code twice_stub(struct Context* context) { + struct LoopCounter* loopCounter = &context->data[context->dataNum]->LoopCounter; + struct Array* array = &context->data[context->dataNum+1]->Array; + Worker *worker = context->worker; + CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker; // memory allocate CUdeviceptr devA; - CUdeviceptr devB[num_exec]; CUdeviceptr devOut[num_exec]; - checkCudaErrors(cuMemAlloc(&devA, LENGTH*THREAD*sizeof(float))); - for (int i=0;isize)); //twiceカーネルが定義されてなければそれをロードする - checkCudaErrors(cuModuleLoad(&module, "multiply.ptx")); - checkCudaErrors(cuModuleGetFunction(&function, module, "multiply")); + checkCudaErrors(cuModuleLoad(&context->module, "CUDAtwice.ptx")); + checkCudaErrors(cuModuleGetFunction(context->&function, module, "twice")); //入力のDataGearをGPUにbuffer経由で送る // Synchronous data transfer(host to device) - checkCudaErrors(cuMemcpyHtoD(devA, A, LENGTH*THREAD*sizeof(float))); + checkCudaErrors(cuMemcpyHtoD(devA, array->array, array->size)); // Asynchronous launch kernel - for (int i=0;inum_exec = 1; + void* args[] = {&devA}; + checkCudaErrors(cuLaunchKernel(function, + array->prefix, 1, 1, + context->num_exec, 1, 1, + 0, NULL , args, NULL)); //結果を取ってくるコマンドを入力する - //コマンドの終了待ちを行う - // Asynchronous data transfer(device to host) - for (int i=0;iarray, devA, array->size)); // wait for stream - for (int i=0;iworker->worker->CUDAWorker.context; - goto twice(context, Gearef(context, LoopCounter), 0, 0, NULL, workerContext); + goto meta(context, context->next); } diff -r 700f247f32a1 -r aeddca686007 src/parallel_execution/CUDAtwice.cu --- a/src/parallel_execution/CUDAtwice.cu Tue Feb 14 12:31:58 2017 +0900 +++ b/src/parallel_execution/CUDAtwice.cu Tue Feb 14 16:55:22 2017 +0900 @@ -1,8 +1,11 @@ extern "C" { - __global__ void multiply(float* A, float* B, float* C) { -// printf("%d %d\n",i[0],i[1]); - int index = blockIdx.x * blockDim.x + threadIdx.x; - C[index] = A[index] * B[0]; +#include + __global__ void twice(strct LoopCounter* loopCounter, int prefix ,int* array) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + printf("array %p, blockIdx.x = %d, blockDim.x = %d, threadIdx.x = %d\n"); + int i = 0; + while (i < prefix) { + array[i+index*prefix] = array[i+index*prefix]*2; + } } - } diff -r 700f247f32a1 -r aeddca686007 src/parallel_execution/context.h --- a/src/parallel_execution/context.h Tue Feb 14 12:31:58 2017 +0900 +++ b/src/parallel_execution/context.h Tue Feb 14 16:55:22 2017 +0900 @@ -79,6 +79,11 @@ int odg; int maxOdg; int workerId; +#ifdef USE_CUDAWorker + int num_exec; + CUmodule module; + CUfunction function; +#endef union Data **data; }; @@ -95,7 +100,6 @@ } Time; struct LoopCounter { int i; - struct Tree* tree; } LoopCounter; struct TaskManager { union Data* taskManager; @@ -212,6 +216,7 @@ struct Element* next; } Element; struct Array { + int size; int index; int prefix; int* array;