Mercurial > hg > Members > Moririn
diff src/parallel_execution/cuda.c @ 414:49159fbdd1fb
Work CUDAbitonicSort
author | Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 15 Sep 2017 22:49:45 +0900 |
parents | 409e6b5fb775 |
children | 35b37fe8d3a7 |
line wrap: on
line diff
--- a/src/parallel_execution/cuda.c Thu Sep 14 22:28:52 2017 +0900 +++ b/src/parallel_execution/cuda.c Fri Sep 15 22:49:45 2017 +0900 @@ -85,29 +85,35 @@ } -void CUDAExec(struct Context* context, struct Array* array) { - printf("cuda exec start\n"); +void CUDAExec(struct Context* context, struct SortArray* inputSortArray, struct SortArray* outputSortArray) { + //printf("cuda exec start\n"); // Worker *worker = context->worker; // CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker; // memory allocate CUdeviceptr devA; CUdeviceptr devB; + CUdeviceptr devC; + CUdeviceptr devD; - checkCudaErrors(cuMemAlloc(&devA, sizeof(int)*array->size)); + checkCudaErrors(cuMemAlloc(&devA, sizeof(struct Integer)*GET_SIZE(inputSortArray->array))); checkCudaErrors(cuMemAlloc(&devB, sizeof(int))); + checkCudaErrors(cuMemAlloc(&devC, sizeof(int))); + checkCudaErrors(cuMemAlloc(&devD, sizeof(int))); //twiceカーネルが定義されてなければそれをロードする - checkCudaErrors(cuModuleLoad(&context->module, "c/examples/twice/CUDAtwice.ptx")); - checkCudaErrors(cuModuleGetFunction(&context->function, context->module, "twice")); + checkCudaErrors(cuModuleLoad(&context->module, "c/examples/bitonicSort/CUDAbitonicSwap.ptx")); + checkCudaErrors(cuModuleGetFunction(&context->function, context->module, "bitonicSwap")); //入力のDataGearをGPUにbuffer経由で送る // Synchronous data transfer(host to device) - checkCudaErrors(cuMemcpyHtoD(devA, array->array, sizeof(int)*array->size)); - checkCudaErrors(cuMemcpyHtoD(devB, &array->prefix, sizeof(int))); + checkCudaErrors(cuMemcpyHtoD(devA, inputSortArray->array, sizeof(struct Integer)*GET_SIZE(inputSortArray->array))); + checkCudaErrors(cuMemcpyHtoD(devB, &inputSortArray->block, sizeof(int))); + checkCudaErrors(cuMemcpyHtoD(devC, &inputSortArray->first, sizeof(int))); + checkCudaErrors(cuMemcpyHtoD(devD, &inputSortArray->prefix, sizeof(int))); // Asynchronous launch kernel context->num_exec = 1; - void* args[] = {&devA, &devB}; + void* args[] = {&devA, &devB, &devC, &devD}; if (context->iterate) { struct MultiDimIterator* iterator = &context->iterator->iterator->MultiDimIterator; checkCudaErrors(cuLaunchKernel(context->function, @@ -123,12 +129,14 @@ } //結果を取ってくるコマンドを入力する //コマンドの終了待ちを行う - checkCudaErrors(cuMemcpyDtoH(array->array, devA, sizeof(int)*array->size)); - + checkCudaErrors(cuMemcpyDtoH(inputSortArray->array, devA, sizeof(struct Integer)*GET_SIZE(inputSortArray->array))); + outputSortArray->array = inputSortArray->array; // wait for stream checkCudaErrors(cuCtxSynchronize()); cuMemFree(devA); cuMemFree(devB); + cuMemFree(devC); + cuMemFree(devD); } void cudaShutdown( struct CUDAWorker *worker) {