Mercurial > hg > GearsTemplate
diff src/parallel_execution/cuda.c @ 431:b3359544adbb
Edit cudaExec but not work
author | Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 17 Oct 2017 01:50:12 +0900 |
parents | 35b37fe8d3a7 |
children | d920f3a3f037 |
line wrap: on
line diff
--- a/src/parallel_execution/cuda.c Mon Oct 09 17:46:42 2017 +0900 +++ b/src/parallel_execution/cuda.c Tue Oct 17 01:50:12 2017 +0900 @@ -85,58 +85,66 @@ } -void CUDAExec(struct Context* context, struct SortArray* inputSortArray, struct SortArray* outputSortArray) { - //printf("cuda exec start\n"); - // Worker *worker = context->worker; - // CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker; - // memory allocate - CUdeviceptr devA; - CUdeviceptr devB; - CUdeviceptr devC; - CUdeviceptr devD; +void cudaExec(struct Context* context, struct CudaBuffer* buffer, char* filename, char* function) { + buffer->kernelParams = (void **)calloc(buffer->inputLen + buffer->outputLen, sizeof(void *)); + int paramCount = 0; + for (int i = 0; i < buffer->inputLen; i++) { + CUdeviceptr* deviceptr = (CUdeviceptr *)calloc(1, sizeof(CUdeviceptr)); + // memory allocate + checkCudaErrors(cuMemAlloc(deviceptr, GET_SIZE(buffer->inputData[i]))); + // Synchronous data transfer(host to device) + checkCudaErrors(cuMemcpyHtoD(*deviceptr, buffer->inputData[i], GET_SIZE(buffer->inputData[i]))); + buffer->kernelParams[paramCount++] = deviceptr; + } - checkCudaErrors(cuMemAlloc(&devA, sizeof(struct Integer)*GET_LEN(inputSortArray->array))); - checkCudaErrors(cuMemAlloc(&devB, sizeof(int))); - checkCudaErrors(cuMemAlloc(&devC, sizeof(int))); - checkCudaErrors(cuMemAlloc(&devD, sizeof(int))); + for (int i = 0; i < buffer->outputLen; i++) { + CUdeviceptr* deviceptr = (CUdeviceptr *)calloc(1, sizeof(CUdeviceptr)); + // memory allocate + checkCudaErrors(cuMemAlloc(deviceptr, GET_SIZE(buffer->outputData[i]))); + // Synchronous data transfer(host to device) + checkCudaErrors(cuMemcpyHtoD(*deviceptr, buffer->outputData[i], GET_SIZE(buffer->outputData[i]))); + buffer->kernelParams[paramCount++] = deviceptr; + } - //twiceカーネルが定義されてなければそれをロードする - checkCudaErrors(cuModuleLoad(&context->module, "c/examples/bitonicSort/CUDAbitonicSwap.ptx")); - checkCudaErrors(cuModuleGetFunction(&context->function, context->module, "bitonicSwap")); - - //入力のDataGearをGPUにbuffer経由で送る - // Synchronous data transfer(host to device) - checkCudaErrors(cuMemcpyHtoD(devA, inputSortArray->array, sizeof(struct Integer)*GET_LEN(inputSortArray->array))); - checkCudaErrors(cuMemcpyHtoD(devB, &inputSortArray->block, sizeof(int))); - checkCudaErrors(cuMemcpyHtoD(devC, &inputSortArray->first, sizeof(int))); - checkCudaErrors(cuMemcpyHtoD(devD, &inputSortArray->prefix, sizeof(int))); + // カーネルが定義されてなければそれをロードする + checkCudaErrors(cuModuleLoad(&context->module, filename)); + checkCudaErrors(cuModuleGetFunction(&context->function, context->module, function)); // Asynchronous launch kernel context->num_exec = 1; - void* args[] = {&devA, &devB, &devC, &devD}; if (context->iterate) { struct MultiDimIterator* iterator = &context->iterator->iterator->MultiDimIterator; checkCudaErrors(cuLaunchKernel(context->function, iterator->x, iterator->y, iterator->z, 1, 1, 1, - 0, NULL, args, NULL)); + 0, NULL, buffer->kernelParams, NULL)); } else { checkCudaErrors(cuLaunchKernel(context->function, 1, 1, 1, 1, 1, 1, - 0, NULL, args, NULL)); + 0, NULL, buffer->kernelParams, NULL)); } + //結果を取ってくるコマンドを入力する //コマンドの終了待ちを行う - checkCudaErrors(cuMemcpyDtoH(inputSortArray->array, devA, sizeof(struct Integer)*GET_LEN(inputSortArray->array))); - outputSortArray->array = inputSortArray->array; + paramCount = 0; + for (int i = 0; i < buffer->inputLen; i++) { + CUdeviceptr* deviceptr = buffer->kernelParams[paramCount++]; + checkCudaErrors(cuMemcpyDtoH(buffer->inputData[i], *deviceptr, GET_SIZE(buffer->inputData[i]))); + cuMemFree(*deviceptr); + free(deviceptr); + } + + for (int i = 0; i < buffer->outputLen; i++) { + CUdeviceptr* deviceptr = buffer->kernelParams[paramCount++]; + checkCudaErrors(cuMemcpyDtoH(buffer->outputData[i], *deviceptr, GET_SIZE(buffer->outputData[i]))); + cuMemFree(*deviceptr); + free(deviceptr); + } + free(buffer->kernelParams); // wait for stream checkCudaErrors(cuCtxSynchronize()); - cuMemFree(devA); - cuMemFree(devB); - cuMemFree(devC); - cuMemFree(devD); } void cudaShutdown( struct CUDAWorker *worker) {