Mercurial > hg > GearsTemplate
diff src/parallel_execution/cuda.c @ 436:08a93fc2f0d3
Fix CudaExecutor but not work
author | Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Sat, 04 Nov 2017 06:52:32 +0900 |
parents | d920f3a3f037 |
children | dcc42f3e7e97 |
line wrap: on
line diff
--- a/src/parallel_execution/cuda.c Sat Nov 04 04:14:36 2017 +0900 +++ b/src/parallel_execution/cuda.c Sat Nov 04 06:52:32 2017 +0900 @@ -84,82 +84,12 @@ printf("cuda Init: Done\n"); } -void cudaRead(struct CudaBuffer* buffer) { - buffer->kernelParams = (void **)calloc(buffer->inputLen + buffer->outputLen, sizeof(void *)); - int paramCount = 0; - for (int i = 0; i < buffer->inputLen; i++) { - CUdeviceptr* deviceptr = (CUdeviceptr *)calloc(1, sizeof(CUdeviceptr)); - // memory allocate - checkCudaErrors(cuMemAlloc(deviceptr, GET_SIZE(buffer->inputData[i]))); - // Synchronous data transfer(host to device) - checkCudaErrors(cuMemcpyHtoD(*deviceptr, buffer->inputData[i], GET_SIZE(buffer->inputData[i]))); - buffer->kernelParams[paramCount++] = deviceptr; - } - - for (int i = 0; i < buffer->outputLen; i++) { - CUdeviceptr* deviceptr = (CUdeviceptr *)calloc(1, sizeof(CUdeviceptr)); - // memory allocate - checkCudaErrors(cuMemAlloc(deviceptr, GET_SIZE(buffer->outputData[i]))); - // Synchronous data transfer(host to device) - checkCudaErrors(cuMemcpyHtoD(*deviceptr, buffer->outputData[i], GET_SIZE(buffer->outputData[i]))); - buffer->kernelParams[paramCount++] = deviceptr; - } -} - void cudaLoadFunction(struct Context* context, char* filename, char* function) { checkCudaErrors(cuModuleLoad(&context->module, filename)); checkCudaErrors(cuModuleGetFunction(&context->function, context->module, function)); } -void cudaExec2(struct Context* context, struct CudaBuffer* buffer) { - // Asynchronous launch kernel - context->num_exec = 1; - if (context->iterate) { - struct MultiDimIterator* iterator = &context->iterator->iterator->MultiDimIterator; - checkCudaErrors(cuLaunchKernel(context->function, - iterator->x/1024, iterator->y, iterator->z, - 1024, 1, 1, - 0, NULL, buffer->kernelParams, NULL)); - - } else { - checkCudaErrors(cuLaunchKernel(context->function, - 1, 1, 1, - 1, 1, 1, - 0, NULL, buffer->kernelParams, NULL)); - } -} - -void cudaWrite(struct CudaBuffer* buffer) { - //結果を取ってくるコマンドを入力する - //コマンドの終了待ちを行う - int paramCount = 0; - for (int i = 0; i < buffer->inputLen; i++) { - CUdeviceptr* deviceptr = buffer->kernelParams[paramCount++]; - checkCudaErrors(cuMemcpyDtoH(buffer->inputData[i], *deviceptr, GET_SIZE(buffer->inputData[i]))); - cuMemFree(*deviceptr); - free(deviceptr); - } - - for (int i = 0; i < buffer->outputLen; i++) { - CUdeviceptr* deviceptr = buffer->kernelParams[paramCount++]; - checkCudaErrors(cuMemcpyDtoH(buffer->outputData[i], *deviceptr, GET_SIZE(buffer->outputData[i]))); - cuMemFree(*deviceptr); - free(deviceptr); - } - free(buffer->kernelParams); - // wait for stream - checkCudaErrors(cuCtxSynchronize()); -} - -void cudaExec(struct Context* context, struct CudaBuffer* buffer, char* filename, char* function) { - // カーネルが定義されてなければそれをロードする - cudaLoadFunction(context, filename, function); - cudaRead(buffer); - cudaExec2(context, buffer); - cudaWrite(buffer); -} - -void cudaShutdown( struct CUDAWorker *worker) { +void cudaShutdown(struct CUDAWorker *worker) { // for (int i=0;i<worker->num_stream;i++) // checkCudaErrors(cuStreamDestroy(worker->stream[i])); checkCudaErrors(cuCtxDestroy(worker->cuCtx));