# HG changeset patch # User Tatsuki IHA # Date 1508222853 -32400 # Node ID d920f3a3f0374161fd648960cf4b82e90468afc3 # Parent 6bb391fc9e1217d5578558136092eff972575f4c Refactoring cuda.c diff -r 6bb391fc9e12 -r d920f3a3f037 src/parallel_execution/cuda.c --- a/src/parallel_execution/cuda.c Tue Oct 17 02:09:14 2017 +0900 +++ b/src/parallel_execution/cuda.c Tue Oct 17 15:47:33 2017 +0900 @@ -84,8 +84,7 @@ printf("cuda Init: Done\n"); } - -void cudaExec(struct Context* context, struct CudaBuffer* buffer, char* filename, char* function) { +void cudaRead(struct CudaBuffer* buffer) { buffer->kernelParams = (void **)calloc(buffer->inputLen + buffer->outputLen, sizeof(void *)); int paramCount = 0; for (int i = 0; i < buffer->inputLen; i++) { @@ -105,18 +104,21 @@ checkCudaErrors(cuMemcpyHtoD(*deviceptr, buffer->outputData[i], GET_SIZE(buffer->outputData[i]))); buffer->kernelParams[paramCount++] = deviceptr; } +} - // カーネルが定義されてなければそれをロードする +void cudaLoadFunction(struct Context* context, char* filename, char* function) { checkCudaErrors(cuModuleLoad(&context->module, filename)); checkCudaErrors(cuModuleGetFunction(&context->function, context->module, function)); +} +void cudaExec2(struct Context* context, struct CudaBuffer* buffer) { // Asynchronous launch kernel context->num_exec = 1; if (context->iterate) { struct MultiDimIterator* iterator = &context->iterator->iterator->MultiDimIterator; checkCudaErrors(cuLaunchKernel(context->function, - iterator->x, iterator->y, iterator->z, - 1, 1, 1, + iterator->x/1024, iterator->y, iterator->z, + 1024, 1, 1, 0, NULL, buffer->kernelParams, NULL)); } else { @@ -125,10 +127,12 @@ 1, 1, 1, 0, NULL, buffer->kernelParams, NULL)); } +} +void cudaWrite(struct CudaBuffer* buffer) { //結果を取ってくるコマンドを入力する //コマンドの終了待ちを行う - paramCount = 0; + int paramCount = 0; for (int i = 0; i < buffer->inputLen; i++) { CUdeviceptr* deviceptr = buffer->kernelParams[paramCount++]; checkCudaErrors(cuMemcpyDtoH(buffer->inputData[i], *deviceptr, GET_SIZE(buffer->inputData[i]))); @@ -147,6 +151,14 @@ checkCudaErrors(cuCtxSynchronize()); } +void cudaExec(struct Context* context, struct CudaBuffer* buffer, char* filename, char* function) { + // カーネルが定義されてなければそれをロードする + cudaLoadFunction(context, filename, function); + cudaRead(buffer); + cudaExec2(context, buffer); + cudaWrite(buffer); +} + void cudaShutdown( struct CUDAWorker *worker) { // for (int i=0;inum_stream;i++) // checkCudaErrors(cuStreamDestroy(worker->stream[i]));