Mercurial > hg > GearsTemplate
changeset 433:d920f3a3f037
Refactoring cuda.c
author | Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 17 Oct 2017 15:47:33 +0900 |
parents | 6bb391fc9e12 |
children | b75badf42701 |
files | src/parallel_execution/cuda.c |
diffstat | 1 files changed, 18 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/src/parallel_execution/cuda.c Tue Oct 17 02:09:14 2017 +0900 +++ b/src/parallel_execution/cuda.c Tue Oct 17 15:47:33 2017 +0900 @@ -84,8 +84,7 @@ printf("cuda Init: Done\n"); } - -void cudaExec(struct Context* context, struct CudaBuffer* buffer, char* filename, char* function) { +void cudaRead(struct CudaBuffer* buffer) { buffer->kernelParams = (void **)calloc(buffer->inputLen + buffer->outputLen, sizeof(void *)); int paramCount = 0; for (int i = 0; i < buffer->inputLen; i++) { @@ -105,18 +104,21 @@ checkCudaErrors(cuMemcpyHtoD(*deviceptr, buffer->outputData[i], GET_SIZE(buffer->outputData[i]))); buffer->kernelParams[paramCount++] = deviceptr; } +} - // カーネルが定義されてなければそれをロードする +void cudaLoadFunction(struct Context* context, char* filename, char* function) { checkCudaErrors(cuModuleLoad(&context->module, filename)); checkCudaErrors(cuModuleGetFunction(&context->function, context->module, function)); +} +void cudaExec2(struct Context* context, struct CudaBuffer* buffer) { // Asynchronous launch kernel context->num_exec = 1; if (context->iterate) { struct MultiDimIterator* iterator = &context->iterator->iterator->MultiDimIterator; checkCudaErrors(cuLaunchKernel(context->function, - iterator->x, iterator->y, iterator->z, - 1, 1, 1, + iterator->x/1024, iterator->y, iterator->z, + 1024, 1, 1, 0, NULL, buffer->kernelParams, NULL)); } else { @@ -125,10 +127,12 @@ 1, 1, 1, 0, NULL, buffer->kernelParams, NULL)); } +} +void cudaWrite(struct CudaBuffer* buffer) { //結果を取ってくるコマンドを入力する //コマンドの終了待ちを行う - paramCount = 0; + int paramCount = 0; for (int i = 0; i < buffer->inputLen; i++) { CUdeviceptr* deviceptr = buffer->kernelParams[paramCount++]; checkCudaErrors(cuMemcpyDtoH(buffer->inputData[i], *deviceptr, GET_SIZE(buffer->inputData[i]))); @@ -147,6 +151,14 @@ checkCudaErrors(cuCtxSynchronize()); } +void cudaExec(struct Context* context, struct CudaBuffer* buffer, char* filename, char* function) { + // カーネルが定義されてなければそれをロードする + cudaLoadFunction(context, filename, function); + cudaRead(buffer); + cudaExec2(context, buffer); + cudaWrite(buffer); +} + void cudaShutdown( struct CUDAWorker *worker) { // for (int i=0;i<worker->num_stream;i++) // checkCudaErrors(cuStreamDestroy(worker->stream[i]));