diff src/parallel_execution/cuda.c @ 436:08a93fc2f0d3

Fix CudaExecutor but not work
author Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
date Sat, 04 Nov 2017 06:52:32 +0900
parents d920f3a3f037
children dcc42f3e7e97
line wrap: on
line diff
--- a/src/parallel_execution/cuda.c	Sat Nov 04 04:14:36 2017 +0900
+++ b/src/parallel_execution/cuda.c	Sat Nov 04 06:52:32 2017 +0900
@@ -84,82 +84,12 @@
     printf("cuda Init: Done\n");
 }
 
-void cudaRead(struct CudaBuffer* buffer) {
-    buffer->kernelParams = (void **)calloc(buffer->inputLen + buffer->outputLen, sizeof(void *));
-    int paramCount = 0;
-    for (int i = 0; i < buffer->inputLen; i++) {
-        CUdeviceptr* deviceptr = (CUdeviceptr *)calloc(1, sizeof(CUdeviceptr));
-        // memory allocate
-        checkCudaErrors(cuMemAlloc(deviceptr, GET_SIZE(buffer->inputData[i])));
-        // Synchronous data transfer(host to device)
-        checkCudaErrors(cuMemcpyHtoD(*deviceptr, buffer->inputData[i], GET_SIZE(buffer->inputData[i])));
-        buffer->kernelParams[paramCount++] = deviceptr;
-    }
-
-    for (int i = 0; i < buffer->outputLen; i++) {
-        CUdeviceptr* deviceptr = (CUdeviceptr *)calloc(1, sizeof(CUdeviceptr));
-        // memory allocate
-        checkCudaErrors(cuMemAlloc(deviceptr, GET_SIZE(buffer->outputData[i])));
-        // Synchronous data transfer(host to device)
-        checkCudaErrors(cuMemcpyHtoD(*deviceptr, buffer->outputData[i], GET_SIZE(buffer->outputData[i])));
-        buffer->kernelParams[paramCount++] = deviceptr;
-    }
-}
-
 void cudaLoadFunction(struct Context* context, char* filename, char* function) {
     checkCudaErrors(cuModuleLoad(&context->module, filename));
     checkCudaErrors(cuModuleGetFunction(&context->function, context->module, function));
 }
 
-void cudaExec2(struct Context* context, struct CudaBuffer* buffer) {
-    // Asynchronous launch kernel
-    context->num_exec = 1;
-    if (context->iterate) {
-        struct MultiDimIterator* iterator = &context->iterator->iterator->MultiDimIterator;
-        checkCudaErrors(cuLaunchKernel(context->function,
-                    iterator->x/1024, iterator->y, iterator->z,
-                    1024, 1, 1,
-                    0, NULL, buffer->kernelParams, NULL));
-
-    } else {
-        checkCudaErrors(cuLaunchKernel(context->function,
-                    1, 1, 1,
-                    1, 1, 1,
-                    0, NULL, buffer->kernelParams, NULL));
-    }
-}
-
-void cudaWrite(struct CudaBuffer* buffer) {
-    //結果を取ってくるコマンドを入力する
-    //コマンドの終了待ちを行う   
-    int paramCount = 0;
-    for (int i = 0; i < buffer->inputLen; i++) {
-        CUdeviceptr* deviceptr =  buffer->kernelParams[paramCount++];
-        checkCudaErrors(cuMemcpyDtoH(buffer->inputData[i], *deviceptr, GET_SIZE(buffer->inputData[i])));
-        cuMemFree(*deviceptr);
-        free(deviceptr);
-    }
-
-    for (int i = 0; i < buffer->outputLen; i++) {
-        CUdeviceptr* deviceptr =  buffer->kernelParams[paramCount++];
-        checkCudaErrors(cuMemcpyDtoH(buffer->outputData[i], *deviceptr, GET_SIZE(buffer->outputData[i])));
-        cuMemFree(*deviceptr);
-        free(deviceptr);
-    }
-    free(buffer->kernelParams);
-    // wait for stream
-    checkCudaErrors(cuCtxSynchronize());
-}
-
-void cudaExec(struct Context* context, struct CudaBuffer* buffer, char* filename, char* function) {
-    // カーネルが定義されてなければそれをロードする
-    cudaLoadFunction(context, filename, function);
-    cudaRead(buffer);
-    cudaExec2(context, buffer);
-    cudaWrite(buffer);
-}
-
-void cudaShutdown( struct CUDAWorker *worker) {
+void cudaShutdown(struct CUDAWorker *worker) {
     //    for (int i=0;i<worker->num_stream;i++)
     //        checkCudaErrors(cuStreamDestroy(worker->stream[i]));
     checkCudaErrors(cuCtxDestroy(worker->cuCtx));