changeset 437:2c1b1d56bf1e

Work CUDAbitonicSort by CUDAExecutor
author Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
date Sat, 04 Nov 2017 08:30:25 +0900
parents 08a93fc2f0d3
children 7679093bdd72
files src/parallel_execution/CUDAExecutor.cbc
diffstat 1 files changed, 7 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/src/parallel_execution/CUDAExecutor.cbc	Sat Nov 04 06:52:32 2017 +0900
+++ b/src/parallel_execution/CUDAExecutor.cbc	Sat Nov 04 08:30:25 2017 +0900
@@ -4,7 +4,6 @@
 // includes, project
 #include <driver_types.h>
 #include <cuda_runtime.h>
-#include <cuda.h>
 #include "../helper_cuda.h"
 #include "pthread.h"
 
@@ -22,15 +21,14 @@
     struct CUDABuffer* buffer = executor->buffer;
     int paramLen = buffer->inputLen + buffer->outputLen;
     executor->kernelParams = (CUdeviceptr**)ALLOCATE_PTR_ARRAY(context, CUdeviceptr, paramLen);
-    CUdeviceptr* deviceptrs = (CUdeviceptr*)ALLOCATE_ARRAY(context, CUdeviceptr, paramLen);
     for (int i = 0; i < paramLen; i++) {
-        CUdeviceptr deviceptr = deviceptrs[i];
+        CUdeviceptr* deviceptr = new CUdeviceptr();
         // memory allocate
         union Data* data = i < buffer->inputLen? buffer->inputData[i] : buffer->outputData[i-buffer->inputLen];
-        checkCudaErrors(cuMemAlloc(&deviceptr, GET_SIZE(data)));
-        checkCudaErrors(cuMemcpyHtoD(deviceptr, data, GET_SIZE(data)));
+        checkCudaErrors(cuMemAlloc(deviceptr, GET_SIZE(data)));
+        checkCudaErrors(cuMemcpyHtoD(*deviceptr, data, GET_SIZE(data)));
         // Synchronous data transfer(host to device)
-        executor->kernelParams[i] = &deviceptr;
+        executor->kernelParams[i] = deviceptr;
     }
     // TODO: Implements pipeline
     // goto next(...);
@@ -63,10 +61,10 @@
     struct CUDABuffer* buffer = executor->buffer;
     int paramLen = buffer->inputLen + buffer->outputLen;
     for (int i = 0; i < paramLen; i++) {
-        CUdeviceptr* deviceptr =  executor->kernelParams[i];
+        CUdeviceptr deviceptr =  *(executor->kernelParams[i]);
         union Data* data = i < buffer->inputLen? buffer->inputData[i] : buffer->outputData[i-buffer->inputLen];
-        checkCudaErrors(cuMemcpyDtoH(data, *deviceptr, GET_SIZE(data)));
-        cuMemFree(*deviceptr);
+        checkCudaErrors(cuMemcpyDtoH(data, deviceptr, GET_SIZE(data)));
+        cuMemFree(deviceptr);
     }
     // wait for stream
     checkCudaErrors(cuCtxSynchronize());