diff src/parallel_execution/cuda.c @ 414:49159fbdd1fb

Work CUDAbitonicSort
author Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
date Fri, 15 Sep 2017 22:49:45 +0900
parents 409e6b5fb775
children 35b37fe8d3a7
line wrap: on
line diff
--- a/src/parallel_execution/cuda.c	Thu Sep 14 22:28:52 2017 +0900
+++ b/src/parallel_execution/cuda.c	Fri Sep 15 22:49:45 2017 +0900
@@ -85,29 +85,35 @@
 }
 
 
-void CUDAExec(struct Context* context, struct Array* array) {
-    printf("cuda exec start\n");
+void CUDAExec(struct Context* context, struct SortArray* inputSortArray, struct SortArray* outputSortArray) {
+    //printf("cuda exec start\n");
     // Worker *worker = context->worker;
     // CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker;
     // memory allocate
     CUdeviceptr devA;
     CUdeviceptr devB;
+    CUdeviceptr devC;
+    CUdeviceptr devD;
 
-    checkCudaErrors(cuMemAlloc(&devA, sizeof(int)*array->size));
+    checkCudaErrors(cuMemAlloc(&devA, sizeof(struct Integer)*GET_SIZE(inputSortArray->array)));
     checkCudaErrors(cuMemAlloc(&devB, sizeof(int)));
+    checkCudaErrors(cuMemAlloc(&devC, sizeof(int)));
+    checkCudaErrors(cuMemAlloc(&devD, sizeof(int)));
 
     //twiceカーネルが定義されてなければそれをロードする
-    checkCudaErrors(cuModuleLoad(&context->module, "c/examples/twice/CUDAtwice.ptx"));
-    checkCudaErrors(cuModuleGetFunction(&context->function, context->module, "twice"));
+    checkCudaErrors(cuModuleLoad(&context->module, "c/examples/bitonicSort/CUDAbitonicSwap.ptx"));
+    checkCudaErrors(cuModuleGetFunction(&context->function, context->module, "bitonicSwap"));
 
     //入力のDataGearをGPUにbuffer経由で送る
     // Synchronous data transfer(host to device)
-    checkCudaErrors(cuMemcpyHtoD(devA, array->array, sizeof(int)*array->size));
-    checkCudaErrors(cuMemcpyHtoD(devB, &array->prefix, sizeof(int)));
+    checkCudaErrors(cuMemcpyHtoD(devA, inputSortArray->array, sizeof(struct Integer)*GET_SIZE(inputSortArray->array)));
+    checkCudaErrors(cuMemcpyHtoD(devB, &inputSortArray->block, sizeof(int)));
+    checkCudaErrors(cuMemcpyHtoD(devC, &inputSortArray->first, sizeof(int)));
+    checkCudaErrors(cuMemcpyHtoD(devD, &inputSortArray->prefix, sizeof(int)));
 
     // Asynchronous launch kernel
     context->num_exec = 1;
-    void* args[] = {&devA, &devB};
+    void* args[] = {&devA, &devB, &devC, &devD};
     if (context->iterate) {
         struct MultiDimIterator* iterator = &context->iterator->iterator->MultiDimIterator;
         checkCudaErrors(cuLaunchKernel(context->function,
@@ -123,12 +129,14 @@
     }
     //結果を取ってくるコマンドを入力する
     //コマンドの終了待ちを行う   
-    checkCudaErrors(cuMemcpyDtoH(array->array, devA, sizeof(int)*array->size));
-
+    checkCudaErrors(cuMemcpyDtoH(inputSortArray->array, devA, sizeof(struct Integer)*GET_SIZE(inputSortArray->array)));
+    outputSortArray->array = inputSortArray->array;
     // wait for stream
     checkCudaErrors(cuCtxSynchronize());
     cuMemFree(devA);
     cuMemFree(devB);
+    cuMemFree(devC);
+    cuMemFree(devD);
 }
 
 void cudaShutdown( struct CUDAWorker *worker) {