changeset 1922:bbd209709ca1 draft

fix
author Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
date Wed, 29 Jan 2014 18:55:59 +0900
parents 91ada4e540f2
children e801016bd47c
files TaskManager/Cuda/CudaScheduler.cc example/Cuda/Makefile.def example/Cuda/main.cc example/Cuda/multiply.cu
diffstat 4 files changed, 25 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/TaskManager/Cuda/CudaScheduler.cc	Wed Jan 29 17:08:07 2014 +0900
+++ b/TaskManager/Cuda/CudaScheduler.cc	Wed Jan 29 18:55:59 2014 +0900
@@ -214,13 +214,13 @@
                     
                     param++;
                 }
-                memin[cur].size  = param; // +1 means param
+                memin[cur].size = param; // +1 means param
                 
                 for(int i = 0; i<nextTask->outData_count;i++) { // set output data
                     ListElement *output_buf = nextTask->outData(i);
                     if (output_buf->size==0) break;
                     if (!flag[cur].flip) { // flip use memin for output 
-                        createBuffer(&memout[cur], i, context, CL_MEM_WRITE_ONLY, output_buf->size, &ret);
+                        createBuffer(&memout[cur], i, context, output_buf->size, &ret);
                         if (ret!=0) { cudaTaskError(cur,tasklist,ret); continue; }
                         ret = cuParamSetv(kernel[cur], 0, memout[cur].buf[i], sizeof(memout));
                         if (ret!=0) { cudaTaskError(cur,tasklist,ret); continue;}
@@ -230,17 +230,34 @@
                 }
                 memout[cur].size = param - memin[cur].size;  // no buffer on flip, but flip use memout event
 
+                void* kernelParams;
+
+                if (!flag[cur.flip]) {
+                    kernelParams = malloc(sizeof(void*)*param);
+                    for (int i = 0; i<memin[cur].size; i++) {
+                        kernelParams[i] = memin[cur].buf[i];
+                    }
+                    for (int i = 0; i<memout[cur].size; i++) {
+                        kernelParams[i+memin[cur].size] = memout[cur][i];
+                    }
+                } else {
+                    kernelParams = malloc(sizeof(void*)*memin[cur].size);
+                    for (int i = ; i<memin[cur].size; i++) {
+                        kernelParams[i] = memin[cur].buf[i];
+                    }
+                }
+                    
                 
                 if (tasklist->dim > 0) {
                     ret = cuLaunchKernel(kernel[cur],
-                                         tasklist->x, tasklist->y, tasklist->z,
+                                         tasklist->x*tasklist->y*tasklist->z, 0, 0,
                                          1, 1, 1,
-                                         stream, NULL, NULL);
+                                         stream, kernelParams, NULL);
                 } else {
                     ret = cuLaunchKernel(kernel[cur],
                                          1, 1, 1,
                                          1, 1, 1,
-                                         stream, NULL, NULL);
+                                         stream, kernelParams, NULL);
                 }
                 if (ret!=0) { cudaTaskError(cur, tasklist, ret); continue; }
 
@@ -258,6 +275,7 @@
                 // to stop pipeline set 1-cur
                 wait_for_event(kernel_event, memout, tasklist, cur);
                 cur = 1 - cur;
+                free(kernelParams);
             }
             reply = (memaddr)tasklist->waiter;
             params_addr = (memaddr)tasklist->next;
--- a/example/Cuda/Makefile.def	Wed Jan 29 17:08:07 2014 +0900
+++ b/example/Cuda/Makefile.def	Wed Jan 29 18:55:59 2014 +0900
@@ -5,4 +5,4 @@
 CC = clang++
 NVCC = nvcc
 CFLAGS = -Wall $(OPT)
-NVCCFLAGS = -ptx
\ No newline at end of file
+NVCCFLAGS = -ptx -arch=sm_20
\ No newline at end of file
--- a/example/Cuda/main.cc	Wed Jan 29 17:08:07 2014 +0900
+++ b/example/Cuda/main.cc	Wed Jan 29 18:55:59 2014 +0900
@@ -26,7 +26,6 @@
     CUcontext context;
     CUmodule module;
     CUfunction function;
-    CUresult result;
 
     cuInit(0);
     cuDeviceGet(&device, 0);
--- a/example/Cuda/multiply.cu	Wed Jan 29 17:08:07 2014 +0900
+++ b/example/Cuda/multiply.cu	Wed Jan 29 18:55:59 2014 +0900
@@ -1,6 +1,7 @@
 extern "C" {
     __global__ void multiply(float* A, float* B, float* C) {
         int index = blockIdx.x * blockDim.x + threadIdx.x;
+        printf("%d\n",index);
         C[index] = A[index] * B[index];
     }
 }