Mercurial > hg > Game > Cerium
changeset 1922:bbd209709ca1 draft
fix
author | Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 29 Jan 2014 18:55:59 +0900 |
parents | 91ada4e540f2 |
children | e801016bd47c |
files | TaskManager/Cuda/CudaScheduler.cc example/Cuda/Makefile.def example/Cuda/main.cc example/Cuda/multiply.cu |
diffstat | 4 files changed, 25 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/TaskManager/Cuda/CudaScheduler.cc Wed Jan 29 17:08:07 2014 +0900 +++ b/TaskManager/Cuda/CudaScheduler.cc Wed Jan 29 18:55:59 2014 +0900 @@ -214,13 +214,13 @@ param++; } - memin[cur].size = param; // +1 means param + memin[cur].size = param; // +1 means param for(int i = 0; i<nextTask->outData_count;i++) { // set output data ListElement *output_buf = nextTask->outData(i); if (output_buf->size==0) break; if (!flag[cur].flip) { // flip use memin for output - createBuffer(&memout[cur], i, context, CL_MEM_WRITE_ONLY, output_buf->size, &ret); + createBuffer(&memout[cur], i, context, output_buf->size, &ret); if (ret!=0) { cudaTaskError(cur,tasklist,ret); continue; } ret = cuParamSetv(kernel[cur], 0, memout[cur].buf[i], sizeof(memout)); if (ret!=0) { cudaTaskError(cur,tasklist,ret); continue;} @@ -230,17 +230,34 @@ } memout[cur].size = param - memin[cur].size; // no buffer on flip, but flip use memout event + void* kernelParams; + + if (!flag[cur.flip]) { + kernelParams = malloc(sizeof(void*)*param); + for (int i = 0; i<memin[cur].size; i++) { + kernelParams[i] = memin[cur].buf[i]; + } + for (int i = 0; i<memout[cur].size; i++) { + kernelParams[i+memin[cur].size] = memout[cur][i]; + } + } else { + kernelParams = malloc(sizeof(void*)*memin[cur].size); + for (int i = ; i<memin[cur].size; i++) { + kernelParams[i] = memin[cur].buf[i]; + } + } + if (tasklist->dim > 0) { ret = cuLaunchKernel(kernel[cur], - tasklist->x, tasklist->y, tasklist->z, + tasklist->x*tasklist->y*tasklist->z, 0, 0, 1, 1, 1, - stream, NULL, NULL); + stream, kernelParams, NULL); } else { ret = cuLaunchKernel(kernel[cur], 1, 1, 1, 1, 1, 1, - stream, NULL, NULL); + stream, kernelParams, NULL); } if (ret!=0) { cudaTaskError(cur, tasklist, ret); continue; } @@ -258,6 +275,7 @@ // to stop pipeline set 1-cur wait_for_event(kernel_event, memout, tasklist, cur); cur = 1 - cur; + free(kernelParams); } reply = (memaddr)tasklist->waiter; params_addr = (memaddr)tasklist->next;
--- a/example/Cuda/Makefile.def Wed Jan 29 17:08:07 2014 +0900 +++ b/example/Cuda/Makefile.def Wed Jan 29 18:55:59 2014 +0900 @@ -5,4 +5,4 @@ CC = clang++ NVCC = nvcc CFLAGS = -Wall $(OPT) -NVCCFLAGS = -ptx \ No newline at end of file +NVCCFLAGS = -ptx -arch=sm_20 \ No newline at end of file
--- a/example/Cuda/main.cc Wed Jan 29 17:08:07 2014 +0900 +++ b/example/Cuda/main.cc Wed Jan 29 18:55:59 2014 +0900 @@ -26,7 +26,6 @@ CUcontext context; CUmodule module; CUfunction function; - CUresult result; cuInit(0); cuDeviceGet(&device, 0);
--- a/example/Cuda/multiply.cu Wed Jan 29 17:08:07 2014 +0900 +++ b/example/Cuda/multiply.cu Wed Jan 29 18:55:59 2014 +0900 @@ -1,6 +1,7 @@ extern "C" { __global__ void multiply(float* A, float* B, float* C) { int index = blockIdx.x * blockDim.x + threadIdx.x; + printf("%d\n",index); C[index] = A[index] * B[index]; } }