Mercurial > hg > Game > Cerium
changeset 1846:a0cb52163e57 draft
simplify GPU flip. Do not use output argments in flip. flip requires different kernel.
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Sat, 21 Dec 2013 08:38:13 +0900 |
parents | ad05aeed3a98 |
children | 7db7242990f7 |
files | TaskManager/Gpu/GpuScheduler.cc example/fft/main.cc |
diffstat | 2 files changed, 46 insertions(+), 65 deletions(-) [+] |
line wrap: on
line diff
--- a/TaskManager/Gpu/GpuScheduler.cc Fri Dec 20 22:25:38 2013 +0900 +++ b/TaskManager/Gpu/GpuScheduler.cc Sat Dec 21 08:38:13 2013 +0900 @@ -138,13 +138,7 @@ } - if (flag[1-cur].flip) { - if (memout[1-cur].size > 1) { - int ret=clWaitForEvents(memout[1-cur].size-1, &memout[1-cur].event[1]); - if (ret<0) error(convert_error_status(ret)); - release_buf_event(cur,memout); - } - } else if (memout[1-cur].size > 0) { + if (memout[1-cur].size > 0) { int ret=clWaitForEvents(memout[1-cur].size, memout[1-cur].event); if (ret<0) error(convert_error_status(ret)); release_buf_event(cur,memout); @@ -182,6 +176,9 @@ * Get input and output data from tasklist. * Enqueue OpenCL command and clflush. * Enqueue and clflush are pipelined structure. + * + * flip means that kernel modify input buffer only, copy GPU input buffer to outData. + * */ void GpuScheduler::run() @@ -242,85 +239,69 @@ int param = 0; // set arg count - GpuBufferPtr gpumem = flag[cur].flip ? memout : memin; - cl_mem memparam = createBuffer(&gpumem[cur], 0, context, CL_MEM_READ_ONLY, + cl_mem memparam = createBuffer(&memin[cur], 0, context, CL_MEM_READ_ONLY, sizeof(memaddr)*nextTask->param_count, &ret); if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; } // parameter is passed as first kernel arg ret = clEnqueueWriteBuffer(command_queue, memparam, CL_TRUE, 0,sizeof(memaddr)*nextTask->param_count, - nextTask->param(0), 0, NULL, &gpumem[cur].event[0]); + nextTask->param(0), 0, NULL, &memin[cur].event[0]); if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; } - ret = clSetKernelArg(kernel[cur], 0, sizeof(memaddr),(void *)&gpumem[cur].buf[0]); + ret = clSetKernelArg(kernel[cur], 0, sizeof(memaddr),(void *)&memin[cur].buf[0]); if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; } param++; - cl_mem_flags mem_flag = CL_MEM_READ_ONLY; + cl_mem_flags mem_flag = flag[cur].flip ? CL_MEM_READ_WRITE : CL_MEM_READ_ONLY; - if (!flag[cur].flip) { // set input data when not flip - for(int i=0;i<nextTask->inData_count;i++) { - ListElement *input_buf = nextTask->inData(i); - if (input_buf->size==0) break; - createBuffer(&memin[cur], param, context, mem_flag, input_buf->size, &ret); - if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; } - ret = clEnqueueWriteBuffer(command_queue, memin[cur].buf[param], CL_TRUE, 0, - input_buf->size, input_buf->addr, 0, - NULL, &memin[cur].event[param]); - if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; } - ret = clSetKernelArg(kernel[cur], param, sizeof(memaddr), (void *)&memin[cur].buf[param]); - if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; } - - param++; - } - } - cl_mem_flags out_mem_flag; - if (flag[cur].flip) { - out_mem_flag = CL_MEM_READ_WRITE; - } else { - out_mem_flag = CL_MEM_WRITE_ONLY; - memin[cur].size = param; // +1 means param - } - - for(int i = 0; i<nextTask->outData_count;i++) { // set output data - ListElement *output_buf = flag[cur].flip? nextTask->inData(i) : nextTask->outData(i); - if (output_buf->size==0) break; - int i0 = flag[cur].flip ? i+1 : i; - createBuffer(&memout[cur], i0, context, out_mem_flag, output_buf->size, &ret); + for(int i=0;i<nextTask->inData_count;i++) { + ListElement *input_buf = nextTask->inData(i); + if (input_buf->size==0) break; + createBuffer(&memin[cur], param, context, mem_flag, input_buf->size, &ret); if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; } - - if (flag[cur].flip) { // use output buffer as input buffer - ListElement *input_buf = nextTask->inData(i); - - ret = clEnqueueWriteBuffer(command_queue, memout[cur].buf[i+1], CL_TRUE, 0, - input_buf->size, input_buf->addr, - 0, NULL, &memout[cur].event[i0]); - if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; } - ret = clSetKernelArg(kernel[cur], param+1, sizeof(memaddr), (void *)&memout[cur].buf[i0]); - if (ret<0) { gpuTaskError(cur,tasklist,ret); continue;} - } - ret = clSetKernelArg(kernel[cur], param, sizeof(memaddr), (void *)&memout[cur].buf[i0]); - if (ret<0) { gpuTaskError(cur,tasklist,ret); continue;} + ret = clEnqueueWriteBuffer(command_queue, memin[cur].buf[param], CL_TRUE, 0, + input_buf->size, input_buf->addr, 0, + NULL, &memin[cur].event[param]); + if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; } + ret = clSetKernelArg(kernel[cur], param, sizeof(memaddr), (void *)&memin[cur].buf[param]); + if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; } + param++; } - - memout[cur].size = param - memin[cur].size; + memin[cur].size = param; // +1 means param + + for(int i = 0; i<nextTask->outData_count;i++) { // set output data + ListElement *output_buf = nextTask->outData(i); + if (output_buf->size==0) break; + if (!flag[cur].flip) { // flip use memin for output + createBuffer(&memout[cur], i, context, CL_MEM_WRITE_ONLY, output_buf->size, &ret); + if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; } + ret = clSetKernelArg(kernel[cur], param, sizeof(memaddr), (void *)&memout[cur].buf[i]); + if (ret<0) { gpuTaskError(cur,tasklist,ret); continue;} + // enqueue later + } + param++; + } + memout[cur].size = param - memin[cur].size; // no buffer on flip, but flip use memout event + if (tasklist->dim > 0) { ret = clEnqueueNDRangeKernel(command_queue, kernel[cur], tasklist->dim, - NULL, &tasklist->x, 0, gpumem[cur].size, gpumem[cur].event, &kernel_event[cur]); + NULL, &tasklist->x, 0, memin[cur].size, memin[cur].event, &kernel_event[cur]); } else { - ret = clEnqueueTask(command_queue, kernel[cur], gpumem[cur].size, - gpumem[cur].event, &kernel_event[cur]); + ret = clEnqueueTask(command_queue, kernel[cur], memin[cur].size, + memin[cur].event, &kernel_event[cur]); } if (ret<0) { gpuTaskError(cur, tasklist, ret); continue; } for(int i=0;i<nextTask->outData_count;i++) { // read output data - ListElement *output_buf = flag[cur].flip? nextTask->inData(i) :nextTask->outData(i); + ListElement *output_buf = nextTask->outData(i); if (output_buf->size==0) break; - int i0 = flag[cur].flip? i+1:i; - ret = clEnqueueReadBuffer(command_queue, memout[cur].buf[i0], CL_FALSE, 0, - output_buf->size, output_buf->addr, 1, &kernel_event[cur], &memout[cur].event[i0]); + GpuBufferPtr mem = flag[cur].flip ? memin : memout ; + int i0 = flag[cur].flip ? i+1 : i ; + // flip use memin buffer and memout event + ret = clEnqueueReadBuffer(command_queue, mem[cur].buf[i0], CL_FALSE, 0, + output_buf->size, output_buf->addr, 1, &kernel_event[cur], &memout[cur].event[i]); if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; } } // wait kernel[1-cur] and write[1-cur]
--- a/example/fft/main.cc Fri Dec 20 22:25:38 2013 +0900 +++ b/example/fft/main.cc Sat Dec 21 08:38:13 2013 +0900 @@ -153,7 +153,7 @@ norm->set_inData(0,dst,length_dst*sizeof(cl_float2)); norm->set_outData(0, dst, length_dst*sizeof(cl_float2)); norm->set_param(0,n); - norm->flip(); + // norm->flip(); norm->set_cpu(spe_cpu); norm->wait_for(waitTask); norm->iterate(gws[0],gws[1]); @@ -253,7 +253,7 @@ setWorkSize(gws,lws,n,n); hpfl->set_inData(0,rm,length_r*sizeof(cl_float2)); hpfl->set_outData(0, rm, length_r*sizeof(cl_float2)); - hpfl->flip(); + // hpfl->flip(); hpfl->set_param(0,n); hpfl->set_param(1,(long)radius); hpfl->set_cpu(spe_cpu);