Mercurial > hg > Game > Cerium
changeset 1841:3a5825ad4f4e draft
Flip on gpu
Extra argments for flip
fft example should be rewritten for flip
author | yuhi |
---|---|
date | Fri, 20 Dec 2013 20:38:21 +0900 |
parents | 759587e37bc7 |
children | 17d06be35858 |
files | TaskManager/Gpu/GpuScheduler.cc TaskManager/Gpu/GpuScheduler.h |
diffstat | 2 files changed, 42 insertions(+), 33 deletions(-) [+] |
line wrap: on
line diff
--- a/TaskManager/Gpu/GpuScheduler.cc Fri Dec 20 18:02:45 2013 +0900 +++ b/TaskManager/Gpu/GpuScheduler.cc Fri Dec 20 20:38:21 2013 +0900 @@ -137,12 +137,16 @@ kernel_event[1-cur] = 0; } - if (memout[1-cur].size > 0) { - + + if (flag[1-cur].flip) { + if (memout[1-cur].size > 1) { + int ret=clWaitForEvents(memout[1-cur].size-1, &memout[1-cur].event[1]); + if (ret<0) error(convert_error_status(ret)); + release_buf_event(cur,memout); + } + } else if (memout[1-cur].size > 0) { int ret=clWaitForEvents(memout[1-cur].size, memout[1-cur].event); - if (ret<0) { - error(convert_error_status(ret)); - } + if (ret<0) error(convert_error_status(ret)); release_buf_event(cur,memout); } @@ -187,8 +191,8 @@ reply = 0; initGpuBuffer(&memin[0]);initGpuBuffer(&memin[1]); initGpuBuffer(&memout[0]);initGpuBuffer(&memout[1]); - memset(&flag, 0, sizeof(HTask::htask_flag)); - + memset(&flag, 0, sizeof(HTask::htask_flag)*2); + for (;;) { memaddr params_addr = connector->task_list_mail_read(); // read task list mail from DmaManager @@ -215,7 +219,7 @@ * flip : When caluculate on input data, to treat this as a output data */ if (tasklist->self) { - flag = tasklist->self->flag; + flag[cur] = tasklist->self->flag; } for (TaskPtr nextTask = tasklist->tasks; nextTask < tasklist->last(); nextTask = nextTask->next()) { if(nextTask->command==ShowTime) { @@ -236,23 +240,24 @@ int param = 0; // set arg count - cl_mem memparam = createBuffer(&memin[cur], 0, context, CL_MEM_READ_ONLY, + GpuBufferPtr gpumem = flag[cur].flip ? memout : memin; + cl_mem memparam = createBuffer(&gpumem[cur], 0, context, CL_MEM_READ_ONLY, sizeof(memaddr)*nextTask->param_count, &ret); if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; } // parameter is passed as first kernel arg ret = clEnqueueWriteBuffer(command_queue, memparam, CL_TRUE, 0,sizeof(memaddr)*nextTask->param_count, - nextTask->param(param), 0, NULL, &memin[cur].event[0]); + nextTask->param(0), 0, NULL, &gpumem[cur].event[0]); if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; } - ret = clSetKernelArg(kernel[cur], param, sizeof(memaddr),(void *)&memin[cur].buf[0]); + ret = clSetKernelArg(kernel[cur], 0, sizeof(memaddr),(void *)&gpumem[cur].buf[0]); if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; } param++; cl_mem_flags mem_flag = CL_MEM_READ_ONLY; - if (!flag.flip) { // set input data when not flip + if (!flag[cur].flip) { // set input data when not flip for(int i=0;i<nextTask->inData_count;i++) { ListElement *input_buf = nextTask->inData(i); if (input_buf->size==0) break; @@ -269,47 +274,51 @@ } } cl_mem_flags out_mem_flag; - if (flag.flip) { + if (flag[cur].flip) { out_mem_flag = CL_MEM_READ_WRITE; } else { out_mem_flag = CL_MEM_WRITE_ONLY; + memin[cur].size = param; // +1 means param } - memin[cur].size = param; // +1 means param for(int i = 0; i<nextTask->outData_count;i++) { // set output data - ListElement *output_buf = flag.flip? nextTask->inData(i) : nextTask->outData(i); + ListElement *output_buf = flag[cur].flip? nextTask->inData(i) : nextTask->outData(i); if (output_buf->size==0) break; - createBuffer(&memout[cur], i, context, out_mem_flag, output_buf->size, &ret); + int i0 = flag[cur].flip ? i+1 : i; + createBuffer(&memout[cur], i0, context, out_mem_flag, output_buf->size, &ret); if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; } - if (flag.flip) { // use output buffer as input buffer + if (flag[cur].flip) { // use output buffer as input buffer ListElement *input_buf = nextTask->inData(i); - ret = clEnqueueWriteBuffer(command_queue, memout[cur].buf[param], CL_TRUE, 0, + ret = clEnqueueWriteBuffer(command_queue, memout[cur].buf[i+1], CL_TRUE, 0, input_buf->size, input_buf->addr, - 0, NULL, &memout[cur].event[param]); + 0, NULL, &memout[cur].event[i+1]); if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; } + ret = clSetKernelArg(kernel[cur], param+1, sizeof(memaddr), (void *)&memout[cur].buf[param]); + if (ret<0) { gpuTaskError(cur,tasklist,ret); continue;} } - ret = clSetKernelArg(kernel[cur], param, sizeof(memaddr), (void *)&memout[cur].buf[i]); - if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; } + ret = clSetKernelArg(kernel[cur], param, sizeof(memaddr), (void *)&memout[cur].buf[param]); + if (ret<0) { gpuTaskError(cur,tasklist,ret); continue;} param++; } memout[cur].size = param - memin[cur].size; if (tasklist->dim > 0) { ret = clEnqueueNDRangeKernel(command_queue, kernel[cur], tasklist->dim, - NULL, &tasklist->x, 0, memin[cur].size, memin[cur].event, &kernel_event[cur]); + NULL, &tasklist->x, 0, gpumem[cur].size, gpumem[cur].event, &kernel_event[cur]); } else { - ret = clEnqueueTask(command_queue, kernel[cur], memin[cur].size, - memin[cur].event, &kernel_event[cur]); + ret = clEnqueueTask(command_queue, kernel[cur], gpumem[cur].size, + gpumem[cur].event, &kernel_event[cur]); } if (ret<0) { gpuTaskError(cur, tasklist, ret); continue; } for(int i=0;i<nextTask->outData_count;i++) { // read output data - ListElement *output_buf = flag.flip? nextTask->inData(i) :nextTask->outData(i); + ListElement *output_buf = flag[cur].flip? nextTask->inData(i) :nextTask->outData(i); if (output_buf->size==0) break; - ret = clEnqueueReadBuffer(command_queue, memout[cur].buf[i], CL_FALSE, 0, - output_buf->size, output_buf->addr, 1, &kernel_event[cur], &memout[cur].event[i]); + int i0 = flag[cur].flip? i+1:i; + ret = clEnqueueReadBuffer(command_queue, memout[cur].buf[i0], CL_FALSE, 0, + output_buf->size, output_buf->addr, 1, &kernel_event[cur], &memout[cur].event[i0]); if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; } } // wait kernel[1-cur] and write[1-cur] @@ -360,7 +369,7 @@ fd = open(filename, O_RDONLY); if (fd<0) { - fprintf(stderr, "Failed to load kernel %s.\n",filename); + fprintf(stderr, "Failed to open kernel %s.\n",filename); exit(1); } @@ -369,18 +378,18 @@ off_t size = stats.st_size; if (size<=0) { - fprintf(stderr, "Failed to load kernel.\n"); + fprintf(stderr, "Failed to read kernel.\n"); exit(1); } - source_str = (char*)alloca(size); + source_str = (char*)alloca(size+1); source_size = read(fd, source_str, size); close(fd); + source_str[size] = 0; cl_program *program = new cl_program; *program = clCreateProgramWithSource(context, 1, - (const char **)&source_str, - (const size_t *)&source_size, &ret); + (const char **)&source_str, 0, &ret); ret = clBuildProgram(*program, 1, &device_id, NULL, NULL, NULL); if(ret<0) {
--- a/TaskManager/Gpu/GpuScheduler.h Fri Dec 20 18:02:45 2013 +0900 +++ b/TaskManager/Gpu/GpuScheduler.h Fri Dec 20 20:38:21 2013 +0900 @@ -54,7 +54,7 @@ cl_event kernel_event[2]; GpuBuffer memin[2]; GpuBuffer memout[2]; - HTask::htask_flag flag; + HTask::htask_flag flag[2]; private: FifoDmaManager *fifoDmaManager; int load_kernel(int cmd);