Mercurial > hg > Game > Cerium
changeset 1548:614a3f62c881 draft
add set work item size function
author | Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 15 Feb 2013 07:37:04 +0900 |
parents | 2983e9e93d24 |
children | 68200bc3ab6b |
files | TaskManager/Gpu/GpuScheduler.cc TaskManager/kernel/ppe/HTask.h TaskManager/kernel/schedule/Scheduler.h |
diffstat | 3 files changed, 71 insertions(+), 37 deletions(-) [+] |
line wrap: on
line diff
--- a/TaskManager/Gpu/GpuScheduler.cc Wed Feb 13 18:36:57 2013 +0900 +++ b/TaskManager/Gpu/GpuScheduler.cc Fri Feb 15 07:37:04 2013 +0900 @@ -70,6 +70,9 @@ { int cur = 0; memaddr reply[2]; + cl_kernel *kernel = new cl_kernel[2]; + cl_mem *memin[2]; + cl_mem *memout[2]; HTask::htask_flag flag; memset(reply, 0, sizeof(memaddr)*2); memset(&flag, 0, sizeof(HTask::htask_flag)); @@ -101,10 +104,34 @@ nextTask < tasklist->last(); nextTask = nextTask->next()) { load_kernel(nextTask->command); + cl_program& program = *task_list[nextTask->command].gputask->program; + const char *function = task_list[nextTask->command].name; - cl_kernel& kernel = *task_list[nextTask->command].gputask->kernel; - + kernel[cur] = clCreateKernel(program, function, &ret); + if (ret<0) { + const char *msg=convert_error_status(ret); + error(msg); + } int param = 0; + + size_t gws[3],lws[3]; + memset(gws, 0, sizeof(size_t)*3); + memset(lws, 0, sizeof(size_t)*3); + cl_uint dimension; + if (flag.nd_range) { + ListElement *input_buf = nextTask->inData(0); + size_t *ws_buf = (size_t*)input_buf->addr; + dimension = (cl_uint)ws_buf[0]; + /* dimension check + * if () { + * error("Invalid work item dimension\n"); + * } + */ + for (int i=0; i<dimension; i++) { + gws[i] = ws_buf[i+1]; + lws[i] = ws_buf[i+1+dimension]; + } + } // set arg count cl_mem memparam = clCreateBuffer(context, CL_MEM_READ_ONLY, @@ -116,7 +143,7 @@ error(msg); } - ret = clSetKernelArg(kernel, param, sizeof(memaddr),(void *)&memparam); + ret = clSetKernelArg(kernel[cur], param, sizeof(memaddr),(void *)&memparam); if (ret<0) { const char *msg=convert_error_status(ret); error(msg); @@ -125,18 +152,20 @@ param++; cl_mem_flags mem_flag = CL_MEM_READ_ONLY; - cl_mem *memin = new cl_mem[nextTask->inData_count]; + memin[cur] = new cl_mem[nextTask->inData_count]; if (!flag.flip) { // set input data when not flip - for(int i=0;i<nextTask->inData_count;i++) { - memin[i] = clCreateBuffer(context, mem_flag, nextTask->inData(i)->size, NULL, NULL); + int i=flag.nd_range? 1:0; + + for(;i<nextTask->inData_count;i++) { ListElement *input_buf = nextTask->inData(i); - ret = clEnqueueWriteBuffer(command_queue[cur], memin[i], CL_TRUE, 0, + memin[cur][i] = clCreateBuffer(context, mem_flag, input_buf->size, NULL, NULL); + ret = clEnqueueWriteBuffer(command_queue[cur], memin[cur][i], CL_TRUE, 0, input_buf->size, input_buf->addr, 0, NULL, NULL); if (ret<0) { const char *msg=convert_error_status(ret); error(msg); } - ret = clSetKernelArg(kernel, param, sizeof(memaddr), (void *)&memin[i]); + ret = clSetKernelArg(kernel[cur], param, sizeof(memaddr), (void *)&memin[cur][i]); if (ret<0) { const char *msg=convert_error_status(ret); error(msg); @@ -145,20 +174,19 @@ param++; } } - cl_mem *memout; cl_mem_flags out_mem_flag; if (flag.flip) { - memout = new cl_mem[nextTask->inData_count]; + memout[cur] = new cl_mem[nextTask->inData_count]; out_mem_flag = CL_MEM_READ_WRITE; } else { - memout = new cl_mem[nextTask->outData_count]; + memout[cur] = new cl_mem[nextTask->outData_count]; out_mem_flag = CL_MEM_WRITE_ONLY; } - - for(int i=0;i<nextTask->outData_count;i++) { // set output data + int i = (flag.nd_range)&&(flag.flip)? 1:0; + for(;i<nextTask->outData_count;i++) { // set output data ListElement *output_buf = flag.flip? nextTask->inData(i) : nextTask->outData(i); - memout[i] = clCreateBuffer(context, out_mem_flag, output_buf->size, NULL, &ret); + memout[cur][i] = clCreateBuffer(context, out_mem_flag, output_buf->size, NULL, &ret); if (ret<0) { const char *msg=convert_error_status(ret); error(msg); @@ -167,14 +195,14 @@ if (flag.flip) { // use output buffer as input buffer ListElement *input_buf = nextTask->inData(i); - ret = clEnqueueWriteBuffer(command_queue[cur], memout[i], CL_TRUE, 0, + ret = clEnqueueWriteBuffer(command_queue[cur], memout[cur][i], CL_TRUE, 0, input_buf->size, input_buf->addr, 0, NULL, NULL); if (ret<0) { const char *msg=convert_error_status(ret); error(msg); } } - ret = clSetKernelArg(kernel, param, sizeof(memaddr), (void *)&memout[i]); + ret = clSetKernelArg(kernel[cur], param, sizeof(memaddr), (void *)&memout[cur][i]); if (ret<0) { const char *msg=convert_error_status(ret); error(msg); @@ -182,9 +210,12 @@ param++; } - cl_event ev = NULL; - ret = clEnqueueTask(command_queue[cur], kernel, 0, NULL, &ev); + if (flag.nd_range){ + ret = clEnqueueNDRangeKernel(command_queue[cur],kernel[cur],dimension,NULL,gws,lws,0,NULL,NULL); + } else { + ret = clEnqueueTask(command_queue[cur], kernel[cur], 0, NULL, &ev); + } if (ret<0) { const char *msg=convert_error_status(ret); @@ -192,11 +223,11 @@ } // ndrange flagが0ならdim,global_work_size[0],local_work_size[0] = 1で固定に // clEnqueueNDRange - // (command_queue[cur], kernel, dim, NULL,global_work_size[0],local_work_size[0],NULL&ev); + // (command_queue[cur], kernel[cur], dim, NULL,global_work_size[0],local_work_size[0],NULL&ev); for(int i=0;i<nextTask->outData_count;i++) { // read output data ListElement *output_buf = flag.flip? nextTask->inData(i) :nextTask->outData(i); - ret = clEnqueueReadBuffer(command_queue[cur], memout[i], CL_TRUE, 0, + ret = clEnqueueReadBuffer(command_queue[cur], memout[cur][i], CL_TRUE, 0, output_buf->size, output_buf->addr, 1, &ev, NULL); if (ret<0) { const char *msg=convert_error_status(ret); @@ -209,7 +240,11 @@ clFlush(command_queue[1-cur]); // waiting for queued task // pipeline : 1-cur // no pipeline : cur - + clReleaseKernel(kernel[1-cur]); + /* should be released + * clReleaseMemObject(memin[1-cur]); + * clReleaseMemObject(memout[1-cur]); + */ if(reply[1-cur]) { connector->mail_write(reply[1-cur]); } @@ -242,8 +277,7 @@ { if (task_list[cmd].run == null_run) return; - const char *filename = (const char *)task_list[cmd].gputask->kernel; - const char *functionname = task_list[cmd].name; + const char *filename = (const char *)task_list[cmd].gputask->program; int fd; char *source_str; @@ -269,29 +303,25 @@ source_size = read(fd, source_str, size); close(fd); - cl_program program = - clCreateProgramWithSource(context, 1, + cl_program *program = new cl_program; + *program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret); - ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); + ret = clBuildProgram(*program, 1, &device_id, NULL, NULL, NULL); if(ret<0) { size_t size; - clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &size); + clGetProgramBuildInfo(*program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &size); char *log = new char[size]; - clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, size, log, NULL); + clGetProgramBuildInfo(*program, device_id, CL_PROGRAM_BUILD_LOG, size, log, NULL); error(log); } - cl_kernel *kernel = new cl_kernel; - *kernel = clCreateKernel(program, functionname, &ret); - task_list[cmd].gputask->kernel = kernel; + task_list[cmd].gputask->program = program; task_list[cmd].run = null_run; // kernel is ready } - - // regist kernel file name void gpu_register_task(int cmd, const char* filename, const char* functionname) @@ -300,7 +330,7 @@ task_list[cmd].load = null_loader; task_list[cmd].wait = null_loader; task_list[cmd].name = functionname; - task_list[cmd].gputask->kernel = (cl_kernel *) filename; + task_list[cmd].gputask->program = (cl_program *) filename; } /* end */
--- a/TaskManager/kernel/ppe/HTask.h Wed Feb 13 18:36:57 2013 +0900 +++ b/TaskManager/kernel/ppe/HTask.h Fri Feb 15 07:37:04 2013 +0900 @@ -128,6 +128,12 @@ void no_flip() { flag.flip = 0; } + void NDrange() { + flag.nd_range = 1; + } + void no_NDrange() { + flag.nd_range = 0; + } htask_flag get_flag(){ return flag;
--- a/TaskManager/kernel/schedule/Scheduler.h Wed Feb 13 18:36:57 2013 +0900 +++ b/TaskManager/kernel/schedule/Scheduler.h Fri Feb 15 07:37:04 2013 +0900 @@ -36,9 +36,7 @@ typedef struct gpu_task_object { #ifdef __CERIUM_GPU__ - cl_kernel *kernel; - int dim; - size_t *l_work_size; + cl_program *program; #endif } GpuTaskObject;