changeset 1548:614a3f62c881 draft

add set work item size function
author Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
date Fri, 15 Feb 2013 07:37:04 +0900
parents 2983e9e93d24
children 68200bc3ab6b
files TaskManager/Gpu/GpuScheduler.cc TaskManager/kernel/ppe/HTask.h TaskManager/kernel/schedule/Scheduler.h
diffstat 3 files changed, 71 insertions(+), 37 deletions(-) [+]
line wrap: on
line diff
--- a/TaskManager/Gpu/GpuScheduler.cc	Wed Feb 13 18:36:57 2013 +0900
+++ b/TaskManager/Gpu/GpuScheduler.cc	Fri Feb 15 07:37:04 2013 +0900
@@ -70,6 +70,9 @@
 {
     int cur = 0;
     memaddr reply[2];
+    cl_kernel *kernel = new cl_kernel[2];
+    cl_mem *memin[2];
+    cl_mem *memout[2];
     HTask::htask_flag flag;
     memset(reply, 0, sizeof(memaddr)*2);
     memset(&flag, 0, sizeof(HTask::htask_flag));
@@ -101,10 +104,34 @@
                  nextTask < tasklist->last(); nextTask = nextTask->next()) {
 
                 load_kernel(nextTask->command);
+                cl_program& program = *task_list[nextTask->command].gputask->program;
+                const char *function = task_list[nextTask->command].name;
 
-                cl_kernel& kernel = *task_list[nextTask->command].gputask->kernel;
-
+                kernel[cur] = clCreateKernel(program, function, &ret);
+                if (ret<0) {
+                    const char *msg=convert_error_status(ret);
+                    error(msg);
+                }
                 int param = 0;
+                
+                size_t gws[3],lws[3];
+                memset(gws, 0, sizeof(size_t)*3);
+                memset(lws, 0, sizeof(size_t)*3);
+                cl_uint dimension;
+                if (flag.nd_range) {
+                    ListElement *input_buf = nextTask->inData(0);
+                    size_t *ws_buf = (size_t*)input_buf->addr;
+                    dimension = (cl_uint)ws_buf[0];
+                    /* dimension check
+                     * if () {
+                     * error("Invalid work item dimension\n");
+                     * }
+                     */
+                    for (int i=0; i<dimension; i++) {
+                        gws[i] = ws_buf[i+1];
+                        lws[i] = ws_buf[i+1+dimension];
+                    }
+                }
 
                 // set arg count
                 cl_mem memparam = clCreateBuffer(context, CL_MEM_READ_ONLY,
@@ -116,7 +143,7 @@
                     error(msg);
                 }
 
-                ret = clSetKernelArg(kernel, param, sizeof(memaddr),(void *)&memparam);
+                ret = clSetKernelArg(kernel[cur], param, sizeof(memaddr),(void *)&memparam);
                 if (ret<0) {
                     const char *msg=convert_error_status(ret);
                     error(msg);
@@ -125,18 +152,20 @@
                 param++;
 
                 cl_mem_flags mem_flag = CL_MEM_READ_ONLY;
-                cl_mem *memin = new cl_mem[nextTask->inData_count];
+                memin[cur] = new cl_mem[nextTask->inData_count];
                 if (!flag.flip) { // set input data when not flip
-                    for(int i=0;i<nextTask->inData_count;i++) {
-                        memin[i] = clCreateBuffer(context, mem_flag, nextTask->inData(i)->size, NULL, NULL);
+                    int i=flag.nd_range? 1:0;
+
+                    for(;i<nextTask->inData_count;i++) {
                         ListElement *input_buf = nextTask->inData(i);
-                        ret = clEnqueueWriteBuffer(command_queue[cur], memin[i], CL_TRUE, 0,
+                        memin[cur][i] = clCreateBuffer(context, mem_flag, input_buf->size, NULL, NULL);
+                        ret = clEnqueueWriteBuffer(command_queue[cur], memin[cur][i], CL_TRUE, 0,
                                                    input_buf->size, input_buf->addr, 0, NULL, NULL);
                         if (ret<0) {
                             const char *msg=convert_error_status(ret);
                             error(msg);
                         }
-                        ret = clSetKernelArg(kernel,  param, sizeof(memaddr), (void *)&memin[i]);
+                        ret = clSetKernelArg(kernel[cur],  param, sizeof(memaddr), (void *)&memin[cur][i]);
                         if (ret<0) {
                             const char *msg=convert_error_status(ret);
                             error(msg);
@@ -145,20 +174,19 @@
                         param++;
                     }
                 }
-                cl_mem *memout;
                 cl_mem_flags out_mem_flag;
                 if (flag.flip) {
-                    memout = new cl_mem[nextTask->inData_count];
+                    memout[cur] = new cl_mem[nextTask->inData_count];
                     out_mem_flag = CL_MEM_READ_WRITE;
                 } else {
-                    memout = new cl_mem[nextTask->outData_count];
+                    memout[cur] = new cl_mem[nextTask->outData_count];
                     out_mem_flag = CL_MEM_WRITE_ONLY;
                 }
 
-
-                for(int i=0;i<nextTask->outData_count;i++) { // set output data
+                int i = (flag.nd_range)&&(flag.flip)? 1:0;
+                for(;i<nextTask->outData_count;i++) { // set output data
                     ListElement *output_buf = flag.flip? nextTask->inData(i) : nextTask->outData(i);
-                    memout[i] = clCreateBuffer(context, out_mem_flag, output_buf->size, NULL, &ret);
+                    memout[cur][i] = clCreateBuffer(context, out_mem_flag, output_buf->size, NULL, &ret);
                     if (ret<0) {
                         const char *msg=convert_error_status(ret);
                         error(msg);
@@ -167,14 +195,14 @@
                     if (flag.flip) { // use output buffer as input buffer
                         ListElement *input_buf = nextTask->inData(i);
 
-                        ret = clEnqueueWriteBuffer(command_queue[cur], memout[i], CL_TRUE, 0,
+                        ret = clEnqueueWriteBuffer(command_queue[cur], memout[cur][i], CL_TRUE, 0,
                                                    input_buf->size, input_buf->addr, 0, NULL, NULL);
                         if (ret<0) {
                             const char *msg=convert_error_status(ret);
                             error(msg);
                         }
                     }
-                    ret = clSetKernelArg(kernel,  param, sizeof(memaddr), (void *)&memout[i]);
+                    ret = clSetKernelArg(kernel[cur],  param, sizeof(memaddr), (void *)&memout[cur][i]);
                     if (ret<0) {
                         const char *msg=convert_error_status(ret);
                         error(msg);
@@ -182,9 +210,12 @@
                     param++;
                 }
 
-
                 cl_event ev = NULL;
-                ret = clEnqueueTask(command_queue[cur], kernel, 0, NULL, &ev);
+                if (flag.nd_range){
+                    ret = clEnqueueNDRangeKernel(command_queue[cur],kernel[cur],dimension,NULL,gws,lws,0,NULL,NULL);
+                } else {
+                    ret = clEnqueueTask(command_queue[cur], kernel[cur], 0, NULL, &ev);    
+                }
 
                 if (ret<0) {
                     const char *msg=convert_error_status(ret);
@@ -192,11 +223,11 @@
                 }
                 // ndrange flagが0ならdim,global_work_size[0],local_work_size[0] = 1で固定に
                 // clEnqueueNDRange
-                // (command_queue[cur], kernel, dim, NULL,global_work_size[0],local_work_size[0],NULL&ev);
+                // (command_queue[cur], kernel[cur], dim, NULL,global_work_size[0],local_work_size[0],NULL&ev);
 
                 for(int i=0;i<nextTask->outData_count;i++) { // read output data
                     ListElement *output_buf = flag.flip? nextTask->inData(i) :nextTask->outData(i);
-                    ret = clEnqueueReadBuffer(command_queue[cur], memout[i], CL_TRUE, 0,
+                    ret = clEnqueueReadBuffer(command_queue[cur], memout[cur][i], CL_TRUE, 0,
                                               output_buf->size, output_buf->addr, 1, &ev, NULL);
                     if (ret<0) {
                         const char *msg=convert_error_status(ret);
@@ -209,7 +240,11 @@
             clFlush(command_queue[1-cur]); // waiting for queued task
             // pipeline    : 1-cur
             // no pipeline : cur
-
+            clReleaseKernel(kernel[1-cur]);
+            /* should be released
+             *  clReleaseMemObject(memin[1-cur]);
+             *  clReleaseMemObject(memout[1-cur]);
+             */
             if(reply[1-cur]) {
                 connector->mail_write(reply[1-cur]);
             }
@@ -242,8 +277,7 @@
 {
     if (task_list[cmd].run == null_run) return;
 
-    const char *filename = (const char *)task_list[cmd].gputask->kernel;
-    const char *functionname = task_list[cmd].name;
+    const char *filename = (const char *)task_list[cmd].gputask->program;
 
     int fd;
     char *source_str;
@@ -269,29 +303,25 @@
     source_size = read(fd, source_str, size);
     close(fd);
 
-    cl_program program =
-        clCreateProgramWithSource(context, 1,
+    cl_program *program = new cl_program;
+    *program = clCreateProgramWithSource(context, 1,
                                   (const char **)&source_str,
                                   (const size_t *)&source_size, &ret);
-    ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
+    ret = clBuildProgram(*program, 1, &device_id, NULL, NULL, NULL);
 
     if(ret<0) {
         size_t size;
-        clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &size);
+        clGetProgramBuildInfo(*program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &size);
 
         char *log = new char[size];
-        clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, size, log, NULL);
+        clGetProgramBuildInfo(*program, device_id, CL_PROGRAM_BUILD_LOG, size, log, NULL);
         error(log);
     }
-    cl_kernel *kernel = new cl_kernel;
-    *kernel = clCreateKernel(program, functionname, &ret);
-    task_list[cmd].gputask->kernel = kernel;
+    task_list[cmd].gputask->program = program;
     task_list[cmd].run = null_run; // kernel is ready
 
 }
 
-
-
 // regist kernel file name
 void
 gpu_register_task(int cmd, const char* filename, const char* functionname)
@@ -300,7 +330,7 @@
     task_list[cmd].load = null_loader;
     task_list[cmd].wait = null_loader;
     task_list[cmd].name = functionname;
-    task_list[cmd].gputask->kernel = (cl_kernel *) filename;
+    task_list[cmd].gputask->program = (cl_program *) filename;
 }
 
 /* end */
--- a/TaskManager/kernel/ppe/HTask.h	Wed Feb 13 18:36:57 2013 +0900
+++ b/TaskManager/kernel/ppe/HTask.h	Fri Feb 15 07:37:04 2013 +0900
@@ -128,6 +128,12 @@
     void no_flip() {
         flag.flip = 0;
     }
+    void NDrange() {
+        flag.nd_range = 1;
+    }
+    void no_NDrange() {
+        flag.nd_range = 0;
+    }
 
     htask_flag get_flag(){
         return flag;
--- a/TaskManager/kernel/schedule/Scheduler.h	Wed Feb 13 18:36:57 2013 +0900
+++ b/TaskManager/kernel/schedule/Scheduler.h	Fri Feb 15 07:37:04 2013 +0900
@@ -36,9 +36,7 @@
 
 typedef struct gpu_task_object {
 #ifdef __CERIUM_GPU__
-    cl_kernel *kernel;
-    int dim;
-    size_t *l_work_size;
+    cl_program *program;
 #endif
 } GpuTaskObject;