changeset 1841:3a5825ad4f4e draft

Flip on gpu Extra argments for flip fft example should be rewritten for flip
author yuhi
date Fri, 20 Dec 2013 20:38:21 +0900
parents 759587e37bc7
children 17d06be35858
files TaskManager/Gpu/GpuScheduler.cc TaskManager/Gpu/GpuScheduler.h
diffstat 2 files changed, 42 insertions(+), 33 deletions(-) [+]
line wrap: on
line diff
--- a/TaskManager/Gpu/GpuScheduler.cc	Fri Dec 20 18:02:45 2013 +0900
+++ b/TaskManager/Gpu/GpuScheduler.cc	Fri Dec 20 20:38:21 2013 +0900
@@ -137,12 +137,16 @@
         kernel_event[1-cur] = 0;
 
     }
-    if (memout[1-cur].size > 0) {
-
+    
+    if (flag[1-cur].flip) {
+        if (memout[1-cur].size > 1) {
+            int ret=clWaitForEvents(memout[1-cur].size-1, &memout[1-cur].event[1]);
+            if (ret<0) error(convert_error_status(ret));
+            release_buf_event(cur,memout);
+        }
+    } else if (memout[1-cur].size > 0) {
         int ret=clWaitForEvents(memout[1-cur].size, memout[1-cur].event);
-        if (ret<0) {
-            error(convert_error_status(ret));
-        }
+        if (ret<0) error(convert_error_status(ret));
         release_buf_event(cur,memout);
     }
 
@@ -187,8 +191,8 @@
     reply = 0;
     initGpuBuffer(&memin[0]);initGpuBuffer(&memin[1]);
     initGpuBuffer(&memout[0]);initGpuBuffer(&memout[1]);
-    memset(&flag, 0, sizeof(HTask::htask_flag));
-
+    memset(&flag, 0, sizeof(HTask::htask_flag)*2);
+    
     for (;;) {
         memaddr params_addr = connector->task_list_mail_read();
         // read task list mail from DmaManager
@@ -215,7 +219,7 @@
              * flip : When caluculate on input data, to treat this as a output data
              */
             if (tasklist->self) {
-                flag = tasklist->self->flag;
+                flag[cur] = tasklist->self->flag;
             }
             for (TaskPtr nextTask = tasklist->tasks; nextTask < tasklist->last(); nextTask = nextTask->next()) {
                 if(nextTask->command==ShowTime) {
@@ -236,23 +240,24 @@
                 int param = 0;
 
                 // set arg count
-                cl_mem memparam = createBuffer(&memin[cur], 0, context, CL_MEM_READ_ONLY,
+                GpuBufferPtr gpumem = flag[cur].flip ? memout : memin;
+                cl_mem memparam = createBuffer(&gpumem[cur], 0, context, CL_MEM_READ_ONLY,
                                                sizeof(memaddr)*nextTask->param_count, &ret);
                 if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; }
 
                 // parameter is passed as first kernel arg 
                 ret = clEnqueueWriteBuffer(command_queue, memparam, CL_TRUE, 0,sizeof(memaddr)*nextTask->param_count,
-                                           nextTask->param(param), 0, NULL, &memin[cur].event[0]);
+                                           nextTask->param(0), 0, NULL, &gpumem[cur].event[0]);
                 if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; }
 
-                ret = clSetKernelArg(kernel[cur], param, sizeof(memaddr),(void *)&memin[cur].buf[0]);
+                ret = clSetKernelArg(kernel[cur], 0, sizeof(memaddr),(void *)&gpumem[cur].buf[0]);
                 if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; }
 
                 param++;
 
                 cl_mem_flags mem_flag = CL_MEM_READ_ONLY;
                 
-                if (!flag.flip) { // set input data when not flip
+                if (!flag[cur].flip) { // set input data when not flip
                     for(int i=0;i<nextTask->inData_count;i++) {
                         ListElement *input_buf = nextTask->inData(i);
                         if (input_buf->size==0) break;
@@ -269,47 +274,51 @@
                     }
                 }
                 cl_mem_flags out_mem_flag;
-                if (flag.flip) {
+                if (flag[cur].flip) {
                     out_mem_flag = CL_MEM_READ_WRITE;
                 } else {
                     out_mem_flag = CL_MEM_WRITE_ONLY;
+                    memin[cur].size  = param; // +1 means param
                 }
-                memin[cur].size  = param; // +1 means param
                 
                 for(int i = 0; i<nextTask->outData_count;i++) { // set output data
-                    ListElement *output_buf = flag.flip? nextTask->inData(i) : nextTask->outData(i);
+                    ListElement *output_buf = flag[cur].flip? nextTask->inData(i) : nextTask->outData(i);
                     if (output_buf->size==0) break;
-                    createBuffer(&memout[cur], i, context, out_mem_flag, output_buf->size, &ret);
+                    int i0 = flag[cur].flip ? i+1 : i;
+                    createBuffer(&memout[cur], i0, context, out_mem_flag, output_buf->size, &ret);
                     if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; }
 
-                    if (flag.flip) { // use output buffer as input buffer
+                    if (flag[cur].flip) { // use output buffer as input buffer
                         ListElement *input_buf = nextTask->inData(i);
                         
-                        ret = clEnqueueWriteBuffer(command_queue, memout[cur].buf[param], CL_TRUE, 0,
+                        ret = clEnqueueWriteBuffer(command_queue, memout[cur].buf[i+1], CL_TRUE, 0,
                                                    input_buf->size, input_buf->addr, 
-                                                   0, NULL, &memout[cur].event[param]);
+                                                   0, NULL, &memout[cur].event[i+1]);
                         if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; }
+                        ret = clSetKernelArg(kernel[cur],  param+1, sizeof(memaddr), (void *)&memout[cur].buf[param]);
+                        if (ret<0) { gpuTaskError(cur,tasklist,ret); continue;}
                     }
-                    ret = clSetKernelArg(kernel[cur],  param, sizeof(memaddr), (void *)&memout[cur].buf[i]);
-                    if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; }
+                    ret = clSetKernelArg(kernel[cur],  param, sizeof(memaddr), (void *)&memout[cur].buf[param]);
+                    if (ret<0) { gpuTaskError(cur,tasklist,ret); continue;}
                     param++;
                 }
                 
                 memout[cur].size = param - memin[cur].size;
                 if (tasklist->dim > 0) {
                     ret = clEnqueueNDRangeKernel(command_queue, kernel[cur], tasklist->dim,
-                                 NULL, &tasklist->x, 0, memin[cur].size, memin[cur].event, &kernel_event[cur]);
+                                 NULL, &tasklist->x, 0, gpumem[cur].size, gpumem[cur].event, &kernel_event[cur]);
                 } else {
-                    ret = clEnqueueTask(command_queue, kernel[cur], memin[cur].size,
-                                        memin[cur].event, &kernel_event[cur]);
+                    ret = clEnqueueTask(command_queue, kernel[cur], gpumem[cur].size,
+                                        gpumem[cur].event, &kernel_event[cur]);
                 }
                 if (ret<0) { gpuTaskError(cur, tasklist, ret); continue; }
 
                 for(int i=0;i<nextTask->outData_count;i++) { // read output data
-                    ListElement *output_buf = flag.flip? nextTask->inData(i) :nextTask->outData(i);
+                    ListElement *output_buf = flag[cur].flip? nextTask->inData(i) :nextTask->outData(i);
                     if (output_buf->size==0) break;
-                    ret = clEnqueueReadBuffer(command_queue, memout[cur].buf[i], CL_FALSE, 0,
-                                              output_buf->size, output_buf->addr, 1, &kernel_event[cur], &memout[cur].event[i]);
+                    int i0 = flag[cur].flip? i+1:i;
+                    ret = clEnqueueReadBuffer(command_queue, memout[cur].buf[i0], CL_FALSE, 0,
+                                              output_buf->size, output_buf->addr, 1, &kernel_event[cur], &memout[cur].event[i0]);
                     if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; }
                 }
                 // wait kernel[1-cur] and write[1-cur]
@@ -360,7 +369,7 @@
     fd = open(filename, O_RDONLY);
 
     if (fd<0) {
-        fprintf(stderr, "Failed to load kernel %s.\n",filename);
+        fprintf(stderr, "Failed to open kernel %s.\n",filename);
         exit(1);
     }
 
@@ -369,18 +378,18 @@
     off_t size = stats.st_size;
 
     if (size<=0) {
-        fprintf(stderr, "Failed to load kernel.\n");
+        fprintf(stderr, "Failed to read kernel.\n");
         exit(1);
     }
 
-    source_str = (char*)alloca(size);
+    source_str = (char*)alloca(size+1);
     source_size = read(fd, source_str, size);
     close(fd);
+    source_str[size] = 0;
 
     cl_program *program = new cl_program;
     *program = clCreateProgramWithSource(context, 1,
-                                         (const char **)&source_str,
-                                         (const size_t *)&source_size, &ret);
+                                         (const char **)&source_str, 0, &ret);
     ret = clBuildProgram(*program, 1, &device_id, NULL, NULL, NULL);
 
     if(ret<0) {
--- a/TaskManager/Gpu/GpuScheduler.h	Fri Dec 20 18:02:45 2013 +0900
+++ b/TaskManager/Gpu/GpuScheduler.h	Fri Dec 20 20:38:21 2013 +0900
@@ -54,7 +54,7 @@
     cl_event kernel_event[2];
     GpuBuffer memin[2];
     GpuBuffer memout[2];
-    HTask::htask_flag flag;
+    HTask::htask_flag flag[2];
 private:
     FifoDmaManager *fifoDmaManager;
     int load_kernel(int cmd);