changeset 1773:83ef550db0a8 draft

break GPU to fix
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Sat, 23 Nov 2013 10:22:21 +0900
parents 6d173ec5ea9a
children 39734c8cbcfe
files TaskManager/ChangeLog TaskManager/Gpu/GpuScheduler.cc
diffstat 2 files changed, 68 insertions(+), 25 deletions(-) [+]
line wrap: on
line diff
--- a/TaskManager/ChangeLog	Sat Nov 23 08:58:15 2013 +0900
+++ b/TaskManager/ChangeLog	Sat Nov 23 10:22:21 2013 +0900
@@ -1,3 +1,27 @@
+2013-11-23 Shinji kONO <kono@ie.u-ryukyu.ac.jp>
+
+	Open CL の event の扱い方が良くない
+
+	pipeline buffer は、構造体で待つ。
+	    reply
+	    kernel
+	    memin     x n
+	    memout    x n
+	    read_event       x n
+	    write_event      x n
+	    kernel_event  
+	これらを、すべて二重に持つ。必要なら n の分 extension する。
+
+	event は、上書きす前にすべて、release する必要がある。
+
+	clEnqueueWriteBuffer, clEnqueueNDRangeKernel, clEnqueueReadBuffer は、eventlist で待ち合わせる。
+
+	clEnqueueWriteBuffer は、前の clEnqueueWriteBuffer を待つ
+	clEnqueueNDRangeKernel  は、 clEnqueueWriteBuffer を待つ
+	clEnqueueReadBuffer は、clEnqueueNDRangeKernel  を待つ
+
+	clEnqueueReadBuffer, clEnqueueWriteBuffer は、あるとは限らない
+
 2013-11-22 Shinji kONO <kono@ie.u-ryukyu.ac.jp>
 
 	Multi Dimention の実装がよろしくない。複雑過ぎる。
--- a/TaskManager/Gpu/GpuScheduler.cc	Sat Nov 23 08:58:15 2013 +0900
+++ b/TaskManager/Gpu/GpuScheduler.cc	Sat Nov 23 10:22:21 2013 +0900
@@ -62,10 +62,16 @@
     clReleaseContext(context);
 }
 
-
+#define NOP_REPLY 1
 
 void
 GpuScheduler::wait_for_event(cl_event* event,memaddr* reply,int cur) {
+    if (event[1-cur] == NOP_REPLY) {
+        if(reply[1-cur]) {
+            connector->mail_write(reply[1-cur]);
+            reply[1-cur]=0;
+        }
+    }
     if (event[1-cur] != NULL) {
         int ret=clWaitForEvents(1,&event[1-cur]);
         if (ret<0) {
@@ -108,14 +114,14 @@
 
         if ((memaddr)params_addr == (memaddr)MY_SPE_COMMAND_EXIT) {
             clFinish(command_queue);
-            if (kernel[0])
-                clReleaseKernel(kernel[0]);
-            if (kernel[1])
-                clReleaseKernel(kernel[1]);
-            if (event[0])
-                clReleaseEvent(event[0]);
-            if (event[1])
-                clReleaseEvent(event[1]);
+            if (kernel[0]) clReleaseKernel(kernel[0]);
+            if (kernel[1]) clReleaseKernel(kernel[1]);
+            if (event[0] && event[0]!=NOP_REPY) clReleaseEvent(event[0]);
+            if (event[1] && event[1]!=NOP_REPY) clReleaseEvent(event[1]);
+            if ( memout[cur-1] )  clReleaseMemObject(memout[cur-1]);
+            if ( memout[cur] )  clReleaseMemObject(memout[cur]);
+            if ( memin[cur-1] )  clReleaseMemObject(memin[cur-1]);
+            if ( memin[cur] )  clReleaseMemObject(memin[cur]);
             return ;
         }
 
@@ -136,13 +142,15 @@
             for (TaskPtr nextTask = tasklist[cur]->tasks;nextTask < tasklist[cur]->last(); nextTask = nextTask->next()) {
                 if(nextTask->command==ShowTime) {
                     connector->show_profile();
-                    continue;
+                        goto nop_repy;
                 }
                 if(nextTask->command==StartProfile) {
                     connector->start_profile();
-                    continue;
+                        goto nop_repy;
                 }
-                if (load_kernel(nextTask->command) == 0) continue ;
+                if (load_kernel(nextTask->command) == 0) {
+                    goto nop_skip;
+                }
                 cl_program& program = *gpu_task_list[nextTask->command].gputask->program;
                 const char *function = gpu_task_list[nextTask->command].name;
 
@@ -152,6 +160,7 @@
                 if (ret<0) {
                     const char *msg=convert_error_status(ret);
                     error(msg);
+                        goto nop_repy;
                 }
 
                 int param = 0;
@@ -170,17 +179,20 @@
                 if (ret<0) {
                     const char *msg=convert_error_status(ret);
                     error(msg);
+                        goto nop_repy;
                 }
 
                 ret = clSetKernelArg(kernel[cur], param, sizeof(memaddr),(void *)&memparam);
                 if (ret<0) {
                     const char *msg=convert_error_status(ret);
                     error(msg);
+                        goto nop_repy;
                 }
 
                 param++;
 
                 cl_mem_flags mem_flag = CL_MEM_READ_ONLY;
+                if ( memout[cur] )  clReleaseMemObject(memout[cur]);
                 memin[cur] = new cl_mem[nextTask->inData_count];
 
                 if (!flag.flip) { // set input data when not flip
@@ -209,9 +221,11 @@
                 }
                 cl_mem_flags out_mem_flag;
                 if (flag.flip) {
+                    if ( memout[cur] )  clReleaseMemObject(memout[cur]);
                     memout[cur] = new cl_mem[nextTask->inData_count];
                     out_mem_flag = CL_MEM_READ_WRITE;
                 } else {
+                    if ( memout[cur] )  clReleaseMemObject(memout[cur]);
                     memout[cur] = new cl_mem[nextTask->outData_count];
                     out_mem_flag = CL_MEM_WRITE_ONLY;
                 }
@@ -247,23 +261,25 @@
                 tasklist[cur]->task_start_time = gettime();
                 if (tasklist[cur]->dim > 0) {
                     ret = clEnqueueNDRangeKernel(command_queue, kernel[cur], tasklist[cur]->dim,
-                                                 NULL, &tasklist[cur]->x, 0, 0, NULL, NULL);
+                                                 NULL, &tasklist[cur]->x, 0, 0, NULL, &event[cur]);
                 } else {
-                    ret = clEnqueueTask(command_queue, kernel[cur], 0, NULL, NULL);
+                    ret = clEnqueueTask(command_queue, kernel[cur], 0, NULL, &event[cur]);
                 }
                 if (ret<0) {
                     const char *msg=convert_error_status(ret);
                     error(msg);
+                        goto nop_repy;
                 }
 
                 for(int i=0;i<nextTask->outData_count;i++) { // read output data
                     ListElement *output_buf = flag.flip? nextTask->inData(i) :nextTask->outData(i);
                     if (output_buf->size==0) break;
                     ret = clEnqueueReadBuffer(command_queue, memout[cur][i], CL_FALSE, 0,
-                                              output_buf->size, output_buf->addr, 0, NULL, &event[cur]);
+                                              output_buf->size, output_buf->addr, 0, NULL, NULL);
                     if (ret<0) {
                         const char *msg=convert_error_status(ret);
                         error(msg);
+                        goto nop_repy;
                     }
                 }
                 tasklist[cur]->task_end_time = gettime();
@@ -271,6 +287,11 @@
                 if (ret<0) {
                     const char *msg=convert_error_status(ret);
                     error(msg);
+nop_reply:
+                    event[cur] = NOP_REPLY;
+                    kernel[cur] = 0;
+                    memout[cur] = 0;
+                    memin[cur] = 0;
                 }
 
                 reply[cur] = (memaddr)tasklist[cur]->waiter;
@@ -285,16 +306,8 @@
                     tasklist[1-cur]->task_end_time   = end;
                 }
                 event[1-cur] = NULL;
-                //clFlush(command_queue); // waiting for queued task
-
-                // clFlush(command_queue);
                 // pipeline    : 1-cur
                 // no pipeline : cur
-                /* should be released
-                 *  clReleaseMemObject(memin[1-cur]);
-                 *  clReleaseMemObject(memout[1-cur]);
-                 */
-                
                 params_addr = (memaddr)tasklist[cur]->next;
             }
             printf("GPU %d %s\t%lld\n",tasklist[cur]->self->cpu_type,(char*)(gpu_task_list[tasklist[cur]->tasks[0].command].name),tasklist[cur]->task_end_time-tasklist[cur]->task_start_time);
@@ -310,13 +323,19 @@
             tasklist[cur]->task_end_time   = end;
             event[cur] = NULL;
         }
-        //clFlush(command_queue); // waiting for queued task
-        //clFinish(command_queue); // waiting for queued task
         unsigned long long wait = 0;
         (*connector->end_dmawait_profile)(&wait, &(connector->start_time), &(connector->stop_time));
         connector->mail_write((memaddr)MY_SPE_STATUS_READY);
     }
     // TaskArrayの処理
+    if ( memout[cur-1] )  clReleaseMemObject(memout[cur-1]);
+    if ( memout[cur] )  clReleaseMemObject(memout[cur]);
+    if ( memin[cur-1] )  clReleaseMemObject(memin[cur-1]);
+    if ( memin[cur] )  clReleaseMemObject(memin[cur]);
+    if ( kernel[cur]) clReleaseKernel(kernel[cur]);
+    if ( kernel[cur-1]) clReleaseKernel(kernel[cur-1]);
+    if (event[0] && event[0]!=NOP_REPY) clReleaseEvent(event[0]);
+    if (event[1] && event[0]!=NOP_REPY) clReleaseEvent(event[1]);
 
 }