Mercurial > hg > Game > Cerium
changeset 1773:83ef550db0a8 draft
break GPU to fix
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Sat, 23 Nov 2013 10:22:21 +0900 |
parents | 6d173ec5ea9a |
children | 39734c8cbcfe |
files | TaskManager/ChangeLog TaskManager/Gpu/GpuScheduler.cc |
diffstat | 2 files changed, 68 insertions(+), 25 deletions(-) [+] |
line wrap: on
line diff
--- a/TaskManager/ChangeLog Sat Nov 23 08:58:15 2013 +0900 +++ b/TaskManager/ChangeLog Sat Nov 23 10:22:21 2013 +0900 @@ -1,3 +1,27 @@ +2013-11-23 Shinji kONO <kono@ie.u-ryukyu.ac.jp> + + Open CL の event の扱い方が良くない + + pipeline buffer は、構造体で待つ。 + reply + kernel + memin x n + memout x n + read_event x n + write_event x n + kernel_event + これらを、すべて二重に持つ。必要なら n の分 extension する。 + + event は、上書きす前にすべて、release する必要がある。 + + clEnqueueWriteBuffer, clEnqueueNDRangeKernel, clEnqueueReadBuffer は、eventlist で待ち合わせる。 + + clEnqueueWriteBuffer は、前の clEnqueueWriteBuffer を待つ + clEnqueueNDRangeKernel は、 clEnqueueWriteBuffer を待つ + clEnqueueReadBuffer は、clEnqueueNDRangeKernel を待つ + + clEnqueueReadBuffer, clEnqueueWriteBuffer は、あるとは限らない + 2013-11-22 Shinji kONO <kono@ie.u-ryukyu.ac.jp> Multi Dimention の実装がよろしくない。複雑過ぎる。
--- a/TaskManager/Gpu/GpuScheduler.cc Sat Nov 23 08:58:15 2013 +0900 +++ b/TaskManager/Gpu/GpuScheduler.cc Sat Nov 23 10:22:21 2013 +0900 @@ -62,10 +62,16 @@ clReleaseContext(context); } - +#define NOP_REPLY 1 void GpuScheduler::wait_for_event(cl_event* event,memaddr* reply,int cur) { + if (event[1-cur] == NOP_REPLY) { + if(reply[1-cur]) { + connector->mail_write(reply[1-cur]); + reply[1-cur]=0; + } + } if (event[1-cur] != NULL) { int ret=clWaitForEvents(1,&event[1-cur]); if (ret<0) { @@ -108,14 +114,14 @@ if ((memaddr)params_addr == (memaddr)MY_SPE_COMMAND_EXIT) { clFinish(command_queue); - if (kernel[0]) - clReleaseKernel(kernel[0]); - if (kernel[1]) - clReleaseKernel(kernel[1]); - if (event[0]) - clReleaseEvent(event[0]); - if (event[1]) - clReleaseEvent(event[1]); + if (kernel[0]) clReleaseKernel(kernel[0]); + if (kernel[1]) clReleaseKernel(kernel[1]); + if (event[0] && event[0]!=NOP_REPY) clReleaseEvent(event[0]); + if (event[1] && event[1]!=NOP_REPY) clReleaseEvent(event[1]); + if ( memout[cur-1] ) clReleaseMemObject(memout[cur-1]); + if ( memout[cur] ) clReleaseMemObject(memout[cur]); + if ( memin[cur-1] ) clReleaseMemObject(memin[cur-1]); + if ( memin[cur] ) clReleaseMemObject(memin[cur]); return ; } @@ -136,13 +142,15 @@ for (TaskPtr nextTask = tasklist[cur]->tasks;nextTask < tasklist[cur]->last(); nextTask = nextTask->next()) { if(nextTask->command==ShowTime) { connector->show_profile(); - continue; + goto nop_repy; } if(nextTask->command==StartProfile) { connector->start_profile(); - continue; + goto nop_repy; } - if (load_kernel(nextTask->command) == 0) continue ; + if (load_kernel(nextTask->command) == 0) { + goto nop_skip; + } cl_program& program = *gpu_task_list[nextTask->command].gputask->program; const char *function = gpu_task_list[nextTask->command].name; @@ -152,6 +160,7 @@ if (ret<0) { const char *msg=convert_error_status(ret); error(msg); + goto nop_repy; } int param = 0; @@ -170,17 +179,20 @@ if (ret<0) { const char *msg=convert_error_status(ret); error(msg); + goto nop_repy; } ret = clSetKernelArg(kernel[cur], param, sizeof(memaddr),(void *)&memparam); if (ret<0) { const char *msg=convert_error_status(ret); error(msg); + goto nop_repy; } param++; cl_mem_flags mem_flag = CL_MEM_READ_ONLY; + if ( memout[cur] ) clReleaseMemObject(memout[cur]); memin[cur] = new cl_mem[nextTask->inData_count]; if (!flag.flip) { // set input data when not flip @@ -209,9 +221,11 @@ } cl_mem_flags out_mem_flag; if (flag.flip) { + if ( memout[cur] ) clReleaseMemObject(memout[cur]); memout[cur] = new cl_mem[nextTask->inData_count]; out_mem_flag = CL_MEM_READ_WRITE; } else { + if ( memout[cur] ) clReleaseMemObject(memout[cur]); memout[cur] = new cl_mem[nextTask->outData_count]; out_mem_flag = CL_MEM_WRITE_ONLY; } @@ -247,23 +261,25 @@ tasklist[cur]->task_start_time = gettime(); if (tasklist[cur]->dim > 0) { ret = clEnqueueNDRangeKernel(command_queue, kernel[cur], tasklist[cur]->dim, - NULL, &tasklist[cur]->x, 0, 0, NULL, NULL); + NULL, &tasklist[cur]->x, 0, 0, NULL, &event[cur]); } else { - ret = clEnqueueTask(command_queue, kernel[cur], 0, NULL, NULL); + ret = clEnqueueTask(command_queue, kernel[cur], 0, NULL, &event[cur]); } if (ret<0) { const char *msg=convert_error_status(ret); error(msg); + goto nop_repy; } for(int i=0;i<nextTask->outData_count;i++) { // read output data ListElement *output_buf = flag.flip? nextTask->inData(i) :nextTask->outData(i); if (output_buf->size==0) break; ret = clEnqueueReadBuffer(command_queue, memout[cur][i], CL_FALSE, 0, - output_buf->size, output_buf->addr, 0, NULL, &event[cur]); + output_buf->size, output_buf->addr, 0, NULL, NULL); if (ret<0) { const char *msg=convert_error_status(ret); error(msg); + goto nop_repy; } } tasklist[cur]->task_end_time = gettime(); @@ -271,6 +287,11 @@ if (ret<0) { const char *msg=convert_error_status(ret); error(msg); +nop_reply: + event[cur] = NOP_REPLY; + kernel[cur] = 0; + memout[cur] = 0; + memin[cur] = 0; } reply[cur] = (memaddr)tasklist[cur]->waiter; @@ -285,16 +306,8 @@ tasklist[1-cur]->task_end_time = end; } event[1-cur] = NULL; - //clFlush(command_queue); // waiting for queued task - - // clFlush(command_queue); // pipeline : 1-cur // no pipeline : cur - /* should be released - * clReleaseMemObject(memin[1-cur]); - * clReleaseMemObject(memout[1-cur]); - */ - params_addr = (memaddr)tasklist[cur]->next; } printf("GPU %d %s\t%lld\n",tasklist[cur]->self->cpu_type,(char*)(gpu_task_list[tasklist[cur]->tasks[0].command].name),tasklist[cur]->task_end_time-tasklist[cur]->task_start_time); @@ -310,13 +323,19 @@ tasklist[cur]->task_end_time = end; event[cur] = NULL; } - //clFlush(command_queue); // waiting for queued task - //clFinish(command_queue); // waiting for queued task unsigned long long wait = 0; (*connector->end_dmawait_profile)(&wait, &(connector->start_time), &(connector->stop_time)); connector->mail_write((memaddr)MY_SPE_STATUS_READY); } // TaskArrayの処理 + if ( memout[cur-1] ) clReleaseMemObject(memout[cur-1]); + if ( memout[cur] ) clReleaseMemObject(memout[cur]); + if ( memin[cur-1] ) clReleaseMemObject(memin[cur-1]); + if ( memin[cur] ) clReleaseMemObject(memin[cur]); + if ( kernel[cur]) clReleaseKernel(kernel[cur]); + if ( kernel[cur-1]) clReleaseKernel(kernel[cur-1]); + if (event[0] && event[0]!=NOP_REPY) clReleaseEvent(event[0]); + if (event[1] && event[0]!=NOP_REPY) clReleaseEvent(event[1]); }