Mercurial > hg > Game > Cerium
changeset 1804:1febe61a935a draft
create GpuBuffer
author | Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 09 Dec 2013 20:41:11 +0900 |
parents | fa15a19d27ef |
children | 8c79f9697179 |
files | TaskManager/Gpu/GpuScheduler.cc TaskManager/Gpu/GpuScheduler.h |
diffstat | 2 files changed, 90 insertions(+), 54 deletions(-) [+] |
line wrap: on
line diff
--- a/TaskManager/Gpu/GpuScheduler.cc Mon Dec 09 18:43:56 2013 +0900 +++ b/TaskManager/Gpu/GpuScheduler.cc Mon Dec 09 20:41:11 2013 +0900 @@ -62,26 +62,70 @@ clReleaseContext(context); } +void +GpuScheduler::initGpuBuffer(GpuBufferPtr m) { + m->size = 0; + m->alllocate_size = 64; + m->buf = (cl_mem*)malloc(m->alllocate_size*sizeof(cl_mem*)); + m->event = (cl_event*)malloc(m->alllocate_size*sizeof(cl_event*)); +} + + + +cl_mem +GpuScheduler::createBuffer(GpuBufferPtr m, int i, cl_context context, cl_mem_flags flags, size_t size, cl_int *error) { + if (i > m->alllocate_size) { + // reallocate buffer size + m->allocate_size *= 2; + m->buf = (cl_mem*)realloc(m->buf, m->alllocate_size*sizeof(cl_mem*)); + m->event = (cl_event*)realloc(m->event, m->alllocate_size*sizeof(cl_event*)); + } + + if (m->buf[i]) { + clReleaseMemObject(m->buf); + } + + flags |= CL_MEM_USE_HOST_PTR; + void *buf = m->buf[i]; + clCreateBuffer(context, flags, size, buf, error); +} + #define NOP_REPLY NULL +/** + * wait for previous pipeline termination + * kernel_event, memout_event + */ void -GpuScheduler::wait_for_event(cl_event* event,memaddr* reply,TaskListPtr taskList, int cur) { - if (event[1-cur] == NOP_REPLY) { +GpuScheduler::wait_for_event(cl_event* kernel_event, GpuBufferPtr memout, memaddr* reply,TaskListPtr taskList, int cur) { + if (kernel_event[1-cur] == NOP_REPLY) { if(reply[1-cur]) { connector->mail_write(reply[1-cur]); reply[1-cur]=0; } - } - if (event[1-cur] != NULL) { - int ret=clWaitForEvents(1,&event[1-cur]); + } else if (kernel_event[1-cur] != NULL) { + int ret=clWaitForEvents(1,&kernel_event[1-cur]); if (ret<0) { error(convert_error_status(ret)); } - if(reply[1-cur]) { - connector->mail_write(reply[1-cur]); - reply[1-cur]=0; + clReleaseEvent(kernel_event[1-cur]); + kernel_evetn[1-cur] = 0; + } + if (memout[1-cur].size > 0) { + int ret=clWaitForEvents(memout[1-cur].size, &memout[1-cur].event); + if (ret<0) { + error(convert_error_status(ret)); + } + for (int i=0; i < memout[1-cur].size; i++) { + clReleaseEvent(memout[1-cur].event[i]); + memout[1-cur].event[i] = 0; } } + if(reply[1-cur]) { + connector->mail_write(reply[1-cur]); + reply[1-cur]=0; + } + if (tasklist[1-cur]!=NULL) { cl_ulong start = 0; cl_ulong end = 0; @@ -115,12 +159,12 @@ clFinish(command_queue); if (kernel[0]) clReleaseKernel(kernel[0]); if (kernel[1]) clReleaseKernel(kernel[1]); - if (event[0] && event[0]!=NOP_REPLY) clReleaseEvent(event[0]); - if (event[1] && event[1]!=NOP_REPLY) clReleaseEvent(event[1]); - if ( memout[cur-1] ) clReleaseMemObject(*memout[cur-1]); - if ( memout[cur] ) clReleaseMemObject(*memout[cur]); - if ( *memin[cur-1] ) clReleaseMemObject(*memin[cur-1]); - if ( *memin[cur] ) clReleaseMemObject(*memin[cur]); + if (kernel_event[0] && kernel_event[0]!=NOP_REPLY) clReleaseEvent(event[0]); + if (kernel_event[1] && kernel_event[1]!=NOP_REPLY) clReleaseEvent(event[1]); + destroyGpuBuffer(memout[cur-1]); + destroyGpuBuffer(memout[cur]); + destroyGpuBuffer(memin[cur]); + destroyGpuBuffer(memin[cur-1]); return ; } @@ -165,15 +209,16 @@ int param = 0; // set arg count - cl_mem memparam = clCreateBuffer(context, CL_MEM_READ_ONLY, - sizeof(memaddr)*nextTask->param_count, NULL, &ret); + cl_mem memparam = createBuffer(memin[cur], 0, context, CL_MEM_READ_ONLY, + sizeof(memaddr)*nextTask->param_count, &ret); if (ret<0) { const char *msg=convert_error_status(ret); error(msg); } ret = clEnqueueWriteBuffer(command_queue, memparam, CL_TRUE, 0,sizeof(memaddr)*nextTask->param_count, - nextTask->param(param), 0, NULL, ¶m_event[1-cur]); // set event flag + nextTask->param(param), 0, NULL, &memin[cur].event[0]); + // parameter is passed as first kernel arg param=0; if (ret<0) { const char *msg=convert_error_status(ret); @@ -181,7 +226,7 @@ goto nop_reply; } - ret = clSetKernelArg(kernel[cur], param, sizeof(memaddr),(void *)&memparam); + ret = clSetKernelArg(kernel[cur], param, sizeof(memaddr),(void *)&memin[cur].buf[0]); if (ret<0) { const char *msg=convert_error_status(ret); error(msg); @@ -191,26 +236,24 @@ param++; cl_mem_flags mem_flag = CL_MEM_READ_ONLY; - if ( memout[cur] ) clReleaseMemObject(*memout[cur]); // using preallocated memorry - memin[cur] = new cl_mem[nextTask->inData_count]; if (!flag.flip) { // set input data when not flip for(int i=0;i<nextTask->inData_count;i++) { ListElement *input_buf = nextTask->inData(i); if (input_buf->size==0) break; - memin[cur][i] = clCreateBuffer(context, mem_flag, input_buf->size, NULL, &ret); + createBuffer(memin[cur], i+1, context, mem_flag, input_buf->size, &ret); if (ret<0) { const char *msg=convert_error_status(ret); error(msg); } - ret = clEnqueueWriteBuffer(command_queue, memin[cur][i], CL_TRUE, 0, - input_buf->size, input_buf->addr, 1, - param_event[1-cur], memin_event[1-cur][i]); + ret = clEnqueueWriteBuffer(command_queue, memin[cur][i+1], CL_TRUE, 0, + input_buf->size, input_buf->addr, 0, + NULL, &memin[cur].event[i+1]); if (ret<0) { const char *msg=convert_error_status(ret); error(msg); } - ret = clSetKernelArg(kernel[cur], param, sizeof(memaddr), (void *)&memin[cur][i]); + ret = clSetKernelArg(kernel[cur], param, sizeof(memaddr), (void *)&memin[cur].buf[i+1]); if (ret<0) { const char *msg=convert_error_status(ret); error(msg); @@ -221,12 +264,8 @@ } cl_mem_flags out_mem_flag; if (flag.flip) { - if ( memout[cur] ) clReleaseMemObject(*memout[cur]); - memout[cur] = new cl_mem[nextTask->inData_count]; out_mem_flag = CL_MEM_READ_WRITE; } else { - if ( memout[cur] ) clReleaseMemObject(*memout[cur]); - memout[cur] = new cl_mem[nextTask->outData_count]; out_mem_flag = CL_MEM_WRITE_ONLY; } @@ -234,7 +273,7 @@ for(int i = 0; i<nextTask->outData_count;i++) { // set output data ListElement *output_buf = flag.flip? nextTask->inData(i) : nextTask->outData(i); if (output_buf->size==0) break; - memout[cur][i] = clCreateBuffer(context, out_mem_flag, output_buf->size, NULL, &ret); + createBuffer(memout[cur], i, context, out_mem_flag, output_buf->size, &ret); if (ret<0) { const char *msg=convert_error_status(ret); error(msg); @@ -243,15 +282,15 @@ if (flag.flip) { // use output buffer as input buffer ListElement *input_buf = nextTask->inData(i); - ret = clEnqueueWriteBuffer(command_queue, memout[cur][i], CL_TRUE, 0, + ret = clEnqueueWriteBuffer(command_queue, memout[cur].buf[i+1], CL_TRUE, 0, input_buf->size, input_buf->addr, - memparam_size[1-cur],param_event[1-cur], memin_event[1-cur][i]); + 0, NULL, memout[cur].event[i+1]); if (ret<0) { const char *msg=convert_error_status(ret); error(msg); } } - ret = clSetKernelArg(kernel[cur], param, sizeof(memaddr), (void *)&memout[cur][i]); + ret = clSetKernelArg(kernel[cur], param, sizeof(memaddr), (void *)&memout[cur].buf[i]); if (ret<0) { const char *msg=convert_error_status(ret); error(msg); @@ -259,13 +298,15 @@ param++; } + memin[cur].size = taskList[cur]->inDataSize+1; // +1 means param + memout[cur].size = taskList[cur]->outDataSize; tasklist[cur]->task_start_time = gettime(); if (tasklist[cur]->dim > 0) { ret = clEnqueueNDRangeKernel(command_queue, kernel[cur], tasklist[cur]->dim, - NULL, &tasklist[cur]->x, 0, memin_size[1-cur], memin_event[1-cur], &kernel_event[cur]); + NULL, &tasklist[cur]->x, 0, memin[cur].size, memin[cur].event, &kernel_event[cur]); } else { - ret = clEnqueueTask(command_queue, kernel[cur], memin_size[1-cur], - memin_event[1-cur], &kernel_event[cur]); + ret = clEnqueueTask(command_queue, kernel[cur], memin[cur].size, + memin[cur].event, &kernel_event[cur]); } if (ret<0) { const char *msg=convert_error_status(ret); @@ -276,8 +317,8 @@ for(int i=0;i<nextTask->outData_count;i++) { // read output data ListElement *output_buf = flag.flip? nextTask->inData(i) :nextTask->outData(i); if (output_buf->size==0) break; - ret = clEnqueueReadBuffer(command_queue, memout[cur][i], CL_FALSE, 0, - output_buf->size, output_buf->addr, 1, kernel_event[1-cur], memout_event[1-cur][i]); + ret = clEnqueueReadBuffer(command_queue, memout[cur].buf[i], CL_FALSE, 0, + output_buf->size, output_buf->addr, 1, kernel_event[cur], memout[cur].event[i]); if (ret<0) { const char *msg=convert_error_status(ret); error(msg); @@ -299,8 +340,7 @@ reply[cur] = (memaddr)tasklist[cur]->waiter; // wait kernel[1-cur] and write[1-cur] - wait_for_event(event,reply,tasklist,cur); - event[1-cur] = NULL; + wait_for_event(kernel_event, memout, reply, tasklist, cur); // pipeline : 1-cur // no pipeline : cur params_addr = (memaddr)tasklist[cur]->next; @@ -308,7 +348,7 @@ printf("GPU %d %s\t%lld\n",tasklist[cur]->self->cpu_type,(char*)(gpu_task_list[tasklist[cur]->tasks[0].command].name),tasklist[cur]->task_end_time-tasklist[cur]->task_start_time); cur = 1 - cur; } - wait_for_event(memout_event,reply,tasklist,cur); + wait_for_event(kernel_event, memout, reply, tasklist, cur); unsigned long long wait = 0; (*connector->end_dmawait_profile)(&wait, &(connector->start_time), &(connector->stop_time));
--- a/TaskManager/Gpu/GpuScheduler.h Mon Dec 09 18:43:56 2013 +0900 +++ b/TaskManager/Gpu/GpuScheduler.h Mon Dec 09 20:41:11 2013 +0900 @@ -17,6 +17,12 @@ class GpuScheduler : public Scheduler { public: + typedef struct gpubuffer { + cl_int allocate_size; + cl_int size; + cl_mem *buf; // clCreateBuffer + cl_event *event; + } GpuBuffer, *GpuBufferPtr; GpuScheduler(); virtual ~GpuScheduler(); void init_impl(int useRefDma); @@ -46,23 +52,13 @@ memaddr reply[2]; cl_kernel kernel[2]; cl_event kernel_event[2]; - cl_mem memparam[2]; // clCreateBuffer - cl_int memparam_size[2]; - cl_event param_event[2]; - cl_event *param_wait[2]; // wait list for param input - cl_mem *memin[2]; // clCreateBuffer - cl_int *memin_size[2]; - cl_event *memin_event[2]; - cl_event *memin_wait[2]; // wait list for param input - cl_mem *memout[2]; // clCreateBuffer - cl_int *memout_size[2]; - cl_event *memout_event[2]; - cl_event *memout_wait[2]; // wait list for param input + GpuBuffer memin[2]; + GpuBuffer memout[2]; HTask::htask_flag flag; private: FifoDmaManager *fifoDmaManager; int load_kernel(int cmd); - + cl_mem createBuffer(GpuBufferPtr m, int i, cl_context context, cl_mem_flags flags, size_t size, cl_int *error); }; #define GpuSchedRegister(str, filename, functionname) \