Mercurial > hg > Game > Cerium
view TaskManager/Gpu/GpuScheduler.cc @ 1716:c12df61ded45 draft
fix gpu profile. not work yet
author | Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 15 Oct 2013 17:21:10 +0900 |
parents | 9392f4d97cff |
children | d911bef11c8a |
line wrap: on
line source
#include "TaskManager.h" #include "GpuScheduler.h" #include "ReferencedDmaManager.h" #include "PreRefDmaManager.h" #include "SchedTask.h" #include "stdio.h" #include "GpuError.h" #include "ListData.h" #include "SysFunc.h" #include "gettime.h" #include <fcntl.h> #include <sys/stat.h> #include <string.h> TaskObject gpu_task_list[MAX_TASK_OBJECT]; GpuScheduler::GpuScheduler() { init_impl(0); init_gpu(); } void GpuScheduler::init_impl(int useRefDma) { if (useRefDma & 0x10) { fifoDmaManager = new PreRefDmaManager(); // Prefetch command and no copy } else if (useRefDma & 0x01) { fifoDmaManager = new FifoDmaManager(); // memcpy } else { fifoDmaManager = new ReferencedDmaManager(); // no copy } connector = fifoDmaManager; } /* * Prepare OpenCL: * get OpenCL information * create command queue */ void GpuScheduler::init_gpu() { clGetPlatformIDs(1, &platform_id, &ret_num_platforms); clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, &ret_num_devices); // unavailable GPU if(ret_num_devices == 0) { exit(EXIT_FAILURE); } context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret); command_queue = clCreateCommandQueue(context, device_id, 0,&ret); if (ret<0) { const char *msg=convert_error_status(ret); error(msg); } } GpuScheduler::~GpuScheduler() { clReleaseCommandQueue(command_queue); clReleaseContext(context); } void GpuScheduler::wait_for_event(cl_event* event,memaddr* reply,int cur) { if (event[1-cur] != NULL) { int ret=clWaitForEvents(1,&event[1-cur]); clReleaseEvent(event[1-cur]); if (ret<0) { const char *msg=convert_error_status(ret); error(msg); } if(reply[1-cur]) { connector->mail_write(reply[1-cur]); reply[1-cur]=0; } event[1-cur]=NULL; } } /* * run GPU task * Get input and output data from tasklist. * Enqueue OpenCL command and clflush. * Enqueue and clflush are pipelined structure. */ void GpuScheduler::run() { int cur = 0; memaddr reply[2]={0,0}; cl_kernel kernel[2]={0,0}; cl_event event[2]; event[0]=NULL;event[1]=NULL; cl_mem *memin[2]; cl_mem *memout[2]; TaskListPtr tasklist[2]; HTask::htask_flag flag; memset(&flag, 0, sizeof(HTask::htask_flag)); for (;;) { memaddr params_addr = connector->task_list_mail_read(); // read task list mail from DmaManager if ((memaddr)params_addr == (memaddr)MY_SPE_COMMAND_EXIT) { clFinish(command_queue); if (kernel[0]) clReleaseKernel(kernel[0]); if (kernel[1]) clReleaseKernel(kernel[1]); if (event[0]) clReleaseEvent(event[0]); if (event[1]) clReleaseEvent(event[1]); return ; } (*connector->start_dmawait_profile)(); while (params_addr) { // since we are on the same memory space, we don't has to use dma_load here tasklist[cur] = (TaskListPtr)connector->dma_load(this, params_addr, sizeof(TaskList), DMA_READ_TASKLIST); tasklist[cur]->start_time = gettime(); /* * get flip flag * flip : When caluculate on input data, to treat this as a output data */ if (tasklist[cur]->self) { flag = tasklist[cur]->self->flag; } for (TaskPtr nextTask = tasklist[cur]->tasks;nextTask < tasklist[cur]->last(); nextTask = nextTask->next()) { if(nextTask->command==ShowTime) { connector->show_profile(); continue; } if(nextTask->command==StartProfile) { connector->start_profile(); continue; } load_kernel(nextTask->command); cl_program& program = *gpu_task_list[nextTask->command].gputask->program; const char *function = gpu_task_list[nextTask->command].name; if (kernel[cur]) clReleaseKernel(kernel[cur]); kernel[cur] = clCreateKernel(program, function, &ret); if (ret<0) { const char *msg=convert_error_status(ret); error(msg); } int param = 0; // set arg count cl_mem memparam = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(memaddr)*nextTask->param_count, NULL, &ret); if (ret<0) { const char *msg=convert_error_status(ret); error(msg); } ret = clEnqueueWriteBuffer(command_queue, memparam, CL_TRUE, 0,sizeof(memaddr)*nextTask->param_count, nextTask->param(param), 0, NULL, NULL); param=0; if (ret<0) { const char *msg=convert_error_status(ret); error(msg); } ret = clSetKernelArg(kernel[cur], param, sizeof(memaddr),(void *)&memparam); if (ret<0) { const char *msg=convert_error_status(ret); error(msg); } param++; cl_mem_flags mem_flag = CL_MEM_READ_ONLY; memin[cur] = new cl_mem[nextTask->inData_count]; if (!flag.flip) { // set input data when not flip for(int i=0;i<nextTask->inData_count;i++) { ListElement *input_buf = nextTask->inData(i); if (input_buf->size==0) break; memin[cur][i] = clCreateBuffer(context, mem_flag, input_buf->size, NULL, &ret); if (ret<0) { const char *msg=convert_error_status(ret); error(msg); } ret = clEnqueueWriteBuffer(command_queue, memin[cur][i], CL_TRUE, 0, input_buf->size, input_buf->addr, 0, NULL, NULL); if (ret<0) { const char *msg=convert_error_status(ret); error(msg); } ret = clSetKernelArg(kernel[cur], param, sizeof(memaddr), (void *)&memin[cur][i]); if (ret<0) { const char *msg=convert_error_status(ret); error(msg); } param++; } } cl_mem_flags out_mem_flag; if (flag.flip) { memout[cur] = new cl_mem[nextTask->inData_count]; out_mem_flag = CL_MEM_READ_WRITE; } else { memout[cur] = new cl_mem[nextTask->outData_count]; out_mem_flag = CL_MEM_WRITE_ONLY; } for(int i = 0; i<nextTask->outData_count;i++) { // set output data ListElement *output_buf = flag.flip? nextTask->inData(i) : nextTask->outData(i); if (output_buf->size==0) break; memout[cur][i] = clCreateBuffer(context, out_mem_flag, output_buf->size, NULL, &ret); if (ret<0) { const char *msg=convert_error_status(ret); error(msg); } if (flag.flip) { // use output buffer as input buffer ListElement *input_buf = nextTask->inData(i); ret = clEnqueueWriteBuffer(command_queue, memout[cur][i], CL_TRUE, 0, input_buf->size, input_buf->addr, 0, NULL, NULL); if (ret<0) { const char *msg=convert_error_status(ret); error(msg); } } ret = clSetKernelArg(kernel[cur], param, sizeof(memaddr), (void *)&memout[cur][i]); if (ret<0) { const char *msg=convert_error_status(ret); error(msg); } param++; } if (tasklist[cur]->dim > 0) { ret = clEnqueueNDRangeKernel(command_queue, kernel[cur], tasklist[cur]->dim, NULL, &tasklist[cur]->x, 0, 0, NULL, NULL); } else { ret = clEnqueueTask(command_queue, kernel[cur], 0, NULL, NULL); } if (ret<0) { const char *msg=convert_error_status(ret); error(msg); } for(int i=0;i<nextTask->outData_count;i++) { // read output data ListElement *output_buf = flag.flip? nextTask->inData(i) :nextTask->outData(i); if (output_buf->size==0) break; ret = clEnqueueReadBuffer(command_queue, memout[cur][i], CL_FALSE, 0, output_buf->size, output_buf->addr, 0, NULL, &event[cur]); if (ret<0) { const char *msg=convert_error_status(ret); error(msg); } } if (ret<0) { const char *msg=convert_error_status(ret); error(msg); } reply[cur] = (memaddr)tasklist[cur]->waiter; wait_for_event(event,reply,cur); tasklist[1-cur]->stop_time = gettime(); //clFlush(command_queue); // waiting for queued task // clFlush(command_queue); // pipeline : 1-cur // no pipeline : cur /* should be released * clReleaseMemObject(memin[1-cur]); * clReleaseMemObject(memout[1-cur]); */ params_addr = (memaddr)tasklist[cur]->next; cur = 1 - cur; } } wait_for_event(event,reply,cur); tasklist[1-cur]->stop_time = gettime(); //clFlush(command_queue); // waiting for queued task //clFinish(command_queue); // waiting for queued task (*connector->end_dmawait_profile)(&connector->global_busy_time); connector->mail_write((memaddr)MY_SPE_STATUS_READY); } // TaskArrayの処理 } int not_ready(SchedTask* smanager, void* r, void *w) { smanager->printf("GPU task not ready %d\n", smanager->atask->command); return 0; } /* * kernel file open and build program */ void GpuScheduler::load_kernel(int cmd) { if (gpu_task_list[cmd].run == null_run) return; const char *filename = (const char *)gpu_task_list[cmd].gputask->program; int fd; char *source_str; size_t source_size; fd = open(filename, O_RDONLY); if (fd<0) { fprintf(stderr, "Failed to load kernel %s.\n",filename); exit(1); } struct stat stats; fstat(fd,&stats); off_t size = stats.st_size; if (size<=0) { fprintf(stderr, "Failed to load kernel.\n"); exit(1); } source_str = (char*)alloca(size); source_size = read(fd, source_str, size); close(fd); cl_program *program = new cl_program; *program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret); ret = clBuildProgram(*program, 1, &device_id, NULL, NULL, NULL); if(ret<0) { size_t size; clGetProgramBuildInfo(*program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &size); char *log = new char[size]; clGetProgramBuildInfo(*program, device_id, CL_PROGRAM_BUILD_LOG, size, log, NULL); error(log); } gpu_task_list[cmd].gputask->program = program; gpu_task_list[cmd].run = null_run; // kernel is ready } // regist kernel file name void gpu_register_task(int cmd, const char* filename, const char* functionname) { gpu_task_list[cmd].run = not_ready; // not yet ready gpu_task_list[cmd].load = null_loader; gpu_task_list[cmd].wait = null_loader; gpu_task_list[cmd].name = functionname; gpu_task_list[cmd].gputask->program = (cl_program *) filename; } /* end */