Mercurial > hg > Game > Cerium
view TaskManager/Gpu/GpuScheduler.cc @ 1529:504a76847b85 draft
remove SDL
author | Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 25 Nov 2012 21:10:50 +0900 |
parents | d232231e1425 |
children | c7f0e6eb1d50 |
line wrap: on
line source
#include "GpuScheduler.h" #include "ReferencedDmaManager.h" #include "PreRefDmaManager.h" #include "SchedTask.h" #include "stdio.h" // #include "ListData.h" #include <fcntl.h> #include <sys/stat.h> #include <string.h> GpuScheduler::GpuScheduler() { init_impl(0); init_gpu(); } void GpuScheduler::init_impl(int useRefDma) { if (useRefDma & 0x10) { fifoDmaManager = new PreRefDmaManager(); // Prefetch command and no copy } else if (useRefDma & 0x01) { fifoDmaManager = new FifoDmaManager(); // memcpy } else { fifoDmaManager = new ReferencedDmaManager(); // no copy } connector = fifoDmaManager; } void GpuScheduler::init_gpu() { clGetPlatformIDs(1, &platform_id, &ret_num_platforms); clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, &ret_num_devices); // unavailable GPU if(ret_num_devices == 0) { exit(EXIT_FAILURE); } context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret); command_queue = new cl_command_queue[2]; command_queue[0] = clCreateCommandQueue(context, device_id, 0, &ret); command_queue[1] = clCreateCommandQueue(context, device_id, 0, &ret); } GpuScheduler::~GpuScheduler() { clReleaseCommandQueue(command_queue[0]); clReleaseCommandQueue(command_queue[1]); clReleaseContext(context); } void GpuScheduler::run() { int cur = 0; memaddr reply[2]; HTask::htask_flag flag; memset(reply, NULL, sizeof(memaddr)*2); memset(&flag, 0, sizeof(HTask::htask_flag)); for (;;) { memaddr params_addr = connector->task_list_mail_read(); // Get OpenCL infomation if ((memaddr)params_addr == (memaddr)MY_SPE_COMMAND_EXIT) { clFinish(command_queue[0]); clFinish(command_queue[1]); return ; } while (params_addr) { // since we are on the same memory space, we don't hae to use dma_load here TaskListPtr tasklist = (TaskListPtr)connector->dma_load(this, params_addr, sizeof(TaskList), DMA_READ_TASKLIST); if (tasklist->self) { flag = tasklist->self->flag; } for (TaskPtr nextTask = tasklist->tasks; nextTask < tasklist->last(); nextTask = nextTask->next()) { load_kernel(nextTask->command); cl_kernel& kernel = *task_list[nextTask->command].gputask->kernel; int param = 0; cl_mem memparam = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(memaddr)*nextTask->param_count, NULL, NULL); ret = clEnqueueWriteBuffer(command_queue[cur], memparam, CL_TRUE, 0, sizeof(memaddr)*nextTask->param_count,nextTask->param(0), 0, NULL, NULL); ret = clSetKernelArg(kernel, param, sizeof(memaddr),(void *)&memparam); param++; cl_mem_flags mem_flag = CL_MEM_READ_ONLY; cl_mem *memin = new cl_mem[nextTask->inData_count]; if (!flag.flip) { for(int i=0;i<nextTask->inData_count;i++) { memin[i] = clCreateBuffer(context, mem_flag, nextTask->inData(i)->size, NULL, NULL); ListElement *input_buf = nextTask->inData(i); ret = clEnqueueWriteBuffer(command_queue[cur], memin[i], CL_TRUE, 0, input_buf->size, input_buf->addr, 0, NULL, NULL); ret = clSetKernelArg(kernel, param, sizeof(memaddr), (void *)&memin[i]); param++; } } cl_mem *memout = new cl_mem[nextTask->outData_count]; cl_mem_flags out_mem_flag = flag.flip? CL_MEM_READ_WRITE : CL_MEM_WRITE_ONLY; for(int i=0;i<nextTask->outData_count;i++) { ListElement *output_buf = nextTask->outData(i); memout[i] = clCreateBuffer(context, out_mem_flag, output_buf->size, NULL, &ret); if (flag.flip) { // use output buffer as input buffer ListElement *input_buf = nextTask->inData(i); ret = clEnqueueWriteBuffer(command_queue[cur], memout[i], CL_TRUE, 0, input_buf->size, input_buf->addr, 0, NULL, NULL); } ret = clSetKernelArg(kernel, param, sizeof(memaddr), (void *)&memout[i]); param++; } cl_event ev = NULL; ret = clEnqueueTask(command_queue[cur], kernel, 0, NULL, &ev); // ndrange flagが0ならdim,global_work_size[0],local_work_size[0] = 1で固定に // clEnqueueNDRange // (command_queue[cur], kernel, dim, NULL,global_work_size[0],local_work_size[0],NULL&ev); for(int i=0;i<nextTask->outData_count;i++) { ListElement *output_buf = nextTask->outData(i); ret = clEnqueueReadBuffer(command_queue[cur], memout[i], CL_TRUE, 0, output_buf->size, output_buf->addr, 1, &ev, NULL); } } reply[cur] = (memaddr)tasklist->waiter; clFlush(command_queue[1-cur]); // waiting for queued task //clFinish(command_queue[cur]); // waiting for queued task // pipeline : 1-cur // no pipeline : cur if(reply[1-cur]) { connector->mail_write(reply[1-cur]); } params_addr = (memaddr)tasklist->next; cur = 1 - cur; } clFlush(command_queue[1-cur]); // waiting for queued task connector->mail_write(reply[1-cur]); connector->mail_write((memaddr)MY_SPE_STATUS_READY); } // TaskArrayの処理 } int not_ready(SchedTask* smanager, void* r, void *w) { smanager->printf("GPU task not ready %d\n", smanager->atask->command); return 0; } void GpuScheduler::load_kernel(int cmd) { if (task_list[cmd].run == null_run) return; const char *filename = (const char *)task_list[cmd].gputask->kernel; const char *functionname = task_list[cmd].name; int fd; char *source_str; size_t source_size; fd = open(filename, O_RDONLY); if (fd<0) { fprintf(stderr, "Failed to load kernel %s.\n",filename); exit(1); } struct stat stats; fstat(fd,&stats); off_t size = stats.st_size; if (size<=0) { fprintf(stderr, "Failed to load kernel.\n"); exit(1); } source_str = (char*)alloca(size); source_size = read(fd, source_str, size); close(fd); cl_program program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret); clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); cl_kernel *kernel = new cl_kernel; *kernel = clCreateKernel(program, functionname, &ret); task_list[cmd].gputask->kernel = kernel; task_list[cmd].run = null_run; // kernel is ready } void gpu_register_task(int cmd, const char* filename, const char* functionname) { task_list[cmd].run = not_ready; // not yet ready task_list[cmd].load = null_loader; task_list[cmd].wait = null_loader; task_list[cmd].name = functionname; task_list[cmd].gputask->kernel = (cl_kernel *) filename; } void gpu_register_ndrange(int cmd, int dim, size_t* l_work_size) { task_list[cmd].gputask->dim = dim; task_list[cmd].gputask->l_work_size = l_work_size; } /* end */