Mercurial > hg > Game > Cerium
changeset 1631:39a381fec1e7 draft
cpu, gpu coexistence
author | shohei kokubo |
---|---|
date | Thu, 06 Jun 2013 17:50:55 +0900 |
parents | d2581892b8ad |
children | b0e84541e2d6 |
files | TaskManager/Cell/CellTaskManagerImpl.cc TaskManager/Fifo/FifoTaskManagerImpl.cc TaskManager/Gpu/GpuScheduler.cc TaskManager/Gpu/GpuScheduler.h TaskManager/kernel/schedule/Scheduler.cc TaskManager/kernel/schedule/Scheduler.h TaskManager/test/SetCpuTest/gpu/task_init.cc TaskManager/test/SetCpuTest/gpu/task_init.o TaskManager/test/SetCpuTest/main.cc |
diffstat | 9 files changed, 96 insertions(+), 60 deletions(-) [+] |
line wrap: on
line diff
--- a/TaskManager/Cell/CellTaskManagerImpl.cc Tue Jun 04 17:17:16 2013 +0900 +++ b/TaskManager/Cell/CellTaskManagerImpl.cc Thu Jun 06 17:50:55 2013 +0900 @@ -26,13 +26,14 @@ void CellTaskManagerImpl::init(int spuIdle_,int useRefDma,int export_task_log) { spe_running = 0; spuIdle = spuIdle_; + int m = machineNum == 0?1:machineNum; // at least 1 tasklistinfo in -cpu 0 // 実行される Task 用の パイプライン用のダブルバッファ - speTaskList = new QueueInfo<TaskList>*[machineNum]; // spe上の走っている Task の配列 - taskListInfo = new QueueInfo<TaskList>*[machineNum]; // 次に走る Task の配列 + speTaskList = new QueueInfo<TaskList>*[m]; // spe上の走っている Task の配列 + taskListInfo = new QueueInfo<TaskList>*[m]; // 次に走る Task の配列 - for (int i = 0; i < machineNum; i++) { + for (int i = 0; i < m; i++) { taskListInfo[i] = new QueueInfo<TaskList> (taskListPool); speTaskList[i] = new QueueInfo<TaskList> (taskListPool); } @@ -82,6 +83,8 @@ * * ここの activeTaskQueue は FifoTaskManagerImpl のと意味が違い、 * spe に渡される Task だけ入っている + * + * machineNum = 0(cpu = 0,gpu = 0) のときはこのルーチンには来ない */ void CellTaskManagerImpl::set_runTaskList(QueueInfo<HTask> *activeTaskQueue) { int speid; @@ -92,8 +95,12 @@ } else { if (htask->cpu_type == SPE_ANY) { speid = cur_anySpeid++ % machineNum; - if (speid < gpuNum) speid = gpuNum; + // cpu があれば cpu に割り振る + if (machineNum != gpuNum && speid < gpuNum) speid = gpuNum; #ifdef __CERIUM_GPU__ + } else if (gpuNum == 0 && htask->cpu_type < (int)SPE_0) { + // gpu = 0 で gpu を指定されたときには cpu で実行する + speid = cur_anySpeid++ % machineNum; } else if (htask->cpu_type < GPU_0+gpuNum) { speid = htask->cpu_type - (int)(GPU_0); #endif @@ -105,6 +112,11 @@ // SPE0 = 1, SPE1 = 2, ... SPE5 = 6 ってなってるので // 配列的 (SPE0 = arr[0], SPE1 = arr[1]) にするため speid = htask->cpu_type - CPU_SPE - 1 + gpuNum; + if (speid >= gpuNum && machineNum == gpuNum) { + speid = cur_anySpeid++ % machineNum; + } else if (speid < gpuNum && gpuNum == 0) { + speid = cur_anySpeid++ % machineNum; + } } set_taskList(htask, taskListInfo[speid]); @@ -382,6 +394,7 @@ Threads *cpus = new SpeThreads(num); #elif __CERIUM_GPU__ + init_task_list(gpu_task_list); Threads *cpus = new CpuThreads(num, useRefDma,num_gpu); num += num_gpu; // for GPU #else
--- a/TaskManager/Fifo/FifoTaskManagerImpl.cc Tue Jun 04 17:17:16 2013 +0900 +++ b/TaskManager/Fifo/FifoTaskManagerImpl.cc Thu Jun 06 17:50:55 2013 +0900 @@ -36,6 +36,7 @@ // TaskManager から呼ばれるので、かなりの部分は初期化されている。 mainScheduler = new MainScheduler(); + init_task_list(task_list); mainScheduler->init(this,useRefDma); mainScheduler->id = 0; set_scheduler(mainScheduler);
--- a/TaskManager/Gpu/GpuScheduler.cc Tue Jun 04 17:17:16 2013 +0900 +++ b/TaskManager/Gpu/GpuScheduler.cc Thu Jun 06 17:50:55 2013 +0900 @@ -10,6 +10,8 @@ #include <sys/stat.h> #include <string.h> +TaskObject gpu_task_list[MAX_TASK_OBJECT]; + GpuScheduler::GpuScheduler() { init_impl(0); @@ -59,26 +61,43 @@ } + +void +GpuScheduler::WaitForEvent(cl_event* event,memaddr* reply,int cur) { + if (event[1-cur] != NULL) { + int ret=clWaitForEvents(1,&event[1-cur]); + clReleaseEvent(event[1-cur]); + if (ret<0) { + const char *msg=convert_error_status(ret); + error(msg); + } + if(reply[1-cur]) { + connector->mail_write(reply[1-cur]); + reply[1-cur]=0; + } + event[1-cur]=NULL; + } +} + + /* * run GPU task * Get input and output data from tasklist. * Enqueue OpenCL command and clflush. * Enqueue and clflush are pipelined structure. */ - void GpuScheduler::run() { int cur = 0; - memaddr reply[2]; - cl_kernel kernel[2]; + memaddr reply[2]={0,0}; + cl_kernel kernel[2]={0,0}; cl_event event[2]; event[0]=NULL;event[1]=NULL; cl_mem *memin[2]; cl_mem *memout[2]; HTask::htask_flag flag; - memset(reply, 0, sizeof(memaddr)*2); memset(&flag, 0, sizeof(HTask::htask_flag)); for (;;) { @@ -87,10 +106,14 @@ if ((memaddr)params_addr == (memaddr)MY_SPE_COMMAND_EXIT) { clFinish(command_queue); - clReleaseKernel(kernel[0]); - // clReleaseKernel(kernel[1]); - clReleaseEvent(event[0]); - clReleaseEvent(event[1]); + if (kernel[0]) + clReleaseKernel(kernel[0]); + if (kernel[1]) + clReleaseKernel(kernel[1]); + if (event[0]) + clReleaseEvent(event[0]); + if (event[1]) + clReleaseEvent(event[1]); return ; } @@ -110,9 +133,11 @@ for (TaskPtr nextTask = tasklist->tasks;nextTask < tasklist->last(); nextTask = nextTask->next()) { load_kernel(nextTask->command); - cl_program& program = *task_list[nextTask->command].gputask->program; - const char *function = task_list[nextTask->command].name; + cl_program& program = *gpu_task_list[nextTask->command].gputask->program; + const char *function = gpu_task_list[nextTask->command].name; + if (kernel[cur]) + clReleaseKernel(kernel[cur]); kernel[cur] = clCreateKernel(program, function, &ret); if (ret<0) { const char *msg=convert_error_status(ret); @@ -237,18 +262,9 @@ } reply[cur] = (memaddr)tasklist->waiter; + + WaitForEvent(event,reply,cur); //clFlush(command_queue); // waiting for queued task - if (event[1-cur] != NULL) { - ret=clWaitForEvents(1,&event[1-cur]); - if (ret<0) { - const char *msg=convert_error_status(ret); - error(msg); - } - if(reply[1-cur]) { - connector->mail_write(reply[1-cur]); - } - event[1-cur]=NULL; - } // clFlush(command_queue); // pipeline : 1-cur @@ -262,13 +278,7 @@ cur = 1 - cur; } } - if (event[1-cur] != NULL) { - ret=clWaitForEvents(1,&event[1-cur]); - event[1-cur]=NULL; - if(reply[1-cur]) { - connector->mail_write(reply[1-cur]); - } - } + WaitForEvent(event,reply,cur); //clFlush(command_queue); // waiting for queued task //clFinish(command_queue); // waiting for queued task connector->mail_write((memaddr)MY_SPE_STATUS_READY); @@ -289,9 +299,9 @@ void GpuScheduler::load_kernel(int cmd) { - if (task_list[cmd].run == null_run) return; + if (gpu_task_list[cmd].run == null_run) return; - const char *filename = (const char *)task_list[cmd].gputask->program; + const char *filename = (const char *)gpu_task_list[cmd].gputask->program; int fd; char *source_str; @@ -331,8 +341,8 @@ clGetProgramBuildInfo(*program, device_id, CL_PROGRAM_BUILD_LOG, size, log, NULL); error(log); } - task_list[cmd].gputask->program = program; - task_list[cmd].run = null_run; // kernel is ready + gpu_task_list[cmd].gputask->program = program; + gpu_task_list[cmd].run = null_run; // kernel is ready } @@ -340,11 +350,11 @@ void gpu_register_task(int cmd, const char* filename, const char* functionname) { - task_list[cmd].run = not_ready; // not yet ready - task_list[cmd].load = null_loader; - task_list[cmd].wait = null_loader; - task_list[cmd].name = functionname; - task_list[cmd].gputask->program = (cl_program *) filename; + gpu_task_list[cmd].run = not_ready; // not yet ready + gpu_task_list[cmd].load = null_loader; + gpu_task_list[cmd].wait = null_loader; + gpu_task_list[cmd].name = functionname; + gpu_task_list[cmd].gputask->program = (cl_program *) filename; } /* end */
--- a/TaskManager/Gpu/GpuScheduler.h Tue Jun 04 17:17:16 2013 +0900 +++ b/TaskManager/Gpu/GpuScheduler.h Thu Jun 06 17:50:55 2013 +0900 @@ -13,12 +13,15 @@ #include <CL/cl.h> #endif +extern TaskObject gpu_task_list[MAX_TASK_OBJECT]; + class GpuScheduler : public Scheduler { public: GpuScheduler(); virtual ~GpuScheduler(); void init_impl(int useRefDma); void init_gpu(); + void WaitForEvent(cl_event* event,memaddr* reply,int cur); void run(); void mail_write_from_host(memaddr data) {
--- a/TaskManager/kernel/schedule/Scheduler.cc Tue Jun 04 17:17:16 2013 +0900 +++ b/TaskManager/kernel/schedule/Scheduler.cc Thu Jun 06 17:50:55 2013 +0900 @@ -38,6 +38,21 @@ /*! @brief speTaskの入出力のパイプラインバッファを確保する */ +void +init_task_list(TaskObject* task_list) { + for (int i = 0; i< MAX_TASK_OBJECT; i++) { + task_list[i].run = null_run; + task_list[i].load = null_loader; + task_list[i].wait = null_loader; + task_list[i].gputask = new GpuTaskObject(); + +#ifndef NOT_CHECK + entry_cmd[i] = NULL; +#endif + + } +} + //useRefDmaは、0という初期値が設定されている。 void Scheduler::init(TaskManagerImpl *m, int useRefDma, int export_task_log) @@ -48,24 +63,6 @@ task_count = 0; #endif - /* - * ;TODO - * Multi-Core Verの場合、各スレッドにMain Schedulerが作られるが、 - * その際、globalなlistの初期化を繰り返して無駄な処理を行なっている - */ - - for (int i = 0; i< MAX_TASK_OBJECT; i++) { - task_list[i].run = null_run; - task_list[i].load = null_loader; - task_list[i].wait = null_loader; - task_list[i].gputask = new GpuTaskObject(); - -#ifndef NOT_CHECK - entry_cmd[i] = NULL; -#endif - - } - set_manager(m); init_impl(useRefDma);
--- a/TaskManager/kernel/schedule/Scheduler.h Tue Jun 04 17:17:16 2013 +0900 +++ b/TaskManager/kernel/schedule/Scheduler.h Thu Jun 06 17:50:55 2013 +0900 @@ -104,6 +104,7 @@ virtual void run(){}; void run(SchedTaskBase* task1); + virtual void init_impl(int useRefDma) {}; void finish(); @@ -172,6 +173,7 @@ int null_run(SchedTask* smanager, void* r, void *w); void null_loader(Scheduler *m, int task_id); +void init_task_list(TaskObject* task_list); extern int entry_cmd[MAX_TASK_OBJECT];
--- a/TaskManager/test/SetCpuTest/gpu/task_init.cc Tue Jun 04 17:17:16 2013 +0900 +++ b/TaskManager/test/SetCpuTest/gpu/task_init.cc Thu Jun 06 17:50:55 2013 +0900 @@ -1,7 +1,15 @@ #include "GpuFunc.h" #include "GpuScheduler.h" +SchedExternTask(Twice); + void -task_init(void) { +gpu_task_init(void) +{ GpuSchedRegister(Twice, "gpu/twice.cl", "twice"); } + +void task_init(void) +{ + SchedRegister(Twice); +}
--- a/TaskManager/test/SetCpuTest/main.cc Tue Jun 04 17:17:16 2013 +0900 +++ b/TaskManager/test/SetCpuTest/main.cc Thu Jun 06 17:50:55 2013 +0900 @@ -12,6 +12,7 @@ static int task = 1; static CPU_TYPE cputype = SPE_ANY; +extern void gpu_task_init(void); extern void task_init(void); char usr_help_str[] = "GpuRun [length]\n"; @@ -108,6 +109,7 @@ return -1; } + gpu_task_init(); task_init(); for (int i = 0; i < task; ++i) {