changeset 1631:39a381fec1e7 draft

cpu, gpu coexistence
author shohei kokubo
date Thu, 06 Jun 2013 17:50:55 +0900
parents d2581892b8ad
children b0e84541e2d6
files TaskManager/Cell/CellTaskManagerImpl.cc TaskManager/Fifo/FifoTaskManagerImpl.cc TaskManager/Gpu/GpuScheduler.cc TaskManager/Gpu/GpuScheduler.h TaskManager/kernel/schedule/Scheduler.cc TaskManager/kernel/schedule/Scheduler.h TaskManager/test/SetCpuTest/gpu/task_init.cc TaskManager/test/SetCpuTest/gpu/task_init.o TaskManager/test/SetCpuTest/main.cc
diffstat 9 files changed, 96 insertions(+), 60 deletions(-) [+]
line wrap: on
line diff
--- a/TaskManager/Cell/CellTaskManagerImpl.cc	Tue Jun 04 17:17:16 2013 +0900
+++ b/TaskManager/Cell/CellTaskManagerImpl.cc	Thu Jun 06 17:50:55 2013 +0900
@@ -26,13 +26,14 @@
 void CellTaskManagerImpl::init(int spuIdle_,int useRefDma,int export_task_log) {
     spe_running = 0;
     spuIdle = spuIdle_;
+    int m = machineNum == 0?1:machineNum; // at least 1 tasklistinfo in -cpu 0
 
     // 実行される Task 用の パイプライン用のダブルバッファ
-    speTaskList = new QueueInfo<TaskList>*[machineNum]; // spe上の走っている Task の配列
-    taskListInfo = new QueueInfo<TaskList>*[machineNum]; // 次に走る Task の配列
+    speTaskList = new QueueInfo<TaskList>*[m]; // spe上の走っている Task の配列
+    taskListInfo = new QueueInfo<TaskList>*[m]; // 次に走る Task の配列
 
 
-    for (int i = 0; i < machineNum; i++) {
+    for (int i = 0; i < m; i++) {
         taskListInfo[i] = new QueueInfo<TaskList> (taskListPool);
         speTaskList[i] = new QueueInfo<TaskList> (taskListPool);
     }
@@ -82,6 +83,8 @@
  *
  * ここの activeTaskQueue は FifoTaskManagerImpl のと意味が違い、
  * spe に渡される Task だけ入っている
+ *
+ * machineNum = 0(cpu = 0,gpu = 0) のときはこのルーチンには来ない
  */
 void CellTaskManagerImpl::set_runTaskList(QueueInfo<HTask> *activeTaskQueue) {
     int speid;
@@ -92,8 +95,12 @@
         }  else {
             if (htask->cpu_type == SPE_ANY) {
                 speid = cur_anySpeid++ % machineNum;
-		if (speid < gpuNum) speid = gpuNum;
+		// cpu があれば cpu に割り振る
+		if (machineNum != gpuNum && speid < gpuNum) speid = gpuNum;
 #ifdef __CERIUM_GPU__
+            } else if (gpuNum == 0 && htask->cpu_type < (int)SPE_0) {
+                // gpu = 0 で gpu を指定されたときには cpu で実行する
+                speid = cur_anySpeid++ % machineNum;
             } else if (htask->cpu_type < GPU_0+gpuNum) {
                 speid = htask->cpu_type - (int)(GPU_0);
 #endif
@@ -105,6 +112,11 @@
                 // SPE0 = 1, SPE1 = 2, ... SPE5 = 6 ってなってるので
                 // 配列的 (SPE0 = arr[0], SPE1 = arr[1]) にするため
                 speid = htask->cpu_type - CPU_SPE - 1 + gpuNum;
+                if (speid >= gpuNum && machineNum == gpuNum) {
+                    speid = cur_anySpeid++ % machineNum;
+		} else if (speid < gpuNum && gpuNum == 0) {
+                    speid = cur_anySpeid++ % machineNum;
+                }
             }
             
             set_taskList(htask, taskListInfo[speid]);
@@ -382,6 +394,7 @@
     Threads *cpus = new SpeThreads(num);
 
 #elif __CERIUM_GPU__    
+    init_task_list(gpu_task_list);
     Threads *cpus = new CpuThreads(num, useRefDma,num_gpu);
     num += num_gpu; // for GPU
 #else    
--- a/TaskManager/Fifo/FifoTaskManagerImpl.cc	Tue Jun 04 17:17:16 2013 +0900
+++ b/TaskManager/Fifo/FifoTaskManagerImpl.cc	Thu Jun 06 17:50:55 2013 +0900
@@ -36,6 +36,7 @@
     // TaskManager から呼ばれるので、かなりの部分は初期化されている。
 
     mainScheduler = new MainScheduler();
+    init_task_list(task_list);
     mainScheduler->init(this,useRefDma);
     mainScheduler->id = 0;
     set_scheduler(mainScheduler);
--- a/TaskManager/Gpu/GpuScheduler.cc	Tue Jun 04 17:17:16 2013 +0900
+++ b/TaskManager/Gpu/GpuScheduler.cc	Thu Jun 06 17:50:55 2013 +0900
@@ -10,6 +10,8 @@
 #include <sys/stat.h>
 #include <string.h>
 
+TaskObject gpu_task_list[MAX_TASK_OBJECT];
+
 GpuScheduler::GpuScheduler()
 {
     init_impl(0);
@@ -59,26 +61,43 @@
 }
 
 
+
+void
+GpuScheduler::WaitForEvent(cl_event* event,memaddr* reply,int cur) {
+  if (event[1-cur] != NULL) {
+    int ret=clWaitForEvents(1,&event[1-cur]);
+    clReleaseEvent(event[1-cur]);
+    if (ret<0) {
+      const char *msg=convert_error_status(ret);
+      error(msg);
+    }
+    if(reply[1-cur]) {
+      connector->mail_write(reply[1-cur]);
+      reply[1-cur]=0;
+    }
+    event[1-cur]=NULL;
+  }
+}
+
+
 /*
  * run GPU task
  * Get input and output data from tasklist.
  * Enqueue OpenCL command and clflush.
  * Enqueue and clflush are pipelined structure.
  */
-
 void
 GpuScheduler::run()
 {
     int cur = 0;
-    memaddr reply[2];
-    cl_kernel kernel[2];
+    memaddr reply[2]={0,0};
+    cl_kernel kernel[2]={0,0};
     cl_event event[2];
     event[0]=NULL;event[1]=NULL;
 
     cl_mem *memin[2];
     cl_mem *memout[2];
     HTask::htask_flag flag;
-    memset(reply, 0, sizeof(memaddr)*2);
     memset(&flag, 0, sizeof(HTask::htask_flag));
 
     for (;;) {
@@ -87,10 +106,14 @@
 
         if ((memaddr)params_addr == (memaddr)MY_SPE_COMMAND_EXIT) {
             clFinish(command_queue);
-            clReleaseKernel(kernel[0]);
-            // clReleaseKernel(kernel[1]);
-            clReleaseEvent(event[0]);
-            clReleaseEvent(event[1]);
+	    if (kernel[0])
+	        clReleaseKernel(kernel[0]);
+	    if (kernel[1])
+	        clReleaseKernel(kernel[1]);
+	    if (event[0])
+	      clReleaseEvent(event[0]);
+	    if (event[1])
+	      clReleaseEvent(event[1]);
             return ;
         }
 
@@ -110,9 +133,11 @@
             for (TaskPtr nextTask = tasklist->tasks;nextTask < tasklist->last(); nextTask = nextTask->next()) {
                 
                 load_kernel(nextTask->command);
-                cl_program& program = *task_list[nextTask->command].gputask->program;
-                const char *function = task_list[nextTask->command].name;
+                cl_program& program = *gpu_task_list[nextTask->command].gputask->program;
+                const char *function = gpu_task_list[nextTask->command].name;
 
+		if (kernel[cur])
+		    clReleaseKernel(kernel[cur]);
                 kernel[cur] = clCreateKernel(program, function, &ret);
                 if (ret<0) {
                     const char *msg=convert_error_status(ret);
@@ -237,18 +262,9 @@
                 }
 
                 reply[cur] = (memaddr)tasklist->waiter;
+
+		WaitForEvent(event,reply,cur);
                 //clFlush(command_queue); // waiting for queued task
-                if (event[1-cur] != NULL) {
-                    ret=clWaitForEvents(1,&event[1-cur]);
-                    if (ret<0) {
-                        const char *msg=convert_error_status(ret);
-                        error(msg);
-                    }
-                    if(reply[1-cur]) {
-                        connector->mail_write(reply[1-cur]);
-                    }
-                    event[1-cur]=NULL;
-                }
 
                 // clFlush(command_queue);
                 // pipeline    : 1-cur
@@ -262,13 +278,7 @@
                 cur = 1 - cur;
             }
         }
-        if (event[1-cur] != NULL) {
-            ret=clWaitForEvents(1,&event[1-cur]);
-            event[1-cur]=NULL;
-            if(reply[1-cur]) {
-                connector->mail_write(reply[1-cur]);
-            }
-        }
+	WaitForEvent(event,reply,cur);
         //clFlush(command_queue); // waiting for queued task
         //clFinish(command_queue); // waiting for queued task
         connector->mail_write((memaddr)MY_SPE_STATUS_READY);
@@ -289,9 +299,9 @@
 void
 GpuScheduler::load_kernel(int cmd)
 {
-    if (task_list[cmd].run == null_run) return;
+    if (gpu_task_list[cmd].run == null_run) return;
 
-    const char *filename = (const char *)task_list[cmd].gputask->program;
+    const char *filename = (const char *)gpu_task_list[cmd].gputask->program;
 
     int fd;
     char *source_str;
@@ -331,8 +341,8 @@
         clGetProgramBuildInfo(*program, device_id, CL_PROGRAM_BUILD_LOG, size, log, NULL);
         error(log);
     }
-    task_list[cmd].gputask->program = program;
-    task_list[cmd].run = null_run; // kernel is ready
+    gpu_task_list[cmd].gputask->program = program;
+    gpu_task_list[cmd].run = null_run; // kernel is ready
 
 }
 
@@ -340,11 +350,11 @@
 void
 gpu_register_task(int cmd, const char* filename, const char* functionname)
 {
-    task_list[cmd].run = not_ready;  // not yet ready
-    task_list[cmd].load = null_loader;
-    task_list[cmd].wait = null_loader;
-    task_list[cmd].name = functionname;
-    task_list[cmd].gputask->program = (cl_program *) filename;
+    gpu_task_list[cmd].run = not_ready;  // not yet ready
+    gpu_task_list[cmd].load = null_loader;
+    gpu_task_list[cmd].wait = null_loader;
+    gpu_task_list[cmd].name = functionname;
+    gpu_task_list[cmd].gputask->program = (cl_program *) filename;
 }
 
 /* end */
--- a/TaskManager/Gpu/GpuScheduler.h	Tue Jun 04 17:17:16 2013 +0900
+++ b/TaskManager/Gpu/GpuScheduler.h	Thu Jun 06 17:50:55 2013 +0900
@@ -13,12 +13,15 @@
 #include <CL/cl.h>
 #endif
 
+extern TaskObject gpu_task_list[MAX_TASK_OBJECT];
+
 class GpuScheduler : public Scheduler {
  public:
     GpuScheduler();
     virtual ~GpuScheduler();
     void init_impl(int useRefDma);
     void init_gpu();
+    void WaitForEvent(cl_event* event,memaddr* reply,int cur);
     void run();
     
     void mail_write_from_host(memaddr data) {
--- a/TaskManager/kernel/schedule/Scheduler.cc	Tue Jun 04 17:17:16 2013 +0900
+++ b/TaskManager/kernel/schedule/Scheduler.cc	Thu Jun 06 17:50:55 2013 +0900
@@ -38,6 +38,21 @@
 /*! @brief speTaskの入出力のパイプラインバッファを確保する
  */
 
+void
+init_task_list(TaskObject* task_list) {
+  for (int i = 0; i< MAX_TASK_OBJECT; i++) {
+    task_list[i].run = null_run;
+    task_list[i].load = null_loader;
+    task_list[i].wait = null_loader;
+    task_list[i].gputask = new GpuTaskObject();
+    
+#ifndef NOT_CHECK
+        entry_cmd[i] = NULL;
+#endif
+
+    }
+}
+
 //useRefDmaは、0という初期値が設定されている。
 void
 Scheduler::init(TaskManagerImpl *m, int useRefDma, int export_task_log)
@@ -48,24 +63,6 @@
     task_count = 0;
 #endif
 
-    /*
-     * ;TODO
-     * Multi-Core Verの場合、各スレッドにMain Schedulerが作られるが、
-     * その際、globalなlistの初期化を繰り返して無駄な処理を行なっている
-     */
-
-    for (int i = 0; i< MAX_TASK_OBJECT; i++) {
-        task_list[i].run = null_run;
-        task_list[i].load = null_loader;
-        task_list[i].wait = null_loader;
-        task_list[i].gputask = new GpuTaskObject();
-
-#ifndef NOT_CHECK
-        entry_cmd[i] = NULL;
-#endif
-
-    }
-
     set_manager(m);
     init_impl(useRefDma);
 
--- a/TaskManager/kernel/schedule/Scheduler.h	Tue Jun 04 17:17:16 2013 +0900
+++ b/TaskManager/kernel/schedule/Scheduler.h	Thu Jun 06 17:50:55 2013 +0900
@@ -104,6 +104,7 @@
     virtual void run(){};
     void run(SchedTaskBase* task1);
 
+
     virtual void init_impl(int useRefDma) {};
     void finish();
 
@@ -172,6 +173,7 @@
 
 int null_run(SchedTask* smanager, void* r, void *w);
 void null_loader(Scheduler *m, int task_id);
+void init_task_list(TaskObject* task_list);
 
 extern int entry_cmd[MAX_TASK_OBJECT];
 
--- a/TaskManager/test/SetCpuTest/gpu/task_init.cc	Tue Jun 04 17:17:16 2013 +0900
+++ b/TaskManager/test/SetCpuTest/gpu/task_init.cc	Thu Jun 06 17:50:55 2013 +0900
@@ -1,7 +1,15 @@
 #include "GpuFunc.h"
 #include "GpuScheduler.h"
 
+SchedExternTask(Twice);
+
 void
-task_init(void) {
+gpu_task_init(void)
+{
     GpuSchedRegister(Twice, "gpu/twice.cl", "twice");
 }
+
+void task_init(void)
+{
+  SchedRegister(Twice);
+}
Binary file TaskManager/test/SetCpuTest/gpu/task_init.o has changed
--- a/TaskManager/test/SetCpuTest/main.cc	Tue Jun 04 17:17:16 2013 +0900
+++ b/TaskManager/test/SetCpuTest/main.cc	Thu Jun 06 17:50:55 2013 +0900
@@ -12,6 +12,7 @@
 static int task = 1;
 static CPU_TYPE cputype = SPE_ANY;
 
+extern void gpu_task_init(void);
 extern void task_init(void);
 
 char usr_help_str[] = "GpuRun [length]\n";
@@ -108,6 +109,7 @@
         return -1;
     }
 
+    gpu_task_init();
     task_init();
 
     for (int i = 0; i < task; ++i) {