changeset 1962:fdffcf8feeab draft

fix CudaScheduler
author kkb
date Tue, 11 Feb 2014 19:58:38 +0900
parents 7d1afa7aeccd
children 6988e5478a8c
files TaskManager/Cell/CellTaskManagerImpl.cc TaskManager/Cuda/CudaScheduler.cc TaskManager/Cuda/CudaThreads.cc TaskManager/kernel/ppe/CpuThreads.cc example/fileread/main.cc example/word_count/Func.h example/word_count/main.cc
diffstat 7 files changed, 23 insertions(+), 20 deletions(-) [+]
line wrap: on
line diff
--- a/TaskManager/Cell/CellTaskManagerImpl.cc	Tue Feb 11 16:29:46 2014 +0900
+++ b/TaskManager/Cell/CellTaskManagerImpl.cc	Tue Feb 11 19:58:38 2014 +0900
@@ -197,14 +197,16 @@
         for (;;) {
             poll();
             if (ppeManager->activeTaskQueue->empty() && spe_running > 0) {
-	        if (receive_wait) receive_wait->sem_p();
-	    } else
+                // sleep until some SPE call me.
+                if (receive_wait) receive_wait->sem_p(); 
+            } else {
                 break;
+            }
         }
         if (spe_running < spu_limit) {
             debug_check_spe_idle(ppeManager->activeTaskQueue, spe_running);
         }
-
+        
     } while (!ppeManager->activeTaskQueue->empty() || !activeTaskQueue->empty() || spe_running > 0);
     if (!waitTaskQueue->empty()) {
         show_dead_lock_info();
--- a/TaskManager/Cuda/CudaScheduler.cc	Tue Feb 11 16:29:46 2014 +0900
+++ b/TaskManager/Cuda/CudaScheduler.cc	Tue Feb 11 19:58:38 2014 +0900
@@ -24,6 +24,7 @@
     cuInit(0);
     cuDeviceGetCount(&ret_num_devices);
     if (ret_num_devices == 0) {
+        error("no cuda device.");
         exit(EXIT_FAILURE);
     }
     cuDeviceGet(&device, 0);
@@ -153,7 +154,7 @@
 void
 CudaScheduler::run() {
     init_gpu();
-    int cur = 0;
+    int cur = 0; // current pipeline index.
     TaskListPtr tasklist = NULL;
     reply = 0;
     
@@ -179,7 +180,6 @@
             // since we are on the same memory space, we don't has to use dma_load here
             tasklist = (TaskListPtr)connector->dma_load(this, param_addr,
                                                         sizeof(TaskList), DMA_READ_TASKLIST);
-            //            tasklist[cur]->task_start_time = gettime();
             tasklist->task_start_time = 0;
             /*
              * get flip flag
@@ -188,7 +188,7 @@
             if (tasklist->self) {
                 flag[cur] = tasklist->self->flag;
             } else {
-                memset(&flag[cur], 0, sizeof(HTask::htask_flag));
+                memset(&flag[cur], 0, sizeof(HTask::htask_flag)); // unnecessary ?
             }
             for (TaskPtr nextTask = tasklist->tasks; nextTask < tasklist->last(); nextTask = nextTask->next()) {
                 if(nextTask->command==ShowTime) {
@@ -199,7 +199,7 @@
                 }
                 if (load_kernel(nextTask->command) == 0) { CudaTaskError(cudabuffer, cur, tasklist, ret); continue; }
                 CUmodule& module = *cuda_task_list[nextTask->command].cudatask->module;
-                const char *funcname = cuda_task_list[nextTask->command].name;
+                const char *funcname = cuda_task_list[nextTask->command].name; // move to load_kernel.
                 
                 ret = cuModuleGetFunction(&kernel[cur], module, funcname);
                 if (ret!=0) { CudaTaskError(cudabuffer, cur, tasklist, ret); continue; }
@@ -240,7 +240,7 @@
                 }
                 cudabuffer[cur].out_size = param - cudabuffer[cur].in_size; // no buffer on flip, but flip use memout event
                 
-                void** kernelParams;
+                void** kernelParams; // move to cudabuffer.
                 
                 if (!flag[cur].flip) {
                     kernelParams = (void**)malloc(sizeof(void*)*param);
@@ -295,7 +295,7 @@
                 // pipeline    : cur
                 // to stop pipeline set cur+1
                 if (cur == 0) {
-                    wait_for_event(kernel_event, cudabuffer, tasklist, STAGE); // to stop pipeline comment out this line
+                    wait_for_event(kernel_event, cudabuffer, tasklist, STAGE);
                 } else {
                     wait_for_event(kernel_event, cudabuffer, tasklist, cur);
                 }
--- a/TaskManager/Cuda/CudaThreads.cc	Tue Feb 11 16:29:46 2014 +0900
+++ b/TaskManager/Cuda/CudaThreads.cc	Tue Feb 11 19:58:38 2014 +0900
@@ -17,10 +17,15 @@
     send_mail(0,1,&mail);
     pthread_join(threads[0], NULL);
 
-    delete threads;
+    delete args->scheduler;
+    delete[] threads;
     delete args;
 }
 
+/**
+  Called from CpuThreads::init()
+  This Semaphore tells that GpuThreads is initalized.
+ */
 void
 CudaThreads::set_wait(SemPtr wait)
 {
--- a/TaskManager/kernel/ppe/CpuThreads.cc	Tue Feb 11 16:29:46 2014 +0900
+++ b/TaskManager/kernel/ppe/CpuThreads.cc	Tue Feb 11 19:58:38 2014 +0900
@@ -96,16 +96,12 @@
 void
 CpuThreads::init()
 {
-#ifdef __CERIUM_GPU__
+#if defined __CERIUM_GPU__ || defined __CERIUM_CUDA__
     gpu->set_wait(wait);
     gpu->init();
-    wait->sem_p();
+    wait->sem_p(); // wait for gpu threads init
 #endif
-#ifdef __CERIUM_CUDA__
-    gpu->set_wait(wait);
-    gpu->init();
-    wait->sem_p();
-#endif
+
     for (int i = 0; i < cpu_num+io_num; i++) {
         args[i].cpuid = i + id_offset;
         args[i].scheduler = new MainScheduler();
--- a/example/fileread/main.cc	Tue Feb 11 16:29:46 2014 +0900
+++ b/example/fileread/main.cc	Tue Feb 11 19:58:38 2014 +0900
@@ -17,7 +17,7 @@
 static int spe_num = 1;
 int divide_read_flag = 0;
 static CPU_TYPE spe_cpu = IO_0;
-static int DIVISION_SIZE = 4096*4;
+static int DIVISION_SIZE = 1024*256*256;
 
 extern TaskManager *manager;
 const char *usr_help_str = "Usage: ./fileread [-cpu cpu_num] [-file filename]\n\
--- a/example/word_count/Func.h	Tue Feb 11 16:29:46 2014 +0900
+++ b/example/word_count/Func.h	Tue Feb 11 19:58:38 2014 +0900
@@ -6,7 +6,7 @@
     TASK_PRINT,
 };
 
-#define DATA_NUM 12
+#define DATA_NUM 16
 #define ADD_NUM 26
 
 #define DATA_ID 0
--- a/example/word_count/main.cc	Tue Feb 11 16:29:46 2014 +0900
+++ b/example/word_count/main.cc	Tue Feb 11 19:58:38 2014 +0900
@@ -26,7 +26,7 @@
 int use_compat = 0;
 int use_iterate = 0;
 int use_iterate_all = 0;
-int array_task_num = 8;
+int array_task_num = 16;
 int spe_num = 1;
 CPU_TYPE spe_cpu = SPE_ANY;
 const char *usr_help_str = "Usage: ./word_count [-a -c -s] [-cpu spe_num] [-g] [-file filename]\n";