changeset 1965:59105550c175 draft

fix CudaScheduler. remove event.
author Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
date Thu, 13 Feb 2014 00:29:49 +0900
parents 33d07fd99291
children d45b7223515b
files TaskManager/Cuda/CudaScheduler.cc TaskManager/Cuda/CudaScheduler.h
diffstat 2 files changed, 27 insertions(+), 45 deletions(-) [+]
line wrap: on
line diff
--- a/TaskManager/Cuda/CudaScheduler.cc	Wed Feb 12 18:15:10 2014 +0900
+++ b/TaskManager/Cuda/CudaScheduler.cc	Thu Feb 13 00:29:49 2014 +0900
@@ -28,7 +28,13 @@
         exit(EXIT_FAILURE);
     }
     cuDeviceGet(&device, 0);
-    ret = cuCtxCreate(&context, 0, device);
+    /*
+      context flog
+      CU_CTX_SCHED_AUTO
+      CU_CTX_SCHED_SPIN
+      CU_CTX_SCHED_YIELD
+    */ 
+    ret = cuCtxCreate(&context, CU_CTX_SCHED_SPIN, device);
     if (ret!=0) {
         error(convert_error_status(ret));
     }
@@ -100,30 +106,24 @@
 }
 
 void
-CudaScheduler::wait_for_event(CUevent* kernel_event, CudaBufferPtr cudabuffer, TaskListPtr taskList, int cur) {
-    if (cuEventQuery(kernel_event[cur]) == CUDA_SUCCESS) {
+CudaScheduler::wait_for_event(CudaBufferPtr cudabuffer, TaskListPtr taskList, int cur) {
+    if (cuStreamQuery(cudabuffer[cur].stream) == CUDA_SUCCESS) {
         
-    } else if (cuEventQuery(kernel_event[cur]) == CUDA_ERROR_NOT_READY){
-        ret = cuEventSynchronize(kernel_event[cur]);
-        
+    } else if (cuStreamQuery(cudabuffer[cur].stream) == CUDA_ERROR_NOT_READY){
+        ret = cuStreamSynchronize(cudabuffer[cur].stream);
         if (ret!=0) {
             error(convert_error_status(ret));
         }
-        if (taskList!=NULL) {
-            unsigned long start = 0;
-            unsigned long end = 0;
-            // timestamp 取る方法がない?
-        }
-        ret = cuEventDestroy(kernel_event[cur]);
-        if (ret!=0) {
-            error(convert_error_status(ret));
-        }
-        ret = cuEventCreate(&kernel_event[cur], CU_EVENT_DISABLE_TIMING);
-        if (ret!=0) {
-            error(convert_error_status(ret));
-        }        
+    }
+    
+    if (taskList!=NULL) {
+        // unsigned long start = 0;
+        // unsigned long end = 0;
+        // timestamp 取る方法がない?
+    }
+    
+    if (cudabuffer[cur].in_size > 0 || cudabuffer[cur].out_size > 0)
         release_buf_event(cur, cudabuffer);
-    }
 
     if(reply) {
         connector->mail_write(reply);
@@ -135,14 +135,9 @@
 void
 CudaScheduler::CudaTaskError(CudaBufferPtr cudabuffer, int cur, TaskListPtr taskList, int ret) {
     error(convert_error_status(ret));
-    if (cuEventQuery(kernel_event[cur]) == CUDA_ERROR_NOT_READY) {
-        cuEventDestroy(kernel_event[cur]);
-        cuEventCreate(&kernel_event[cur], CU_EVENT_DISABLE_TIMING);
-    }
     kernel[cur] = 0;
-    release_buf_event(cur, cudabuffer);
 
-    wait_for_event(kernel_event, cudabuffer, taskList, cur);
+    wait_for_event(cudabuffer, taskList, cur);
 }
 
 void
@@ -154,7 +149,6 @@
     
     for (int i = 0; i<STAGE; i++) {
         initCudaBuffer(&cudabuffer[i]);
-        cuEventCreate(&kernel_event[i], CU_EVENT_DISABLE_TIMING);
     }
 
     memset(&flag, 0, sizeof(HTask::htask_flag)*STAGE);
@@ -260,8 +254,6 @@
                 }
                 if (ret!=0) { CudaTaskError(cudabuffer , cur, tasklist, ret); continue; }
 
-                if (ret!=0) { CudaTaskError(cudabuffer , cur, tasklist, ret); continue; }
-                
                 for(int i=0;i<nextTask->outData_count;i++) { // read output data
                     ListElement *output_buf = nextTask->outData(i);
                     if (output_buf->size==0) break;
@@ -269,26 +261,17 @@
                     int i0 = flag[cur].flip ? i+1 : i ;
                     // flip use memin buffer and memout event
                     ret = cuMemcpyDtoHAsync(output_buf->addr, mem[i0], output_buf->size, cudabuffer[cur].stream);
-
-                    ret = cuEventRecord(kernel_event[cur], cudabuffer[cur].stream);
                     if (ret!=0) { CudaTaskError(cudabuffer, cur, tasklist, ret); continue; }
                 }
-                cur++;
-                if (STAGE <= cur)
-                    cur = 0;
-                // wait kernel[cur] and write[cur]
-                // pipeline    : cur
-                // to stop pipeline set cur-1
-                wait_for_event(kernel_event, cudabuffer, tasklist, cur);
+                cur++;            // wait write[cur+1]
+                if (STAGE <= cur) // to stop pipeline move to after wait_for_event
+                    cur = 0;      //
+                wait_for_event(cudabuffer, tasklist, cur);
             }
             reply = (memaddr)tasklist->waiter;
             param_addr = (memaddr)tasklist->next;
         }
-        wait_for_event(kernel_event, cudabuffer, tasklist, cur-1);
-        for (int i = 0; i<STAGE; i++) {
-            ret = cuStreamSynchronize(cudabuffer[i].stream);
-            if (ret!=0) { CudaTaskError(cudabuffer , cur, tasklist, ret); continue; }
-        }
+        wait_for_event(cudabuffer, tasklist, cur);
         
         unsigned long long wait = 0;
         (*connector->end_dmawait_profile)(&wait, &(connector->start_time), &(connector->stop_time));
--- a/TaskManager/Cuda/CudaScheduler.h	Wed Feb 12 18:15:10 2014 +0900
+++ b/TaskManager/Cuda/CudaScheduler.h	Thu Feb 13 00:29:49 2014 +0900
@@ -26,7 +26,7 @@
     CudaScheduler();
     virtual ~CudaScheduler();
     void init_gpu();
-    void wait_for_event(CUevent* event, CudaBufferPtr m, TaskListPtr taskList, int cur);
+    void wait_for_event(CudaBufferPtr m, TaskListPtr taskList, int cur);
     void run();
     
     // platform platform;
@@ -45,7 +45,6 @@
     // 変数名は function にすべきか kernel にすべきか
     // とりあえず、kernel で
     CUfunction kernel[STAGE];
-    CUevent kernel_event[STAGE];
     CudaBuffer cudabuffer[STAGE];
     
     HTask::htask_flag flag[STAGE];