changeset 1908:bd5152f8fe3a draft

cuda
author Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
date Fri, 17 Jan 2014 10:21:08 +0900
parents df397764c51e
children c228dbede5df
files TaskManager/Cuda/CudaError.h TaskManager/Cuda/CudaScheduler.cc TaskManager/Cuda/CudaScheduler.h TaskManager/Gpu/GpuScheduler.cc TaskManager/include/error.h example/word_count/ppe/Exec.cc
diffstat 6 files changed, 238 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TaskManager/Cuda/CudaError.h	Fri Jan 17 10:21:08 2014 +0900
@@ -0,0 +1,45 @@
+// enum to string
+// これ自分で書くのあれだよね
+const char* convert_error_status(unsigned int status){ 
+    static const struct {
+        const unsigned int status;
+        const char* status_string;
+    } Error_Status[] = {
+        {cudaSuccess,                       "cudaSuccess"},
+        {cudaErrorMissingConfiguration,     "cudaErrorMissingConfiguration"},
+        {cudaErrorMemoryAllocation,         "cudaErrorMemoryAllocation"},
+        {cudaErrorInitializationError,      "cudaErrorInitializationError"},
+        {cudaErrorLaunchFailure,            "cudaErrorLaunchFailure"},
+        {cudaErrorLaunchTimeout,            "cudaErrorLaunchTimeout"},
+        {cudaErrorLaunchOutOfResources,     "cudaErrorLaunchOutOfResources"},
+        {cudaErrorInvalidDeviceFunction,    "cudaErrorInvalidDeviceFunction"},
+        {cudaErrorInvalidConfiguration,     "cudaErrorInvalidConfiguration"},
+        {cudaErrorInvalidDevice,            "cudaErrorInvalidDevice"},
+        {cudaErrorInvalidValue,             "cudaErrorInvalidValue"},
+        {cudaErrorInvalidPitchValue,        "cudaErrorInvalidPitchValue"},
+        {cudaErrorInvalidSymbol,            "cudaErrorInvalidSymbol"},
+        {cudaErrorUnmapBufferObjectFailed,  "cudaErrorUnmapBufferObjectFailed"},
+        {cudaErrorInvalidHostPointer,       "cudaErrorInvalidHostPointer"},
+        {cudaErrorInvalidDevicePointer,     "cudaErrorInvalidDevicePointer"},
+        {cudaErrorInvalidTexture,           "cudaErrorInvalidTexture"},
+        {cudaErrorInvalidTextureBinding,    "cudaErrorInvalidTextureBinding"},
+        {cudaErrorInvalidChannelDescriptor, "cudaErrorInvalidChannelDescriptor"},
+        {cudaErrorInvalidMemcpyDirection,   "cudaErrorInvalidMemcpyDirection"},
+        {cudaErrorInvalidFilterSetting,     "cudaErrorInvalidFilterSetting"},
+        {cudaErrorInvalidNormSetting,       "cudaErrorInvalidNormSetting"},
+        {cudaErrorUnknown,                  "cudaErrorUnknown"},
+        {cudaErrorInvalidResourceHandle,    "cudaErrorInvalidResourceHandle"},
+        {cudaErrorInsufficientDriver,       "cudaErrorInsufficientDriver"},
+        {cudaErrorSetOnActiveProcess,       "cudaErrorSetOnActiveProcess"},
+        {cudaErrorStartupFailure,           "cudaErrorStartupFailure"},
+        {0, NULL}
+    };
+    const char* message;
+
+    for(int i=0; Error_Status[i].status_string != NULL; i++) {
+        if (Error_Status[i].status = status) {
+            message = Error_Status[i].status_string;
+        }
+    }
+    return message;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TaskManager/Cuda/CudaScheduler.cc	Fri Jan 17 10:21:08 2014 +0900
@@ -0,0 +1,130 @@
+#include "TaskManager.h"
+#include "CudaScheduler.h"
+#include "ReferenceDmaManager.h"
+#include "PreRefDmaManager.h"
+#include "SchedTask.h"
+#include "CudaError.h"
+#include "ListData.h"
+#include "SysFunc.h"
+#include "gettime.h"
+#include "error.h"
+#include <stdio.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <string.h>
+
+TaskObject cuda_task_list[MAX_TASK_OBJECT];
+
+CudaScheduler::CudaScheduler() {
+    init_gpu();
+}
+
+void
+CudaScheduler::init_gpu() {
+    cuInit(0);
+    cuDeviceGetCount(&ret_num_devices);
+    if (ret_num_devices == 0) {
+        exit(EXIT_FILURE);
+    }
+    cuDeviceGet(&context, 0);
+    ret = cuCtxCreate(&context, 0, device);
+    if (ret<0) {
+        const char* msg = convert_error_status(ret);
+        error(msg);
+    }
+}
+
+CudaScheduler::~CudaScheduler()
+{
+    cuCtxDestroy(context);
+}
+
+void
+CudaScheduler::initCudaBuffer(CudaBufferPtr m) {
+    m->size = 0;
+    m->allcate_size = 64;
+    m->buf = (CUdeviceptr*)malloc(m->allcate_size*sizeof(CUdeviceptr*));
+    m->event = (CUevent*)malloc(m->allcate_size*sizeof(CUevent*));
+}
+
+void
+CudaScheduler::destroyCudaBuffer(CudaBufferPtr m) {
+    free(m->buf);
+    free(m->event);
+    m->size = 0;
+    m->allcate_size = 0;
+    m->buf = 0;
+    m->event = 0;
+}
+
+CUdeviceptr
+CudaScheduler::createBuffer(CudaBufferPtr m,int i, CUcontext context, size_t size, int* error) {
+    if (i > m->allcate_size) {
+        m->allcate_size *= 2;
+        m->buf = (CUdeviceptr*)realloc(m->buf, m->allcate_size*sizeof(CUdeviceptr*));
+        m->event = (CUevent*)remalloc(m->allcate_size*sizeof(CUevent*));
+    }
+
+    cuMemAlloc(&m->buf[i], size);
+    return m->buf[i];
+}
+
+#define NOP_REPLY NULL
+
+static void
+release_buf_event(int cur, CudaScheduler::CudaBufferPtr mem) {
+    for (int i=0; i<mem[1-cur].size; i++) {
+        if (mem[1-cur].event[i] != 0)
+            cuEventDestroy(mem[1-cur].event[i]);
+        mem[1-cur].event[i] = 0;
+        if (mem[1-cur].buf[i])
+            cuMemFree(mem[1-cur].buf[i]);
+        mem[1-cur].buf[i] = 0;
+    }
+    mem[1-cur].size = 0;
+}
+
+void
+CudaBufferPtr::wait_for_event(CUevent* kernel_event, CudaBufferPtr memout, CudaBufferPtr memin, TaskListPtr taskList, int cur) {
+    if (kernel_event[1-cur] == NOP_REPLY) {
+
+    } else {
+        ret = cuEventSynchronize(kernel_event[1-cur]);
+
+        if (ret<0) {
+            error(convert_error_status(ret));
+        }
+        if (taskList!=NULL) {
+            unsigned long start = 0;
+            unsigned long end = 0;
+            // timestamp 取る方法がない?
+        }
+        cuEventDestroy(kernel_event[1-cur]);
+        kernel_event[1-cur] = 0;
+    }
+
+    if (memout[1-cur].size > 0) {
+        ret = cuEventSynchronize(memout[1-cur].event);
+        if (ret<0) error(convert_error_status(ret));
+        release_buf_event(cur, memout);
+    }        
+
+    if (memin[1-cur].size > 0) {
+        release_buf_event(cur, memin);
+    }
+
+    if(reply) {
+        connector->mail_write(reply);
+        __debug(this, "CUDA %d %s\t%lld\n", taskList->cpu_type, (char*)(cuda_task_list[taskList->tasks[0].command].name), taskList->task_end_time-taskList->task_start_time);
+        reply = 0;
+    }
+}
+
+void
+CudaScheduler::CudaTaskError(int cur, TaskListPtr taskList, int ret) {
+    error(convert_error_status(ret));
+    if (kernel_event[cur] != 0)
+        cuEventDestroy(kernel_event[cur]);
+    kernel_event[cur] = NOP_REPLY;
+    if (kernel[cur] != 0)
+        
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TaskManager/Cuda/CudaScheduler.h	Fri Jan 17 10:21:08 2014 +0900
@@ -0,0 +1,60 @@
+#ifndef INCLUDE_CUDA_SCHEDULER
+#define INCLUDE_CUDA_SCHEDULER
+
+#include "MainScheduler.h"
+#include "FifoDmaManager.h"
+#include "CudaThreads.h"
+#include "HTask.h"
+#include "TaskManager.h"
+
+extern TaskObject cuda_task_list[MAX_TASK_OBJECT];
+
+class CudaScheduler : public MainScheduler {
+ public:
+    typedef struct cudabuffer {
+        int allcate_size;
+        int size;
+        CUdeviceptr* buf;
+        CUevent* event;
+    } CudaBuffer;
+    cudabuffer* CudaBufferPtr;
+    CudaScheduler();
+    virtual ~CudaScheduler();
+    void init_gpu();
+    void wait_for_event(CUevent* event, CudaBufferPtr m, TaskListPtr taskList, int cur);
+    void run();
+
+    // platform platform;
+    // platform は OpenCL が複数のメーカーの GPU に対応してるから必要
+    // Cuda の場合、NVIDIA だけなので必要ない?
+    // Cuda で CPU 使うとき要るんじゃね?
+    // そもそも CPU 使えたっけ?
+    CUdevice device;
+    unsigned int ret_num_platforms; // たぶん要らない
+    unsigned int ret_num_devices;
+    CUcontext context;
+    // command_queue command_queue;
+    // Cuda には command_queue に相当するものはない
+    // Closest approximation would be the CUDA Stream mechanism. らしい...
+    int ret;
+    memaddr reply;
+    // cl_kernel に相当
+    // 変数名は function にすべきか kernel にすべきか
+    // とりあえず、kernel で
+    CUfunction kernel[2];
+    CUevent kernel_event[2];
+    CudaBuffer memin[2];
+    CudaBuffer memout[2];
+    HTask::htask_flag[2];
+    
+ privete:
+    int load_kernel(int cmd);
+    CUdeviceptr createBuffer(CudaBufferPtr m, int i, CUcontext context, /* mem_flag mem_flag, */size_t size, int* error);
+    void initCudaBuffer(CudaBufferPtr m);
+    void destroyCudaBuffer(CudaBufferPtr m);
+    void CudaTaskError(int cur, TaskListPtr taskList, int ret);
+};
+
+#define CudaSchedRegister(str, filename, functionname)  \
+    cuda_register_task(str, filename, functionname);
+#endif
--- a/TaskManager/Gpu/GpuScheduler.cc	Thu Jan 16 18:59:01 2014 +0900
+++ b/TaskManager/Gpu/GpuScheduler.cc	Fri Jan 17 10:21:08 2014 +0900
@@ -8,6 +8,7 @@
 #include "ListData.h"
 #include "SysFunc.h"
 #include "gettime.h"
+#include "error.h"
 #include <fcntl.h>
 #include <sys/stat.h>
 #include <string.h>
@@ -134,7 +135,7 @@
     }
     if(reply) {
         connector->mail_write(reply);
-        __debug(this, "GPU %d %s\t%lld\n",taskList->self->cpu_type,(char*)(gpu_task_list[taskList->tasks[0].command].name),taskList->task_end_time-taskList->task_start_time));
+        __debug(this, "GPU %d %s\t%lld\n",taskList->self->cpu_type,(char*)(gpu_task_list[taskList->tasks[0].command].name),taskList->task_end_time-taskList->task_start_time);
         reply = 0;
     }
 }
--- a/TaskManager/include/error.h	Thu Jan 16 18:59:01 2014 +0900
+++ b/TaskManager/include/error.h	Fri Jan 17 10:21:08 2014 +0900
@@ -4,7 +4,7 @@
 #ifdef DEBUG
 #  include <stdio.h>
 #  define __debug(s, ...) do { \
-	s->printf(__VA_ARGS__);		\
+	s->printf(__VA_ARGS__);                 \
     } while (0)
 #else /* DEBUG */
 #  define __debug(...)
--- a/example/word_count/ppe/Exec.cc	Thu Jan 16 18:59:01 2014 +0900
+++ b/example/word_count/ppe/Exec.cc	Fri Jan 17 10:21:08 2014 +0900
@@ -17,7 +17,6 @@
     int word_num = 0;
     int line_num = 0;
     int i = 0;
-    s->printf("%ld\n",o_data);
     head_tail_flag[0] = (i_data[0] != 0x20) && (i_data[0] != 0x0A);
     word_num -= 1-head_tail_flag[0];