Mercurial > hg > Game > Cerium
changeset 1908:bd5152f8fe3a draft
cuda
author | Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jan 2014 10:21:08 +0900 |
parents | df397764c51e |
children | c228dbede5df |
files | TaskManager/Cuda/CudaError.h TaskManager/Cuda/CudaScheduler.cc TaskManager/Cuda/CudaScheduler.h TaskManager/Gpu/GpuScheduler.cc TaskManager/include/error.h example/word_count/ppe/Exec.cc |
diffstat | 6 files changed, 238 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/TaskManager/Cuda/CudaError.h Fri Jan 17 10:21:08 2014 +0900 @@ -0,0 +1,45 @@ +// enum to string +// これ自分で書くのあれだよね +const char* convert_error_status(unsigned int status){ + static const struct { + const unsigned int status; + const char* status_string; + } Error_Status[] = { + {cudaSuccess, "cudaSuccess"}, + {cudaErrorMissingConfiguration, "cudaErrorMissingConfiguration"}, + {cudaErrorMemoryAllocation, "cudaErrorMemoryAllocation"}, + {cudaErrorInitializationError, "cudaErrorInitializationError"}, + {cudaErrorLaunchFailure, "cudaErrorLaunchFailure"}, + {cudaErrorLaunchTimeout, "cudaErrorLaunchTimeout"}, + {cudaErrorLaunchOutOfResources, "cudaErrorLaunchOutOfResources"}, + {cudaErrorInvalidDeviceFunction, "cudaErrorInvalidDeviceFunction"}, + {cudaErrorInvalidConfiguration, "cudaErrorInvalidConfiguration"}, + {cudaErrorInvalidDevice, "cudaErrorInvalidDevice"}, + {cudaErrorInvalidValue, "cudaErrorInvalidValue"}, + {cudaErrorInvalidPitchValue, "cudaErrorInvalidPitchValue"}, + {cudaErrorInvalidSymbol, "cudaErrorInvalidSymbol"}, + {cudaErrorUnmapBufferObjectFailed, "cudaErrorUnmapBufferObjectFailed"}, + {cudaErrorInvalidHostPointer, "cudaErrorInvalidHostPointer"}, + {cudaErrorInvalidDevicePointer, "cudaErrorInvalidDevicePointer"}, + {cudaErrorInvalidTexture, "cudaErrorInvalidTexture"}, + {cudaErrorInvalidTextureBinding, "cudaErrorInvalidTextureBinding"}, + {cudaErrorInvalidChannelDescriptor, "cudaErrorInvalidChannelDescriptor"}, + {cudaErrorInvalidMemcpyDirection, "cudaErrorInvalidMemcpyDirection"}, + {cudaErrorInvalidFilterSetting, "cudaErrorInvalidFilterSetting"}, + {cudaErrorInvalidNormSetting, "cudaErrorInvalidNormSetting"}, + {cudaErrorUnknown, "cudaErrorUnknown"}, + {cudaErrorInvalidResourceHandle, "cudaErrorInvalidResourceHandle"}, + {cudaErrorInsufficientDriver, "cudaErrorInsufficientDriver"}, + {cudaErrorSetOnActiveProcess, "cudaErrorSetOnActiveProcess"}, + {cudaErrorStartupFailure, "cudaErrorStartupFailure"}, + {0, NULL} + }; + const char* message; + + for(int i=0; Error_Status[i].status_string != NULL; i++) { + if (Error_Status[i].status = status) { + message = Error_Status[i].status_string; + } + } + return message; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/TaskManager/Cuda/CudaScheduler.cc Fri Jan 17 10:21:08 2014 +0900 @@ -0,0 +1,130 @@ +#include "TaskManager.h" +#include "CudaScheduler.h" +#include "ReferenceDmaManager.h" +#include "PreRefDmaManager.h" +#include "SchedTask.h" +#include "CudaError.h" +#include "ListData.h" +#include "SysFunc.h" +#include "gettime.h" +#include "error.h" +#include <stdio.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <string.h> + +TaskObject cuda_task_list[MAX_TASK_OBJECT]; + +CudaScheduler::CudaScheduler() { + init_gpu(); +} + +void +CudaScheduler::init_gpu() { + cuInit(0); + cuDeviceGetCount(&ret_num_devices); + if (ret_num_devices == 0) { + exit(EXIT_FILURE); + } + cuDeviceGet(&context, 0); + ret = cuCtxCreate(&context, 0, device); + if (ret<0) { + const char* msg = convert_error_status(ret); + error(msg); + } +} + +CudaScheduler::~CudaScheduler() +{ + cuCtxDestroy(context); +} + +void +CudaScheduler::initCudaBuffer(CudaBufferPtr m) { + m->size = 0; + m->allcate_size = 64; + m->buf = (CUdeviceptr*)malloc(m->allcate_size*sizeof(CUdeviceptr*)); + m->event = (CUevent*)malloc(m->allcate_size*sizeof(CUevent*)); +} + +void +CudaScheduler::destroyCudaBuffer(CudaBufferPtr m) { + free(m->buf); + free(m->event); + m->size = 0; + m->allcate_size = 0; + m->buf = 0; + m->event = 0; +} + +CUdeviceptr +CudaScheduler::createBuffer(CudaBufferPtr m,int i, CUcontext context, size_t size, int* error) { + if (i > m->allcate_size) { + m->allcate_size *= 2; + m->buf = (CUdeviceptr*)realloc(m->buf, m->allcate_size*sizeof(CUdeviceptr*)); + m->event = (CUevent*)remalloc(m->allcate_size*sizeof(CUevent*)); + } + + cuMemAlloc(&m->buf[i], size); + return m->buf[i]; +} + +#define NOP_REPLY NULL + +static void +release_buf_event(int cur, CudaScheduler::CudaBufferPtr mem) { + for (int i=0; i<mem[1-cur].size; i++) { + if (mem[1-cur].event[i] != 0) + cuEventDestroy(mem[1-cur].event[i]); + mem[1-cur].event[i] = 0; + if (mem[1-cur].buf[i]) + cuMemFree(mem[1-cur].buf[i]); + mem[1-cur].buf[i] = 0; + } + mem[1-cur].size = 0; +} + +void +CudaBufferPtr::wait_for_event(CUevent* kernel_event, CudaBufferPtr memout, CudaBufferPtr memin, TaskListPtr taskList, int cur) { + if (kernel_event[1-cur] == NOP_REPLY) { + + } else { + ret = cuEventSynchronize(kernel_event[1-cur]); + + if (ret<0) { + error(convert_error_status(ret)); + } + if (taskList!=NULL) { + unsigned long start = 0; + unsigned long end = 0; + // timestamp 取る方法がない? + } + cuEventDestroy(kernel_event[1-cur]); + kernel_event[1-cur] = 0; + } + + if (memout[1-cur].size > 0) { + ret = cuEventSynchronize(memout[1-cur].event); + if (ret<0) error(convert_error_status(ret)); + release_buf_event(cur, memout); + } + + if (memin[1-cur].size > 0) { + release_buf_event(cur, memin); + } + + if(reply) { + connector->mail_write(reply); + __debug(this, "CUDA %d %s\t%lld\n", taskList->cpu_type, (char*)(cuda_task_list[taskList->tasks[0].command].name), taskList->task_end_time-taskList->task_start_time); + reply = 0; + } +} + +void +CudaScheduler::CudaTaskError(int cur, TaskListPtr taskList, int ret) { + error(convert_error_status(ret)); + if (kernel_event[cur] != 0) + cuEventDestroy(kernel_event[cur]); + kernel_event[cur] = NOP_REPLY; + if (kernel[cur] != 0) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/TaskManager/Cuda/CudaScheduler.h Fri Jan 17 10:21:08 2014 +0900 @@ -0,0 +1,60 @@ +#ifndef INCLUDE_CUDA_SCHEDULER +#define INCLUDE_CUDA_SCHEDULER + +#include "MainScheduler.h" +#include "FifoDmaManager.h" +#include "CudaThreads.h" +#include "HTask.h" +#include "TaskManager.h" + +extern TaskObject cuda_task_list[MAX_TASK_OBJECT]; + +class CudaScheduler : public MainScheduler { + public: + typedef struct cudabuffer { + int allcate_size; + int size; + CUdeviceptr* buf; + CUevent* event; + } CudaBuffer; + cudabuffer* CudaBufferPtr; + CudaScheduler(); + virtual ~CudaScheduler(); + void init_gpu(); + void wait_for_event(CUevent* event, CudaBufferPtr m, TaskListPtr taskList, int cur); + void run(); + + // platform platform; + // platform は OpenCL が複数のメーカーの GPU に対応してるから必要 + // Cuda の場合、NVIDIA だけなので必要ない? + // Cuda で CPU 使うとき要るんじゃね? + // そもそも CPU 使えたっけ? + CUdevice device; + unsigned int ret_num_platforms; // たぶん要らない + unsigned int ret_num_devices; + CUcontext context; + // command_queue command_queue; + // Cuda には command_queue に相当するものはない + // Closest approximation would be the CUDA Stream mechanism. らしい... + int ret; + memaddr reply; + // cl_kernel に相当 + // 変数名は function にすべきか kernel にすべきか + // とりあえず、kernel で + CUfunction kernel[2]; + CUevent kernel_event[2]; + CudaBuffer memin[2]; + CudaBuffer memout[2]; + HTask::htask_flag[2]; + + privete: + int load_kernel(int cmd); + CUdeviceptr createBuffer(CudaBufferPtr m, int i, CUcontext context, /* mem_flag mem_flag, */size_t size, int* error); + void initCudaBuffer(CudaBufferPtr m); + void destroyCudaBuffer(CudaBufferPtr m); + void CudaTaskError(int cur, TaskListPtr taskList, int ret); +}; + +#define CudaSchedRegister(str, filename, functionname) \ + cuda_register_task(str, filename, functionname); +#endif
--- a/TaskManager/Gpu/GpuScheduler.cc Thu Jan 16 18:59:01 2014 +0900 +++ b/TaskManager/Gpu/GpuScheduler.cc Fri Jan 17 10:21:08 2014 +0900 @@ -8,6 +8,7 @@ #include "ListData.h" #include "SysFunc.h" #include "gettime.h" +#include "error.h" #include <fcntl.h> #include <sys/stat.h> #include <string.h> @@ -134,7 +135,7 @@ } if(reply) { connector->mail_write(reply); - __debug(this, "GPU %d %s\t%lld\n",taskList->self->cpu_type,(char*)(gpu_task_list[taskList->tasks[0].command].name),taskList->task_end_time-taskList->task_start_time)); + __debug(this, "GPU %d %s\t%lld\n",taskList->self->cpu_type,(char*)(gpu_task_list[taskList->tasks[0].command].name),taskList->task_end_time-taskList->task_start_time); reply = 0; } }
--- a/TaskManager/include/error.h Thu Jan 16 18:59:01 2014 +0900 +++ b/TaskManager/include/error.h Fri Jan 17 10:21:08 2014 +0900 @@ -4,7 +4,7 @@ #ifdef DEBUG # include <stdio.h> # define __debug(s, ...) do { \ - s->printf(__VA_ARGS__); \ + s->printf(__VA_ARGS__); \ } while (0) #else /* DEBUG */ # define __debug(...)
--- a/example/word_count/ppe/Exec.cc Thu Jan 16 18:59:01 2014 +0900 +++ b/example/word_count/ppe/Exec.cc Fri Jan 17 10:21:08 2014 +0900 @@ -17,7 +17,6 @@ int word_num = 0; int line_num = 0; int i = 0; - s->printf("%ld\n",o_data); head_tail_flag[0] = (i_data[0] != 0x20) && (i_data[0] != 0x0A); word_num -= 1-head_tail_flag[0];