view TaskManager/Cuda/CudaScheduler.cc @ 1908:bd5152f8fe3a draft

cuda
author Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
date Fri, 17 Jan 2014 10:21:08 +0900
parents
children effb5653fd5c
line wrap: on
line source

#include "TaskManager.h"
#include "CudaScheduler.h"
#include "ReferenceDmaManager.h"
#include "PreRefDmaManager.h"
#include "SchedTask.h"
#include "CudaError.h"
#include "ListData.h"
#include "SysFunc.h"
#include "gettime.h"
#include "error.h"
#include <stdio.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <string.h>

TaskObject cuda_task_list[MAX_TASK_OBJECT];

CudaScheduler::CudaScheduler() {
    init_gpu();
}

void
CudaScheduler::init_gpu() {
    cuInit(0);
    cuDeviceGetCount(&ret_num_devices);
    if (ret_num_devices == 0) {
        exit(EXIT_FILURE);
    }
    cuDeviceGet(&context, 0);
    ret = cuCtxCreate(&context, 0, device);
    if (ret<0) {
        const char* msg = convert_error_status(ret);
        error(msg);
    }
}

CudaScheduler::~CudaScheduler()
{
    cuCtxDestroy(context);
}

void
CudaScheduler::initCudaBuffer(CudaBufferPtr m) {
    m->size = 0;
    m->allcate_size = 64;
    m->buf = (CUdeviceptr*)malloc(m->allcate_size*sizeof(CUdeviceptr*));
    m->event = (CUevent*)malloc(m->allcate_size*sizeof(CUevent*));
}

void
CudaScheduler::destroyCudaBuffer(CudaBufferPtr m) {
    free(m->buf);
    free(m->event);
    m->size = 0;
    m->allcate_size = 0;
    m->buf = 0;
    m->event = 0;
}

CUdeviceptr
CudaScheduler::createBuffer(CudaBufferPtr m,int i, CUcontext context, size_t size, int* error) {
    if (i > m->allcate_size) {
        m->allcate_size *= 2;
        m->buf = (CUdeviceptr*)realloc(m->buf, m->allcate_size*sizeof(CUdeviceptr*));
        m->event = (CUevent*)remalloc(m->allcate_size*sizeof(CUevent*));
    }

    cuMemAlloc(&m->buf[i], size);
    return m->buf[i];
}

#define NOP_REPLY NULL

static void
release_buf_event(int cur, CudaScheduler::CudaBufferPtr mem) {
    for (int i=0; i<mem[1-cur].size; i++) {
        if (mem[1-cur].event[i] != 0)
            cuEventDestroy(mem[1-cur].event[i]);
        mem[1-cur].event[i] = 0;
        if (mem[1-cur].buf[i])
            cuMemFree(mem[1-cur].buf[i]);
        mem[1-cur].buf[i] = 0;
    }
    mem[1-cur].size = 0;
}

void
CudaBufferPtr::wait_for_event(CUevent* kernel_event, CudaBufferPtr memout, CudaBufferPtr memin, TaskListPtr taskList, int cur) {
    if (kernel_event[1-cur] == NOP_REPLY) {

    } else {
        ret = cuEventSynchronize(kernel_event[1-cur]);

        if (ret<0) {
            error(convert_error_status(ret));
        }
        if (taskList!=NULL) {
            unsigned long start = 0;
            unsigned long end = 0;
            // timestamp 取る方法がない?
        }
        cuEventDestroy(kernel_event[1-cur]);
        kernel_event[1-cur] = 0;
    }

    if (memout[1-cur].size > 0) {
        ret = cuEventSynchronize(memout[1-cur].event);
        if (ret<0) error(convert_error_status(ret));
        release_buf_event(cur, memout);
    }        

    if (memin[1-cur].size > 0) {
        release_buf_event(cur, memin);
    }

    if(reply) {
        connector->mail_write(reply);
        __debug(this, "CUDA %d %s\t%lld\n", taskList->cpu_type, (char*)(cuda_task_list[taskList->tasks[0].command].name), taskList->task_end_time-taskList->task_start_time);
        reply = 0;
    }
}

void
CudaScheduler::CudaTaskError(int cur, TaskListPtr taskList, int ret) {
    error(convert_error_status(ret));
    if (kernel_event[cur] != 0)
        cuEventDestroy(kernel_event[cur]);
    kernel_event[cur] = NOP_REPLY;
    if (kernel[cur] != 0)