view src/parallel_execution/CUDAWorker.cbc @ 316:54d203daf06b

CUDAtwice.cbc is called.
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Wed, 15 Feb 2017 16:25:23 +0900
parents 1839586f5b41
children 51aa65676e37
line wrap: on
line source

#include <stdio.h>
#include <sys/time.h>
#include <string.h>
#include <stdlib.h>
#include <libkern/OSAtomic.h>

// includes, project
#include <driver_types.h>
#include <cuda_runtime.h>
#include <cuda.h>
#include "helper_cuda.h"

#include "../context.h"

static void start_CUDAworker(Worker* worker);
static void cudaInit(struct CUDAWorker *cudaWorker,int phase) ;

volatile int cuda_initialized = 0;

Worker* createCUDAWorker(struct Context* context, int id, Queue* queue, TaskManagerImpl *im) {
    struct Worker* worker = ALLOC(context, Worker);
    struct CUDAWorker* cudaWorker = new CUDAWorker();

    cudaInit(cudaWorker,0);

    worker->worker = (union Data*)cudaWorker;
    worker->tasks = queue;
printf("createCUDAWorker %p\n",queue);
    cudaWorker->id = id;
    worker->shutdown = C_shutdownCUDAWorker;
    // pthread_create(&worker->worker->CUDAWorker.thread, NULL, (void*)&start_CUDAworker, worker);
    if (im) {
printf("im->worker %p\n",im->workers);
        im->workers[0] = worker;
    }
    cuda_initialized = 1;
    start_CUDAworker(worker);
    return worker;
}

static void cudaInit(struct CUDAWorker *cudaWorker,int phase) {
    // initialize and load kernel
    cudaWorker->num_stream = 1; // number of stream
//    cudaWorker->stream = NEWN(cudaWorker->num_stream, CUstream );
printf("cudaInit 1\n");
   if (phase==0)
    checkCudaErrors(cuInit(0));
   if (phase==0)
    checkCudaErrors(cuDeviceGet(&cudaWorker->device, 0));
printf("cudaInit 2\n");
   if (phase==0)
    checkCudaErrors(cuCtxCreate(&cudaWorker->cuCtx, CU_CTX_SCHED_SPIN, cudaWorker->device));
printf("cudaInit 3\n");
//    if (cudaWorker->num_stream) {
//        for (int i=0;i<cudaWorker->num_stream;i++)
//            checkCudaErrors(cuStreamCreate(&cudaWorker->stream[i],0));
//    }
    CUdeviceptr devA;
    checkCudaErrors(cuMemAlloc(&devA, 16));

printf("cudaInit done\n");
}

static void start_CUDAworker(Worker* worker) {
    CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker;
    cudaWorker->context = NEW(struct Context);
    initContext(cudaWorker->context);
    Gearef(cudaWorker->context, Worker)->worker = (union Data*)worker;

    goto meta(cudaWorker->context, C_taskReceiveCUDAWorker);
}

__code taskReceiveCUDAWorker(struct Worker* worker,struct Queue* queue) {
    queue->queue = (union Data*)worker->tasks;
    queue->next = C_getTaskCUDA;
    goto meta(context, worker->tasks->take);
}

__code taskReceiveCUDAWorker_stub(struct Context* context) {
    goto taskReceiveCUDAWorker(context, &Gearef(context, Worker)->worker->Worker, Gearef(context, Queue));
}

__code getTaskCUDA(struct Worker* worker, struct Context* task) {
    if (!task)
        return; // end thread
//    if (cuda_initialized==0 || 1) {
//        CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker;
//        cudaInit(cudaWorker,1);
//    }
    worker->taskReceive = C_taskReceiveCUDAWorker;
    task->worker = worker;
    enum Code taskCg = task->next;
    task->next = C_odgCommitCUDA; // set CG after task exec
    goto meta(task, taskCg);
}

__code getTaskCUDA_stub(struct Context* context) {
    Worker* worker = &Gearef(context,Worker)->worker->Worker;
    struct Context* task = &Gearef(context, Queue)->data->Context;
    goto getTaskCUDA(context, worker, task);
}

__code odgCommitCUDA(struct LoopCounter* loopCounter, struct Queue* queue, struct Context* task) {
    int i = loopCounter->i ;
    if(task->odg + i < task->maxOdg) {
        queue->queue = (union Data*)GET_WAIT_LIST(task->data[task->odg+i]);
        queue->next = C_odgCommitCUDA1;
        goto meta(context, queue->queue->Queue.take);
    }
    loopCounter->i = 0;
    goto meta(context, C_taskReceiveCUDAWorker);
}

__code odgCommitCUDA_stub(struct Context* context) {
    struct Context* workerContext = context->worker->worker->CUDAWorker.context;
    goto odgCommitCUDA(workerContext,
                   Gearef(workerContext, LoopCounter),
                   Gearef(workerContext, Queue),
                   context);
}

__code odgCommitCUDA1(struct TaskManager* taskManager, struct Context* task) {
    if(__sync_fetch_and_sub(&task->idgCount, 1)) {
        if(task->idgCount == 0) {
            taskManager->taskManager = (union Data*)task->taskManager;
            taskManager->context = task;
            taskManager->next = C_odgCommitCUDA;
            goto meta(context, task->taskManager->spawn);
        }
    } else {
        goto meta(context, C_odgCommitCUDA1);
    }
}

__code odgCommitCUDA1_stub(struct Context* context) {
    struct Context* task = &Gearef(context, Queue)->data->Context;
    goto odgCommitCUDA1(context,
                    Gearef(context, TaskManager),
                    task);
                 
}


__code shutdownCUDAWorker(struct Context* context, CUDAWorker* worker) {
//    for (int i=0;i<worker->num_stream;i++)
//        checkCudaErrors(cuStreamDestroy(worker->stream[i]));
    checkCudaErrors(cuCtxDestroy(worker->cuCtx));
}

__code shutdownCUDAWorker_stub(struct Context* context) {
    CUDAWorker* worker = (CUDAWorker *)GearImpl(context, Worker, worker);
    goto shutdownCUDAWorker(context,worker);
}