view src/parallel_execution/CUDAWorker.cbc @ 319:a15511b1a6e0

separate cuda.c, and USE_CUDA_MAIN_THREAD flag
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Wed, 15 Feb 2017 20:43:55 +0900
parents 51aa65676e37
children f730761bb044
line wrap: on
line source

#include <stdio.h>
#include <sys/time.h>
#include <string.h>
#include <stdlib.h>
#include <libkern/OSAtomic.h>

#include "../context.h"

extern void cudaInit(struct CUDAWorker *cudaWorker,int phase) ;

static void start_CUDAworker(Worker* worker);

volatile int cuda_initialized = 0;

Worker* createCUDAWorker(struct Context* context, int id, Queue* queue, TaskManagerImpl *im) {
    struct Worker* worker = ALLOC(context, Worker);
    struct CUDAWorker* cudaWorker = new CUDAWorker();

    cudaInit(cudaWorker,0);

    worker->worker = (union Data*)cudaWorker;
    worker->tasks = queue;
    cudaWorker->id = id;
    worker->shutdown = C_shutdownCUDAWorker;
#ifndef USE_CUDA_MAIN_THREAD
    pthread_create(&worker->worker->CUDAWorker.thread, NULL, (void*)&start_CUDAworker, worker);
#else
    if (im) {
        im->workers[0] = worker;
    }
    cuda_initialized = 1;
    start_CUDAworker(worker);
#endif
    return worker;
}


static void start_CUDAworker(Worker* worker) {
    CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker;
    cudaWorker->context = NEW(struct Context);
    initContext(cudaWorker->context);
    Gearef(cudaWorker->context, Worker)->worker = (union Data*)worker;

    goto meta(cudaWorker->context, C_taskReceiveCUDAWorker);
}

__code taskReceiveCUDAWorker(struct Worker* worker,struct Queue* queue) {
    queue->queue = (union Data*)worker->tasks;
    queue->next = C_getTaskCUDA;
    goto meta(context, worker->tasks->take);
}

__code taskReceiveCUDAWorker_stub(struct Context* context) {
    goto taskReceiveCUDAWorker(context, &Gearef(context, Worker)->worker->Worker, Gearef(context, Queue));
}

__code getTaskCUDA(struct Worker* worker, struct Context* task) {
    if (!task)
        return; // end thread
    worker->taskReceive = C_taskReceiveCUDAWorker;
    task->worker = worker;
    enum Code taskCg = task->next;
    task->next = C_odgCommitCUDA; // set CG after task exec
    goto meta(task, taskCg);
}

__code getTaskCUDA_stub(struct Context* context) {
    Worker* worker = &Gearef(context,Worker)->worker->Worker;
    struct Context* task = &Gearef(context, Queue)->data->Context;
    goto getTaskCUDA(context, worker, task);
}

__code odgCommitCUDA(struct LoopCounter* loopCounter, struct Queue* queue, struct Context* task) {
    int i = loopCounter->i ;
    if(task->odg + i < task->maxOdg) {
        queue->queue = (union Data*)GET_WAIT_LIST(task->data[task->odg+i]);
        queue->next = C_odgCommitCUDA1;
        goto meta(context, queue->queue->Queue.take);
    }
    loopCounter->i = 0;
    goto meta(context, C_taskReceiveCUDAWorker);
}

__code odgCommitCUDA_stub(struct Context* context) {
    struct Context* workerContext = context->worker->worker->CUDAWorker.context;
    goto odgCommitCUDA(workerContext,
                   Gearef(workerContext, LoopCounter),
                   Gearef(workerContext, Queue),
                   context);
}

__code odgCommitCUDA1(struct TaskManager* taskManager, struct Context* task) {
    if(__sync_fetch_and_sub(&task->idgCount, 1)) {
        if(task->idgCount == 0) {
            taskManager->taskManager = (union Data*)task->taskManager;
            taskManager->context = task;
            taskManager->next = C_odgCommitCUDA;
            goto meta(context, task->taskManager->spawn);
        }
    } else {
        goto meta(context, C_odgCommitCUDA1);
    }
}

__code odgCommitCUDA1_stub(struct Context* context) {
    struct Context* task = &Gearef(context, Queue)->data->Context;
    goto odgCommitCUDA1(context,
                    Gearef(context, TaskManager),
                    task);
                 
}

extern void cudaShutdown( CUDAWorker *cudaWorker) ;

__code shutdownCUDAWorker(struct Context* context, CUDAWorker* worker) {
    cudaShutdown( worker) ;
}

__code shutdownCUDAWorker_stub(struct Context* context) {
    CUDAWorker* worker = (CUDAWorker *)GearImpl(context, Worker, worker);
    goto shutdownCUDAWorker(context,worker);
}