view src/parallel_execution/cuda.c @ 410:85b0ddbf458e

Fix CudaWorker
author Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
date Thu, 14 Sep 2017 02:35:20 +0900
parents c5cd9888bf2a
children 0eba9a04633f
line wrap: on
line source

#include <stdio.h>
#include <sys/time.h>
#include <string.h>
#include <stdlib.h>
#include <libkern/OSAtomic.h>

// includes, project
#include <driver_types.h>
#include <cuda_runtime.h>
#include <cuda.h>
#include "helper_cuda.h"
#include "pthread.h"

#include "context.h"

/*
struct Context {
    int next;
    struct Worker* worker;
    struct TaskManager* taskManager;
    int codeNum;
    void  (**code) (struct Context*);
    void* heapStart;
    void* heap;
    long heapLimit;
    int dataNum;
    int idgCount; //number of waiting dataGear
    int idg;
    int maxIdg;
    int odg;
    int maxOdg;
    int workerId;
    struct Context* task;
    struct Queue* tasks;
    int num_exec;
    CUmodule module;
    CUfunction function;
    union Data **data;

    // multi dimension parameter
    int iterate;
    struct Iterator* iterator;
};

struct CUDAWorker {
    CUdevice device;
    CUcontext cuCtx;
    pthread_t thread;
    struct Context* context;
    int id;
    struct Queue* tasks;
    int runFlag;
    int next;
    int num_stream;
    CUstream *stream;
} CUDAWorker;

struct LoopCounter {
    int i;
} LoopCounter;

struct Array {
    int size;
    int index;
    int prefix;
    int* array;
} Array;
*/

void cudaInit(struct CUDAWorker *cudaWorker,int phase) {
    // initialize and load kernel
    cudaWorker->num_stream = 1; // number of stream
    //    cudaWorker->stream = NEWN(cudaWorker->num_stream, CUstream );
    if (phase==0)
        checkCudaErrors(cuInit(0));
    if (phase==0)
        checkCudaErrors(cuDeviceGet(&cudaWorker->device, 0));
    if (phase==0)
        checkCudaErrors(cuCtxCreate(&cudaWorker->cuCtx, CU_CTX_SCHED_SPIN, cudaWorker->device));
    //    if (cudaWorker->num_stream) {
    //        for (int i=0;i<cudaWorker->num_stream;i++)
    //            checkCudaErrors(cuStreamCreate(&cudaWorker->stream[i],0));
    //    }
    printf("cuda Init: Done\n");
}


void CUDAExec(struct Context* context, struct Array* array) {
    printf("cuda exec start\n");
    // Worker *worker = context->worker;
    // CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker;
    // memory allocate
    CUdeviceptr devA;

    checkCudaErrors(cuMemAlloc(&devA, sizeof(int)*array->size));

    //twiceカーネルが定義されてなければそれをロードする
    checkCudaErrors(cuModuleLoad(&context->module, "c/examples/twice/CUDAtwice.ptx"));
    checkCudaErrors(cuModuleGetFunction(&context->function, context->module, "twice"));

    //入力のDataGearをGPUにbuffer経由で送る
    // Synchronous data transfer(host to device)
    checkCudaErrors(cuMemcpyHtoD(devA, array->array, array->size));

    // Asynchronous launch kernel
    context->num_exec = 1;
    void* args[] = {&devA};
    if (context->iterate) {
        struct MultiDimIterator* iterator = &context->iterator->iterator->MultiDimIterator;
        checkCudaErrors(cuLaunchKernel(context->function,
                    iterator->x, iterator->y, iterator->z,
                    1, 1, 1,
                    0, NULL, args, NULL));

    } else {
        checkCudaErrors(cuLaunchKernel(context->function,
                    1, 1, 1,
                    1, 1, 1,
                    0, NULL, args, NULL));
    }
    //結果を取ってくるコマンドを入力する
    //コマンドの終了待ちを行う   
    checkCudaErrors(cuMemcpyDtoH(array->array, devA, array->size));

    // wait for stream
    checkCudaErrors(cuCtxSynchronize());
}

void cudaShutdown( struct CUDAWorker *worker) {
    //    for (int i=0;i<worker->num_stream;i++)
    //        checkCudaErrors(cuStreamDestroy(worker->stream[i]));
    checkCudaErrors(cuCtxDestroy(worker->cuCtx));
}