view src/parallel_execution/CUDAtwice.cbc @ 312:7dd5a7d52a67

USE_CUDAWorker flag only for CUDAtwice
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Wed, 15 Feb 2017 11:04:30 +0900
parents 782f4c560de4
children 1839586f5b41
line wrap: on
line source

#include <stdio.h>
#include "../context.h"

#include <cuda.h>

#include <cuda_runtime.h>
#include "helper_cuda.h"

static void CUDAExec(struct Context* context, Array* array, LoopCounter *loopCounter) {
    // Worker *worker = context->worker;
    // CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker;
  // memory allocate
    CUdeviceptr devA;
    CUdeviceptr devLoopCounter;

    checkCudaErrors(cuMemAlloc(&devA, array->size));
    checkCudaErrors(cuMemAlloc(&devLoopCounter, sizeof(LoopCounter)));

    //twiceカーネルが定義されてなければそれをロードする
    checkCudaErrors(cuModuleLoad(&context->module, "CUDAtwice.ptx"));
    checkCudaErrors(cuModuleGetFunction(&context->function, context->module, "twice"));

    //入力のDataGearをGPUにbuffer経由で送る
    // Synchronous data transfer(host to device)
    checkCudaErrors(cuMemcpyHtoD(devLoopCounter, loopCounter, sizeof(LoopCounter)));
    checkCudaErrors(cuMemcpyHtoD(devA, array->array, array->size));

  // Asynchronous launch kernel
     context->num_exec = 1;
     void* args[] = {&devLoopCounter,&array->index,&array->prefix,&devA};
     checkCudaErrors(cuLaunchKernel(context->function,
                       1, 1, 1,
                       1, 1, 1,
                                 0, NULL , args, NULL));

    //結果を取ってくるコマンドを入力する
    //コマンドの終了待ちを行う   
    checkCudaErrors(cuMemcpyDtoH(array->array, devA, array->size));
    // wait for stream
}

__code CUDAtwice(struct Context* context, struct LoopCounter* loopCounter, int index, int prefix, int* array, struct Context* workerContext) {
    int i = loopCounter->i;
    if (i < prefix) {
        array[i+index*prefix] = array[i+index*prefix]*2;
        loopCounter->i++;

        goto meta(context, C_twice);
    }

    loopCounter->i = 0;
    goto meta(workerContext, workerContext->next);
}

__code CUDAtwice_stub(struct Context* context) {
    struct LoopCounter* loopCounter = &context->data[context->dataNum]->LoopCounter;
    struct Array* array = &context->data[context->dataNum+1]->Array;
    CUDAExec(context,array,loopCounter);

    //continuationにそってGPUworkerに戻る
    goto meta(context, context->next);
}