#include <stdio.h> #include "../context.h" #include <cuda.h> #include <cuda_runtime.h> #include "helper_cuda.h" static void CUDAExec(struct Context* context, Array* array, LoopCounter *loopCounter) { // Worker *worker = context->worker; // CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker; // memory allocate CUdeviceptr devA; CUdeviceptr devLoopCounter; printf("CUdA Exe 1\n"); checkCudaErrors(cuMemAlloc(&devA, array->size)); checkCudaErrors(cuMemAlloc(&devLoopCounter, sizeof(LoopCounter))); //twiceカーネルが定義されてなければそれをロードする checkCudaErrors(cuModuleLoad(&context->module, "CUDAtwice.ptx")); checkCudaErrors(cuModuleGetFunction(&context->function, context->module, "twice")); printf("CUdA Exe 2\n"); //入力のDataGearをGPUにbuffer経由で送る // Synchronous data transfer(host to device) checkCudaErrors(cuMemcpyHtoD(devLoopCounter, loopCounter, sizeof(LoopCounter))); checkCudaErrors(cuMemcpyHtoD(devA, array->array, array->size)); // Asynchronous launch kernel context->num_exec = 1; void* args[] = {&devLoopCounter,&array->index,&array->prefix,&devA}; checkCudaErrors(cuLaunchKernel(context->function, 1, 1, 1, 1, 1, 1, 0, NULL , args, NULL)); //結果を取ってくるコマンドを入力する //コマンドの終了待ちを行う checkCudaErrors(cuMemcpyDtoH(array->array, devA, array->size)); // wait for stream } __code CUDAtwice(struct Context* context, struct LoopCounter* loopCounter, int index, int prefix, int* array, struct Context* workerContext) { int i = loopCounter->i; if (i < prefix) { array[i+index*prefix] = array[i+index*prefix]*2; loopCounter->i++; goto meta(context, C_twice); } loopCounter->i = 0; goto meta(workerContext, workerContext->next); } __code CUDAtwice_stub(struct Context* context) { printf("CUdAtwice stub\n"); struct LoopCounter* loopCounter = &context->data[context->dataNum]->LoopCounter; struct Array* array = &context->data[context->dataNum+1]->Array; CUDAExec(context,array,loopCounter); //continuationにそってGPUworkerに戻る goto meta(context, context->next); }