Mercurial > hg > Members > Moririn
view src/parallel_execution/cuda.c @ 410:85b0ddbf458e
Fix CudaWorker
author | Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Thu, 14 Sep 2017 02:35:20 +0900 |
parents | c5cd9888bf2a |
children | 0eba9a04633f |
line wrap: on
line source
#include <stdio.h> #include <sys/time.h> #include <string.h> #include <stdlib.h> #include <libkern/OSAtomic.h> // includes, project #include <driver_types.h> #include <cuda_runtime.h> #include <cuda.h> #include "helper_cuda.h" #include "pthread.h" #include "context.h" /* struct Context { int next; struct Worker* worker; struct TaskManager* taskManager; int codeNum; void (**code) (struct Context*); void* heapStart; void* heap; long heapLimit; int dataNum; int idgCount; //number of waiting dataGear int idg; int maxIdg; int odg; int maxOdg; int workerId; struct Context* task; struct Queue* tasks; int num_exec; CUmodule module; CUfunction function; union Data **data; // multi dimension parameter int iterate; struct Iterator* iterator; }; struct CUDAWorker { CUdevice device; CUcontext cuCtx; pthread_t thread; struct Context* context; int id; struct Queue* tasks; int runFlag; int next; int num_stream; CUstream *stream; } CUDAWorker; struct LoopCounter { int i; } LoopCounter; struct Array { int size; int index; int prefix; int* array; } Array; */ void cudaInit(struct CUDAWorker *cudaWorker,int phase) { // initialize and load kernel cudaWorker->num_stream = 1; // number of stream // cudaWorker->stream = NEWN(cudaWorker->num_stream, CUstream ); if (phase==0) checkCudaErrors(cuInit(0)); if (phase==0) checkCudaErrors(cuDeviceGet(&cudaWorker->device, 0)); if (phase==0) checkCudaErrors(cuCtxCreate(&cudaWorker->cuCtx, CU_CTX_SCHED_SPIN, cudaWorker->device)); // if (cudaWorker->num_stream) { // for (int i=0;i<cudaWorker->num_stream;i++) // checkCudaErrors(cuStreamCreate(&cudaWorker->stream[i],0)); // } printf("cuda Init: Done\n"); } void CUDAExec(struct Context* context, struct Array* array) { printf("cuda exec start\n"); // Worker *worker = context->worker; // CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker; // memory allocate CUdeviceptr devA; checkCudaErrors(cuMemAlloc(&devA, sizeof(int)*array->size)); //twiceカーネルが定義されてなければそれをロードする checkCudaErrors(cuModuleLoad(&context->module, "c/examples/twice/CUDAtwice.ptx")); checkCudaErrors(cuModuleGetFunction(&context->function, context->module, "twice")); //入力のDataGearをGPUにbuffer経由で送る // Synchronous data transfer(host to device) checkCudaErrors(cuMemcpyHtoD(devA, array->array, array->size)); // Asynchronous launch kernel context->num_exec = 1; void* args[] = {&devA}; if (context->iterate) { struct MultiDimIterator* iterator = &context->iterator->iterator->MultiDimIterator; checkCudaErrors(cuLaunchKernel(context->function, iterator->x, iterator->y, iterator->z, 1, 1, 1, 0, NULL, args, NULL)); } else { checkCudaErrors(cuLaunchKernel(context->function, 1, 1, 1, 1, 1, 1, 0, NULL, args, NULL)); } //結果を取ってくるコマンドを入力する //コマンドの終了待ちを行う checkCudaErrors(cuMemcpyDtoH(array->array, devA, array->size)); // wait for stream checkCudaErrors(cuCtxSynchronize()); } void cudaShutdown( struct CUDAWorker *worker) { // for (int i=0;i<worker->num_stream;i++) // checkCudaErrors(cuStreamDestroy(worker->stream[i])); checkCudaErrors(cuCtxDestroy(worker->cuCtx)); }