Mercurial > hg > Gears > GearsAgda
view src/parallel_execution/CUDAWorker.cbc @ 316:54d203daf06b
CUDAtwice.cbc is called.
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 15 Feb 2017 16:25:23 +0900 |
parents | 1839586f5b41 |
children | 51aa65676e37 |
line wrap: on
line source
#include <stdio.h> #include <sys/time.h> #include <string.h> #include <stdlib.h> #include <libkern/OSAtomic.h> // includes, project #include <driver_types.h> #include <cuda_runtime.h> #include <cuda.h> #include "helper_cuda.h" #include "../context.h" static void start_CUDAworker(Worker* worker); static void cudaInit(struct CUDAWorker *cudaWorker,int phase) ; volatile int cuda_initialized = 0; Worker* createCUDAWorker(struct Context* context, int id, Queue* queue, TaskManagerImpl *im) { struct Worker* worker = ALLOC(context, Worker); struct CUDAWorker* cudaWorker = new CUDAWorker(); cudaInit(cudaWorker,0); worker->worker = (union Data*)cudaWorker; worker->tasks = queue; printf("createCUDAWorker %p\n",queue); cudaWorker->id = id; worker->shutdown = C_shutdownCUDAWorker; // pthread_create(&worker->worker->CUDAWorker.thread, NULL, (void*)&start_CUDAworker, worker); if (im) { printf("im->worker %p\n",im->workers); im->workers[0] = worker; } cuda_initialized = 1; start_CUDAworker(worker); return worker; } static void cudaInit(struct CUDAWorker *cudaWorker,int phase) { // initialize and load kernel cudaWorker->num_stream = 1; // number of stream // cudaWorker->stream = NEWN(cudaWorker->num_stream, CUstream ); printf("cudaInit 1\n"); if (phase==0) checkCudaErrors(cuInit(0)); if (phase==0) checkCudaErrors(cuDeviceGet(&cudaWorker->device, 0)); printf("cudaInit 2\n"); if (phase==0) checkCudaErrors(cuCtxCreate(&cudaWorker->cuCtx, CU_CTX_SCHED_SPIN, cudaWorker->device)); printf("cudaInit 3\n"); // if (cudaWorker->num_stream) { // for (int i=0;i<cudaWorker->num_stream;i++) // checkCudaErrors(cuStreamCreate(&cudaWorker->stream[i],0)); // } CUdeviceptr devA; checkCudaErrors(cuMemAlloc(&devA, 16)); printf("cudaInit done\n"); } static void start_CUDAworker(Worker* worker) { CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker; cudaWorker->context = NEW(struct Context); initContext(cudaWorker->context); Gearef(cudaWorker->context, Worker)->worker = (union Data*)worker; goto meta(cudaWorker->context, C_taskReceiveCUDAWorker); } __code taskReceiveCUDAWorker(struct Worker* worker,struct Queue* queue) { queue->queue = (union Data*)worker->tasks; queue->next = C_getTaskCUDA; goto meta(context, worker->tasks->take); } __code taskReceiveCUDAWorker_stub(struct Context* context) { goto taskReceiveCUDAWorker(context, &Gearef(context, Worker)->worker->Worker, Gearef(context, Queue)); } __code getTaskCUDA(struct Worker* worker, struct Context* task) { if (!task) return; // end thread // if (cuda_initialized==0 || 1) { // CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker; // cudaInit(cudaWorker,1); // } worker->taskReceive = C_taskReceiveCUDAWorker; task->worker = worker; enum Code taskCg = task->next; task->next = C_odgCommitCUDA; // set CG after task exec goto meta(task, taskCg); } __code getTaskCUDA_stub(struct Context* context) { Worker* worker = &Gearef(context,Worker)->worker->Worker; struct Context* task = &Gearef(context, Queue)->data->Context; goto getTaskCUDA(context, worker, task); } __code odgCommitCUDA(struct LoopCounter* loopCounter, struct Queue* queue, struct Context* task) { int i = loopCounter->i ; if(task->odg + i < task->maxOdg) { queue->queue = (union Data*)GET_WAIT_LIST(task->data[task->odg+i]); queue->next = C_odgCommitCUDA1; goto meta(context, queue->queue->Queue.take); } loopCounter->i = 0; goto meta(context, C_taskReceiveCUDAWorker); } __code odgCommitCUDA_stub(struct Context* context) { struct Context* workerContext = context->worker->worker->CUDAWorker.context; goto odgCommitCUDA(workerContext, Gearef(workerContext, LoopCounter), Gearef(workerContext, Queue), context); } __code odgCommitCUDA1(struct TaskManager* taskManager, struct Context* task) { if(__sync_fetch_and_sub(&task->idgCount, 1)) { if(task->idgCount == 0) { taskManager->taskManager = (union Data*)task->taskManager; taskManager->context = task; taskManager->next = C_odgCommitCUDA; goto meta(context, task->taskManager->spawn); } } else { goto meta(context, C_odgCommitCUDA1); } } __code odgCommitCUDA1_stub(struct Context* context) { struct Context* task = &Gearef(context, Queue)->data->Context; goto odgCommitCUDA1(context, Gearef(context, TaskManager), task); } __code shutdownCUDAWorker(struct Context* context, CUDAWorker* worker) { // for (int i=0;i<worker->num_stream;i++) // checkCudaErrors(cuStreamDestroy(worker->stream[i])); checkCudaErrors(cuCtxDestroy(worker->cuCtx)); } __code shutdownCUDAWorker_stub(struct Context* context) { CUDAWorker* worker = (CUDAWorker *)GearImpl(context, Worker, worker); goto shutdownCUDAWorker(context,worker); }