Mercurial > hg > GearsTemplate
view src/parallel_execution/CUDAWorker.cbc @ 303:1dbaef86593b
CUDAtwice.cbc
author | ikkun |
---|---|
date | Mon, 13 Feb 2017 18:23:29 +0900 |
parents | 8e7926f3e271 |
children | 9755206813cb |
line wrap: on
line source
#include <stdio.h> #include <sys/time.h> #include <string.h> #include <stdlib.h> #include <cuda.h> #include <cuda_runtime.h> #include "helper_cuda.h" #include <libkern/OSAtomic.h> #include "../context.h" static void start_CUDAworker(Worker* worker); union Data* createCUDAWorker(struct Context* context, int id, Queue* queue) { struct Worker* worker = ALLOC(context, Worker); struct CUDAWorker* CUDAWorker = ALLOC(context, CUDAWorker); worker->worker = (union Data*)CUDAWorker; worker->tasks = queue; cpuWorker->id = id; worker->taskReceive = C_taskReceiveCUDAWorker; worker->shutdown = C_shutdownCUDAWorker; pthread_create(&worker->worker->CUDAWorker.thread, NULL, (void*)&start_CUDAworker, worker); return (union Data*)(worker); } static void start_CUDAworker(Worker* worker) { CUDAWorker* CUDAWorker = (CUDAWorker*)worker->worker; CUDAWorker->context = NEW(struct Context); initContext(CUDAWorker->context); Gearef(CUDAWorker->context, Worker)->worker = (union Data*)worker; int num_stream = 1; // number of stream int num_exec = 16; // number of executed kernel // initialize and load kernel CUdevice device; CUcontext context; CUmodule module; CUfunction function; CUstream stream[num_stream]; checkCudaErrors(cuInit(0)); checkCudaErrors(cuDeviceGet(&device, 0)); checkCudaErrors(cuCtxCreate(&context, CU_CTX_SCHED_SPIN, device)); checkCudaErrors(cuModuleLoad(&module, "multiply.ptx")); checkCudaErrors(cuModuleGetFunction(&function, module, "multiply")); if (num_stream) { for (int i=0;i<num_stream;i++) checkCudaErrors(cuStreamCreate(&stream[i],0)); } goto meta(CUDAWorker->context, C_taskReceiveCUDAWorker); } __code taskReceiveCUDAWorker(struct Context* context, Worker* worker, Queue* queue) { queue->queue = (union Data*)worker->tasks; queue->next = C_getTask; goto meta(context, worker->tasks->take); } __code taskReceiveCUDAWorker_stub(struct Context* context) { CUDAWorker* CUDAWorker = (CUDAWorker *)GearImpl(context, CUDAWorker, CUDAworker); pthread_cond_wait(&CUDAWorker->cond, &CUDAWorker->mutex); goto taskReceiveCUDAWorker(context, &Gearef(context, Worker)->worker->Worker, Gearef(context, Queue)); } __code getCUDATask(struct Context* context, Worker* worker, struct Context* task) { if (!task) return; // end thread task->worker = worker; context->next = C_taskReceiveCUDAWorker; // set CG after task exec goto meta(task, task->next); } __code getCUDATask_stub(struct Context* context) { Worker* worker = &Gearef(context,Worker)->worker->Worker; struct Context* task = &Gearef(context, Queue)->data->Context; goto getCUDATask(context, worker, task); } #ifdef USE_CUDA __code twiceCUDA(struct Context* context) { cuMemcpyHtoDAsync(context,context,context,context->stream); cuLaunchkanel(); cuMemcpyDtoHAsync(); } #endif __code shutdownCUDAWorker(struct Context* context, CPUWorker* worker) { for (int i=0;i<num_stream;i++) checkCudaErrors(cuStreamDestroy(stream[i])); checkCudaErrors(cuModuleUnload(module)); checkCudaErrors(cuCtxDestroy(context)); } __code shutdownCUDAWorker_stub(struct Context* context) { CPUWorker* worker = (CPUWorker *)GearImpl(context, Worker, worker); goto shutdownCUDAWorker(context,worker); }