Mercurial > hg > GearsTemplate
diff src/parallel_execution/CUDAWorker.cbc @ 303:1dbaef86593b
CUDAtwice.cbc
author | ikkun |
---|---|
date | Mon, 13 Feb 2017 18:23:29 +0900 |
parents | 8e7926f3e271 |
children | 9755206813cb |
line wrap: on
line diff
--- a/src/parallel_execution/CUDAWorker.cbc Mon Feb 13 17:58:04 2017 +0900 +++ b/src/parallel_execution/CUDAWorker.cbc Mon Feb 13 18:23:29 2017 +0900 @@ -1,8 +1,18 @@ +#include <stdio.h> +#include <sys/time.h> +#include <string.h> +#include <stdlib.h> + +#include <cuda.h> + +#include <cuda_runtime.h> +#include "helper_cuda.h" + #include <libkern/OSAtomic.h> #include "../context.h" -static void start_worker(Worker* worker); +static void start_CUDAworker(Worker* worker); union Data* createCUDAWorker(struct Context* context, int id, Queue* queue) { struct Worker* worker = ALLOC(context, Worker); @@ -16,11 +26,31 @@ return (union Data*)(worker); } -static void start_worker(Worker* worker) { +static void start_CUDAworker(Worker* worker) { CUDAWorker* CUDAWorker = (CUDAWorker*)worker->worker; CUDAWorker->context = NEW(struct Context); initContext(CUDAWorker->context); Gearef(CUDAWorker->context, Worker)->worker = (union Data*)worker; + int num_stream = 1; // number of stream + int num_exec = 16; // number of executed kernel + + // initialize and load kernel + CUdevice device; + CUcontext context; + CUmodule module; + CUfunction function; + CUstream stream[num_stream]; + + checkCudaErrors(cuInit(0)); + checkCudaErrors(cuDeviceGet(&device, 0)); + checkCudaErrors(cuCtxCreate(&context, CU_CTX_SCHED_SPIN, device)); + checkCudaErrors(cuModuleLoad(&module, "multiply.ptx")); + checkCudaErrors(cuModuleGetFunction(&function, module, "multiply")); + if (num_stream) { + for (int i=0;i<num_stream;i++) + checkCudaErrors(cuStreamCreate(&stream[i],0)); + } + goto meta(CUDAWorker->context, C_taskReceiveCUDAWorker); } @@ -58,10 +88,14 @@ } #endif -__code shutdownWorker(struct Context* context, CPUWorker* worker) { +__code shutdownCUDAWorker(struct Context* context, CPUWorker* worker) { + for (int i=0;i<num_stream;i++) + checkCudaErrors(cuStreamDestroy(stream[i])); + checkCudaErrors(cuModuleUnload(module)); + checkCudaErrors(cuCtxDestroy(context)); } -__code shutdownWorker_stub(struct Context* context) { +__code shutdownCUDAWorker_stub(struct Context* context) { CPUWorker* worker = (CPUWorker *)GearImpl(context, Worker, worker); - goto shutdownWorker(context,worker); + goto shutdownCUDAWorker(context,worker); }