diff src/parallel_execution/CUDAWorker.cbc @ 303:1dbaef86593b

CUDAtwice.cbc
author ikkun
date Mon, 13 Feb 2017 18:23:29 +0900
parents 8e7926f3e271
children 9755206813cb
line wrap: on
line diff
--- a/src/parallel_execution/CUDAWorker.cbc	Mon Feb 13 17:58:04 2017 +0900
+++ b/src/parallel_execution/CUDAWorker.cbc	Mon Feb 13 18:23:29 2017 +0900
@@ -1,8 +1,18 @@
+#include <stdio.h>
+#include <sys/time.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include <cuda.h>
+
+#include <cuda_runtime.h>
+#include "helper_cuda.h"
+
 #include <libkern/OSAtomic.h>
 
 #include "../context.h"
 
-static void start_worker(Worker* worker);
+static void start_CUDAworker(Worker* worker);
 
 union Data* createCUDAWorker(struct Context* context, int id, Queue* queue) {
     struct Worker* worker = ALLOC(context, Worker);
@@ -16,11 +26,31 @@
     return (union Data*)(worker);
 }
 
-static void start_worker(Worker* worker) {
+static void start_CUDAworker(Worker* worker) {
     CUDAWorker* CUDAWorker = (CUDAWorker*)worker->worker;
     CUDAWorker->context = NEW(struct Context);
     initContext(CUDAWorker->context);
     Gearef(CUDAWorker->context, Worker)->worker = (union Data*)worker;
+    int num_stream = 1; // number of stream
+    int num_exec = 16; // number of executed kernel
+
+    // initialize and load kernel
+    CUdevice device;
+    CUcontext context;
+    CUmodule module;
+    CUfunction function;
+    CUstream stream[num_stream];
+
+    checkCudaErrors(cuInit(0));
+    checkCudaErrors(cuDeviceGet(&device, 0));
+    checkCudaErrors(cuCtxCreate(&context, CU_CTX_SCHED_SPIN, device));
+    checkCudaErrors(cuModuleLoad(&module, "multiply.ptx"));
+    checkCudaErrors(cuModuleGetFunction(&function, module, "multiply"));
+    if (num_stream) {
+        for (int i=0;i<num_stream;i++)
+            checkCudaErrors(cuStreamCreate(&stream[i],0));
+    }
+
     goto meta(CUDAWorker->context, C_taskReceiveCUDAWorker);
 }
 
@@ -58,10 +88,14 @@
 }
 #endif
 
-__code shutdownWorker(struct Context* context, CPUWorker* worker) {
+__code shutdownCUDAWorker(struct Context* context, CPUWorker* worker) {
+    for (int i=0;i<num_stream;i++)
+        checkCudaErrors(cuStreamDestroy(stream[i]));
+    checkCudaErrors(cuModuleUnload(module));
+    checkCudaErrors(cuCtxDestroy(context));
 }
 
-__code shutdownWorker_stub(struct Context* context) {
+__code shutdownCUDAWorker_stub(struct Context* context) {
     CPUWorker* worker = (CPUWorker *)GearImpl(context, Worker, worker);
-    goto shutdownWorker(context,worker);
+    goto shutdownCUDAWorker(context,worker);
 }