comparison src/parallel_execution/cuda.c @ 319:a15511b1a6e0

separate cuda.c, and USE_CUDA_MAIN_THREAD flag
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Wed, 15 Feb 2017 20:43:55 +0900
parents
children 408b4aab7610
comparison
equal deleted inserted replaced
318:054c47e6ca20 319:a15511b1a6e0
1 #include <stdio.h>
2 #include <sys/time.h>
3 #include <string.h>
4 #include <stdlib.h>
5 #include <libkern/OSAtomic.h>
6
7 // includes, project
8 #include <driver_types.h>
9 #include <cuda_runtime.h>
10 #include <cuda.h>
11 #include "helper_cuda.h"
12 #include "pthread.h"
13
14 // #include "context.h"
15
16 struct Context {
17 int next;
18 struct Worker* worker;
19 struct TaskManager* taskManager;
20 int codeNum;
21 void (**code) (struct Context*);
22 void* heapStart;
23 void* heap;
24 long heapLimit;
25 int dataNum;
26 int idgCount; //number of waiting dataGear
27 int odg;
28 int maxOdg;
29 int workerId;
30 int num_exec;
31 CUmodule module;
32 CUfunction function;
33 union Data **data;
34 };
35
36 struct CUDAWorker {
37 CUdevice device;
38 CUcontext cuCtx;
39 pthread_t thread;
40 struct Context* context;
41 int id;
42 struct Queue* tasks;
43 int runFlag;
44 int next;
45 int num_stream;
46 CUstream *stream;
47 } CUDAWorker;
48
49 struct LoopCounter {
50 int i;
51 } LoopCounter;
52
53 struct Array {
54 int size;
55 int index;
56 int prefix;
57 int* array;
58 } Array;
59
60
61
62 void cudaInit(struct CUDAWorker *cudaWorker,int phase) {
63 // initialize and load kernel
64 cudaWorker->num_stream = 1; // number of stream
65 // cudaWorker->stream = NEWN(cudaWorker->num_stream, CUstream );
66 if (phase==0)
67 checkCudaErrors(cuInit(0));
68 if (phase==0)
69 checkCudaErrors(cuDeviceGet(&cudaWorker->device, 0));
70 if (phase==0)
71 checkCudaErrors(cuCtxCreate(&cudaWorker->cuCtx, CU_CTX_SCHED_SPIN, cudaWorker->device));
72 // if (cudaWorker->num_stream) {
73 // for (int i=0;i<cudaWorker->num_stream;i++)
74 // checkCudaErrors(cuStreamCreate(&cudaWorker->stream[i],0));
75 // }
76 }
77
78
79 void CUDAExec(struct Context* context, struct Array* array, struct LoopCounter *loopCounter) {
80 // Worker *worker = context->worker;
81 // CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker;
82 // memory allocate
83 CUdeviceptr devA;
84 CUdeviceptr devLoopCounter;
85
86 checkCudaErrors(cuMemAlloc(&devA, array->size));
87 checkCudaErrors(cuMemAlloc(&devLoopCounter, sizeof(LoopCounter)));
88
89 //twiceカーネルが定義されてなければそれをロードする
90 checkCudaErrors(cuModuleLoad(&context->module, "c/CUDAtwice.ptx"));
91 checkCudaErrors(cuModuleGetFunction(&context->function, context->module, "twice"));
92
93 //入力のDataGearをGPUにbuffer経由で送る
94 // Synchronous data transfer(host to device)
95 checkCudaErrors(cuMemcpyHtoD(devLoopCounter, loopCounter, sizeof(LoopCounter)));
96 checkCudaErrors(cuMemcpyHtoD(devA, array->array, array->size));
97
98 // Asynchronous launch kernel
99 context->num_exec = 1;
100 void* args[] = {&devLoopCounter,&array->index,&array->prefix,&devA};
101 checkCudaErrors(cuLaunchKernel(context->function,
102 1, 1, 1,
103 1, 1, 1,
104 0, NULL , args, NULL));
105
106 //結果を取ってくるコマンドを入力する
107 //コマンドの終了待ちを行う
108 checkCudaErrors(cuMemcpyDtoH(array->array, devA, array->size));
109
110 // wait for stream
111 checkCudaErrors(cuCtxSynchronize());
112 }
113
114 void cudaShutdown( struct CUDAWorker *worker) {
115 // for (int i=0;i<worker->num_stream;i++)
116 // checkCudaErrors(cuStreamDestroy(worker->stream[i]));
117 checkCudaErrors(cuCtxDestroy(worker->cuCtx));
118 }
119