Mercurial > hg > GearsTemplate
annotate src/parallel_execution/cuda.c @ 557:1eb2a22ec1e3
tweak
author | anatofuz <anatofuz@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 18 Nov 2019 21:22:34 +0900 |
parents | d6983ce1015d |
children |
rev | line source |
---|---|
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
1 #include <stdio.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
2 #include <sys/time.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
3 #include <string.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
4 #include <stdlib.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
5 |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
6 // includes, project |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
7 #include <driver_types.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
8 #include <cuda_runtime.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
9 #include <cuda.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
10 #include "helper_cuda.h" |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
11 #include "pthread.h" |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
12 |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
13 #include "context.h" |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
14 |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
15 /* |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
16 struct Context { |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
17 int next; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
18 struct Worker* worker; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
19 struct TaskManager* taskManager; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
20 int codeNum; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
21 void (**code) (struct Context*); |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
22 void* heapStart; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
23 void* heap; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
24 long heapLimit; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
25 int dataNum; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
26 int idgCount; //number of waiting dataGear |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
27 int idg; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
28 int maxIdg; |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
29 int odg; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
30 int maxOdg; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
31 int workerId; |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
32 struct Context* task; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
33 struct Queue* tasks; |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
34 int num_exec; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
35 CUmodule module; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
36 CUfunction function; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
37 union Data **data; |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
38 |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
39 // multi dimension parameter |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
40 int iterate; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
41 struct Iterator* iterator; |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
42 }; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
43 |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
44 struct CUDAWorker { |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
45 CUdevice device; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
46 CUcontext cuCtx; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
47 pthread_t thread; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
48 struct Context* context; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
49 int id; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
50 struct Queue* tasks; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
51 int runFlag; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
52 int next; |
451
dcc42f3e7e97
Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
436
diff
changeset
|
53 int numStream; |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
54 CUstream *stream; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
55 } CUDAWorker; |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
56 |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
57 struct LoopCounter { |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
58 int i; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
59 } LoopCounter; |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
60 |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
61 struct Array { |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
62 int size; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
63 int index; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
64 int prefix; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
65 int* array; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
66 } Array; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
67 */ |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
68 |
451
dcc42f3e7e97
Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
436
diff
changeset
|
69 void cudaInit(struct CUDAWorker *cudaWorker,int phase, int deviceNum) { |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
70 // initialize and load kernel |
451
dcc42f3e7e97
Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
436
diff
changeset
|
71 cudaWorker->numStream = 1; // number of stream |
dcc42f3e7e97
Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
436
diff
changeset
|
72 // cudaWorker->stream = NEWN(cudaWorker->numStream, CUstream ); |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
73 if (phase==0) |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
74 checkCudaErrors(cuInit(0)); |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
75 if (phase==0) |
451
dcc42f3e7e97
Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
436
diff
changeset
|
76 checkCudaErrors(cuDeviceGet(&cudaWorker->device, deviceNum)); |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
77 if (phase==0) |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
78 checkCudaErrors(cuCtxCreate(&cudaWorker->cuCtx, CU_CTX_SCHED_SPIN, cudaWorker->device)); |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
79 // if (cudaWorker->num_stream) { |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
80 // for (int i=0;i<cudaWorker->num_stream;i++) |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
81 // checkCudaErrors(cuStreamCreate(&cudaWorker->stream[i],0)); |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
82 // } |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
83 printf("cuda Init: Done\n"); |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
84 } |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
85 |
433
d920f3a3f037
Refactoring cuda.c
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
431
diff
changeset
|
86 void cudaLoadFunction(struct Context* context, char* filename, char* function) { |
431
b3359544adbb
Edit cudaExec but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
430
diff
changeset
|
87 checkCudaErrors(cuModuleLoad(&context->module, filename)); |
b3359544adbb
Edit cudaExec but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
430
diff
changeset
|
88 checkCudaErrors(cuModuleGetFunction(&context->function, context->module, function)); |
433
d920f3a3f037
Refactoring cuda.c
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
431
diff
changeset
|
89 } |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
90 |
436
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
433
diff
changeset
|
91 void cudaShutdown(struct CUDAWorker *worker) { |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
92 // for (int i=0;i<worker->num_stream;i++) |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
93 // checkCudaErrors(cuStreamDestroy(worker->stream[i])); |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
94 checkCudaErrors(cuCtxDestroy(worker->cuCtx)); |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
95 } |