Mercurial > hg > GearsTemplate
annotate src/parallel_execution/cuda.c @ 436:08a93fc2f0d3
Fix CudaExecutor but not work
author | Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Sat, 04 Nov 2017 06:52:32 +0900 |
parents | d920f3a3f037 |
children | dcc42f3e7e97 |
rev | line source |
---|---|
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
1 #include <stdio.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
2 #include <sys/time.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
3 #include <string.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
4 #include <stdlib.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
5 #include <libkern/OSAtomic.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
6 |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
7 // includes, project |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
8 #include <driver_types.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
9 #include <cuda_runtime.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
10 #include <cuda.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
11 #include "helper_cuda.h" |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
12 #include "pthread.h" |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
13 |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
14 #include "context.h" |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
15 |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
16 /* |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
17 struct Context { |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
18 int next; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
19 struct Worker* worker; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
20 struct TaskManager* taskManager; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
21 int codeNum; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
22 void (**code) (struct Context*); |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
23 void* heapStart; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
24 void* heap; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
25 long heapLimit; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
26 int dataNum; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
27 int idgCount; //number of waiting dataGear |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
28 int idg; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
29 int maxIdg; |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
30 int odg; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
31 int maxOdg; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
32 int workerId; |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
33 struct Context* task; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
34 struct Queue* tasks; |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
35 int num_exec; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
36 CUmodule module; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
37 CUfunction function; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
38 union Data **data; |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
39 |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
40 // multi dimension parameter |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
41 int iterate; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
42 struct Iterator* iterator; |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
43 }; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
44 |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
45 struct CUDAWorker { |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
46 CUdevice device; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
47 CUcontext cuCtx; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
48 pthread_t thread; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
49 struct Context* context; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
50 int id; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
51 struct Queue* tasks; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
52 int runFlag; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
53 int next; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
54 int num_stream; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
55 CUstream *stream; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
56 } CUDAWorker; |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
57 |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
58 struct LoopCounter { |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
59 int i; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
60 } LoopCounter; |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
61 |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
62 struct Array { |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
63 int size; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
64 int index; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
65 int prefix; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
66 int* array; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
67 } Array; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
68 */ |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
69 |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
70 void cudaInit(struct CUDAWorker *cudaWorker,int phase) { |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
71 // initialize and load kernel |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
72 cudaWorker->num_stream = 1; // number of stream |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
73 // cudaWorker->stream = NEWN(cudaWorker->num_stream, CUstream ); |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
74 if (phase==0) |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
75 checkCudaErrors(cuInit(0)); |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
76 if (phase==0) |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
77 checkCudaErrors(cuDeviceGet(&cudaWorker->device, 0)); |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
78 if (phase==0) |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
79 checkCudaErrors(cuCtxCreate(&cudaWorker->cuCtx, CU_CTX_SCHED_SPIN, cudaWorker->device)); |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
80 // if (cudaWorker->num_stream) { |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
81 // for (int i=0;i<cudaWorker->num_stream;i++) |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
82 // checkCudaErrors(cuStreamCreate(&cudaWorker->stream[i],0)); |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
83 // } |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
84 printf("cuda Init: Done\n"); |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
85 } |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
86 |
433
d920f3a3f037
Refactoring cuda.c
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
431
diff
changeset
|
87 void cudaLoadFunction(struct Context* context, char* filename, char* function) { |
431
b3359544adbb
Edit cudaExec but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
430
diff
changeset
|
88 checkCudaErrors(cuModuleLoad(&context->module, filename)); |
b3359544adbb
Edit cudaExec but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
430
diff
changeset
|
89 checkCudaErrors(cuModuleGetFunction(&context->function, context->module, function)); |
433
d920f3a3f037
Refactoring cuda.c
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
431
diff
changeset
|
90 } |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
91 |
436
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
433
diff
changeset
|
92 void cudaShutdown(struct CUDAWorker *worker) { |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
93 // for (int i=0;i<worker->num_stream;i++) |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
94 // checkCudaErrors(cuStreamDestroy(worker->stream[i])); |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
95 checkCudaErrors(cuCtxDestroy(worker->cuCtx)); |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
96 } |