Mercurial > hg > GearsTemplate
annotate src/parallel_execution/cuda.c @ 404:c5cd9888bf2a
Fix bitonicSort
author | Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 03 Sep 2017 00:21:16 +0900 |
parents | 408b4aab7610 |
children | 85b0ddbf458e |
rev | line source |
---|---|
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
1 #include <stdio.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
2 #include <sys/time.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
3 #include <string.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
4 #include <stdlib.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
5 #include <libkern/OSAtomic.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
6 |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
7 // includes, project |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
8 #include <driver_types.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
9 #include <cuda_runtime.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
10 #include <cuda.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
11 #include "helper_cuda.h" |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
12 #include "pthread.h" |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
13 |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
14 // #include "context.h" |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
15 |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
16 struct Context { |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
17 int next; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
18 struct Worker* worker; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
19 struct TaskManager* taskManager; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
20 int codeNum; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
21 void (**code) (struct Context*); |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
22 void* heapStart; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
23 void* heap; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
24 long heapLimit; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
25 int dataNum; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
26 int idgCount; //number of waiting dataGear |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
27 int odg; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
28 int maxOdg; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
29 int workerId; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
30 int num_exec; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
31 CUmodule module; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
32 CUfunction function; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
33 union Data **data; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
34 }; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
35 |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
36 struct CUDAWorker { |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
37 CUdevice device; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
38 CUcontext cuCtx; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
39 pthread_t thread; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
40 struct Context* context; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
41 int id; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
42 struct Queue* tasks; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
43 int runFlag; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
44 int next; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
45 int num_stream; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
46 CUstream *stream; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
47 } CUDAWorker; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
48 |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
49 struct LoopCounter { |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
50 int i; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
51 } LoopCounter; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
52 |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
53 struct Array { |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
54 int size; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
55 int index; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
56 int prefix; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
57 int* array; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
58 } Array; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
59 |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
60 |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
61 |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
62 void cudaInit(struct CUDAWorker *cudaWorker,int phase) { |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
63 // initialize and load kernel |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
64 cudaWorker->num_stream = 1; // number of stream |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
65 // cudaWorker->stream = NEWN(cudaWorker->num_stream, CUstream ); |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
66 if (phase==0) |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
67 checkCudaErrors(cuInit(0)); |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
68 if (phase==0) |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
69 checkCudaErrors(cuDeviceGet(&cudaWorker->device, 0)); |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
70 if (phase==0) |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
71 checkCudaErrors(cuCtxCreate(&cudaWorker->cuCtx, CU_CTX_SCHED_SPIN, cudaWorker->device)); |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
72 // if (cudaWorker->num_stream) { |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
73 // for (int i=0;i<cudaWorker->num_stream;i++) |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
74 // checkCudaErrors(cuStreamCreate(&cudaWorker->stream[i],0)); |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
75 // } |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
76 } |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
77 |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
78 |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
79 void CUDAExec(struct Context* context, struct Array* array, struct LoopCounter *loopCounter) { |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
80 // Worker *worker = context->worker; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
81 // CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
82 // memory allocate |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
83 CUdeviceptr devA; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
84 CUdeviceptr devLoopCounter; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
85 |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
86 checkCudaErrors(cuMemAlloc(&devA, array->size)); |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
87 checkCudaErrors(cuMemAlloc(&devLoopCounter, sizeof(LoopCounter))); |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
88 |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
89 //twiceカーネルが定義されてなければそれをロードする |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
90 checkCudaErrors(cuModuleLoad(&context->module, "c/CUDAtwice.ptx")); |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
91 checkCudaErrors(cuModuleGetFunction(&context->function, context->module, "twice")); |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
92 |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
93 //入力のDataGearをGPUにbuffer経由で送る |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
94 // Synchronous data transfer(host to device) |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
95 checkCudaErrors(cuMemcpyHtoD(devLoopCounter, loopCounter, sizeof(LoopCounter))); |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
96 checkCudaErrors(cuMemcpyHtoD(devA, array->array, array->size)); |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
97 |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
98 // Asynchronous launch kernel |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
99 context->num_exec = 1; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
100 void* args[] = {&devLoopCounter,&array->index,&array->prefix,&devA}; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
101 checkCudaErrors(cuLaunchKernel(context->function, |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
102 1, 1, 1, |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
103 1, 1, 1, |
404
c5cd9888bf2a
Fix bitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
401
diff
changeset
|
104 0, NULL, args, NULL)); |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
105 |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
106 //結果を取ってくるコマンドを入力する |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
107 //コマンドの終了待ちを行う |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
108 checkCudaErrors(cuMemcpyDtoH(array->array, devA, array->size)); |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
109 |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
110 // wait for stream |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
111 checkCudaErrors(cuCtxSynchronize()); |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
112 } |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
113 |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
114 void cudaShutdown( struct CUDAWorker *worker) { |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
115 // for (int i=0;i<worker->num_stream;i++) |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
116 // checkCudaErrors(cuStreamDestroy(worker->stream[i])); |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
117 checkCudaErrors(cuCtxDestroy(worker->cuCtx)); |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
118 } |