annotate src/parallel_execution/cuda.c @ 404:c5cd9888bf2a

Fix bitonicSort
author Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
date Sun, 03 Sep 2017 00:21:16 +0900
parents 408b4aab7610
children 85b0ddbf458e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
319
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1 #include <stdio.h>
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2 #include <sys/time.h>
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3 #include <string.h>
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
4 #include <stdlib.h>
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
5 #include <libkern/OSAtomic.h>
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
6
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
7 // includes, project
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
8 #include <driver_types.h>
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
9 #include <cuda_runtime.h>
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
10 #include <cuda.h>
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
11 #include "helper_cuda.h"
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
12 #include "pthread.h"
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
13
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
14 // #include "context.h"
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
15
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
16 struct Context {
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
17 int next;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
18 struct Worker* worker;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
19 struct TaskManager* taskManager;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
20 int codeNum;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
21 void (**code) (struct Context*);
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
22 void* heapStart;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
23 void* heap;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
24 long heapLimit;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
25 int dataNum;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
26 int idgCount; //number of waiting dataGear
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
27 int odg;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
28 int maxOdg;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
29 int workerId;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
30 int num_exec;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
31 CUmodule module;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
32 CUfunction function;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
33 union Data **data;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
34 };
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
35
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
36 struct CUDAWorker {
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
37 CUdevice device;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
38 CUcontext cuCtx;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
39 pthread_t thread;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
40 struct Context* context;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
41 int id;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
42 struct Queue* tasks;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
43 int runFlag;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
44 int next;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
45 int num_stream;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
46 CUstream *stream;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
47 } CUDAWorker;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
48
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
49 struct LoopCounter {
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
50 int i;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
51 } LoopCounter;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
52
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
53 struct Array {
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
54 int size;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
55 int index;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
56 int prefix;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
57 int* array;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
58 } Array;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
59
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
60
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
61
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
62 void cudaInit(struct CUDAWorker *cudaWorker,int phase) {
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
63 // initialize and load kernel
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
64 cudaWorker->num_stream = 1; // number of stream
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
65 // cudaWorker->stream = NEWN(cudaWorker->num_stream, CUstream );
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
66 if (phase==0)
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
67 checkCudaErrors(cuInit(0));
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
68 if (phase==0)
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
69 checkCudaErrors(cuDeviceGet(&cudaWorker->device, 0));
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
70 if (phase==0)
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
71 checkCudaErrors(cuCtxCreate(&cudaWorker->cuCtx, CU_CTX_SCHED_SPIN, cudaWorker->device));
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
72 // if (cudaWorker->num_stream) {
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
73 // for (int i=0;i<cudaWorker->num_stream;i++)
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
74 // checkCudaErrors(cuStreamCreate(&cudaWorker->stream[i],0));
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
75 // }
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
76 }
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
77
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
78
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
79 void CUDAExec(struct Context* context, struct Array* array, struct LoopCounter *loopCounter) {
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
80 // Worker *worker = context->worker;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
81 // CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
82 // memory allocate
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
83 CUdeviceptr devA;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
84 CUdeviceptr devLoopCounter;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
85
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
86 checkCudaErrors(cuMemAlloc(&devA, array->size));
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
87 checkCudaErrors(cuMemAlloc(&devLoopCounter, sizeof(LoopCounter)));
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
88
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
89 //twiceカーネルが定義されてなければそれをロードする
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
90 checkCudaErrors(cuModuleLoad(&context->module, "c/CUDAtwice.ptx"));
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
91 checkCudaErrors(cuModuleGetFunction(&context->function, context->module, "twice"));
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
92
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
93 //入力のDataGearをGPUにbuffer経由で送る
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
94 // Synchronous data transfer(host to device)
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
95 checkCudaErrors(cuMemcpyHtoD(devLoopCounter, loopCounter, sizeof(LoopCounter)));
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
96 checkCudaErrors(cuMemcpyHtoD(devA, array->array, array->size));
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
97
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
98 // Asynchronous launch kernel
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
99 context->num_exec = 1;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
100 void* args[] = {&devLoopCounter,&array->index,&array->prefix,&devA};
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
101 checkCudaErrors(cuLaunchKernel(context->function,
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
102 1, 1, 1,
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
103 1, 1, 1,
404
c5cd9888bf2a Fix bitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 401
diff changeset
104 0, NULL, args, NULL));
319
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
105
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
106 //結果を取ってくるコマンドを入力する
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
107 //コマンドの終了待ちを行う
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
108 checkCudaErrors(cuMemcpyDtoH(array->array, devA, array->size));
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
109
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
110 // wait for stream
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
111 checkCudaErrors(cuCtxSynchronize());
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
112 }
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
113
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
114 void cudaShutdown( struct CUDAWorker *worker) {
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
115 // for (int i=0;i<worker->num_stream;i++)
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
116 // checkCudaErrors(cuStreamDestroy(worker->stream[i]));
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
117 checkCudaErrors(cuCtxDestroy(worker->cuCtx));
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
118 }