Mercurial > hg > GearsTemplate
annotate src/parallel_execution/cuda.c @ 430:35b37fe8d3a7
Add size member in struct Meta
author | Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 09 Oct 2017 17:46:42 +0900 |
parents | 49159fbdd1fb |
children | b3359544adbb |
rev | line source |
---|---|
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
1 #include <stdio.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
2 #include <sys/time.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
3 #include <string.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
4 #include <stdlib.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
5 #include <libkern/OSAtomic.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
6 |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
7 // includes, project |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
8 #include <driver_types.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
9 #include <cuda_runtime.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
10 #include <cuda.h> |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
11 #include "helper_cuda.h" |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
12 #include "pthread.h" |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
13 |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
14 #include "context.h" |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
15 |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
16 /* |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
17 struct Context { |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
18 int next; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
19 struct Worker* worker; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
20 struct TaskManager* taskManager; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
21 int codeNum; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
22 void (**code) (struct Context*); |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
23 void* heapStart; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
24 void* heap; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
25 long heapLimit; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
26 int dataNum; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
27 int idgCount; //number of waiting dataGear |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
28 int idg; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
29 int maxIdg; |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
30 int odg; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
31 int maxOdg; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
32 int workerId; |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
33 struct Context* task; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
34 struct Queue* tasks; |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
35 int num_exec; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
36 CUmodule module; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
37 CUfunction function; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
38 union Data **data; |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
39 |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
40 // multi dimension parameter |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
41 int iterate; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
42 struct Iterator* iterator; |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
43 }; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
44 |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
45 struct CUDAWorker { |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
46 CUdevice device; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
47 CUcontext cuCtx; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
48 pthread_t thread; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
49 struct Context* context; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
50 int id; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
51 struct Queue* tasks; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
52 int runFlag; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
53 int next; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
54 int num_stream; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
55 CUstream *stream; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
56 } CUDAWorker; |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
57 |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
58 struct LoopCounter { |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
59 int i; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
60 } LoopCounter; |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
61 |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
62 struct Array { |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
63 int size; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
64 int index; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
65 int prefix; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
66 int* array; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
67 } Array; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
68 */ |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
69 |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
70 void cudaInit(struct CUDAWorker *cudaWorker,int phase) { |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
71 // initialize and load kernel |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
72 cudaWorker->num_stream = 1; // number of stream |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
73 // cudaWorker->stream = NEWN(cudaWorker->num_stream, CUstream ); |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
74 if (phase==0) |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
75 checkCudaErrors(cuInit(0)); |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
76 if (phase==0) |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
77 checkCudaErrors(cuDeviceGet(&cudaWorker->device, 0)); |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
78 if (phase==0) |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
79 checkCudaErrors(cuCtxCreate(&cudaWorker->cuCtx, CU_CTX_SCHED_SPIN, cudaWorker->device)); |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
80 // if (cudaWorker->num_stream) { |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
81 // for (int i=0;i<cudaWorker->num_stream;i++) |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
82 // checkCudaErrors(cuStreamCreate(&cudaWorker->stream[i],0)); |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
83 // } |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
84 printf("cuda Init: Done\n"); |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
85 } |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
86 |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
87 |
414
49159fbdd1fb
Work CUDAbitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
412
diff
changeset
|
88 void CUDAExec(struct Context* context, struct SortArray* inputSortArray, struct SortArray* outputSortArray) { |
49159fbdd1fb
Work CUDAbitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
412
diff
changeset
|
89 //printf("cuda exec start\n"); |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
90 // Worker *worker = context->worker; |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
91 // CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker; |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
92 // memory allocate |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
93 CUdeviceptr devA; |
411
0eba9a04633f
Work CUDAtwice
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
410
diff
changeset
|
94 CUdeviceptr devB; |
414
49159fbdd1fb
Work CUDAbitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
412
diff
changeset
|
95 CUdeviceptr devC; |
49159fbdd1fb
Work CUDAbitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
412
diff
changeset
|
96 CUdeviceptr devD; |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
97 |
430
35b37fe8d3a7
Add size member in struct Meta
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
414
diff
changeset
|
98 checkCudaErrors(cuMemAlloc(&devA, sizeof(struct Integer)*GET_LEN(inputSortArray->array))); |
411
0eba9a04633f
Work CUDAtwice
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
410
diff
changeset
|
99 checkCudaErrors(cuMemAlloc(&devB, sizeof(int))); |
414
49159fbdd1fb
Work CUDAbitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
412
diff
changeset
|
100 checkCudaErrors(cuMemAlloc(&devC, sizeof(int))); |
49159fbdd1fb
Work CUDAbitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
412
diff
changeset
|
101 checkCudaErrors(cuMemAlloc(&devD, sizeof(int))); |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
102 |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
103 //twiceカーネルが定義されてなければそれをロードする |
414
49159fbdd1fb
Work CUDAbitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
412
diff
changeset
|
104 checkCudaErrors(cuModuleLoad(&context->module, "c/examples/bitonicSort/CUDAbitonicSwap.ptx")); |
49159fbdd1fb
Work CUDAbitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
412
diff
changeset
|
105 checkCudaErrors(cuModuleGetFunction(&context->function, context->module, "bitonicSwap")); |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
106 |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
107 //入力のDataGearをGPUにbuffer経由で送る |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
108 // Synchronous data transfer(host to device) |
430
35b37fe8d3a7
Add size member in struct Meta
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
414
diff
changeset
|
109 checkCudaErrors(cuMemcpyHtoD(devA, inputSortArray->array, sizeof(struct Integer)*GET_LEN(inputSortArray->array))); |
414
49159fbdd1fb
Work CUDAbitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
412
diff
changeset
|
110 checkCudaErrors(cuMemcpyHtoD(devB, &inputSortArray->block, sizeof(int))); |
49159fbdd1fb
Work CUDAbitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
412
diff
changeset
|
111 checkCudaErrors(cuMemcpyHtoD(devC, &inputSortArray->first, sizeof(int))); |
49159fbdd1fb
Work CUDAbitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
412
diff
changeset
|
112 checkCudaErrors(cuMemcpyHtoD(devD, &inputSortArray->prefix, sizeof(int))); |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
113 |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
114 // Asynchronous launch kernel |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
115 context->num_exec = 1; |
414
49159fbdd1fb
Work CUDAbitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
412
diff
changeset
|
116 void* args[] = {&devA, &devB, &devC, &devD}; |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
117 if (context->iterate) { |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
118 struct MultiDimIterator* iterator = &context->iterator->iterator->MultiDimIterator; |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
119 checkCudaErrors(cuLaunchKernel(context->function, |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
120 iterator->x, iterator->y, iterator->z, |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
121 1, 1, 1, |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
122 0, NULL, args, NULL)); |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
123 |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
124 } else { |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
125 checkCudaErrors(cuLaunchKernel(context->function, |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
126 1, 1, 1, |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
127 1, 1, 1, |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
128 0, NULL, args, NULL)); |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
129 } |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
130 //結果を取ってくるコマンドを入力する |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
131 //コマンドの終了待ちを行う |
430
35b37fe8d3a7
Add size member in struct Meta
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
414
diff
changeset
|
132 checkCudaErrors(cuMemcpyDtoH(inputSortArray->array, devA, sizeof(struct Integer)*GET_LEN(inputSortArray->array))); |
414
49159fbdd1fb
Work CUDAbitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
412
diff
changeset
|
133 outputSortArray->array = inputSortArray->array; |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
134 // wait for stream |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
135 checkCudaErrors(cuCtxSynchronize()); |
412 | 136 cuMemFree(devA); |
137 cuMemFree(devB); | |
414
49159fbdd1fb
Work CUDAbitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
412
diff
changeset
|
138 cuMemFree(devC); |
49159fbdd1fb
Work CUDAbitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
412
diff
changeset
|
139 cuMemFree(devD); |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
140 } |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
141 |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
142 void cudaShutdown( struct CUDAWorker *worker) { |
410
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
143 // for (int i=0;i<worker->num_stream;i++) |
85b0ddbf458e
Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
404
diff
changeset
|
144 // checkCudaErrors(cuStreamDestroy(worker->stream[i])); |
319
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
145 checkCudaErrors(cuCtxDestroy(worker->cuCtx)); |
a15511b1a6e0
separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
146 } |