annotate src/parallel_execution/cuda.c @ 430:35b37fe8d3a7

Add size member in struct Meta
author Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
date Mon, 09 Oct 2017 17:46:42 +0900
parents 49159fbdd1fb
children b3359544adbb
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
319
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1 #include <stdio.h>
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2 #include <sys/time.h>
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3 #include <string.h>
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
4 #include <stdlib.h>
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
5 #include <libkern/OSAtomic.h>
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
6
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
7 // includes, project
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
8 #include <driver_types.h>
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
9 #include <cuda_runtime.h>
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
10 #include <cuda.h>
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
11 #include "helper_cuda.h"
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
12 #include "pthread.h"
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
13
410
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
14 #include "context.h"
319
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
15
410
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
16 /*
319
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
17 struct Context {
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
18 int next;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
19 struct Worker* worker;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
20 struct TaskManager* taskManager;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
21 int codeNum;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
22 void (**code) (struct Context*);
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
23 void* heapStart;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
24 void* heap;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
25 long heapLimit;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
26 int dataNum;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
27 int idgCount; //number of waiting dataGear
410
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
28 int idg;
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
29 int maxIdg;
319
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
30 int odg;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
31 int maxOdg;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
32 int workerId;
410
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
33 struct Context* task;
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
34 struct Queue* tasks;
319
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
35 int num_exec;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
36 CUmodule module;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
37 CUfunction function;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
38 union Data **data;
410
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
39
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
40 // multi dimension parameter
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
41 int iterate;
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
42 struct Iterator* iterator;
319
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
43 };
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
44
410
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
45 struct CUDAWorker {
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
46 CUdevice device;
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
47 CUcontext cuCtx;
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
48 pthread_t thread;
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
49 struct Context* context;
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
50 int id;
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
51 struct Queue* tasks;
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
52 int runFlag;
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
53 int next;
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
54 int num_stream;
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
55 CUstream *stream;
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
56 } CUDAWorker;
319
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
57
410
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
58 struct LoopCounter {
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
59 int i;
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
60 } LoopCounter;
319
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
61
410
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
62 struct Array {
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
63 int size;
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
64 int index;
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
65 int prefix;
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
66 int* array;
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
67 } Array;
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
68 */
319
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
69
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
70 void cudaInit(struct CUDAWorker *cudaWorker,int phase) {
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
71 // initialize and load kernel
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
72 cudaWorker->num_stream = 1; // number of stream
410
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
73 // cudaWorker->stream = NEWN(cudaWorker->num_stream, CUstream );
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
74 if (phase==0)
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
75 checkCudaErrors(cuInit(0));
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
76 if (phase==0)
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
77 checkCudaErrors(cuDeviceGet(&cudaWorker->device, 0));
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
78 if (phase==0)
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
79 checkCudaErrors(cuCtxCreate(&cudaWorker->cuCtx, CU_CTX_SCHED_SPIN, cudaWorker->device));
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
80 // if (cudaWorker->num_stream) {
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
81 // for (int i=0;i<cudaWorker->num_stream;i++)
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
82 // checkCudaErrors(cuStreamCreate(&cudaWorker->stream[i],0));
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
83 // }
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
84 printf("cuda Init: Done\n");
319
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
85 }
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
86
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
87
414
49159fbdd1fb Work CUDAbitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 412
diff changeset
88 void CUDAExec(struct Context* context, struct SortArray* inputSortArray, struct SortArray* outputSortArray) {
49159fbdd1fb Work CUDAbitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 412
diff changeset
89 //printf("cuda exec start\n");
319
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
90 // Worker *worker = context->worker;
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
91 // CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker;
410
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
92 // memory allocate
319
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
93 CUdeviceptr devA;
411
0eba9a04633f Work CUDAtwice
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 410
diff changeset
94 CUdeviceptr devB;
414
49159fbdd1fb Work CUDAbitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 412
diff changeset
95 CUdeviceptr devC;
49159fbdd1fb Work CUDAbitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 412
diff changeset
96 CUdeviceptr devD;
319
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
97
430
35b37fe8d3a7 Add size member in struct Meta
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 414
diff changeset
98 checkCudaErrors(cuMemAlloc(&devA, sizeof(struct Integer)*GET_LEN(inputSortArray->array)));
411
0eba9a04633f Work CUDAtwice
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 410
diff changeset
99 checkCudaErrors(cuMemAlloc(&devB, sizeof(int)));
414
49159fbdd1fb Work CUDAbitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 412
diff changeset
100 checkCudaErrors(cuMemAlloc(&devC, sizeof(int)));
49159fbdd1fb Work CUDAbitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 412
diff changeset
101 checkCudaErrors(cuMemAlloc(&devD, sizeof(int)));
319
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
102
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
103 //twiceカーネルが定義されてなければそれをロードする
414
49159fbdd1fb Work CUDAbitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 412
diff changeset
104 checkCudaErrors(cuModuleLoad(&context->module, "c/examples/bitonicSort/CUDAbitonicSwap.ptx"));
49159fbdd1fb Work CUDAbitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 412
diff changeset
105 checkCudaErrors(cuModuleGetFunction(&context->function, context->module, "bitonicSwap"));
319
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
106
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
107 //入力のDataGearをGPUにbuffer経由で送る
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
108 // Synchronous data transfer(host to device)
430
35b37fe8d3a7 Add size member in struct Meta
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 414
diff changeset
109 checkCudaErrors(cuMemcpyHtoD(devA, inputSortArray->array, sizeof(struct Integer)*GET_LEN(inputSortArray->array)));
414
49159fbdd1fb Work CUDAbitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 412
diff changeset
110 checkCudaErrors(cuMemcpyHtoD(devB, &inputSortArray->block, sizeof(int)));
49159fbdd1fb Work CUDAbitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 412
diff changeset
111 checkCudaErrors(cuMemcpyHtoD(devC, &inputSortArray->first, sizeof(int)));
49159fbdd1fb Work CUDAbitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 412
diff changeset
112 checkCudaErrors(cuMemcpyHtoD(devD, &inputSortArray->prefix, sizeof(int)));
319
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
113
410
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
114 // Asynchronous launch kernel
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
115 context->num_exec = 1;
414
49159fbdd1fb Work CUDAbitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 412
diff changeset
116 void* args[] = {&devA, &devB, &devC, &devD};
410
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
117 if (context->iterate) {
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
118 struct MultiDimIterator* iterator = &context->iterator->iterator->MultiDimIterator;
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
119 checkCudaErrors(cuLaunchKernel(context->function,
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
120 iterator->x, iterator->y, iterator->z,
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
121 1, 1, 1,
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
122 0, NULL, args, NULL));
319
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
123
410
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
124 } else {
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
125 checkCudaErrors(cuLaunchKernel(context->function,
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
126 1, 1, 1,
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
127 1, 1, 1,
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
128 0, NULL, args, NULL));
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
129 }
319
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
130 //結果を取ってくるコマンドを入力する
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
131 //コマンドの終了待ちを行う
430
35b37fe8d3a7 Add size member in struct Meta
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 414
diff changeset
132 checkCudaErrors(cuMemcpyDtoH(inputSortArray->array, devA, sizeof(struct Integer)*GET_LEN(inputSortArray->array)));
414
49159fbdd1fb Work CUDAbitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 412
diff changeset
133 outputSortArray->array = inputSortArray->array;
319
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
134 // wait for stream
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
135 checkCudaErrors(cuCtxSynchronize());
412
409e6b5fb775 Add cuMemFree
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 411
diff changeset
136 cuMemFree(devA);
409e6b5fb775 Add cuMemFree
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 411
diff changeset
137 cuMemFree(devB);
414
49159fbdd1fb Work CUDAbitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 412
diff changeset
138 cuMemFree(devC);
49159fbdd1fb Work CUDAbitonicSort
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 412
diff changeset
139 cuMemFree(devD);
319
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
140 }
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
141
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
142 void cudaShutdown( struct CUDAWorker *worker) {
410
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
143 // for (int i=0;i<worker->num_stream;i++)
85b0ddbf458e Fix CudaWorker
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 404
diff changeset
144 // checkCudaErrors(cuStreamDestroy(worker->stream[i]));
319
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
145 checkCudaErrors(cuCtxDestroy(worker->cuCtx));
a15511b1a6e0 separate cuda.c, and USE_CUDA_MAIN_THREAD flag
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
146 }