Mercurial > hg > Members > Moririn
annotate src/parallel_execution/CUDAExecutor.cbc @ 464:7d67c9cf09ee
Rename from Time interface to Timer interface
author | Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Thu, 21 Dec 2017 00:07:27 +0900 |
parents | 8d7e5d48cad3 |
children | ac244346c85d |
rev | line source |
---|---|
435 | 1 #include "../context.h" |
464
7d67c9cf09ee
Rename from Time interface to Timer interface
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
462
diff
changeset
|
2 #include "Executor.h" |
435 | 3 #include <stdio.h> |
4 | |
451
dcc42f3e7e97
Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
439
diff
changeset
|
5 Executor* createCUDAExecutor(struct Context* context, CUdevice device) { |
435 | 6 struct Executor* executor = new Executor(); |
7 struct CUDAExecutor* cudaExecutor = new CUDAExecutor(); | |
451
dcc42f3e7e97
Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
439
diff
changeset
|
8 checkCudaErrors(cuDeviceGetAttribute(&cudaExecutor->maxThreadPerBlock, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, device)); |
435 | 9 executor->executor = (union Data*)cudaExecutor; |
10 executor->read = C_readCUDAExecutor; | |
11 executor->exec = C_execCUDAExecutor; | |
12 executor->write = C_writeCUDAExecutor; | |
13 return executor; | |
14 } | |
15 | |
436
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
16 __code readCUDAExecutor(struct CUDAExecutor* executor, struct Context* task, __code next(...)) { |
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
17 struct CUDABuffer* buffer = executor->buffer; |
435 | 18 int paramLen = buffer->inputLen + buffer->outputLen; |
436
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
19 executor->kernelParams = (CUdeviceptr**)ALLOCATE_PTR_ARRAY(context, CUdeviceptr, paramLen); |
435 | 20 for (int i = 0; i < paramLen; i++) { |
437
2c1b1d56bf1e
Work CUDAbitonicSort by CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
436
diff
changeset
|
21 CUdeviceptr* deviceptr = new CUdeviceptr(); |
435 | 22 // memory allocate |
436
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
23 union Data* data = i < buffer->inputLen? buffer->inputData[i] : buffer->outputData[i-buffer->inputLen]; |
437
2c1b1d56bf1e
Work CUDAbitonicSort by CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
436
diff
changeset
|
24 checkCudaErrors(cuMemAlloc(deviceptr, GET_SIZE(data))); |
2c1b1d56bf1e
Work CUDAbitonicSort by CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
436
diff
changeset
|
25 checkCudaErrors(cuMemcpyHtoD(*deviceptr, data, GET_SIZE(data))); |
435 | 26 // Synchronous data transfer(host to device) |
437
2c1b1d56bf1e
Work CUDAbitonicSort by CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
436
diff
changeset
|
27 executor->kernelParams[i] = deviceptr; |
435 | 28 } |
436
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
29 // TODO: Implements pipeline |
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
30 // goto next(...); |
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
31 goto meta(context, C_execCUDAExecutor); |
435 | 32 } |
33 | |
451
dcc42f3e7e97
Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
439
diff
changeset
|
34 int computeblockDim(int count, int maxThreadPerBlock) { |
dcc42f3e7e97
Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
439
diff
changeset
|
35 return count < maxThreadPerBlock ? count : maxThreadPerBlock; |
dcc42f3e7e97
Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
439
diff
changeset
|
36 } |
dcc42f3e7e97
Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
439
diff
changeset
|
37 |
436
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
38 __code execCUDAExecutor(struct CUDAExecutor* executor, struct Context* task, __code next(...)) { |
435 | 39 // Asynchronous launch kernel |
40 task->num_exec = 1; | |
41 if (task->iterate) { | |
42 struct MultiDimIterator* iterator = &task->iterator->iterator->MultiDimIterator; | |
451
dcc42f3e7e97
Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
439
diff
changeset
|
43 int blockDimX = computeblockDim(iterator->x, executor->maxThreadPerBlock); |
dcc42f3e7e97
Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
439
diff
changeset
|
44 int blockDimY = computeblockDim(iterator->y, executor->maxThreadPerBlock); |
dcc42f3e7e97
Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
439
diff
changeset
|
45 int blockDimZ = computeblockDim(iterator->z, executor->maxThreadPerBlock); |
436
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
46 checkCudaErrors(cuLaunchKernel(task->function, |
451
dcc42f3e7e97
Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
439
diff
changeset
|
47 iterator->x/blockDimX, iterator->y/blockDimY, iterator->z/blockDimZ, |
dcc42f3e7e97
Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
439
diff
changeset
|
48 blockDimX, blockDimY, blockDimZ, |
436
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
49 0, NULL, (void**)executor->kernelParams, NULL)); |
435 | 50 } else { |
436
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
51 checkCudaErrors(cuLaunchKernel(task->function, |
435 | 52 1, 1, 1, |
53 1, 1, 1, | |
436
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
54 0, NULL, (void**)executor->kernelParams, NULL)); |
435 | 55 } |
436
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
56 // TODO: Implements pipeline |
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
57 // goto next(...); |
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
58 goto meta(context, C_writeCUDAExecutor); |
435 | 59 } |
60 | |
436
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
61 __code writeCUDAExecutor(struct CUDAExecutor* executor, struct Context* task, __code next(...)) { |
435 | 62 //結果を取ってくるコマンドを入力する |
63 //コマンドの終了待ちを行う | |
436
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
64 struct CUDABuffer* buffer = executor->buffer; |
435 | 65 int paramLen = buffer->inputLen + buffer->outputLen; |
66 for (int i = 0; i < paramLen; i++) { | |
437
2c1b1d56bf1e
Work CUDAbitonicSort by CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
436
diff
changeset
|
67 CUdeviceptr deviceptr = *(executor->kernelParams[i]); |
436
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
68 union Data* data = i < buffer->inputLen? buffer->inputData[i] : buffer->outputData[i-buffer->inputLen]; |
437
2c1b1d56bf1e
Work CUDAbitonicSort by CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
436
diff
changeset
|
69 checkCudaErrors(cuMemcpyDtoH(data, deviceptr, GET_SIZE(data))); |
2c1b1d56bf1e
Work CUDAbitonicSort by CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
436
diff
changeset
|
70 cuMemFree(deviceptr); |
435 | 71 } |
72 // wait for stream | |
436
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
73 checkCudaErrors(cuCtxSynchronize()); |
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
74 goto next(...); |
435 | 75 } |