Mercurial > hg > Members > Moririn
annotate src/parallel_execution/CUDAExecutor.cbc @ 462:8d7e5d48cad3
Running CPU examples
author | Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 20 Dec 2017 22:05:08 +0900 |
parents | dcc42f3e7e97 |
children | 7d67c9cf09ee |
rev | line source |
---|---|
435 | 1 #include "../context.h" |
2 #include <stdio.h> | |
3 | |
451
dcc42f3e7e97
Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
439
diff
changeset
|
4 Executor* createCUDAExecutor(struct Context* context, CUdevice device) { |
435 | 5 struct Executor* executor = new Executor(); |
6 struct CUDAExecutor* cudaExecutor = new CUDAExecutor(); | |
451
dcc42f3e7e97
Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
439
diff
changeset
|
7 checkCudaErrors(cuDeviceGetAttribute(&cudaExecutor->maxThreadPerBlock, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, device)); |
435 | 8 executor->executor = (union Data*)cudaExecutor; |
9 executor->read = C_readCUDAExecutor; | |
10 executor->exec = C_execCUDAExecutor; | |
11 executor->write = C_writeCUDAExecutor; | |
12 return executor; | |
13 } | |
14 | |
436
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
15 __code readCUDAExecutor(struct CUDAExecutor* executor, struct Context* task, __code next(...)) { |
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
16 struct CUDABuffer* buffer = executor->buffer; |
435 | 17 int paramLen = buffer->inputLen + buffer->outputLen; |
436
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
18 executor->kernelParams = (CUdeviceptr**)ALLOCATE_PTR_ARRAY(context, CUdeviceptr, paramLen); |
435 | 19 for (int i = 0; i < paramLen; i++) { |
437
2c1b1d56bf1e
Work CUDAbitonicSort by CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
436
diff
changeset
|
20 CUdeviceptr* deviceptr = new CUdeviceptr(); |
435 | 21 // memory allocate |
436
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
22 union Data* data = i < buffer->inputLen? buffer->inputData[i] : buffer->outputData[i-buffer->inputLen]; |
437
2c1b1d56bf1e
Work CUDAbitonicSort by CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
436
diff
changeset
|
23 checkCudaErrors(cuMemAlloc(deviceptr, GET_SIZE(data))); |
2c1b1d56bf1e
Work CUDAbitonicSort by CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
436
diff
changeset
|
24 checkCudaErrors(cuMemcpyHtoD(*deviceptr, data, GET_SIZE(data))); |
435 | 25 // Synchronous data transfer(host to device) |
437
2c1b1d56bf1e
Work CUDAbitonicSort by CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
436
diff
changeset
|
26 executor->kernelParams[i] = deviceptr; |
435 | 27 } |
436
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
28 // TODO: Implements pipeline |
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
29 // goto next(...); |
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
30 goto meta(context, C_execCUDAExecutor); |
435 | 31 } |
32 | |
451
dcc42f3e7e97
Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
439
diff
changeset
|
33 int computeblockDim(int count, int maxThreadPerBlock) { |
dcc42f3e7e97
Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
439
diff
changeset
|
34 return count < maxThreadPerBlock ? count : maxThreadPerBlock; |
dcc42f3e7e97
Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
439
diff
changeset
|
35 } |
dcc42f3e7e97
Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
439
diff
changeset
|
36 |
436
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
37 __code execCUDAExecutor(struct CUDAExecutor* executor, struct Context* task, __code next(...)) { |
435 | 38 // Asynchronous launch kernel |
39 task->num_exec = 1; | |
40 if (task->iterate) { | |
41 struct MultiDimIterator* iterator = &task->iterator->iterator->MultiDimIterator; | |
451
dcc42f3e7e97
Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
439
diff
changeset
|
42 int blockDimX = computeblockDim(iterator->x, executor->maxThreadPerBlock); |
dcc42f3e7e97
Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
439
diff
changeset
|
43 int blockDimY = computeblockDim(iterator->y, executor->maxThreadPerBlock); |
dcc42f3e7e97
Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
439
diff
changeset
|
44 int blockDimZ = computeblockDim(iterator->z, executor->maxThreadPerBlock); |
436
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
45 checkCudaErrors(cuLaunchKernel(task->function, |
451
dcc42f3e7e97
Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
439
diff
changeset
|
46 iterator->x/blockDimX, iterator->y/blockDimY, iterator->z/blockDimZ, |
dcc42f3e7e97
Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
439
diff
changeset
|
47 blockDimX, blockDimY, blockDimZ, |
436
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
48 0, NULL, (void**)executor->kernelParams, NULL)); |
435 | 49 } else { |
436
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
50 checkCudaErrors(cuLaunchKernel(task->function, |
435 | 51 1, 1, 1, |
52 1, 1, 1, | |
436
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
53 0, NULL, (void**)executor->kernelParams, NULL)); |
435 | 54 } |
436
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
55 // TODO: Implements pipeline |
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
56 // goto next(...); |
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
57 goto meta(context, C_writeCUDAExecutor); |
435 | 58 } |
59 | |
436
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
60 __code writeCUDAExecutor(struct CUDAExecutor* executor, struct Context* task, __code next(...)) { |
435 | 61 //結果を取ってくるコマンドを入力する |
62 //コマンドの終了待ちを行う | |
436
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
63 struct CUDABuffer* buffer = executor->buffer; |
435 | 64 int paramLen = buffer->inputLen + buffer->outputLen; |
65 for (int i = 0; i < paramLen; i++) { | |
437
2c1b1d56bf1e
Work CUDAbitonicSort by CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
436
diff
changeset
|
66 CUdeviceptr deviceptr = *(executor->kernelParams[i]); |
436
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
67 union Data* data = i < buffer->inputLen? buffer->inputData[i] : buffer->outputData[i-buffer->inputLen]; |
437
2c1b1d56bf1e
Work CUDAbitonicSort by CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
436
diff
changeset
|
68 checkCudaErrors(cuMemcpyDtoH(data, deviceptr, GET_SIZE(data))); |
2c1b1d56bf1e
Work CUDAbitonicSort by CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
436
diff
changeset
|
69 cuMemFree(deviceptr); |
435 | 70 } |
71 // wait for stream | |
436
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
72 checkCudaErrors(cuCtxSynchronize()); |
08a93fc2f0d3
Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
435
diff
changeset
|
73 goto next(...); |
435 | 74 } |