annotate src/parallel_execution/CUDAExecutor.cbc @ 464:7d67c9cf09ee

Rename from Time interface to Timer interface
author Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
date Thu, 21 Dec 2017 00:07:27 +0900
parents 8d7e5d48cad3
children ac244346c85d
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
435
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1 #include "../context.h"
464
7d67c9cf09ee Rename from Time interface to Timer interface
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 462
diff changeset
2 #include "Executor.h"
435
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
3 #include <stdio.h>
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
4
451
dcc42f3e7e97 Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 439
diff changeset
5 Executor* createCUDAExecutor(struct Context* context, CUdevice device) {
435
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
6 struct Executor* executor = new Executor();
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
7 struct CUDAExecutor* cudaExecutor = new CUDAExecutor();
451
dcc42f3e7e97 Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 439
diff changeset
8 checkCudaErrors(cuDeviceGetAttribute(&cudaExecutor->maxThreadPerBlock, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, device));
435
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
9 executor->executor = (union Data*)cudaExecutor;
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
10 executor->read = C_readCUDAExecutor;
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
11 executor->exec = C_execCUDAExecutor;
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
12 executor->write = C_writeCUDAExecutor;
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
13 return executor;
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
14 }
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
15
436
08a93fc2f0d3 Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 435
diff changeset
16 __code readCUDAExecutor(struct CUDAExecutor* executor, struct Context* task, __code next(...)) {
08a93fc2f0d3 Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 435
diff changeset
17 struct CUDABuffer* buffer = executor->buffer;
435
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
18 int paramLen = buffer->inputLen + buffer->outputLen;
436
08a93fc2f0d3 Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 435
diff changeset
19 executor->kernelParams = (CUdeviceptr**)ALLOCATE_PTR_ARRAY(context, CUdeviceptr, paramLen);
435
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
20 for (int i = 0; i < paramLen; i++) {
437
2c1b1d56bf1e Work CUDAbitonicSort by CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 436
diff changeset
21 CUdeviceptr* deviceptr = new CUdeviceptr();
435
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
22 // memory allocate
436
08a93fc2f0d3 Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 435
diff changeset
23 union Data* data = i < buffer->inputLen? buffer->inputData[i] : buffer->outputData[i-buffer->inputLen];
437
2c1b1d56bf1e Work CUDAbitonicSort by CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 436
diff changeset
24 checkCudaErrors(cuMemAlloc(deviceptr, GET_SIZE(data)));
2c1b1d56bf1e Work CUDAbitonicSort by CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 436
diff changeset
25 checkCudaErrors(cuMemcpyHtoD(*deviceptr, data, GET_SIZE(data)));
435
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
26 // Synchronous data transfer(host to device)
437
2c1b1d56bf1e Work CUDAbitonicSort by CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 436
diff changeset
27 executor->kernelParams[i] = deviceptr;
435
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
28 }
436
08a93fc2f0d3 Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 435
diff changeset
29 // TODO: Implements pipeline
08a93fc2f0d3 Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 435
diff changeset
30 // goto next(...);
08a93fc2f0d3 Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 435
diff changeset
31 goto meta(context, C_execCUDAExecutor);
435
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
32 }
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
33
451
dcc42f3e7e97 Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 439
diff changeset
34 int computeblockDim(int count, int maxThreadPerBlock) {
dcc42f3e7e97 Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 439
diff changeset
35 return count < maxThreadPerBlock ? count : maxThreadPerBlock;
dcc42f3e7e97 Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 439
diff changeset
36 }
dcc42f3e7e97 Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 439
diff changeset
37
436
08a93fc2f0d3 Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 435
diff changeset
38 __code execCUDAExecutor(struct CUDAExecutor* executor, struct Context* task, __code next(...)) {
435
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
39 // Asynchronous launch kernel
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
40 task->num_exec = 1;
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
41 if (task->iterate) {
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
42 struct MultiDimIterator* iterator = &task->iterator->iterator->MultiDimIterator;
451
dcc42f3e7e97 Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 439
diff changeset
43 int blockDimX = computeblockDim(iterator->x, executor->maxThreadPerBlock);
dcc42f3e7e97 Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 439
diff changeset
44 int blockDimY = computeblockDim(iterator->y, executor->maxThreadPerBlock);
dcc42f3e7e97 Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 439
diff changeset
45 int blockDimZ = computeblockDim(iterator->z, executor->maxThreadPerBlock);
436
08a93fc2f0d3 Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 435
diff changeset
46 checkCudaErrors(cuLaunchKernel(task->function,
451
dcc42f3e7e97 Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 439
diff changeset
47 iterator->x/blockDimX, iterator->y/blockDimY, iterator->z/blockDimZ,
dcc42f3e7e97 Auto choice blockDim
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 439
diff changeset
48 blockDimX, blockDimY, blockDimZ,
436
08a93fc2f0d3 Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 435
diff changeset
49 0, NULL, (void**)executor->kernelParams, NULL));
435
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
50 } else {
436
08a93fc2f0d3 Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 435
diff changeset
51 checkCudaErrors(cuLaunchKernel(task->function,
435
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
52 1, 1, 1,
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
53 1, 1, 1,
436
08a93fc2f0d3 Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 435
diff changeset
54 0, NULL, (void**)executor->kernelParams, NULL));
435
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
55 }
436
08a93fc2f0d3 Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 435
diff changeset
56 // TODO: Implements pipeline
08a93fc2f0d3 Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 435
diff changeset
57 // goto next(...);
08a93fc2f0d3 Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 435
diff changeset
58 goto meta(context, C_writeCUDAExecutor);
435
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
59 }
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
60
436
08a93fc2f0d3 Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 435
diff changeset
61 __code writeCUDAExecutor(struct CUDAExecutor* executor, struct Context* task, __code next(...)) {
435
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
62 //結果を取ってくるコマンドを入力する
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
63 //コマンドの終了待ちを行う
436
08a93fc2f0d3 Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 435
diff changeset
64 struct CUDABuffer* buffer = executor->buffer;
435
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
65 int paramLen = buffer->inputLen + buffer->outputLen;
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
66 for (int i = 0; i < paramLen; i++) {
437
2c1b1d56bf1e Work CUDAbitonicSort by CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 436
diff changeset
67 CUdeviceptr deviceptr = *(executor->kernelParams[i]);
436
08a93fc2f0d3 Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 435
diff changeset
68 union Data* data = i < buffer->inputLen? buffer->inputData[i] : buffer->outputData[i-buffer->inputLen];
437
2c1b1d56bf1e Work CUDAbitonicSort by CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 436
diff changeset
69 checkCudaErrors(cuMemcpyDtoH(data, deviceptr, GET_SIZE(data)));
2c1b1d56bf1e Work CUDAbitonicSort by CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 436
diff changeset
70 cuMemFree(deviceptr);
435
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
71 }
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
72 // wait for stream
436
08a93fc2f0d3 Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 435
diff changeset
73 checkCudaErrors(cuCtxSynchronize());
08a93fc2f0d3 Fix CudaExecutor but not work
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents: 435
diff changeset
74 goto next(...);
435
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
75 }