435
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
1 #include "../context.h"
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
2 #include <stdio.h>
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
3
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
4 // includes, project
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
5 #include <driver_types.h>
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
6 #include <cuda_runtime.h>
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
7 #include <cuda.h>
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
8 #include "helper_cuda.h"
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
9
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
10 Executor* createCUDAExecutor(struct Context* context) {
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
11 struct Executor* executor = new Executor();
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
12 struct CUDAExecutor* cudaExecutor = new CUDAExecutor();
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
13 executor->executor = (union Data*)cudaExecutor;
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
14 executor->read = C_readCUDAExecutor;
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
15 executor->exec = C_execCUDAExecutor;
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
16 executor->write = C_writeCUDAExecutor;
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
17 return executor;
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
18 }
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
19
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
20 __code readCUDAExecutor(struct CUDAExecutor* executor, struct Context* task) {
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
21 int paramLen = buffer->inputLen + buffer->outputLen;
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
22 struct CUDABuffer buffer = executor->buffer;
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
23 buffer->kernelParams = ALLOCATE_PTR_ARRAY(context, CudevicePtr, paramLen);
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
24 struct CUDABuffer buffer = executor->buffer;
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
25 CUdeviceptr* deviceptrs = ALLOCATE_ARRAY(context, CudevicePtr, paramLen);
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
26 for (int i = 0; i < paramLen; i++) {
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
27 CUdeviceptr deviceptr = deviceptrs[i];
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
28 // memory allocate
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
29 union Data* data = i < inputLen? buffer->inputData[i] : buffer->outputData[i-inputLen];
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
30 checkCUDAErrors(cuMemAlloc(deviceptr, GET_SIZE(data)));
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
31 checkCUDAErrors(cuMemcpyHtoD(deviceptr, data, GET_SIZE(data)));
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
32 // Synchronous data transfer(host to device)
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
33 buffer->kernelParams[paramCount++] = &deviceptr;
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
34 }
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
35 }
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
36
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
37 void cudaLoadFunction(struct Context* context, char* filename, char* function) {
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
38 checkCUDAErrors(cuModuleLoad(&context->module, filename));
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
39 checkCUDAErrors(cuModuleGetFunction(&context->function, context->module, function));
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
40 }
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
41
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
42 __code execCUDAExecutor(struct CUDAExecutor* executor, struct Context* task) {
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
43 // Asynchronous launch kernel
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
44 task->num_exec = 1;
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
45 struct CUDABuffer buffer = executor->buffer;
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
46 if (task->iterate) {
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
47 struct MultiDimIterator* iterator = &task->iterator->iterator->MultiDimIterator;
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
48 checkCUDAErrors(cuLaunchKernel(task->function,
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
49 iterator->x, iterator->y, iterator->z,
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
50 1, 1, 1,
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
51 0, NULL, (void**)buffer->kernelParams, NULL));
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
52 } else {
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
53 checkCUDAErrors(cuLaunchKernel(task->function,
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
54 1, 1, 1,
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
55 1, 1, 1,
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
56 0, NULL, (void**)buffer->kernelParams, NULL));
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
57 }
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
58 }
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
59
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
60 __code writeCUDAExecutor(struct CUDAExecutor* executor, struct Context* task) {
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
61 //結果を取ってくるコマンドを入力する
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
62 //コマンドの終了待ちを行う
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
63 int paramLen = buffer->inputLen + buffer->outputLen;
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
64 struct CUDABuffer buffer = executor->buffer;
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
65 for (int i = 0; i < paramLen; i++) {
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
66 CUdeviceptr* deviceptr = buffer->kernelParams[i];
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
67 union Data* data = i < inputLen? buffer->inputData[i] : buffer->outputData[i-inputLen];
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
68 checkCUDAErrors(cuMemcpyDtoH(data, *deviceptr, GET_SIZE(data)));
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
69 cuMemFree(*deviceptr);
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
70 }
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
71 // wait for stream
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
72 checkCUDAErrors(cuCtxSynchronize());
|
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
73 }
|