Mercurial > hg > Members > Moririn
annotate src/test/twice.cc @ 300:8bbc0012e1a4
checkErrors on an example
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 12 Feb 2017 09:12:21 +0900 |
parents | b387b224790c |
children | 609bf62768b9 |
rev | line source |
---|---|
290 | 1 #include <stdio.h> |
2 #include <sys/time.h> | |
3 #include <string.h> | |
4 #include <stdlib.h> | |
5 | |
292 | 6 #include <cuda.h> |
7 | |
291 | 8 #include <cuda_runtime.h> |
292 | 9 #include "helper_cuda.h" |
290 | 10 |
11 #define LENGTH (10) | |
12 #define THREAD (10) | |
13 | |
14 double | |
15 getTime() { | |
16 struct timeval tv; | |
17 gettimeofday(&tv, NULL); | |
18 return tv.tv_sec + (double)tv.tv_usec*1e-6; | |
19 } | |
20 | |
21 void | |
22 check_data(float* A, float B, float* C) { | |
23 for (int i=0; i<LENGTH*THREAD; i++) { | |
24 if (A[i]*B!=C[i]) { | |
25 puts("multiply failure."); | |
26 return; | |
27 } | |
28 } | |
29 puts("success."); | |
30 } | |
31 | |
32 void print_result(float* C) { | |
33 for (int i=0; i<LENGTH*THREAD; i++) { | |
34 printf("%f\n",C[i]); | |
35 } | |
36 } | |
37 | |
38 int main(int args, char* argv[]) { | |
39 int num_stream = 1; // number of stream | |
40 int num_exec = 16; // number of executed kernel | |
41 | |
42 for (int i=1;argv[i];i++) { | |
43 if (strcmp(argv[i], "--stream") == 0 || strcmp(argv[i], "-s") == 0) { | |
44 num_stream = atoi(argv[++i]); | |
45 } | |
300
8bbc0012e1a4
checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
299
diff
changeset
|
46 if (strcmp(argv[i], "--numExec") == 0 || strcmp(argv[i], "-e") == 0) { |
8bbc0012e1a4
checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
299
diff
changeset
|
47 num_exec = atoi(argv[++i]); |
8bbc0012e1a4
checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
299
diff
changeset
|
48 } |
290 | 49 } |
50 | |
51 // initialize and load kernel | |
52 CUdevice device; | |
53 CUcontext context; | |
54 CUmodule module; | |
55 CUfunction function; | |
56 CUstream stream[num_stream]; | |
57 | |
292 | 58 checkCudaErrors(cuInit(0)); |
59 checkCudaErrors(cuDeviceGet(&device, 0)); | |
60 checkCudaErrors(cuCtxCreate(&context, CU_CTX_SCHED_SPIN, device)); | |
61 checkCudaErrors(cuModuleLoad(&module, "multiply.ptx")); | |
62 checkCudaErrors(cuModuleGetFunction(&function, module, "multiply")); | |
300
8bbc0012e1a4
checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
299
diff
changeset
|
63 if (num_stream) { |
8bbc0012e1a4
checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
299
diff
changeset
|
64 for (int i=0;i<num_stream;i++) |
8bbc0012e1a4
checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
299
diff
changeset
|
65 checkCudaErrors(cuStreamCreate(&stream[i],0)); |
8bbc0012e1a4
checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
299
diff
changeset
|
66 } |
290 | 67 |
68 // memory allocate | |
69 CUdeviceptr devA; | |
70 CUdeviceptr devB[num_exec]; | |
71 CUdeviceptr devOut[num_exec]; | |
72 | |
292 | 73 checkCudaErrors(cuMemAlloc(&devA, LENGTH*THREAD*sizeof(float))); |
290 | 74 for (int i=0;i<num_exec;i++) { |
292 | 75 checkCudaErrors(cuMemAlloc(&devB[i], sizeof(float))); |
76 checkCudaErrors(cuMemAlloc(&devOut[i], LENGTH*THREAD*sizeof(float))); | |
290 | 77 } |
78 | |
79 // input buffer | |
80 float* A = new float[LENGTH*THREAD]; | |
81 float* B = new float[num_exec]; | |
82 | |
83 for (int i=0; i<LENGTH*THREAD; i++) | |
84 A[i] = (float)(i+1000); | |
85 | |
86 // output buffer | |
87 float** result = new float* [num_exec]; | |
88 | |
89 for (int i=0;i<num_exec;i++) | |
90 result[i] = new float[LENGTH*THREAD]; | |
91 | |
92 // Synchronous data transfer(host to device) | |
292 | 93 checkCudaErrors(cuMemcpyHtoD(devA, A, LENGTH*THREAD*sizeof(float))); |
290 | 94 |
95 // Asynchronous data transfer(host to device) | |
96 int cur = 0; | |
97 | |
98 for (int i=0;i<num_exec;i++,cur++) { | |
99 if (num_stream <= cur) | |
100 cur = 0; | |
101 B[i] = (float)(i+1); | |
300
8bbc0012e1a4
checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
299
diff
changeset
|
102 if (num_stream) { |
8bbc0012e1a4
checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
299
diff
changeset
|
103 checkCudaErrors(cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur])); |
8bbc0012e1a4
checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
299
diff
changeset
|
104 } else { |
8bbc0012e1a4
checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
299
diff
changeset
|
105 checkCudaErrors(cuMemcpyHtoD(devB[i], &B[i], sizeof(float))); |
8bbc0012e1a4
checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
299
diff
changeset
|
106 } |
290 | 107 } |
108 | |
109 cur = 0; | |
110 | |
111 // Asynchronous launch kernel | |
112 for (int i=0;i<num_exec;i++,cur++) { | |
113 if (num_stream <= cur) | |
114 cur=0; | |
115 B[i] = (float)(i+1); | |
116 //cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur]); | |
117 void* args[] = {&devA, &devB[i], &devOut[i]}; | |
292 | 118 checkCudaErrors(cuLaunchKernel(function, |
290 | 119 LENGTH, 1, 1, |
120 THREAD, 1, 1, | |
121 0, stream[cur], args, NULL)); | |
122 //cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur]); | |
123 } | |
124 | |
125 cur = 0; | |
126 | |
127 | |
128 // Asynchronous data transfer(device to host) | |
129 for (int i=0;i<num_exec;i++,cur++) { | |
130 if (num_stream <= cur) | |
131 cur = 0; | |
300
8bbc0012e1a4
checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
299
diff
changeset
|
132 if (num_stream) { |
8bbc0012e1a4
checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
299
diff
changeset
|
133 checkCudaErrors(cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur])); |
8bbc0012e1a4
checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
299
diff
changeset
|
134 } else { |
8bbc0012e1a4
checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
299
diff
changeset
|
135 checkCudaErrors(cuMemcpyDtoH(result[i], devOut[i], LENGTH*THREAD*sizeof(float))); |
8bbc0012e1a4
checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
299
diff
changeset
|
136 } |
290 | 137 } |
138 | |
139 // wait for stream | |
140 for (int i=0;i<num_stream;i++) | |
292 | 141 checkCudaErrors(cuStreamSynchronize(stream[i])); |
290 | 142 |
143 //printf("%0.6f\n",getTime()-start); | |
144 | |
145 for (int i=0;i<num_exec;i++) | |
146 check_data(A,(float)(i+1),result[i]); | |
147 | |
148 // memory release | |
292 | 149 checkCudaErrors(cuMemFree(devA)); |
290 | 150 for (int i=0;i<num_exec;i++) { |
292 | 151 checkCudaErrors(cuMemFree(devB[i])); |
152 checkCudaErrors(cuMemFree(devOut[i])); | |
290 | 153 } |
154 for (int i=0;i<num_stream;i++) | |
292 | 155 checkCudaErrors(cuStreamDestroy(stream[i])); |
156 checkCudaErrors(cuModuleUnload(module)); | |
157 checkCudaErrors(cuCtxDestroy(context)); | |
290 | 158 |
159 delete[] A; | |
160 delete[] B; | |
161 for (int i=0;i<num_exec;i++) | |
162 delete[] result[i]; | |
163 delete[] result; | |
164 | |
165 return 0; | |
166 } | |
299 | 167 |