annotate src/test/twice.cc @ 300:8bbc0012e1a4

checkErrors on an example
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Sun, 12 Feb 2017 09:12:21 +0900
parents b387b224790c
children 609bf62768b9
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
290
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
1 #include <stdio.h>
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
2 #include <sys/time.h>
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
3 #include <string.h>
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
4 #include <stdlib.h>
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
5
292
2bc63a22dd21 add twice
ikkun
parents: 291
diff changeset
6 #include <cuda.h>
2bc63a22dd21 add twice
ikkun
parents: 291
diff changeset
7
291
87128b876c63 add test
ikkun
parents: 290
diff changeset
8 #include <cuda_runtime.h>
292
2bc63a22dd21 add twice
ikkun
parents: 291
diff changeset
9 #include "helper_cuda.h"
290
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
10
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
11 #define LENGTH (10)
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
12 #define THREAD (10)
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
13
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
14 double
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
15 getTime() {
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
16 struct timeval tv;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
17 gettimeofday(&tv, NULL);
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
18 return tv.tv_sec + (double)tv.tv_usec*1e-6;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
19 }
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
20
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
21 void
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
22 check_data(float* A, float B, float* C) {
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
23 for (int i=0; i<LENGTH*THREAD; i++) {
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
24 if (A[i]*B!=C[i]) {
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
25 puts("multiply failure.");
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
26 return;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
27 }
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
28 }
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
29 puts("success.");
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
30 }
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
31
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
32 void print_result(float* C) {
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
33 for (int i=0; i<LENGTH*THREAD; i++) {
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
34 printf("%f\n",C[i]);
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
35 }
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
36 }
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
37
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
38 int main(int args, char* argv[]) {
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
39 int num_stream = 1; // number of stream
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
40 int num_exec = 16; // number of executed kernel
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
41
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
42 for (int i=1;argv[i];i++) {
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
43 if (strcmp(argv[i], "--stream") == 0 || strcmp(argv[i], "-s") == 0) {
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
44 num_stream = atoi(argv[++i]);
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
45 }
300
8bbc0012e1a4 checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
46 if (strcmp(argv[i], "--numExec") == 0 || strcmp(argv[i], "-e") == 0) {
8bbc0012e1a4 checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
47 num_exec = atoi(argv[++i]);
8bbc0012e1a4 checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
48 }
290
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
49 }
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
50
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
51 // initialize and load kernel
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
52 CUdevice device;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
53 CUcontext context;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
54 CUmodule module;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
55 CUfunction function;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
56 CUstream stream[num_stream];
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
57
292
2bc63a22dd21 add twice
ikkun
parents: 291
diff changeset
58 checkCudaErrors(cuInit(0));
2bc63a22dd21 add twice
ikkun
parents: 291
diff changeset
59 checkCudaErrors(cuDeviceGet(&device, 0));
2bc63a22dd21 add twice
ikkun
parents: 291
diff changeset
60 checkCudaErrors(cuCtxCreate(&context, CU_CTX_SCHED_SPIN, device));
2bc63a22dd21 add twice
ikkun
parents: 291
diff changeset
61 checkCudaErrors(cuModuleLoad(&module, "multiply.ptx"));
2bc63a22dd21 add twice
ikkun
parents: 291
diff changeset
62 checkCudaErrors(cuModuleGetFunction(&function, module, "multiply"));
300
8bbc0012e1a4 checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
63 if (num_stream) {
8bbc0012e1a4 checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
64 for (int i=0;i<num_stream;i++)
8bbc0012e1a4 checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
65 checkCudaErrors(cuStreamCreate(&stream[i],0));
8bbc0012e1a4 checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
66 }
290
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
67
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
68 // memory allocate
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
69 CUdeviceptr devA;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
70 CUdeviceptr devB[num_exec];
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
71 CUdeviceptr devOut[num_exec];
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
72
292
2bc63a22dd21 add twice
ikkun
parents: 291
diff changeset
73 checkCudaErrors(cuMemAlloc(&devA, LENGTH*THREAD*sizeof(float)));
290
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
74 for (int i=0;i<num_exec;i++) {
292
2bc63a22dd21 add twice
ikkun
parents: 291
diff changeset
75 checkCudaErrors(cuMemAlloc(&devB[i], sizeof(float)));
2bc63a22dd21 add twice
ikkun
parents: 291
diff changeset
76 checkCudaErrors(cuMemAlloc(&devOut[i], LENGTH*THREAD*sizeof(float)));
290
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
77 }
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
78
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
79 // input buffer
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
80 float* A = new float[LENGTH*THREAD];
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
81 float* B = new float[num_exec];
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
82
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
83 for (int i=0; i<LENGTH*THREAD; i++)
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
84 A[i] = (float)(i+1000);
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
85
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
86 // output buffer
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
87 float** result = new float* [num_exec];
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
88
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
89 for (int i=0;i<num_exec;i++)
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
90 result[i] = new float[LENGTH*THREAD];
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
91
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
92 // Synchronous data transfer(host to device)
292
2bc63a22dd21 add twice
ikkun
parents: 291
diff changeset
93 checkCudaErrors(cuMemcpyHtoD(devA, A, LENGTH*THREAD*sizeof(float)));
290
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
94
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
95 // Asynchronous data transfer(host to device)
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
96 int cur = 0;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
97
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
98 for (int i=0;i<num_exec;i++,cur++) {
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
99 if (num_stream <= cur)
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
100 cur = 0;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
101 B[i] = (float)(i+1);
300
8bbc0012e1a4 checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
102 if (num_stream) {
8bbc0012e1a4 checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
103 checkCudaErrors(cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur]));
8bbc0012e1a4 checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
104 } else {
8bbc0012e1a4 checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
105 checkCudaErrors(cuMemcpyHtoD(devB[i], &B[i], sizeof(float)));
8bbc0012e1a4 checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
106 }
290
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
107 }
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
108
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
109 cur = 0;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
110
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
111 // Asynchronous launch kernel
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
112 for (int i=0;i<num_exec;i++,cur++) {
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
113 if (num_stream <= cur)
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
114 cur=0;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
115 B[i] = (float)(i+1);
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
116 //cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur]);
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
117 void* args[] = {&devA, &devB[i], &devOut[i]};
292
2bc63a22dd21 add twice
ikkun
parents: 291
diff changeset
118 checkCudaErrors(cuLaunchKernel(function,
290
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
119 LENGTH, 1, 1,
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
120 THREAD, 1, 1,
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
121 0, stream[cur], args, NULL));
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
122 //cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur]);
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
123 }
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
124
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
125 cur = 0;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
126
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
127
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
128 // Asynchronous data transfer(device to host)
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
129 for (int i=0;i<num_exec;i++,cur++) {
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
130 if (num_stream <= cur)
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
131 cur = 0;
300
8bbc0012e1a4 checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
132 if (num_stream) {
8bbc0012e1a4 checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
133 checkCudaErrors(cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur]));
8bbc0012e1a4 checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
134 } else {
8bbc0012e1a4 checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
135 checkCudaErrors(cuMemcpyDtoH(result[i], devOut[i], LENGTH*THREAD*sizeof(float)));
8bbc0012e1a4 checkErrors on an example
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 299
diff changeset
136 }
290
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
137 }
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
138
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
139 // wait for stream
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
140 for (int i=0;i<num_stream;i++)
292
2bc63a22dd21 add twice
ikkun
parents: 291
diff changeset
141 checkCudaErrors(cuStreamSynchronize(stream[i]));
290
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
142
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
143 //printf("%0.6f\n",getTime()-start);
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
144
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
145 for (int i=0;i<num_exec;i++)
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
146 check_data(A,(float)(i+1),result[i]);
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
147
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
148 // memory release
292
2bc63a22dd21 add twice
ikkun
parents: 291
diff changeset
149 checkCudaErrors(cuMemFree(devA));
290
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
150 for (int i=0;i<num_exec;i++) {
292
2bc63a22dd21 add twice
ikkun
parents: 291
diff changeset
151 checkCudaErrors(cuMemFree(devB[i]));
2bc63a22dd21 add twice
ikkun
parents: 291
diff changeset
152 checkCudaErrors(cuMemFree(devOut[i]));
290
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
153 }
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
154 for (int i=0;i<num_stream;i++)
292
2bc63a22dd21 add twice
ikkun
parents: 291
diff changeset
155 checkCudaErrors(cuStreamDestroy(stream[i]));
2bc63a22dd21 add twice
ikkun
parents: 291
diff changeset
156 checkCudaErrors(cuModuleUnload(module));
2bc63a22dd21 add twice
ikkun
parents: 291
diff changeset
157 checkCudaErrors(cuCtxDestroy(context));
290
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
158
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
159 delete[] A;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
160 delete[] B;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
161 for (int i=0;i<num_exec;i++)
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
162 delete[] result[i];
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
163 delete[] result;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
164
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
165 return 0;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
166 }
299
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 298
diff changeset
167