annotate src/test/main.cc @ 291:87128b876c63

add test
author ikkun
date Thu, 09 Feb 2017 19:02:15 +0900 (2017-02-09)
parents 625a19d81ed7
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
290
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
1 #include <stdio.h>
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
2 #include <sys/time.h>
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
3 #include <string.h>
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
4 #include <stdlib.h>
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
5
291
87128b876c63 add test
ikkun
parents: 290
diff changeset
6 extern "C"
87128b876c63 add test
ikkun
parents: 290
diff changeset
7 {
87128b876c63 add test
ikkun
parents: 290
diff changeset
8 //#include <cuda.h>
87128b876c63 add test
ikkun
parents: 290
diff changeset
9 }
87128b876c63 add test
ikkun
parents: 290
diff changeset
10 #include <cuda_runtime.h>
290
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
11
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
12 #define LENGTH (10)
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
13 #define THREAD (10)
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
14
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
15 void
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
16 report_error(cudaError_t err, const char* file, int lineNo) {
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
17 fprintf(stderr, "[cudaError] %s (error code: %d) at %s line %d\n", cudaGetErrorString(err), err, file, lineNo);
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
18 }
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
19
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
20 #define CUDA_CALL(func) \
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
21 do { \
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
22 if ((func) != CUDA_SUCCESS) { \
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
23 cudaError_t err = cudaGetLastError(); \
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
24 report_error(err, __FILE__, __LINE__); \
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
25 exit(err); \
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
26 } \
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
27 } while(0)
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
28
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
29 double
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
30 getTime() {
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
31 struct timeval tv;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
32 gettimeofday(&tv, NULL);
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
33 return tv.tv_sec + (double)tv.tv_usec*1e-6;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
34 }
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
35
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
36 void
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
37 check_data(float* A, float B, float* C) {
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
38 for (int i=0; i<LENGTH*THREAD; i++) {
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
39 if (A[i]*B!=C[i]) {
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
40 puts("multiply failure.");
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
41 return;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
42 }
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
43 }
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
44 puts("success.");
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
45 }
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
46
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
47 void print_result(float* C) {
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
48 for (int i=0; i<LENGTH*THREAD; i++) {
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
49 printf("%f\n",C[i]);
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
50 }
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
51 }
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
52
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
53 int main(int args, char* argv[]) {
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
54 int num_stream = 1; // number of stream
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
55 int num_exec = 16; // number of executed kernel
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
56
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
57 for (int i=1;argv[i];i++) {
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
58 if (strcmp(argv[i], "--stream") == 0 || strcmp(argv[i], "-s") == 0) {
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
59 num_stream = atoi(argv[++i]);
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
60 }
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
61 }
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
62
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
63 // initialize and load kernel
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
64 CUdevice device;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
65 CUcontext context;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
66 CUmodule module;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
67 CUfunction function;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
68 CUstream stream[num_stream];
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
69
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
70 CUDA_CALL(cuInit(0));
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
71 CUDA_CALL(cuDeviceGet(&device, 0));
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
72 CUDA_CALL(cuCtxCreate(&context, CU_CTX_SCHED_SPIN, device));
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
73 CUDA_CALL(cuModuleLoad(&module, "multiply.ptx"));
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
74 CUDA_CALL(cuModuleGetFunction(&function, module, "multiply"));
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
75 for (int i=0;i<num_stream;i++)
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
76 CUDA_CALL(cuStreamCreate(&stream[i],0));
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
77
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
78 // memory allocate
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
79 CUdeviceptr devA;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
80 CUdeviceptr devB[num_exec];
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
81 CUdeviceptr devOut[num_exec];
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
82
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
83 CUDA_CALL(cuMemAlloc(&devA, LENGTH*THREAD*sizeof(float)));
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
84 for (int i=0;i<num_exec;i++) {
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
85 CUDA_CALL(cuMemAlloc(&devB[i], sizeof(float)));
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
86 CUDA_CALL(cuMemAlloc(&devOut[i], LENGTH*THREAD*sizeof(float)));
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
87 }
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
88
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
89 // input buffer
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
90 float* A = new float[LENGTH*THREAD];
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
91 float* B = new float[num_exec];
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
92
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
93 for (int i=0; i<LENGTH*THREAD; i++)
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
94 A[i] = (float)(i+1000);
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
95
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
96 // output buffer
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
97 float** result = new float* [num_exec];
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
98
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
99 for (int i=0;i<num_exec;i++)
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
100 result[i] = new float[LENGTH*THREAD];
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
101
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
102 // Synchronous data transfer(host to device)
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
103 CUDA_CALL(cuMemcpyHtoD(devA, A, LENGTH*THREAD*sizeof(float)));
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
104
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
105 // Asynchronous data transfer(host to device)
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
106 int cur = 0;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
107
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
108 for (int i=0;i<num_exec;i++,cur++) {
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
109 if (num_stream <= cur)
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
110 cur = 0;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
111 B[i] = (float)(i+1);
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
112 CUDA_CALL(cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur]));
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
113 }
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
114
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
115 cur = 0;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
116
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
117 // Asynchronous launch kernel
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
118 for (int i=0;i<num_exec;i++,cur++) {
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
119 if (num_stream <= cur)
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
120 cur=0;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
121 B[i] = (float)(i+1);
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
122 //cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur]);
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
123 void* args[] = {&devA, &devB[i], &devOut[i]};
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
124 CUDA_CALL(cuLaunchKernel(function,
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
125 LENGTH, 1, 1,
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
126 THREAD, 1, 1,
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
127 0, stream[cur], args, NULL));
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
128 //cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur]);
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
129 }
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
130
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
131 cur = 0;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
132
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
133
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
134 // Asynchronous data transfer(device to host)
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
135 for (int i=0;i<num_exec;i++,cur++) {
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
136 if (num_stream <= cur)
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
137 cur = 0;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
138 CUDA_CALL(cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur]));
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
139 }
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
140
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
141 // wait for stream
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
142 for (int i=0;i<num_stream;i++)
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
143 CUDA_CALL(cuStreamSynchronize(stream[i]));
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
144
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
145 //printf("%0.6f\n",getTime()-start);
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
146
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
147 for (int i=0;i<num_exec;i++)
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
148 check_data(A,(float)(i+1),result[i]);
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
149
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
150 // memory release
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
151 CUDA_CALL(cuMemFree(devA));
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
152 for (int i=0;i<num_exec;i++) {
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
153 CUDA_CALL(cuMemFree(devB[i]));
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
154 CUDA_CALL(cuMemFree(devOut[i]));
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
155 }
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
156 for (int i=0;i<num_stream;i++)
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
157 CUDA_CALL(cuStreamDestroy(stream[i]));
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
158 CUDA_CALL(cuModuleUnload(module));
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
159 CUDA_CALL(cuCtxDestroy(context));
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
160
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
161 delete[] A;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
162 delete[] B;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
163 for (int i=0;i<num_exec;i++)
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
164 delete[] result[i];
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
165 delete[] result;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
166
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
167 return 0;
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
168 }
625a19d81ed7 add Cmake
ikkun
parents:
diff changeset
169 //