Mercurial > hg > Gears > GearsAgda
annotate src/parallel_execution/helper_cuda.h @ 303:1dbaef86593b
CUDAtwice.cbc
author | ikkun |
---|---|
date | Mon, 13 Feb 2017 18:23:29 +0900 |
parents | src/test/helper_cuda.h@609bf62768b9 |
children | 9755206813cb |
rev | line source |
---|---|
291 | 1 /** |
2 * Copyright 1993-2013 NVIDIA Corporation. All rights reserved. | |
3 * | |
4 * Please refer to the NVIDIA end user license agreement (EULA) associated | |
5 * with this source code for terms and conditions that govern your use of | |
6 * this software. Any use, reproduction, disclosure, or distribution of | |
7 * this software and related documentation outside the terms of the EULA | |
8 * is strictly prohibited. | |
9 * | |
10 */ | |
11 | |
12 //////////////////////////////////////////////////////////////////////////////// | |
13 // These are CUDA Helper functions for initialization and error checking | |
14 | |
15 #ifndef HELPER_CUDA_H | |
16 #define HELPER_CUDA_H | |
17 | |
18 #pragma once | |
19 | |
20 #include <stdlib.h> | |
21 #include <stdio.h> | |
22 #include <string.h> | |
23 | |
24 #include "helper_string.h" | |
25 | |
26 #ifndef EXIT_WAIVED | |
27 #define EXIT_WAIVED 2 | |
28 #endif | |
29 | |
30 // Note, it is required that your SDK sample to include the proper header files, please | |
31 // refer the CUDA examples for examples of the needed CUDA headers, which may change depending | |
32 // on which CUDA functions are used. | |
33 | |
34 // CUDA Runtime error messages | |
35 #ifdef __DRIVER_TYPES_H__ | |
36 static const char *_cudaGetErrorEnum(cudaError_t error) | |
37 { | |
38 switch (error) | |
39 { | |
40 case cudaSuccess: | |
41 return "cudaSuccess"; | |
42 | |
43 case cudaErrorMissingConfiguration: | |
44 return "cudaErrorMissingConfiguration"; | |
45 | |
46 case cudaErrorMemoryAllocation: | |
47 return "cudaErrorMemoryAllocation"; | |
48 | |
49 case cudaErrorInitializationError: | |
50 return "cudaErrorInitializationError"; | |
51 | |
52 case cudaErrorLaunchFailure: | |
53 return "cudaErrorLaunchFailure"; | |
54 | |
55 case cudaErrorPriorLaunchFailure: | |
56 return "cudaErrorPriorLaunchFailure"; | |
57 | |
58 case cudaErrorLaunchTimeout: | |
59 return "cudaErrorLaunchTimeout"; | |
60 | |
61 case cudaErrorLaunchOutOfResources: | |
62 return "cudaErrorLaunchOutOfResources"; | |
63 | |
64 case cudaErrorInvalidDeviceFunction: | |
65 return "cudaErrorInvalidDeviceFunction"; | |
66 | |
67 case cudaErrorInvalidConfiguration: | |
68 return "cudaErrorInvalidConfiguration"; | |
69 | |
70 case cudaErrorInvalidDevice: | |
71 return "cudaErrorInvalidDevice"; | |
72 | |
73 case cudaErrorInvalidValue: | |
74 return "cudaErrorInvalidValue"; | |
75 | |
76 case cudaErrorInvalidPitchValue: | |
77 return "cudaErrorInvalidPitchValue"; | |
78 | |
79 case cudaErrorInvalidSymbol: | |
80 return "cudaErrorInvalidSymbol"; | |
81 | |
82 case cudaErrorMapBufferObjectFailed: | |
83 return "cudaErrorMapBufferObjectFailed"; | |
84 | |
85 case cudaErrorUnmapBufferObjectFailed: | |
86 return "cudaErrorUnmapBufferObjectFailed"; | |
87 | |
88 case cudaErrorInvalidHostPointer: | |
89 return "cudaErrorInvalidHostPointer"; | |
90 | |
91 case cudaErrorInvalidDevicePointer: | |
92 return "cudaErrorInvalidDevicePointer"; | |
93 | |
94 case cudaErrorInvalidTexture: | |
95 return "cudaErrorInvalidTexture"; | |
96 | |
97 case cudaErrorInvalidTextureBinding: | |
98 return "cudaErrorInvalidTextureBinding"; | |
99 | |
100 case cudaErrorInvalidChannelDescriptor: | |
101 return "cudaErrorInvalidChannelDescriptor"; | |
102 | |
103 case cudaErrorInvalidMemcpyDirection: | |
104 return "cudaErrorInvalidMemcpyDirection"; | |
105 | |
106 case cudaErrorAddressOfConstant: | |
107 return "cudaErrorAddressOfConstant"; | |
108 | |
109 case cudaErrorTextureFetchFailed: | |
110 return "cudaErrorTextureFetchFailed"; | |
111 | |
112 case cudaErrorTextureNotBound: | |
113 return "cudaErrorTextureNotBound"; | |
114 | |
115 case cudaErrorSynchronizationError: | |
116 return "cudaErrorSynchronizationError"; | |
117 | |
118 case cudaErrorInvalidFilterSetting: | |
119 return "cudaErrorInvalidFilterSetting"; | |
120 | |
121 case cudaErrorInvalidNormSetting: | |
122 return "cudaErrorInvalidNormSetting"; | |
123 | |
124 case cudaErrorMixedDeviceExecution: | |
125 return "cudaErrorMixedDeviceExecution"; | |
126 | |
127 case cudaErrorCudartUnloading: | |
128 return "cudaErrorCudartUnloading"; | |
129 | |
130 case cudaErrorUnknown: | |
131 return "cudaErrorUnknown"; | |
132 | |
133 case cudaErrorNotYetImplemented: | |
134 return "cudaErrorNotYetImplemented"; | |
135 | |
136 case cudaErrorMemoryValueTooLarge: | |
137 return "cudaErrorMemoryValueTooLarge"; | |
138 | |
139 case cudaErrorInvalidResourceHandle: | |
140 return "cudaErrorInvalidResourceHandle"; | |
141 | |
142 case cudaErrorNotReady: | |
143 return "cudaErrorNotReady"; | |
144 | |
145 case cudaErrorInsufficientDriver: | |
146 return "cudaErrorInsufficientDriver"; | |
147 | |
148 case cudaErrorSetOnActiveProcess: | |
149 return "cudaErrorSetOnActiveProcess"; | |
150 | |
151 case cudaErrorInvalidSurface: | |
152 return "cudaErrorInvalidSurface"; | |
153 | |
154 case cudaErrorNoDevice: | |
155 return "cudaErrorNoDevice"; | |
156 | |
157 case cudaErrorECCUncorrectable: | |
158 return "cudaErrorECCUncorrectable"; | |
159 | |
160 case cudaErrorSharedObjectSymbolNotFound: | |
161 return "cudaErrorSharedObjectSymbolNotFound"; | |
162 | |
163 case cudaErrorSharedObjectInitFailed: | |
164 return "cudaErrorSharedObjectInitFailed"; | |
165 | |
166 case cudaErrorUnsupportedLimit: | |
167 return "cudaErrorUnsupportedLimit"; | |
168 | |
169 case cudaErrorDuplicateVariableName: | |
170 return "cudaErrorDuplicateVariableName"; | |
171 | |
172 case cudaErrorDuplicateTextureName: | |
173 return "cudaErrorDuplicateTextureName"; | |
174 | |
175 case cudaErrorDuplicateSurfaceName: | |
176 return "cudaErrorDuplicateSurfaceName"; | |
177 | |
178 case cudaErrorDevicesUnavailable: | |
179 return "cudaErrorDevicesUnavailable"; | |
180 | |
181 case cudaErrorInvalidKernelImage: | |
182 return "cudaErrorInvalidKernelImage"; | |
183 | |
184 case cudaErrorNoKernelImageForDevice: | |
185 return "cudaErrorNoKernelImageForDevice"; | |
186 | |
187 case cudaErrorIncompatibleDriverContext: | |
188 return "cudaErrorIncompatibleDriverContext"; | |
189 | |
190 case cudaErrorPeerAccessAlreadyEnabled: | |
191 return "cudaErrorPeerAccessAlreadyEnabled"; | |
192 | |
193 case cudaErrorPeerAccessNotEnabled: | |
194 return "cudaErrorPeerAccessNotEnabled"; | |
195 | |
196 case cudaErrorDeviceAlreadyInUse: | |
197 return "cudaErrorDeviceAlreadyInUse"; | |
198 | |
199 case cudaErrorProfilerDisabled: | |
200 return "cudaErrorProfilerDisabled"; | |
201 | |
202 case cudaErrorProfilerNotInitialized: | |
203 return "cudaErrorProfilerNotInitialized"; | |
204 | |
205 case cudaErrorProfilerAlreadyStarted: | |
206 return "cudaErrorProfilerAlreadyStarted"; | |
207 | |
208 case cudaErrorProfilerAlreadyStopped: | |
209 return "cudaErrorProfilerAlreadyStopped"; | |
210 | |
211 /* Since CUDA 4.0*/ | |
212 case cudaErrorAssert: | |
213 return "cudaErrorAssert"; | |
214 | |
215 case cudaErrorTooManyPeers: | |
216 return "cudaErrorTooManyPeers"; | |
217 | |
218 case cudaErrorHostMemoryAlreadyRegistered: | |
219 return "cudaErrorHostMemoryAlreadyRegistered"; | |
220 | |
221 case cudaErrorHostMemoryNotRegistered: | |
222 return "cudaErrorHostMemoryNotRegistered"; | |
223 | |
224 /* Since CUDA 5.0 */ | |
225 case cudaErrorOperatingSystem: | |
226 return "cudaErrorOperatingSystem"; | |
227 | |
228 case cudaErrorPeerAccessUnsupported: | |
229 return "cudaErrorPeerAccessUnsupported"; | |
230 | |
231 case cudaErrorLaunchMaxDepthExceeded: | |
232 return "cudaErrorLaunchMaxDepthExceeded"; | |
233 | |
234 case cudaErrorLaunchFileScopedTex: | |
235 return "cudaErrorLaunchFileScopedTex"; | |
236 | |
237 case cudaErrorLaunchFileScopedSurf: | |
238 return "cudaErrorLaunchFileScopedSurf"; | |
239 | |
240 case cudaErrorSyncDepthExceeded: | |
241 return "cudaErrorSyncDepthExceeded"; | |
242 | |
243 case cudaErrorLaunchPendingCountExceeded: | |
244 return "cudaErrorLaunchPendingCountExceeded"; | |
245 | |
246 case cudaErrorNotPermitted: | |
247 return "cudaErrorNotPermitted"; | |
248 | |
249 case cudaErrorNotSupported: | |
250 return "cudaErrorNotSupported"; | |
251 | |
252 /* Since CUDA 6.0 */ | |
253 case cudaErrorHardwareStackError: | |
254 return "cudaErrorHardwareStackError"; | |
255 | |
256 case cudaErrorIllegalInstruction: | |
257 return "cudaErrorIllegalInstruction"; | |
258 | |
259 case cudaErrorMisalignedAddress: | |
260 return "cudaErrorMisalignedAddress"; | |
261 | |
262 case cudaErrorInvalidAddressSpace: | |
263 return "cudaErrorInvalidAddressSpace"; | |
264 | |
265 case cudaErrorInvalidPc: | |
266 return "cudaErrorInvalidPc"; | |
267 | |
268 case cudaErrorIllegalAddress: | |
269 return "cudaErrorIllegalAddress"; | |
270 | |
271 /* Since CUDA 6.5*/ | |
272 case cudaErrorInvalidPtx: | |
273 return "cudaErrorInvalidPtx"; | |
274 | |
275 case cudaErrorInvalidGraphicsContext: | |
276 return "cudaErrorInvalidGraphicsContext"; | |
277 | |
278 case cudaErrorStartupFailure: | |
279 return "cudaErrorStartupFailure"; | |
280 | |
281 case cudaErrorApiFailureBase: | |
282 return "cudaErrorApiFailureBase"; | |
283 | |
284 /* Since CUDA 8.0*/ | |
285 case cudaErrorNvlinkUncorrectable : | |
286 return "cudaErrorNvlinkUncorrectable"; | |
287 } | |
288 | |
289 return "<unknown>"; | |
290 } | |
291 #endif | |
292 | |
293 #ifdef __cuda_cuda_h__ | |
294 // CUDA Driver API errors | |
301
609bf62768b9
add -DUSE_CUDA=1 flag to cmake
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
291
diff
changeset
|
295 const char *_cudaGetErrorEnum(CUresult error) |
291 | 296 { |
297 switch (error) | |
298 { | |
299 case CUDA_SUCCESS: | |
300 return "CUDA_SUCCESS"; | |
301 | |
302 case CUDA_ERROR_INVALID_VALUE: | |
303 return "CUDA_ERROR_INVALID_VALUE"; | |
304 | |
305 case CUDA_ERROR_OUT_OF_MEMORY: | |
306 return "CUDA_ERROR_OUT_OF_MEMORY"; | |
307 | |
308 case CUDA_ERROR_NOT_INITIALIZED: | |
309 return "CUDA_ERROR_NOT_INITIALIZED"; | |
310 | |
311 case CUDA_ERROR_DEINITIALIZED: | |
312 return "CUDA_ERROR_DEINITIALIZED"; | |
313 | |
314 case CUDA_ERROR_PROFILER_DISABLED: | |
315 return "CUDA_ERROR_PROFILER_DISABLED"; | |
316 | |
317 case CUDA_ERROR_PROFILER_NOT_INITIALIZED: | |
318 return "CUDA_ERROR_PROFILER_NOT_INITIALIZED"; | |
319 | |
320 case CUDA_ERROR_PROFILER_ALREADY_STARTED: | |
321 return "CUDA_ERROR_PROFILER_ALREADY_STARTED"; | |
322 | |
323 case CUDA_ERROR_PROFILER_ALREADY_STOPPED: | |
324 return "CUDA_ERROR_PROFILER_ALREADY_STOPPED"; | |
325 | |
326 case CUDA_ERROR_NO_DEVICE: | |
327 return "CUDA_ERROR_NO_DEVICE"; | |
328 | |
329 case CUDA_ERROR_INVALID_DEVICE: | |
330 return "CUDA_ERROR_INVALID_DEVICE"; | |
331 | |
332 case CUDA_ERROR_INVALID_IMAGE: | |
333 return "CUDA_ERROR_INVALID_IMAGE"; | |
334 | |
335 case CUDA_ERROR_INVALID_CONTEXT: | |
336 return "CUDA_ERROR_INVALID_CONTEXT"; | |
337 | |
338 case CUDA_ERROR_CONTEXT_ALREADY_CURRENT: | |
339 return "CUDA_ERROR_CONTEXT_ALREADY_CURRENT"; | |
340 | |
341 case CUDA_ERROR_MAP_FAILED: | |
342 return "CUDA_ERROR_MAP_FAILED"; | |
343 | |
344 case CUDA_ERROR_UNMAP_FAILED: | |
345 return "CUDA_ERROR_UNMAP_FAILED"; | |
346 | |
347 case CUDA_ERROR_ARRAY_IS_MAPPED: | |
348 return "CUDA_ERROR_ARRAY_IS_MAPPED"; | |
349 | |
350 case CUDA_ERROR_ALREADY_MAPPED: | |
351 return "CUDA_ERROR_ALREADY_MAPPED"; | |
352 | |
353 case CUDA_ERROR_NO_BINARY_FOR_GPU: | |
354 return "CUDA_ERROR_NO_BINARY_FOR_GPU"; | |
355 | |
356 case CUDA_ERROR_ALREADY_ACQUIRED: | |
357 return "CUDA_ERROR_ALREADY_ACQUIRED"; | |
358 | |
359 case CUDA_ERROR_NOT_MAPPED: | |
360 return "CUDA_ERROR_NOT_MAPPED"; | |
361 | |
362 case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: | |
363 return "CUDA_ERROR_NOT_MAPPED_AS_ARRAY"; | |
364 | |
365 case CUDA_ERROR_NOT_MAPPED_AS_POINTER: | |
366 return "CUDA_ERROR_NOT_MAPPED_AS_POINTER"; | |
367 | |
368 case CUDA_ERROR_ECC_UNCORRECTABLE: | |
369 return "CUDA_ERROR_ECC_UNCORRECTABLE"; | |
370 | |
371 case CUDA_ERROR_UNSUPPORTED_LIMIT: | |
372 return "CUDA_ERROR_UNSUPPORTED_LIMIT"; | |
373 | |
374 case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: | |
375 return "CUDA_ERROR_CONTEXT_ALREADY_IN_USE"; | |
376 | |
377 case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED: | |
378 return "CUDA_ERROR_PEER_ACCESS_UNSUPPORTED"; | |
379 | |
380 case CUDA_ERROR_INVALID_PTX: | |
381 return "CUDA_ERROR_INVALID_PTX"; | |
382 | |
383 case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT: | |
384 return "CUDA_ERROR_INVALID_GRAPHICS_CONTEXT"; | |
385 | |
386 case CUDA_ERROR_NVLINK_UNCORRECTABLE: | |
387 return "CUDA_ERROR_NVLINK_UNCORRECTABLE"; | |
388 | |
389 case CUDA_ERROR_INVALID_SOURCE: | |
390 return "CUDA_ERROR_INVALID_SOURCE"; | |
391 | |
392 case CUDA_ERROR_FILE_NOT_FOUND: | |
393 return "CUDA_ERROR_FILE_NOT_FOUND"; | |
394 | |
395 case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: | |
396 return "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND"; | |
397 | |
398 case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: | |
399 return "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED"; | |
400 | |
401 case CUDA_ERROR_OPERATING_SYSTEM: | |
402 return "CUDA_ERROR_OPERATING_SYSTEM"; | |
403 | |
404 case CUDA_ERROR_INVALID_HANDLE: | |
405 return "CUDA_ERROR_INVALID_HANDLE"; | |
406 | |
407 case CUDA_ERROR_NOT_FOUND: | |
408 return "CUDA_ERROR_NOT_FOUND"; | |
409 | |
410 case CUDA_ERROR_NOT_READY: | |
411 return "CUDA_ERROR_NOT_READY"; | |
412 | |
413 case CUDA_ERROR_ILLEGAL_ADDRESS: | |
414 return "CUDA_ERROR_ILLEGAL_ADDRESS"; | |
415 | |
416 case CUDA_ERROR_LAUNCH_FAILED: | |
417 return "CUDA_ERROR_LAUNCH_FAILED"; | |
418 | |
419 case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: | |
420 return "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES"; | |
421 | |
422 case CUDA_ERROR_LAUNCH_TIMEOUT: | |
423 return "CUDA_ERROR_LAUNCH_TIMEOUT"; | |
424 | |
425 case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: | |
426 return "CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING"; | |
427 | |
428 case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: | |
429 return "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED"; | |
430 | |
431 case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: | |
432 return "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED"; | |
433 | |
434 case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: | |
435 return "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE"; | |
436 | |
437 case CUDA_ERROR_CONTEXT_IS_DESTROYED: | |
438 return "CUDA_ERROR_CONTEXT_IS_DESTROYED"; | |
439 | |
440 case CUDA_ERROR_ASSERT: | |
441 return "CUDA_ERROR_ASSERT"; | |
442 | |
443 case CUDA_ERROR_TOO_MANY_PEERS: | |
444 return "CUDA_ERROR_TOO_MANY_PEERS"; | |
445 | |
446 case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: | |
447 return "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED"; | |
448 | |
449 case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: | |
450 return "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED"; | |
451 | |
452 case CUDA_ERROR_HARDWARE_STACK_ERROR: | |
453 return "CUDA_ERROR_HARDWARE_STACK_ERROR"; | |
454 | |
455 case CUDA_ERROR_ILLEGAL_INSTRUCTION: | |
456 return "CUDA_ERROR_ILLEGAL_INSTRUCTION"; | |
457 | |
458 case CUDA_ERROR_MISALIGNED_ADDRESS: | |
459 return "CUDA_ERROR_MISALIGNED_ADDRESS"; | |
460 | |
461 case CUDA_ERROR_INVALID_ADDRESS_SPACE: | |
462 return "CUDA_ERROR_INVALID_ADDRESS_SPACE"; | |
463 | |
464 case CUDA_ERROR_INVALID_PC: | |
465 return "CUDA_ERROR_INVALID_PC"; | |
466 | |
467 case CUDA_ERROR_NOT_PERMITTED: | |
468 return "CUDA_ERROR_NOT_PERMITTED"; | |
469 | |
470 case CUDA_ERROR_NOT_SUPPORTED: | |
471 return "CUDA_ERROR_NOT_SUPPORTED"; | |
472 | |
473 case CUDA_ERROR_UNKNOWN: | |
474 return "CUDA_ERROR_UNKNOWN"; | |
475 } | |
476 | |
477 return "<unknown>"; | |
478 } | |
479 #endif | |
480 | |
481 #ifdef CUBLAS_API_H_ | |
482 // cuBLAS API errors | |
483 static const char *_cudaGetErrorEnum(cublasStatus_t error) | |
484 { | |
485 switch (error) | |
486 { | |
487 case CUBLAS_STATUS_SUCCESS: | |
488 return "CUBLAS_STATUS_SUCCESS"; | |
489 | |
490 case CUBLAS_STATUS_NOT_INITIALIZED: | |
491 return "CUBLAS_STATUS_NOT_INITIALIZED"; | |
492 | |
493 case CUBLAS_STATUS_ALLOC_FAILED: | |
494 return "CUBLAS_STATUS_ALLOC_FAILED"; | |
495 | |
496 case CUBLAS_STATUS_INVALID_VALUE: | |
497 return "CUBLAS_STATUS_INVALID_VALUE"; | |
498 | |
499 case CUBLAS_STATUS_ARCH_MISMATCH: | |
500 return "CUBLAS_STATUS_ARCH_MISMATCH"; | |
501 | |
502 case CUBLAS_STATUS_MAPPING_ERROR: | |
503 return "CUBLAS_STATUS_MAPPING_ERROR"; | |
504 | |
505 case CUBLAS_STATUS_EXECUTION_FAILED: | |
506 return "CUBLAS_STATUS_EXECUTION_FAILED"; | |
507 | |
508 case CUBLAS_STATUS_INTERNAL_ERROR: | |
509 return "CUBLAS_STATUS_INTERNAL_ERROR"; | |
510 | |
511 case CUBLAS_STATUS_NOT_SUPPORTED: | |
512 return "CUBLAS_STATUS_NOT_SUPPORTED"; | |
513 | |
514 case CUBLAS_STATUS_LICENSE_ERROR: | |
515 return "CUBLAS_STATUS_LICENSE_ERROR"; | |
516 } | |
517 | |
518 return "<unknown>"; | |
519 } | |
520 #endif | |
521 | |
522 #ifdef _CUFFT_H_ | |
523 // cuFFT API errors | |
524 static const char *_cudaGetErrorEnum(cufftResult error) | |
525 { | |
526 switch (error) | |
527 { | |
528 case CUFFT_SUCCESS: | |
529 return "CUFFT_SUCCESS"; | |
530 | |
531 case CUFFT_INVALID_PLAN: | |
532 return "CUFFT_INVALID_PLAN"; | |
533 | |
534 case CUFFT_ALLOC_FAILED: | |
535 return "CUFFT_ALLOC_FAILED"; | |
536 | |
537 case CUFFT_INVALID_TYPE: | |
538 return "CUFFT_INVALID_TYPE"; | |
539 | |
540 case CUFFT_INVALID_VALUE: | |
541 return "CUFFT_INVALID_VALUE"; | |
542 | |
543 case CUFFT_INTERNAL_ERROR: | |
544 return "CUFFT_INTERNAL_ERROR"; | |
545 | |
546 case CUFFT_EXEC_FAILED: | |
547 return "CUFFT_EXEC_FAILED"; | |
548 | |
549 case CUFFT_SETUP_FAILED: | |
550 return "CUFFT_SETUP_FAILED"; | |
551 | |
552 case CUFFT_INVALID_SIZE: | |
553 return "CUFFT_INVALID_SIZE"; | |
554 | |
555 case CUFFT_UNALIGNED_DATA: | |
556 return "CUFFT_UNALIGNED_DATA"; | |
557 | |
558 case CUFFT_INCOMPLETE_PARAMETER_LIST: | |
559 return "CUFFT_INCOMPLETE_PARAMETER_LIST"; | |
560 | |
561 case CUFFT_INVALID_DEVICE: | |
562 return "CUFFT_INVALID_DEVICE"; | |
563 | |
564 case CUFFT_PARSE_ERROR: | |
565 return "CUFFT_PARSE_ERROR"; | |
566 | |
567 case CUFFT_NO_WORKSPACE: | |
568 return "CUFFT_NO_WORKSPACE"; | |
569 | |
570 case CUFFT_NOT_IMPLEMENTED: | |
571 return "CUFFT_NOT_IMPLEMENTED"; | |
572 | |
573 case CUFFT_LICENSE_ERROR: | |
574 return "CUFFT_LICENSE_ERROR"; | |
575 | |
576 case CUFFT_NOT_SUPPORTED: | |
577 return "CUFFT_NOT_SUPPORTED"; | |
578 } | |
579 | |
580 return "<unknown>"; | |
581 } | |
582 #endif | |
583 | |
584 | |
585 #ifdef CUSPARSEAPI | |
586 // cuSPARSE API errors | |
587 static const char *_cudaGetErrorEnum(cusparseStatus_t error) | |
588 { | |
589 switch (error) | |
590 { | |
591 case CUSPARSE_STATUS_SUCCESS: | |
592 return "CUSPARSE_STATUS_SUCCESS"; | |
593 | |
594 case CUSPARSE_STATUS_NOT_INITIALIZED: | |
595 return "CUSPARSE_STATUS_NOT_INITIALIZED"; | |
596 | |
597 case CUSPARSE_STATUS_ALLOC_FAILED: | |
598 return "CUSPARSE_STATUS_ALLOC_FAILED"; | |
599 | |
600 case CUSPARSE_STATUS_INVALID_VALUE: | |
601 return "CUSPARSE_STATUS_INVALID_VALUE"; | |
602 | |
603 case CUSPARSE_STATUS_ARCH_MISMATCH: | |
604 return "CUSPARSE_STATUS_ARCH_MISMATCH"; | |
605 | |
606 case CUSPARSE_STATUS_MAPPING_ERROR: | |
607 return "CUSPARSE_STATUS_MAPPING_ERROR"; | |
608 | |
609 case CUSPARSE_STATUS_EXECUTION_FAILED: | |
610 return "CUSPARSE_STATUS_EXECUTION_FAILED"; | |
611 | |
612 case CUSPARSE_STATUS_INTERNAL_ERROR: | |
613 return "CUSPARSE_STATUS_INTERNAL_ERROR"; | |
614 | |
615 case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED: | |
616 return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED"; | |
617 } | |
618 | |
619 return "<unknown>"; | |
620 } | |
621 #endif | |
622 | |
623 #ifdef CUSOLVER_COMMON_H_ | |
624 //cuSOLVER API errors | |
625 static const char *_cudaGetErrorEnum(cusolverStatus_t error) | |
626 { | |
627 switch(error) | |
628 { | |
629 case CUSOLVER_STATUS_SUCCESS: | |
630 return "CUSOLVER_STATUS_SUCCESS"; | |
631 case CUSOLVER_STATUS_NOT_INITIALIZED: | |
632 return "CUSOLVER_STATUS_NOT_INITIALIZED"; | |
633 case CUSOLVER_STATUS_ALLOC_FAILED: | |
634 return "CUSOLVER_STATUS_ALLOC_FAILED"; | |
635 case CUSOLVER_STATUS_INVALID_VALUE: | |
636 return "CUSOLVER_STATUS_INVALID_VALUE"; | |
637 case CUSOLVER_STATUS_ARCH_MISMATCH: | |
638 return "CUSOLVER_STATUS_ARCH_MISMATCH"; | |
639 case CUSOLVER_STATUS_MAPPING_ERROR: | |
640 return "CUSOLVER_STATUS_MAPPING_ERROR"; | |
641 case CUSOLVER_STATUS_EXECUTION_FAILED: | |
642 return "CUSOLVER_STATUS_EXECUTION_FAILED"; | |
643 case CUSOLVER_STATUS_INTERNAL_ERROR: | |
644 return "CUSOLVER_STATUS_INTERNAL_ERROR"; | |
645 case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED: | |
646 return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED"; | |
647 case CUSOLVER_STATUS_NOT_SUPPORTED : | |
648 return "CUSOLVER_STATUS_NOT_SUPPORTED "; | |
649 case CUSOLVER_STATUS_ZERO_PIVOT: | |
650 return "CUSOLVER_STATUS_ZERO_PIVOT"; | |
651 case CUSOLVER_STATUS_INVALID_LICENSE: | |
652 return "CUSOLVER_STATUS_INVALID_LICENSE"; | |
653 } | |
654 | |
655 return "<unknown>"; | |
656 | |
657 } | |
658 #endif | |
659 | |
660 #ifdef CURAND_H_ | |
661 // cuRAND API errors | |
662 static const char *_cudaGetErrorEnum(curandStatus_t error) | |
663 { | |
664 switch (error) | |
665 { | |
666 case CURAND_STATUS_SUCCESS: | |
667 return "CURAND_STATUS_SUCCESS"; | |
668 | |
669 case CURAND_STATUS_VERSION_MISMATCH: | |
670 return "CURAND_STATUS_VERSION_MISMATCH"; | |
671 | |
672 case CURAND_STATUS_NOT_INITIALIZED: | |
673 return "CURAND_STATUS_NOT_INITIALIZED"; | |
674 | |
675 case CURAND_STATUS_ALLOCATION_FAILED: | |
676 return "CURAND_STATUS_ALLOCATION_FAILED"; | |
677 | |
678 case CURAND_STATUS_TYPE_ERROR: | |
679 return "CURAND_STATUS_TYPE_ERROR"; | |
680 | |
681 case CURAND_STATUS_OUT_OF_RANGE: | |
682 return "CURAND_STATUS_OUT_OF_RANGE"; | |
683 | |
684 case CURAND_STATUS_LENGTH_NOT_MULTIPLE: | |
685 return "CURAND_STATUS_LENGTH_NOT_MULTIPLE"; | |
686 | |
687 case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED: | |
688 return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED"; | |
689 | |
690 case CURAND_STATUS_LAUNCH_FAILURE: | |
691 return "CURAND_STATUS_LAUNCH_FAILURE"; | |
692 | |
693 case CURAND_STATUS_PREEXISTING_FAILURE: | |
694 return "CURAND_STATUS_PREEXISTING_FAILURE"; | |
695 | |
696 case CURAND_STATUS_INITIALIZATION_FAILED: | |
697 return "CURAND_STATUS_INITIALIZATION_FAILED"; | |
698 | |
699 case CURAND_STATUS_ARCH_MISMATCH: | |
700 return "CURAND_STATUS_ARCH_MISMATCH"; | |
701 | |
702 case CURAND_STATUS_INTERNAL_ERROR: | |
703 return "CURAND_STATUS_INTERNAL_ERROR"; | |
704 } | |
705 | |
706 return "<unknown>"; | |
707 } | |
708 #endif | |
709 | |
710 #ifdef NV_NPPIDEFS_H | |
711 // NPP API errors | |
712 static const char *_cudaGetErrorEnum(NppStatus error) | |
713 { | |
714 switch (error) | |
715 { | |
716 case NPP_NOT_SUPPORTED_MODE_ERROR: | |
717 return "NPP_NOT_SUPPORTED_MODE_ERROR"; | |
718 | |
719 case NPP_ROUND_MODE_NOT_SUPPORTED_ERROR: | |
720 return "NPP_ROUND_MODE_NOT_SUPPORTED_ERROR"; | |
721 | |
722 case NPP_RESIZE_NO_OPERATION_ERROR: | |
723 return "NPP_RESIZE_NO_OPERATION_ERROR"; | |
724 | |
725 case NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY: | |
726 return "NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY"; | |
727 | |
728 #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000 | |
729 | |
730 case NPP_BAD_ARG_ERROR: | |
731 return "NPP_BAD_ARGUMENT_ERROR"; | |
732 | |
733 case NPP_COEFF_ERROR: | |
734 return "NPP_COEFFICIENT_ERROR"; | |
735 | |
736 case NPP_RECT_ERROR: | |
737 return "NPP_RECTANGLE_ERROR"; | |
738 | |
739 case NPP_QUAD_ERROR: | |
740 return "NPP_QUADRANGLE_ERROR"; | |
741 | |
742 case NPP_MEM_ALLOC_ERR: | |
743 return "NPP_MEMORY_ALLOCATION_ERROR"; | |
744 | |
745 case NPP_HISTO_NUMBER_OF_LEVELS_ERROR: | |
746 return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR"; | |
747 | |
748 case NPP_INVALID_INPUT: | |
749 return "NPP_INVALID_INPUT"; | |
750 | |
751 case NPP_POINTER_ERROR: | |
752 return "NPP_POINTER_ERROR"; | |
753 | |
754 case NPP_WARNING: | |
755 return "NPP_WARNING"; | |
756 | |
757 case NPP_ODD_ROI_WARNING: | |
758 return "NPP_ODD_ROI_WARNING"; | |
759 #else | |
760 | |
761 // These are for CUDA 5.5 or higher | |
762 case NPP_BAD_ARGUMENT_ERROR: | |
763 return "NPP_BAD_ARGUMENT_ERROR"; | |
764 | |
765 case NPP_COEFFICIENT_ERROR: | |
766 return "NPP_COEFFICIENT_ERROR"; | |
767 | |
768 case NPP_RECTANGLE_ERROR: | |
769 return "NPP_RECTANGLE_ERROR"; | |
770 | |
771 case NPP_QUADRANGLE_ERROR: | |
772 return "NPP_QUADRANGLE_ERROR"; | |
773 | |
774 case NPP_MEMORY_ALLOCATION_ERR: | |
775 return "NPP_MEMORY_ALLOCATION_ERROR"; | |
776 | |
777 case NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR: | |
778 return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR"; | |
779 | |
780 case NPP_INVALID_HOST_POINTER_ERROR: | |
781 return "NPP_INVALID_HOST_POINTER_ERROR"; | |
782 | |
783 case NPP_INVALID_DEVICE_POINTER_ERROR: | |
784 return "NPP_INVALID_DEVICE_POINTER_ERROR"; | |
785 #endif | |
786 | |
787 case NPP_LUT_NUMBER_OF_LEVELS_ERROR: | |
788 return "NPP_LUT_NUMBER_OF_LEVELS_ERROR"; | |
789 | |
790 case NPP_TEXTURE_BIND_ERROR: | |
791 return "NPP_TEXTURE_BIND_ERROR"; | |
792 | |
793 case NPP_WRONG_INTERSECTION_ROI_ERROR: | |
794 return "NPP_WRONG_INTERSECTION_ROI_ERROR"; | |
795 | |
796 case NPP_NOT_EVEN_STEP_ERROR: | |
797 return "NPP_NOT_EVEN_STEP_ERROR"; | |
798 | |
799 case NPP_INTERPOLATION_ERROR: | |
800 return "NPP_INTERPOLATION_ERROR"; | |
801 | |
802 case NPP_RESIZE_FACTOR_ERROR: | |
803 return "NPP_RESIZE_FACTOR_ERROR"; | |
804 | |
805 case NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR: | |
806 return "NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR"; | |
807 | |
808 | |
809 #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000 | |
810 | |
811 case NPP_MEMFREE_ERR: | |
812 return "NPP_MEMFREE_ERR"; | |
813 | |
814 case NPP_MEMSET_ERR: | |
815 return "NPP_MEMSET_ERR"; | |
816 | |
817 case NPP_MEMCPY_ERR: | |
818 return "NPP_MEMCPY_ERROR"; | |
819 | |
820 case NPP_MIRROR_FLIP_ERR: | |
821 return "NPP_MIRROR_FLIP_ERR"; | |
822 #else | |
823 | |
824 case NPP_MEMFREE_ERROR: | |
825 return "NPP_MEMFREE_ERROR"; | |
826 | |
827 case NPP_MEMSET_ERROR: | |
828 return "NPP_MEMSET_ERROR"; | |
829 | |
830 case NPP_MEMCPY_ERROR: | |
831 return "NPP_MEMCPY_ERROR"; | |
832 | |
833 case NPP_MIRROR_FLIP_ERROR: | |
834 return "NPP_MIRROR_FLIP_ERROR"; | |
835 #endif | |
836 | |
837 case NPP_ALIGNMENT_ERROR: | |
838 return "NPP_ALIGNMENT_ERROR"; | |
839 | |
840 case NPP_STEP_ERROR: | |
841 return "NPP_STEP_ERROR"; | |
842 | |
843 case NPP_SIZE_ERROR: | |
844 return "NPP_SIZE_ERROR"; | |
845 | |
846 case NPP_NULL_POINTER_ERROR: | |
847 return "NPP_NULL_POINTER_ERROR"; | |
848 | |
849 case NPP_CUDA_KERNEL_EXECUTION_ERROR: | |
850 return "NPP_CUDA_KERNEL_EXECUTION_ERROR"; | |
851 | |
852 case NPP_NOT_IMPLEMENTED_ERROR: | |
853 return "NPP_NOT_IMPLEMENTED_ERROR"; | |
854 | |
855 case NPP_ERROR: | |
856 return "NPP_ERROR"; | |
857 | |
858 case NPP_SUCCESS: | |
859 return "NPP_SUCCESS"; | |
860 | |
861 case NPP_WRONG_INTERSECTION_QUAD_WARNING: | |
862 return "NPP_WRONG_INTERSECTION_QUAD_WARNING"; | |
863 | |
864 case NPP_MISALIGNED_DST_ROI_WARNING: | |
865 return "NPP_MISALIGNED_DST_ROI_WARNING"; | |
866 | |
867 case NPP_AFFINE_QUAD_INCORRECT_WARNING: | |
868 return "NPP_AFFINE_QUAD_INCORRECT_WARNING"; | |
869 | |
870 case NPP_DOUBLE_SIZE_WARNING: | |
871 return "NPP_DOUBLE_SIZE_WARNING"; | |
872 | |
873 case NPP_WRONG_INTERSECTION_ROI_WARNING: | |
874 return "NPP_WRONG_INTERSECTION_ROI_WARNING"; | |
875 | |
876 #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x6000 | |
877 /* These are 6.0 or higher */ | |
878 case NPP_LUT_PALETTE_BITSIZE_ERROR: | |
879 return "NPP_LUT_PALETTE_BITSIZE_ERROR"; | |
880 | |
881 case NPP_ZC_MODE_NOT_SUPPORTED_ERROR: | |
882 return "NPP_ZC_MODE_NOT_SUPPORTED_ERROR"; | |
883 | |
884 case NPP_QUALITY_INDEX_ERROR: | |
885 return "NPP_QUALITY_INDEX_ERROR"; | |
886 | |
887 case NPP_CHANNEL_ORDER_ERROR: | |
888 return "NPP_CHANNEL_ORDER_ERROR"; | |
889 | |
890 case NPP_ZERO_MASK_VALUE_ERROR: | |
891 return "NPP_ZERO_MASK_VALUE_ERROR"; | |
892 | |
893 case NPP_NUMBER_OF_CHANNELS_ERROR: | |
894 return "NPP_NUMBER_OF_CHANNELS_ERROR"; | |
895 | |
896 case NPP_COI_ERROR: | |
897 return "NPP_COI_ERROR"; | |
898 | |
899 case NPP_DIVISOR_ERROR: | |
900 return "NPP_DIVISOR_ERROR"; | |
901 | |
902 case NPP_CHANNEL_ERROR: | |
903 return "NPP_CHANNEL_ERROR"; | |
904 | |
905 case NPP_STRIDE_ERROR: | |
906 return "NPP_STRIDE_ERROR"; | |
907 | |
908 case NPP_ANCHOR_ERROR: | |
909 return "NPP_ANCHOR_ERROR"; | |
910 | |
911 case NPP_MASK_SIZE_ERROR: | |
912 return "NPP_MASK_SIZE_ERROR"; | |
913 | |
914 case NPP_MOMENT_00_ZERO_ERROR: | |
915 return "NPP_MOMENT_00_ZERO_ERROR"; | |
916 | |
917 case NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR: | |
918 return "NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR"; | |
919 | |
920 case NPP_THRESHOLD_ERROR: | |
921 return "NPP_THRESHOLD_ERROR"; | |
922 | |
923 case NPP_CONTEXT_MATCH_ERROR: | |
924 return "NPP_CONTEXT_MATCH_ERROR"; | |
925 | |
926 case NPP_FFT_FLAG_ERROR: | |
927 return "NPP_FFT_FLAG_ERROR"; | |
928 | |
929 case NPP_FFT_ORDER_ERROR: | |
930 return "NPP_FFT_ORDER_ERROR"; | |
931 | |
932 case NPP_SCALE_RANGE_ERROR: | |
933 return "NPP_SCALE_RANGE_ERROR"; | |
934 | |
935 case NPP_DATA_TYPE_ERROR: | |
936 return "NPP_DATA_TYPE_ERROR"; | |
937 | |
938 case NPP_OUT_OFF_RANGE_ERROR: | |
939 return "NPP_OUT_OFF_RANGE_ERROR"; | |
940 | |
941 case NPP_DIVIDE_BY_ZERO_ERROR: | |
942 return "NPP_DIVIDE_BY_ZERO_ERROR"; | |
943 | |
944 case NPP_RANGE_ERROR: | |
945 return "NPP_RANGE_ERROR"; | |
946 | |
947 case NPP_NO_MEMORY_ERROR: | |
948 return "NPP_NO_MEMORY_ERROR"; | |
949 | |
950 case NPP_ERROR_RESERVED: | |
951 return "NPP_ERROR_RESERVED"; | |
952 | |
953 case NPP_NO_OPERATION_WARNING: | |
954 return "NPP_NO_OPERATION_WARNING"; | |
955 | |
956 case NPP_DIVIDE_BY_ZERO_WARNING: | |
957 return "NPP_DIVIDE_BY_ZERO_WARNING"; | |
958 #endif | |
959 | |
960 #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x7000 | |
961 /* These are 7.0 or higher */ | |
962 case NPP_OVERFLOW_ERROR: | |
963 return "NPP_OVERFLOW_ERROR"; | |
964 | |
965 case NPP_CORRUPTED_DATA_ERROR: | |
966 return "NPP_CORRUPTED_DATA_ERROR"; | |
967 #endif | |
968 } | |
969 | |
970 return "<unknown>"; | |
971 } | |
972 #endif | |
973 | |
974 #ifdef __DRIVER_TYPES_H__ | |
975 #ifndef DEVICE_RESET | |
976 #define DEVICE_RESET cudaDeviceReset(); | |
977 #endif | |
978 #else | |
979 #ifndef DEVICE_RESET | |
980 #define DEVICE_RESET | |
981 #endif | |
982 #endif | |
983 | |
984 template< typename T > | |
985 void check(T result, char const *const func, const char *const file, int const line) | |
986 { | |
987 if (result) | |
988 { | |
989 fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", | |
990 file, line, static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func); | |
991 DEVICE_RESET | |
992 // Make sure we call CUDA Device Reset before exiting | |
993 exit(EXIT_FAILURE); | |
994 } | |
995 } | |
996 | |
997 #ifdef __DRIVER_TYPES_H__ | |
998 // This will output the proper CUDA error strings in the event that a CUDA host call returns an error | |
999 #define checkCudaErrors(val) check ( (val), #val, __FILE__, __LINE__ ) | |
1000 | |
1001 // This will output the proper error string when calling cudaGetLastError | |
1002 #define getLastCudaError(msg) __getLastCudaError (msg, __FILE__, __LINE__) | |
1003 | |
1004 inline void __getLastCudaError(const char *errorMessage, const char *file, const int line) | |
1005 { | |
1006 cudaError_t err = cudaGetLastError(); | |
1007 | |
1008 if (cudaSuccess != err) | |
1009 { | |
1010 fprintf(stderr, "%s(%i) : getLastCudaError() CUDA error : %s : (%d) %s.\n", | |
1011 file, line, errorMessage, (int)err, cudaGetErrorString(err)); | |
1012 DEVICE_RESET | |
1013 exit(EXIT_FAILURE); | |
1014 } | |
1015 } | |
1016 #endif | |
1017 | |
1018 #ifndef MAX | |
1019 #define MAX(a,b) (a > b ? a : b) | |
1020 #endif | |
1021 | |
1022 // Float To Int conversion | |
1023 inline int ftoi(float value) | |
1024 { | |
1025 return (value >= 0 ? (int)(value + 0.5) : (int)(value - 0.5)); | |
1026 } | |
1027 | |
1028 // Beginning of GPU Architecture definitions | |
1029 inline int _ConvertSMVer2Cores(int major, int minor) | |
1030 { | |
1031 // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM | |
1032 typedef struct | |
1033 { | |
1034 int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version | |
1035 int Cores; | |
1036 } sSMtoCores; | |
1037 | |
1038 sSMtoCores nGpuArchCoresPerSM[] = | |
1039 { | |
1040 { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class | |
1041 { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class | |
1042 { 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class | |
1043 { 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class | |
1044 { 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class | |
1045 { 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class | |
1046 { 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class | |
1047 { 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class | |
1048 { 0x53, 128}, // Maxwell Generation (SM 5.3) GM20x class | |
1049 { 0x60, 64 }, // Pascal Generation (SM 6.0) GP100 class | |
1050 { 0x61, 128}, // Pascal Generation (SM 6.1) GP10x class | |
1051 { 0x62, 128}, // Pascal Generation (SM 6.2) GP10x class | |
1052 { -1, -1 } | |
1053 }; | |
1054 | |
1055 int index = 0; | |
1056 | |
1057 while (nGpuArchCoresPerSM[index].SM != -1) | |
1058 { | |
1059 if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) | |
1060 { | |
1061 return nGpuArchCoresPerSM[index].Cores; | |
1062 } | |
1063 | |
1064 index++; | |
1065 } | |
1066 | |
1067 // If we don't find the values, we default use the previous one to run properly | |
1068 printf("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores); | |
1069 return nGpuArchCoresPerSM[index-1].Cores; | |
1070 } | |
1071 // end of GPU Architecture definitions | |
1072 | |
1073 #ifdef __CUDA_RUNTIME_H__ | |
1074 // General GPU Device CUDA Initialization | |
1075 inline int gpuDeviceInit(int devID) | |
1076 { | |
1077 int device_count; | |
1078 checkCudaErrors(cudaGetDeviceCount(&device_count)); | |
1079 | |
1080 if (device_count == 0) | |
1081 { | |
1082 fprintf(stderr, "gpuDeviceInit() CUDA error: no devices supporting CUDA.\n"); | |
1083 exit(EXIT_FAILURE); | |
1084 } | |
1085 | |
1086 if (devID < 0) | |
1087 { | |
1088 devID = 0; | |
1089 } | |
1090 | |
1091 if (devID > device_count-1) | |
1092 { | |
1093 fprintf(stderr, "\n"); | |
1094 fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", device_count); | |
1095 fprintf(stderr, ">> gpuDeviceInit (-device=%d) is not a valid GPU device. <<\n", devID); | |
1096 fprintf(stderr, "\n"); | |
1097 return -devID; | |
1098 } | |
1099 | |
1100 cudaDeviceProp deviceProp; | |
1101 checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID)); | |
1102 | |
1103 if (deviceProp.computeMode == cudaComputeModeProhibited) | |
1104 { | |
1105 fprintf(stderr, "Error: device is running in <Compute Mode Prohibited>, no threads can use ::cudaSetDevice().\n"); | |
1106 return -1; | |
1107 } | |
1108 | |
1109 if (deviceProp.major < 1) | |
1110 { | |
1111 fprintf(stderr, "gpuDeviceInit(): GPU device does not support CUDA.\n"); | |
1112 exit(EXIT_FAILURE); | |
1113 } | |
1114 | |
1115 checkCudaErrors(cudaSetDevice(devID)); | |
1116 printf("gpuDeviceInit() CUDA Device [%d]: \"%s\n", devID, deviceProp.name); | |
1117 | |
1118 return devID; | |
1119 } | |
1120 | |
1121 // This function returns the best GPU (with maximum GFLOPS) | |
1122 inline int gpuGetMaxGflopsDeviceId() | |
1123 { | |
1124 int current_device = 0, sm_per_multiproc = 0; | |
1125 int max_perf_device = 0; | |
1126 int device_count = 0, best_SM_arch = 0; | |
1127 int devices_prohibited = 0; | |
1128 | |
1129 unsigned long long max_compute_perf = 0; | |
1130 cudaDeviceProp deviceProp; | |
1131 cudaGetDeviceCount(&device_count); | |
1132 | |
1133 checkCudaErrors(cudaGetDeviceCount(&device_count)); | |
1134 | |
1135 if (device_count == 0) | |
1136 { | |
1137 fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: no devices supporting CUDA.\n"); | |
1138 exit(EXIT_FAILURE); | |
1139 } | |
1140 | |
1141 // Find the best major SM Architecture GPU device | |
1142 while (current_device < device_count) | |
1143 { | |
1144 cudaGetDeviceProperties(&deviceProp, current_device); | |
1145 | |
1146 // If this GPU is not running on Compute Mode prohibited, then we can add it to the list | |
1147 if (deviceProp.computeMode != cudaComputeModeProhibited) | |
1148 { | |
1149 if (deviceProp.major > 0 && deviceProp.major < 9999) | |
1150 { | |
1151 best_SM_arch = MAX(best_SM_arch, deviceProp.major); | |
1152 } | |
1153 } | |
1154 else | |
1155 { | |
1156 devices_prohibited++; | |
1157 } | |
1158 | |
1159 current_device++; | |
1160 } | |
1161 | |
1162 if (devices_prohibited == device_count) | |
1163 { | |
1164 fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: all devices have compute mode prohibited.\n"); | |
1165 exit(EXIT_FAILURE); | |
1166 } | |
1167 | |
1168 // Find the best CUDA capable GPU device | |
1169 current_device = 0; | |
1170 | |
1171 while (current_device < device_count) | |
1172 { | |
1173 cudaGetDeviceProperties(&deviceProp, current_device); | |
1174 | |
1175 // If this GPU is not running on Compute Mode prohibited, then we can add it to the list | |
1176 if (deviceProp.computeMode != cudaComputeModeProhibited) | |
1177 { | |
1178 if (deviceProp.major == 9999 && deviceProp.minor == 9999) | |
1179 { | |
1180 sm_per_multiproc = 1; | |
1181 } | |
1182 else | |
1183 { | |
1184 sm_per_multiproc = _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor); | |
1185 } | |
1186 | |
1187 unsigned long long compute_perf = (unsigned long long) deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate; | |
1188 | |
1189 if (compute_perf > max_compute_perf) | |
1190 { | |
1191 // If we find GPU with SM major > 2, search only these | |
1192 if (best_SM_arch > 2) | |
1193 { | |
1194 // If our device==dest_SM_arch, choose this, or else pass | |
1195 if (deviceProp.major == best_SM_arch) | |
1196 { | |
1197 max_compute_perf = compute_perf; | |
1198 max_perf_device = current_device; | |
1199 } | |
1200 } | |
1201 else | |
1202 { | |
1203 max_compute_perf = compute_perf; | |
1204 max_perf_device = current_device; | |
1205 } | |
1206 } | |
1207 } | |
1208 | |
1209 ++current_device; | |
1210 } | |
1211 | |
1212 return max_perf_device; | |
1213 } | |
1214 | |
1215 | |
1216 // Initialization code to find the best CUDA Device | |
1217 inline int findCudaDevice(int argc, const char **argv) | |
1218 { | |
1219 cudaDeviceProp deviceProp; | |
1220 int devID = 0; | |
1221 | |
1222 // If the command-line has a device number specified, use it | |
1223 if (checkCmdLineFlag(argc, argv, "device")) | |
1224 { | |
1225 devID = getCmdLineArgumentInt(argc, argv, "device="); | |
1226 | |
1227 if (devID < 0) | |
1228 { | |
1229 printf("Invalid command line parameter\n "); | |
1230 exit(EXIT_FAILURE); | |
1231 } | |
1232 else | |
1233 { | |
1234 devID = gpuDeviceInit(devID); | |
1235 | |
1236 if (devID < 0) | |
1237 { | |
1238 printf("exiting...\n"); | |
1239 exit(EXIT_FAILURE); | |
1240 } | |
1241 } | |
1242 } | |
1243 else | |
1244 { | |
1245 // Otherwise pick the device with highest Gflops/s | |
1246 devID = gpuGetMaxGflopsDeviceId(); | |
1247 checkCudaErrors(cudaSetDevice(devID)); | |
1248 checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID)); | |
1249 printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n", devID, deviceProp.name, deviceProp.major, deviceProp.minor); | |
1250 } | |
1251 | |
1252 return devID; | |
1253 } | |
1254 | |
1255 // General check for CUDA GPU SM Capabilities | |
1256 inline bool checkCudaCapabilities(int major_version, int minor_version) | |
1257 { | |
1258 cudaDeviceProp deviceProp; | |
1259 deviceProp.major = 0; | |
1260 deviceProp.minor = 0; | |
1261 int dev; | |
1262 | |
1263 checkCudaErrors(cudaGetDevice(&dev)); | |
1264 checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev)); | |
1265 | |
1266 if ((deviceProp.major > major_version) || | |
1267 (deviceProp.major == major_version && deviceProp.minor >= minor_version)) | |
1268 { | |
1269 printf(" Device %d: <%16s >, Compute SM %d.%d detected\n", dev, deviceProp.name, deviceProp.major, deviceProp.minor); | |
1270 return true; | |
1271 } | |
1272 else | |
1273 { | |
1274 printf(" No GPU device was found that can support CUDA compute capability %d.%d.\n", major_version, minor_version); | |
1275 return false; | |
1276 } | |
1277 } | |
1278 #endif | |
1279 | |
1280 // end of CUDA Helper Functions | |
1281 | |
1282 | |
1283 #endif |