Mercurial > hg > Gears > GearsAgda
annotate src/parallel_execution/helper_cuda.h @ 433:d920f3a3f037
Refactoring cuda.c
author | Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 17 Oct 2017 15:47:33 +0900 |
parents | 6fcbbe644b92 |
children |
rev | line source |
---|---|
291 | 1 /** |
2 * Copyright 1993-2013 NVIDIA Corporation. All rights reserved. | |
3 * | |
4 * Please refer to the NVIDIA end user license agreement (EULA) associated | |
5 * with this source code for terms and conditions that govern your use of | |
6 * this software. Any use, reproduction, disclosure, or distribution of | |
7 * this software and related documentation outside the terms of the EULA | |
8 * is strictly prohibited. | |
9 * | |
10 */ | |
11 | |
12 //////////////////////////////////////////////////////////////////////////////// | |
13 // These are CUDA Helper functions for initialization and error checking | |
14 | |
15 #ifndef HELPER_CUDA_H | |
16 #define HELPER_CUDA_H | |
17 | |
18 #pragma once | |
19 | |
20 #include <stdlib.h> | |
21 #include <stdio.h> | |
22 #include <string.h> | |
23 | |
24 #include "helper_string.h" | |
25 | |
26 #ifndef EXIT_WAIVED | |
27 #define EXIT_WAIVED 2 | |
28 #endif | |
29 | |
30 // Note, it is required that your SDK sample to include the proper header files, please | |
31 // refer the CUDA examples for examples of the needed CUDA headers, which may change depending | |
32 // on which CUDA functions are used. | |
33 | |
34 // CUDA Runtime error messages | |
305 | 35 #ifndef __DRIVER_TYPES_H__ |
291 | 36 static const char *_cudaGetErrorEnum(cudaError_t error) |
37 { | |
38 switch (error) | |
39 { | |
40 case cudaSuccess: | |
41 return "cudaSuccess"; | |
42 | |
43 case cudaErrorMissingConfiguration: | |
44 return "cudaErrorMissingConfiguration"; | |
45 | |
46 case cudaErrorMemoryAllocation: | |
47 return "cudaErrorMemoryAllocation"; | |
48 | |
49 case cudaErrorInitializationError: | |
50 return "cudaErrorInitializationError"; | |
51 | |
52 case cudaErrorLaunchFailure: | |
53 return "cudaErrorLaunchFailure"; | |
54 | |
55 case cudaErrorPriorLaunchFailure: | |
56 return "cudaErrorPriorLaunchFailure"; | |
57 | |
58 case cudaErrorLaunchTimeout: | |
59 return "cudaErrorLaunchTimeout"; | |
60 | |
61 case cudaErrorLaunchOutOfResources: | |
62 return "cudaErrorLaunchOutOfResources"; | |
63 | |
64 case cudaErrorInvalidDeviceFunction: | |
65 return "cudaErrorInvalidDeviceFunction"; | |
66 | |
67 case cudaErrorInvalidConfiguration: | |
68 return "cudaErrorInvalidConfiguration"; | |
69 | |
70 case cudaErrorInvalidDevice: | |
71 return "cudaErrorInvalidDevice"; | |
72 | |
73 case cudaErrorInvalidValue: | |
74 return "cudaErrorInvalidValue"; | |
75 | |
76 case cudaErrorInvalidPitchValue: | |
77 return "cudaErrorInvalidPitchValue"; | |
78 | |
79 case cudaErrorInvalidSymbol: | |
80 return "cudaErrorInvalidSymbol"; | |
81 | |
82 case cudaErrorMapBufferObjectFailed: | |
83 return "cudaErrorMapBufferObjectFailed"; | |
84 | |
85 case cudaErrorUnmapBufferObjectFailed: | |
86 return "cudaErrorUnmapBufferObjectFailed"; | |
87 | |
88 case cudaErrorInvalidHostPointer: | |
89 return "cudaErrorInvalidHostPointer"; | |
90 | |
91 case cudaErrorInvalidDevicePointer: | |
92 return "cudaErrorInvalidDevicePointer"; | |
93 | |
94 case cudaErrorInvalidTexture: | |
95 return "cudaErrorInvalidTexture"; | |
96 | |
97 case cudaErrorInvalidTextureBinding: | |
98 return "cudaErrorInvalidTextureBinding"; | |
99 | |
100 case cudaErrorInvalidChannelDescriptor: | |
101 return "cudaErrorInvalidChannelDescriptor"; | |
102 | |
103 case cudaErrorInvalidMemcpyDirection: | |
104 return "cudaErrorInvalidMemcpyDirection"; | |
105 | |
106 case cudaErrorAddressOfConstant: | |
107 return "cudaErrorAddressOfConstant"; | |
108 | |
109 case cudaErrorTextureFetchFailed: | |
110 return "cudaErrorTextureFetchFailed"; | |
111 | |
112 case cudaErrorTextureNotBound: | |
113 return "cudaErrorTextureNotBound"; | |
114 | |
115 case cudaErrorSynchronizationError: | |
116 return "cudaErrorSynchronizationError"; | |
117 | |
118 case cudaErrorInvalidFilterSetting: | |
119 return "cudaErrorInvalidFilterSetting"; | |
120 | |
121 case cudaErrorInvalidNormSetting: | |
122 return "cudaErrorInvalidNormSetting"; | |
123 | |
124 case cudaErrorMixedDeviceExecution: | |
125 return "cudaErrorMixedDeviceExecution"; | |
126 | |
127 case cudaErrorCudartUnloading: | |
128 return "cudaErrorCudartUnloading"; | |
129 | |
130 case cudaErrorUnknown: | |
131 return "cudaErrorUnknown"; | |
132 | |
133 case cudaErrorNotYetImplemented: | |
134 return "cudaErrorNotYetImplemented"; | |
135 | |
136 case cudaErrorMemoryValueTooLarge: | |
137 return "cudaErrorMemoryValueTooLarge"; | |
138 | |
139 case cudaErrorInvalidResourceHandle: | |
140 return "cudaErrorInvalidResourceHandle"; | |
141 | |
142 case cudaErrorNotReady: | |
143 return "cudaErrorNotReady"; | |
144 | |
145 case cudaErrorInsufficientDriver: | |
146 return "cudaErrorInsufficientDriver"; | |
147 | |
148 case cudaErrorSetOnActiveProcess: | |
149 return "cudaErrorSetOnActiveProcess"; | |
150 | |
151 case cudaErrorInvalidSurface: | |
152 return "cudaErrorInvalidSurface"; | |
153 | |
154 case cudaErrorNoDevice: | |
155 return "cudaErrorNoDevice"; | |
156 | |
157 case cudaErrorECCUncorrectable: | |
158 return "cudaErrorECCUncorrectable"; | |
159 | |
160 case cudaErrorSharedObjectSymbolNotFound: | |
161 return "cudaErrorSharedObjectSymbolNotFound"; | |
162 | |
163 case cudaErrorSharedObjectInitFailed: | |
164 return "cudaErrorSharedObjectInitFailed"; | |
165 | |
166 case cudaErrorUnsupportedLimit: | |
167 return "cudaErrorUnsupportedLimit"; | |
168 | |
169 case cudaErrorDuplicateVariableName: | |
170 return "cudaErrorDuplicateVariableName"; | |
171 | |
172 case cudaErrorDuplicateTextureName: | |
173 return "cudaErrorDuplicateTextureName"; | |
174 | |
175 case cudaErrorDuplicateSurfaceName: | |
176 return "cudaErrorDuplicateSurfaceName"; | |
177 | |
178 case cudaErrorDevicesUnavailable: | |
179 return "cudaErrorDevicesUnavailable"; | |
180 | |
181 case cudaErrorInvalidKernelImage: | |
182 return "cudaErrorInvalidKernelImage"; | |
183 | |
184 case cudaErrorNoKernelImageForDevice: | |
185 return "cudaErrorNoKernelImageForDevice"; | |
186 | |
187 case cudaErrorIncompatibleDriverContext: | |
188 return "cudaErrorIncompatibleDriverContext"; | |
189 | |
190 case cudaErrorPeerAccessAlreadyEnabled: | |
191 return "cudaErrorPeerAccessAlreadyEnabled"; | |
192 | |
193 case cudaErrorPeerAccessNotEnabled: | |
194 return "cudaErrorPeerAccessNotEnabled"; | |
195 | |
196 case cudaErrorDeviceAlreadyInUse: | |
197 return "cudaErrorDeviceAlreadyInUse"; | |
198 | |
199 case cudaErrorProfilerDisabled: | |
200 return "cudaErrorProfilerDisabled"; | |
201 | |
202 case cudaErrorProfilerNotInitialized: | |
203 return "cudaErrorProfilerNotInitialized"; | |
204 | |
205 case cudaErrorProfilerAlreadyStarted: | |
206 return "cudaErrorProfilerAlreadyStarted"; | |
207 | |
208 case cudaErrorProfilerAlreadyStopped: | |
209 return "cudaErrorProfilerAlreadyStopped"; | |
210 | |
211 /* Since CUDA 4.0*/ | |
212 case cudaErrorAssert: | |
213 return "cudaErrorAssert"; | |
214 | |
215 case cudaErrorTooManyPeers: | |
216 return "cudaErrorTooManyPeers"; | |
217 | |
218 case cudaErrorHostMemoryAlreadyRegistered: | |
219 return "cudaErrorHostMemoryAlreadyRegistered"; | |
220 | |
221 case cudaErrorHostMemoryNotRegistered: | |
222 return "cudaErrorHostMemoryNotRegistered"; | |
223 | |
224 /* Since CUDA 5.0 */ | |
225 case cudaErrorOperatingSystem: | |
226 return "cudaErrorOperatingSystem"; | |
227 | |
228 case cudaErrorPeerAccessUnsupported: | |
229 return "cudaErrorPeerAccessUnsupported"; | |
230 | |
231 case cudaErrorLaunchMaxDepthExceeded: | |
232 return "cudaErrorLaunchMaxDepthExceeded"; | |
233 | |
234 case cudaErrorLaunchFileScopedTex: | |
235 return "cudaErrorLaunchFileScopedTex"; | |
236 | |
237 case cudaErrorLaunchFileScopedSurf: | |
238 return "cudaErrorLaunchFileScopedSurf"; | |
239 | |
240 case cudaErrorSyncDepthExceeded: | |
241 return "cudaErrorSyncDepthExceeded"; | |
242 | |
243 case cudaErrorLaunchPendingCountExceeded: | |
244 return "cudaErrorLaunchPendingCountExceeded"; | |
245 | |
246 case cudaErrorNotPermitted: | |
247 return "cudaErrorNotPermitted"; | |
248 | |
249 case cudaErrorNotSupported: | |
250 return "cudaErrorNotSupported"; | |
251 | |
252 /* Since CUDA 6.0 */ | |
253 case cudaErrorHardwareStackError: | |
254 return "cudaErrorHardwareStackError"; | |
255 | |
256 case cudaErrorIllegalInstruction: | |
257 return "cudaErrorIllegalInstruction"; | |
258 | |
259 case cudaErrorMisalignedAddress: | |
260 return "cudaErrorMisalignedAddress"; | |
261 | |
262 case cudaErrorInvalidAddressSpace: | |
263 return "cudaErrorInvalidAddressSpace"; | |
264 | |
265 case cudaErrorInvalidPc: | |
266 return "cudaErrorInvalidPc"; | |
267 | |
268 case cudaErrorIllegalAddress: | |
269 return "cudaErrorIllegalAddress"; | |
270 | |
271 /* Since CUDA 6.5*/ | |
272 case cudaErrorInvalidPtx: | |
273 return "cudaErrorInvalidPtx"; | |
274 | |
275 case cudaErrorInvalidGraphicsContext: | |
276 return "cudaErrorInvalidGraphicsContext"; | |
277 | |
278 case cudaErrorStartupFailure: | |
279 return "cudaErrorStartupFailure"; | |
280 | |
281 case cudaErrorApiFailureBase: | |
282 return "cudaErrorApiFailureBase"; | |
283 | |
284 /* Since CUDA 8.0*/ | |
285 case cudaErrorNvlinkUncorrectable : | |
286 return "cudaErrorNvlinkUncorrectable"; | |
287 } | |
288 | |
289 return "<unknown>"; | |
290 } | |
304
9755206813cb
helper_string.h for ANSI C
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
303
diff
changeset
|
291 #else |
291 | 292 // CUDA Driver API errors |
311 | 293 static const char *_cudaGetErrorEnum(CUresult error) |
291 | 294 { |
295 switch (error) | |
296 { | |
297 case CUDA_SUCCESS: | |
298 return "CUDA_SUCCESS"; | |
299 | |
300 case CUDA_ERROR_INVALID_VALUE: | |
301 return "CUDA_ERROR_INVALID_VALUE"; | |
302 | |
303 case CUDA_ERROR_OUT_OF_MEMORY: | |
304 return "CUDA_ERROR_OUT_OF_MEMORY"; | |
305 | |
306 case CUDA_ERROR_NOT_INITIALIZED: | |
307 return "CUDA_ERROR_NOT_INITIALIZED"; | |
308 | |
309 case CUDA_ERROR_DEINITIALIZED: | |
310 return "CUDA_ERROR_DEINITIALIZED"; | |
311 | |
312 case CUDA_ERROR_PROFILER_DISABLED: | |
313 return "CUDA_ERROR_PROFILER_DISABLED"; | |
314 | |
315 case CUDA_ERROR_PROFILER_NOT_INITIALIZED: | |
316 return "CUDA_ERROR_PROFILER_NOT_INITIALIZED"; | |
317 | |
318 case CUDA_ERROR_PROFILER_ALREADY_STARTED: | |
319 return "CUDA_ERROR_PROFILER_ALREADY_STARTED"; | |
320 | |
321 case CUDA_ERROR_PROFILER_ALREADY_STOPPED: | |
322 return "CUDA_ERROR_PROFILER_ALREADY_STOPPED"; | |
323 | |
324 case CUDA_ERROR_NO_DEVICE: | |
325 return "CUDA_ERROR_NO_DEVICE"; | |
326 | |
327 case CUDA_ERROR_INVALID_DEVICE: | |
328 return "CUDA_ERROR_INVALID_DEVICE"; | |
329 | |
330 case CUDA_ERROR_INVALID_IMAGE: | |
331 return "CUDA_ERROR_INVALID_IMAGE"; | |
332 | |
333 case CUDA_ERROR_INVALID_CONTEXT: | |
334 return "CUDA_ERROR_INVALID_CONTEXT"; | |
335 | |
336 case CUDA_ERROR_CONTEXT_ALREADY_CURRENT: | |
337 return "CUDA_ERROR_CONTEXT_ALREADY_CURRENT"; | |
338 | |
339 case CUDA_ERROR_MAP_FAILED: | |
340 return "CUDA_ERROR_MAP_FAILED"; | |
341 | |
342 case CUDA_ERROR_UNMAP_FAILED: | |
343 return "CUDA_ERROR_UNMAP_FAILED"; | |
344 | |
345 case CUDA_ERROR_ARRAY_IS_MAPPED: | |
346 return "CUDA_ERROR_ARRAY_IS_MAPPED"; | |
347 | |
348 case CUDA_ERROR_ALREADY_MAPPED: | |
349 return "CUDA_ERROR_ALREADY_MAPPED"; | |
350 | |
351 case CUDA_ERROR_NO_BINARY_FOR_GPU: | |
352 return "CUDA_ERROR_NO_BINARY_FOR_GPU"; | |
353 | |
354 case CUDA_ERROR_ALREADY_ACQUIRED: | |
355 return "CUDA_ERROR_ALREADY_ACQUIRED"; | |
356 | |
357 case CUDA_ERROR_NOT_MAPPED: | |
358 return "CUDA_ERROR_NOT_MAPPED"; | |
359 | |
360 case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: | |
361 return "CUDA_ERROR_NOT_MAPPED_AS_ARRAY"; | |
362 | |
363 case CUDA_ERROR_NOT_MAPPED_AS_POINTER: | |
364 return "CUDA_ERROR_NOT_MAPPED_AS_POINTER"; | |
365 | |
366 case CUDA_ERROR_ECC_UNCORRECTABLE: | |
367 return "CUDA_ERROR_ECC_UNCORRECTABLE"; | |
368 | |
369 case CUDA_ERROR_UNSUPPORTED_LIMIT: | |
370 return "CUDA_ERROR_UNSUPPORTED_LIMIT"; | |
371 | |
372 case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: | |
373 return "CUDA_ERROR_CONTEXT_ALREADY_IN_USE"; | |
374 | |
375 case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED: | |
376 return "CUDA_ERROR_PEER_ACCESS_UNSUPPORTED"; | |
377 | |
378 case CUDA_ERROR_INVALID_PTX: | |
379 return "CUDA_ERROR_INVALID_PTX"; | |
380 | |
381 case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT: | |
382 return "CUDA_ERROR_INVALID_GRAPHICS_CONTEXT"; | |
383 | |
384 case CUDA_ERROR_NVLINK_UNCORRECTABLE: | |
385 return "CUDA_ERROR_NVLINK_UNCORRECTABLE"; | |
386 | |
387 case CUDA_ERROR_INVALID_SOURCE: | |
388 return "CUDA_ERROR_INVALID_SOURCE"; | |
389 | |
390 case CUDA_ERROR_FILE_NOT_FOUND: | |
391 return "CUDA_ERROR_FILE_NOT_FOUND"; | |
392 | |
393 case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: | |
394 return "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND"; | |
395 | |
396 case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: | |
397 return "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED"; | |
398 | |
399 case CUDA_ERROR_OPERATING_SYSTEM: | |
400 return "CUDA_ERROR_OPERATING_SYSTEM"; | |
401 | |
402 case CUDA_ERROR_INVALID_HANDLE: | |
403 return "CUDA_ERROR_INVALID_HANDLE"; | |
404 | |
405 case CUDA_ERROR_NOT_FOUND: | |
406 return "CUDA_ERROR_NOT_FOUND"; | |
407 | |
408 case CUDA_ERROR_NOT_READY: | |
409 return "CUDA_ERROR_NOT_READY"; | |
410 | |
411 case CUDA_ERROR_ILLEGAL_ADDRESS: | |
412 return "CUDA_ERROR_ILLEGAL_ADDRESS"; | |
413 | |
414 case CUDA_ERROR_LAUNCH_FAILED: | |
415 return "CUDA_ERROR_LAUNCH_FAILED"; | |
416 | |
417 case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: | |
418 return "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES"; | |
419 | |
420 case CUDA_ERROR_LAUNCH_TIMEOUT: | |
421 return "CUDA_ERROR_LAUNCH_TIMEOUT"; | |
422 | |
423 case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: | |
424 return "CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING"; | |
425 | |
426 case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: | |
427 return "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED"; | |
428 | |
429 case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: | |
430 return "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED"; | |
431 | |
432 case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: | |
433 return "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE"; | |
434 | |
435 case CUDA_ERROR_CONTEXT_IS_DESTROYED: | |
436 return "CUDA_ERROR_CONTEXT_IS_DESTROYED"; | |
437 | |
438 case CUDA_ERROR_ASSERT: | |
439 return "CUDA_ERROR_ASSERT"; | |
440 | |
441 case CUDA_ERROR_TOO_MANY_PEERS: | |
442 return "CUDA_ERROR_TOO_MANY_PEERS"; | |
443 | |
444 case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: | |
445 return "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED"; | |
446 | |
447 case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: | |
448 return "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED"; | |
449 | |
450 case CUDA_ERROR_HARDWARE_STACK_ERROR: | |
451 return "CUDA_ERROR_HARDWARE_STACK_ERROR"; | |
452 | |
453 case CUDA_ERROR_ILLEGAL_INSTRUCTION: | |
454 return "CUDA_ERROR_ILLEGAL_INSTRUCTION"; | |
455 | |
456 case CUDA_ERROR_MISALIGNED_ADDRESS: | |
457 return "CUDA_ERROR_MISALIGNED_ADDRESS"; | |
458 | |
459 case CUDA_ERROR_INVALID_ADDRESS_SPACE: | |
460 return "CUDA_ERROR_INVALID_ADDRESS_SPACE"; | |
461 | |
462 case CUDA_ERROR_INVALID_PC: | |
463 return "CUDA_ERROR_INVALID_PC"; | |
464 | |
465 case CUDA_ERROR_NOT_PERMITTED: | |
466 return "CUDA_ERROR_NOT_PERMITTED"; | |
467 | |
468 case CUDA_ERROR_NOT_SUPPORTED: | |
469 return "CUDA_ERROR_NOT_SUPPORTED"; | |
470 | |
471 case CUDA_ERROR_UNKNOWN: | |
472 return "CUDA_ERROR_UNKNOWN"; | |
473 } | |
474 | |
475 return "<unknown>"; | |
476 } | |
477 #endif | |
478 | |
479 #ifdef CUBLAS_API_H_ | |
480 // cuBLAS API errors | |
481 static const char *_cudaGetErrorEnum(cublasStatus_t error) | |
482 { | |
483 switch (error) | |
484 { | |
485 case CUBLAS_STATUS_SUCCESS: | |
486 return "CUBLAS_STATUS_SUCCESS"; | |
487 | |
488 case CUBLAS_STATUS_NOT_INITIALIZED: | |
489 return "CUBLAS_STATUS_NOT_INITIALIZED"; | |
490 | |
491 case CUBLAS_STATUS_ALLOC_FAILED: | |
492 return "CUBLAS_STATUS_ALLOC_FAILED"; | |
493 | |
494 case CUBLAS_STATUS_INVALID_VALUE: | |
495 return "CUBLAS_STATUS_INVALID_VALUE"; | |
496 | |
497 case CUBLAS_STATUS_ARCH_MISMATCH: | |
498 return "CUBLAS_STATUS_ARCH_MISMATCH"; | |
499 | |
500 case CUBLAS_STATUS_MAPPING_ERROR: | |
501 return "CUBLAS_STATUS_MAPPING_ERROR"; | |
502 | |
503 case CUBLAS_STATUS_EXECUTION_FAILED: | |
504 return "CUBLAS_STATUS_EXECUTION_FAILED"; | |
505 | |
506 case CUBLAS_STATUS_INTERNAL_ERROR: | |
507 return "CUBLAS_STATUS_INTERNAL_ERROR"; | |
508 | |
509 case CUBLAS_STATUS_NOT_SUPPORTED: | |
510 return "CUBLAS_STATUS_NOT_SUPPORTED"; | |
511 | |
512 case CUBLAS_STATUS_LICENSE_ERROR: | |
513 return "CUBLAS_STATUS_LICENSE_ERROR"; | |
514 } | |
515 | |
516 return "<unknown>"; | |
517 } | |
518 #endif | |
519 | |
520 #ifdef _CUFFT_H_ | |
521 // cuFFT API errors | |
522 static const char *_cudaGetErrorEnum(cufftResult error) | |
523 { | |
524 switch (error) | |
525 { | |
526 case CUFFT_SUCCESS: | |
527 return "CUFFT_SUCCESS"; | |
528 | |
529 case CUFFT_INVALID_PLAN: | |
530 return "CUFFT_INVALID_PLAN"; | |
531 | |
532 case CUFFT_ALLOC_FAILED: | |
533 return "CUFFT_ALLOC_FAILED"; | |
534 | |
535 case CUFFT_INVALID_TYPE: | |
536 return "CUFFT_INVALID_TYPE"; | |
537 | |
538 case CUFFT_INVALID_VALUE: | |
539 return "CUFFT_INVALID_VALUE"; | |
540 | |
541 case CUFFT_INTERNAL_ERROR: | |
542 return "CUFFT_INTERNAL_ERROR"; | |
543 | |
544 case CUFFT_EXEC_FAILED: | |
545 return "CUFFT_EXEC_FAILED"; | |
546 | |
547 case CUFFT_SETUP_FAILED: | |
548 return "CUFFT_SETUP_FAILED"; | |
549 | |
550 case CUFFT_INVALID_SIZE: | |
551 return "CUFFT_INVALID_SIZE"; | |
552 | |
553 case CUFFT_UNALIGNED_DATA: | |
554 return "CUFFT_UNALIGNED_DATA"; | |
555 | |
556 case CUFFT_INCOMPLETE_PARAMETER_LIST: | |
557 return "CUFFT_INCOMPLETE_PARAMETER_LIST"; | |
558 | |
559 case CUFFT_INVALID_DEVICE: | |
560 return "CUFFT_INVALID_DEVICE"; | |
561 | |
562 case CUFFT_PARSE_ERROR: | |
563 return "CUFFT_PARSE_ERROR"; | |
564 | |
565 case CUFFT_NO_WORKSPACE: | |
566 return "CUFFT_NO_WORKSPACE"; | |
567 | |
568 case CUFFT_NOT_IMPLEMENTED: | |
569 return "CUFFT_NOT_IMPLEMENTED"; | |
570 | |
571 case CUFFT_LICENSE_ERROR: | |
572 return "CUFFT_LICENSE_ERROR"; | |
573 | |
574 case CUFFT_NOT_SUPPORTED: | |
575 return "CUFFT_NOT_SUPPORTED"; | |
576 } | |
577 | |
578 return "<unknown>"; | |
579 } | |
580 #endif | |
581 | |
582 | |
583 #ifdef CUSPARSEAPI | |
584 // cuSPARSE API errors | |
585 static const char *_cudaGetErrorEnum(cusparseStatus_t error) | |
586 { | |
587 switch (error) | |
588 { | |
589 case CUSPARSE_STATUS_SUCCESS: | |
590 return "CUSPARSE_STATUS_SUCCESS"; | |
591 | |
592 case CUSPARSE_STATUS_NOT_INITIALIZED: | |
593 return "CUSPARSE_STATUS_NOT_INITIALIZED"; | |
594 | |
595 case CUSPARSE_STATUS_ALLOC_FAILED: | |
596 return "CUSPARSE_STATUS_ALLOC_FAILED"; | |
597 | |
598 case CUSPARSE_STATUS_INVALID_VALUE: | |
599 return "CUSPARSE_STATUS_INVALID_VALUE"; | |
600 | |
601 case CUSPARSE_STATUS_ARCH_MISMATCH: | |
602 return "CUSPARSE_STATUS_ARCH_MISMATCH"; | |
603 | |
604 case CUSPARSE_STATUS_MAPPING_ERROR: | |
605 return "CUSPARSE_STATUS_MAPPING_ERROR"; | |
606 | |
607 case CUSPARSE_STATUS_EXECUTION_FAILED: | |
608 return "CUSPARSE_STATUS_EXECUTION_FAILED"; | |
609 | |
610 case CUSPARSE_STATUS_INTERNAL_ERROR: | |
611 return "CUSPARSE_STATUS_INTERNAL_ERROR"; | |
612 | |
613 case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED: | |
614 return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED"; | |
615 } | |
616 | |
617 return "<unknown>"; | |
618 } | |
619 #endif | |
620 | |
621 #ifdef CUSOLVER_COMMON_H_ | |
622 //cuSOLVER API errors | |
623 static const char *_cudaGetErrorEnum(cusolverStatus_t error) | |
624 { | |
625 switch(error) | |
626 { | |
627 case CUSOLVER_STATUS_SUCCESS: | |
628 return "CUSOLVER_STATUS_SUCCESS"; | |
629 case CUSOLVER_STATUS_NOT_INITIALIZED: | |
630 return "CUSOLVER_STATUS_NOT_INITIALIZED"; | |
631 case CUSOLVER_STATUS_ALLOC_FAILED: | |
632 return "CUSOLVER_STATUS_ALLOC_FAILED"; | |
633 case CUSOLVER_STATUS_INVALID_VALUE: | |
634 return "CUSOLVER_STATUS_INVALID_VALUE"; | |
635 case CUSOLVER_STATUS_ARCH_MISMATCH: | |
636 return "CUSOLVER_STATUS_ARCH_MISMATCH"; | |
637 case CUSOLVER_STATUS_MAPPING_ERROR: | |
638 return "CUSOLVER_STATUS_MAPPING_ERROR"; | |
639 case CUSOLVER_STATUS_EXECUTION_FAILED: | |
640 return "CUSOLVER_STATUS_EXECUTION_FAILED"; | |
641 case CUSOLVER_STATUS_INTERNAL_ERROR: | |
642 return "CUSOLVER_STATUS_INTERNAL_ERROR"; | |
643 case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED: | |
644 return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED"; | |
645 case CUSOLVER_STATUS_NOT_SUPPORTED : | |
646 return "CUSOLVER_STATUS_NOT_SUPPORTED "; | |
647 case CUSOLVER_STATUS_ZERO_PIVOT: | |
648 return "CUSOLVER_STATUS_ZERO_PIVOT"; | |
649 case CUSOLVER_STATUS_INVALID_LICENSE: | |
650 return "CUSOLVER_STATUS_INVALID_LICENSE"; | |
651 } | |
652 | |
653 return "<unknown>"; | |
654 | |
655 } | |
656 #endif | |
657 | |
658 #ifdef CURAND_H_ | |
659 // cuRAND API errors | |
660 static const char *_cudaGetErrorEnum(curandStatus_t error) | |
661 { | |
662 switch (error) | |
663 { | |
664 case CURAND_STATUS_SUCCESS: | |
665 return "CURAND_STATUS_SUCCESS"; | |
666 | |
667 case CURAND_STATUS_VERSION_MISMATCH: | |
668 return "CURAND_STATUS_VERSION_MISMATCH"; | |
669 | |
670 case CURAND_STATUS_NOT_INITIALIZED: | |
671 return "CURAND_STATUS_NOT_INITIALIZED"; | |
672 | |
673 case CURAND_STATUS_ALLOCATION_FAILED: | |
674 return "CURAND_STATUS_ALLOCATION_FAILED"; | |
675 | |
676 case CURAND_STATUS_TYPE_ERROR: | |
677 return "CURAND_STATUS_TYPE_ERROR"; | |
678 | |
679 case CURAND_STATUS_OUT_OF_RANGE: | |
680 return "CURAND_STATUS_OUT_OF_RANGE"; | |
681 | |
682 case CURAND_STATUS_LENGTH_NOT_MULTIPLE: | |
683 return "CURAND_STATUS_LENGTH_NOT_MULTIPLE"; | |
684 | |
685 case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED: | |
686 return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED"; | |
687 | |
688 case CURAND_STATUS_LAUNCH_FAILURE: | |
689 return "CURAND_STATUS_LAUNCH_FAILURE"; | |
690 | |
691 case CURAND_STATUS_PREEXISTING_FAILURE: | |
692 return "CURAND_STATUS_PREEXISTING_FAILURE"; | |
693 | |
694 case CURAND_STATUS_INITIALIZATION_FAILED: | |
695 return "CURAND_STATUS_INITIALIZATION_FAILED"; | |
696 | |
697 case CURAND_STATUS_ARCH_MISMATCH: | |
698 return "CURAND_STATUS_ARCH_MISMATCH"; | |
699 | |
700 case CURAND_STATUS_INTERNAL_ERROR: | |
701 return "CURAND_STATUS_INTERNAL_ERROR"; | |
702 } | |
703 | |
704 return "<unknown>"; | |
705 } | |
706 #endif | |
707 | |
708 #ifdef NV_NPPIDEFS_H | |
709 // NPP API errors | |
710 static const char *_cudaGetErrorEnum(NppStatus error) | |
711 { | |
712 switch (error) | |
713 { | |
714 case NPP_NOT_SUPPORTED_MODE_ERROR: | |
715 return "NPP_NOT_SUPPORTED_MODE_ERROR"; | |
716 | |
717 case NPP_ROUND_MODE_NOT_SUPPORTED_ERROR: | |
718 return "NPP_ROUND_MODE_NOT_SUPPORTED_ERROR"; | |
719 | |
720 case NPP_RESIZE_NO_OPERATION_ERROR: | |
721 return "NPP_RESIZE_NO_OPERATION_ERROR"; | |
722 | |
723 case NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY: | |
724 return "NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY"; | |
725 | |
726 #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000 | |
727 | |
728 case NPP_BAD_ARG_ERROR: | |
729 return "NPP_BAD_ARGUMENT_ERROR"; | |
730 | |
731 case NPP_COEFF_ERROR: | |
732 return "NPP_COEFFICIENT_ERROR"; | |
733 | |
734 case NPP_RECT_ERROR: | |
735 return "NPP_RECTANGLE_ERROR"; | |
736 | |
737 case NPP_QUAD_ERROR: | |
738 return "NPP_QUADRANGLE_ERROR"; | |
739 | |
740 case NPP_MEM_ALLOC_ERR: | |
741 return "NPP_MEMORY_ALLOCATION_ERROR"; | |
742 | |
743 case NPP_HISTO_NUMBER_OF_LEVELS_ERROR: | |
744 return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR"; | |
745 | |
746 case NPP_INVALID_INPUT: | |
747 return "NPP_INVALID_INPUT"; | |
748 | |
749 case NPP_POINTER_ERROR: | |
750 return "NPP_POINTER_ERROR"; | |
751 | |
752 case NPP_WARNING: | |
753 return "NPP_WARNING"; | |
754 | |
755 case NPP_ODD_ROI_WARNING: | |
756 return "NPP_ODD_ROI_WARNING"; | |
757 #else | |
758 | |
759 // These are for CUDA 5.5 or higher | |
760 case NPP_BAD_ARGUMENT_ERROR: | |
761 return "NPP_BAD_ARGUMENT_ERROR"; | |
762 | |
763 case NPP_COEFFICIENT_ERROR: | |
764 return "NPP_COEFFICIENT_ERROR"; | |
765 | |
766 case NPP_RECTANGLE_ERROR: | |
767 return "NPP_RECTANGLE_ERROR"; | |
768 | |
769 case NPP_QUADRANGLE_ERROR: | |
770 return "NPP_QUADRANGLE_ERROR"; | |
771 | |
772 case NPP_MEMORY_ALLOCATION_ERR: | |
773 return "NPP_MEMORY_ALLOCATION_ERROR"; | |
774 | |
775 case NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR: | |
776 return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR"; | |
777 | |
778 case NPP_INVALID_HOST_POINTER_ERROR: | |
779 return "NPP_INVALID_HOST_POINTER_ERROR"; | |
780 | |
781 case NPP_INVALID_DEVICE_POINTER_ERROR: | |
782 return "NPP_INVALID_DEVICE_POINTER_ERROR"; | |
783 #endif | |
784 | |
785 case NPP_LUT_NUMBER_OF_LEVELS_ERROR: | |
786 return "NPP_LUT_NUMBER_OF_LEVELS_ERROR"; | |
787 | |
788 case NPP_TEXTURE_BIND_ERROR: | |
789 return "NPP_TEXTURE_BIND_ERROR"; | |
790 | |
791 case NPP_WRONG_INTERSECTION_ROI_ERROR: | |
792 return "NPP_WRONG_INTERSECTION_ROI_ERROR"; | |
793 | |
794 case NPP_NOT_EVEN_STEP_ERROR: | |
795 return "NPP_NOT_EVEN_STEP_ERROR"; | |
796 | |
797 case NPP_INTERPOLATION_ERROR: | |
798 return "NPP_INTERPOLATION_ERROR"; | |
799 | |
800 case NPP_RESIZE_FACTOR_ERROR: | |
801 return "NPP_RESIZE_FACTOR_ERROR"; | |
802 | |
803 case NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR: | |
804 return "NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR"; | |
805 | |
806 | |
807 #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000 | |
808 | |
809 case NPP_MEMFREE_ERR: | |
810 return "NPP_MEMFREE_ERR"; | |
811 | |
812 case NPP_MEMSET_ERR: | |
813 return "NPP_MEMSET_ERR"; | |
814 | |
815 case NPP_MEMCPY_ERR: | |
816 return "NPP_MEMCPY_ERROR"; | |
817 | |
818 case NPP_MIRROR_FLIP_ERR: | |
819 return "NPP_MIRROR_FLIP_ERR"; | |
820 #else | |
821 | |
822 case NPP_MEMFREE_ERROR: | |
823 return "NPP_MEMFREE_ERROR"; | |
824 | |
825 case NPP_MEMSET_ERROR: | |
826 return "NPP_MEMSET_ERROR"; | |
827 | |
828 case NPP_MEMCPY_ERROR: | |
829 return "NPP_MEMCPY_ERROR"; | |
830 | |
831 case NPP_MIRROR_FLIP_ERROR: | |
832 return "NPP_MIRROR_FLIP_ERROR"; | |
833 #endif | |
834 | |
835 case NPP_ALIGNMENT_ERROR: | |
836 return "NPP_ALIGNMENT_ERROR"; | |
837 | |
838 case NPP_STEP_ERROR: | |
839 return "NPP_STEP_ERROR"; | |
840 | |
841 case NPP_SIZE_ERROR: | |
842 return "NPP_SIZE_ERROR"; | |
843 | |
844 case NPP_NULL_POINTER_ERROR: | |
845 return "NPP_NULL_POINTER_ERROR"; | |
846 | |
847 case NPP_CUDA_KERNEL_EXECUTION_ERROR: | |
848 return "NPP_CUDA_KERNEL_EXECUTION_ERROR"; | |
849 | |
850 case NPP_NOT_IMPLEMENTED_ERROR: | |
851 return "NPP_NOT_IMPLEMENTED_ERROR"; | |
852 | |
853 case NPP_ERROR: | |
854 return "NPP_ERROR"; | |
855 | |
856 case NPP_SUCCESS: | |
857 return "NPP_SUCCESS"; | |
858 | |
859 case NPP_WRONG_INTERSECTION_QUAD_WARNING: | |
860 return "NPP_WRONG_INTERSECTION_QUAD_WARNING"; | |
861 | |
862 case NPP_MISALIGNED_DST_ROI_WARNING: | |
863 return "NPP_MISALIGNED_DST_ROI_WARNING"; | |
864 | |
865 case NPP_AFFINE_QUAD_INCORRECT_WARNING: | |
866 return "NPP_AFFINE_QUAD_INCORRECT_WARNING"; | |
867 | |
868 case NPP_DOUBLE_SIZE_WARNING: | |
869 return "NPP_DOUBLE_SIZE_WARNING"; | |
870 | |
871 case NPP_WRONG_INTERSECTION_ROI_WARNING: | |
872 return "NPP_WRONG_INTERSECTION_ROI_WARNING"; | |
873 | |
874 #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x6000 | |
875 /* These are 6.0 or higher */ | |
876 case NPP_LUT_PALETTE_BITSIZE_ERROR: | |
877 return "NPP_LUT_PALETTE_BITSIZE_ERROR"; | |
878 | |
879 case NPP_ZC_MODE_NOT_SUPPORTED_ERROR: | |
880 return "NPP_ZC_MODE_NOT_SUPPORTED_ERROR"; | |
881 | |
882 case NPP_QUALITY_INDEX_ERROR: | |
883 return "NPP_QUALITY_INDEX_ERROR"; | |
884 | |
885 case NPP_CHANNEL_ORDER_ERROR: | |
886 return "NPP_CHANNEL_ORDER_ERROR"; | |
887 | |
888 case NPP_ZERO_MASK_VALUE_ERROR: | |
889 return "NPP_ZERO_MASK_VALUE_ERROR"; | |
890 | |
891 case NPP_NUMBER_OF_CHANNELS_ERROR: | |
892 return "NPP_NUMBER_OF_CHANNELS_ERROR"; | |
893 | |
894 case NPP_COI_ERROR: | |
895 return "NPP_COI_ERROR"; | |
896 | |
897 case NPP_DIVISOR_ERROR: | |
898 return "NPP_DIVISOR_ERROR"; | |
899 | |
900 case NPP_CHANNEL_ERROR: | |
901 return "NPP_CHANNEL_ERROR"; | |
902 | |
903 case NPP_STRIDE_ERROR: | |
904 return "NPP_STRIDE_ERROR"; | |
905 | |
906 case NPP_ANCHOR_ERROR: | |
907 return "NPP_ANCHOR_ERROR"; | |
908 | |
909 case NPP_MASK_SIZE_ERROR: | |
910 return "NPP_MASK_SIZE_ERROR"; | |
911 | |
912 case NPP_MOMENT_00_ZERO_ERROR: | |
913 return "NPP_MOMENT_00_ZERO_ERROR"; | |
914 | |
915 case NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR: | |
916 return "NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR"; | |
917 | |
918 case NPP_THRESHOLD_ERROR: | |
919 return "NPP_THRESHOLD_ERROR"; | |
920 | |
921 case NPP_CONTEXT_MATCH_ERROR: | |
922 return "NPP_CONTEXT_MATCH_ERROR"; | |
923 | |
924 case NPP_FFT_FLAG_ERROR: | |
925 return "NPP_FFT_FLAG_ERROR"; | |
926 | |
927 case NPP_FFT_ORDER_ERROR: | |
928 return "NPP_FFT_ORDER_ERROR"; | |
929 | |
930 case NPP_SCALE_RANGE_ERROR: | |
931 return "NPP_SCALE_RANGE_ERROR"; | |
932 | |
933 case NPP_DATA_TYPE_ERROR: | |
934 return "NPP_DATA_TYPE_ERROR"; | |
935 | |
936 case NPP_OUT_OFF_RANGE_ERROR: | |
937 return "NPP_OUT_OFF_RANGE_ERROR"; | |
938 | |
939 case NPP_DIVIDE_BY_ZERO_ERROR: | |
940 return "NPP_DIVIDE_BY_ZERO_ERROR"; | |
941 | |
942 case NPP_RANGE_ERROR: | |
943 return "NPP_RANGE_ERROR"; | |
944 | |
945 case NPP_NO_MEMORY_ERROR: | |
946 return "NPP_NO_MEMORY_ERROR"; | |
947 | |
948 case NPP_ERROR_RESERVED: | |
949 return "NPP_ERROR_RESERVED"; | |
950 | |
951 case NPP_NO_OPERATION_WARNING: | |
952 return "NPP_NO_OPERATION_WARNING"; | |
953 | |
954 case NPP_DIVIDE_BY_ZERO_WARNING: | |
955 return "NPP_DIVIDE_BY_ZERO_WARNING"; | |
956 #endif | |
957 | |
958 #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x7000 | |
959 /* These are 7.0 or higher */ | |
960 case NPP_OVERFLOW_ERROR: | |
961 return "NPP_OVERFLOW_ERROR"; | |
962 | |
963 case NPP_CORRUPTED_DATA_ERROR: | |
964 return "NPP_CORRUPTED_DATA_ERROR"; | |
965 #endif | |
966 } | |
967 | |
968 return "<unknown>"; | |
969 } | |
970 #endif | |
971 | |
972 #ifdef __DRIVER_TYPES_H__ | |
973 #ifndef DEVICE_RESET | |
974 #define DEVICE_RESET cudaDeviceReset(); | |
975 #endif | |
976 #else | |
977 #ifndef DEVICE_RESET | |
978 #define DEVICE_RESET | |
979 #endif | |
980 #endif | |
981 | |
305 | 982 #ifdef __DRIVER_TYPES_H__ |
304
9755206813cb
helper_string.h for ANSI C
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
303
diff
changeset
|
983 static inline void check(CUresult result, char const *const func, const char *const file, int const line) |
291 | 984 { |
985 if (result) | |
986 { | |
987 fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", | |
304
9755206813cb
helper_string.h for ANSI C
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
303
diff
changeset
|
988 file, line, (unsigned int)(result), _cudaGetErrorEnum(result), func); |
291 | 989 DEVICE_RESET |
990 // Make sure we call CUDA Device Reset before exiting | |
991 exit(EXIT_FAILURE); | |
992 } | |
993 } | |
304
9755206813cb
helper_string.h for ANSI C
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
303
diff
changeset
|
994 #else |
9755206813cb
helper_string.h for ANSI C
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
303
diff
changeset
|
995 static inline void check(cudaError_t result, char const *const func, const char *const file, int const line) |
9755206813cb
helper_string.h for ANSI C
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
303
diff
changeset
|
996 { |
9755206813cb
helper_string.h for ANSI C
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
303
diff
changeset
|
997 if (result) |
9755206813cb
helper_string.h for ANSI C
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
303
diff
changeset
|
998 { |
9755206813cb
helper_string.h for ANSI C
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
303
diff
changeset
|
999 fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", |
9755206813cb
helper_string.h for ANSI C
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
303
diff
changeset
|
1000 file, line, (unsigned int)(result), _cudaGetErrorEnum(result), func); |
9755206813cb
helper_string.h for ANSI C
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
303
diff
changeset
|
1001 DEVICE_RESET |
9755206813cb
helper_string.h for ANSI C
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
303
diff
changeset
|
1002 // Make sure we call CUDA Device Reset before exiting |
9755206813cb
helper_string.h for ANSI C
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
303
diff
changeset
|
1003 exit(EXIT_FAILURE); |
9755206813cb
helper_string.h for ANSI C
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
303
diff
changeset
|
1004 } |
9755206813cb
helper_string.h for ANSI C
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
303
diff
changeset
|
1005 } |
9755206813cb
helper_string.h for ANSI C
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
303
diff
changeset
|
1006 #endif |
291 | 1007 |
1008 #ifdef __DRIVER_TYPES_H__ | |
1009 // This will output the proper CUDA error strings in the event that a CUDA host call returns an error | |
1010 #define checkCudaErrors(val) check ( (val), #val, __FILE__, __LINE__ ) | |
1011 | |
1012 // This will output the proper error string when calling cudaGetLastError | |
1013 #define getLastCudaError(msg) __getLastCudaError (msg, __FILE__, __LINE__) | |
1014 | |
1015 inline void __getLastCudaError(const char *errorMessage, const char *file, const int line) | |
1016 { | |
1017 cudaError_t err = cudaGetLastError(); | |
1018 | |
1019 if (cudaSuccess != err) | |
1020 { | |
1021 fprintf(stderr, "%s(%i) : getLastCudaError() CUDA error : %s : (%d) %s.\n", | |
1022 file, line, errorMessage, (int)err, cudaGetErrorString(err)); | |
1023 DEVICE_RESET | |
1024 exit(EXIT_FAILURE); | |
1025 } | |
1026 } | |
1027 #endif | |
1028 | |
1029 #ifndef MAX | |
1030 #define MAX(a,b) (a > b ? a : b) | |
1031 #endif | |
1032 | |
1033 // Float To Int conversion | |
1034 inline int ftoi(float value) | |
1035 { | |
1036 return (value >= 0 ? (int)(value + 0.5) : (int)(value - 0.5)); | |
1037 } | |
1038 | |
1039 // Beginning of GPU Architecture definitions | |
1040 inline int _ConvertSMVer2Cores(int major, int minor) | |
1041 { | |
1042 // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM | |
304
9755206813cb
helper_string.h for ANSI C
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
303
diff
changeset
|
1043 typedef struct sSMtoCores |
291 | 1044 { |
1045 int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version | |
1046 int Cores; | |
1047 } sSMtoCores; | |
1048 | |
1049 sSMtoCores nGpuArchCoresPerSM[] = | |
1050 { | |
1051 { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class | |
1052 { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class | |
1053 { 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class | |
1054 { 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class | |
1055 { 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class | |
1056 { 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class | |
1057 { 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class | |
1058 { 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class | |
1059 { 0x53, 128}, // Maxwell Generation (SM 5.3) GM20x class | |
1060 { 0x60, 64 }, // Pascal Generation (SM 6.0) GP100 class | |
1061 { 0x61, 128}, // Pascal Generation (SM 6.1) GP10x class | |
1062 { 0x62, 128}, // Pascal Generation (SM 6.2) GP10x class | |
1063 { -1, -1 } | |
1064 }; | |
1065 | |
1066 int index = 0; | |
1067 | |
1068 while (nGpuArchCoresPerSM[index].SM != -1) | |
1069 { | |
1070 if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) | |
1071 { | |
1072 return nGpuArchCoresPerSM[index].Cores; | |
1073 } | |
1074 | |
1075 index++; | |
1076 } | |
1077 | |
1078 // If we don't find the values, we default use the previous one to run properly | |
1079 printf("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores); | |
1080 return nGpuArchCoresPerSM[index-1].Cores; | |
1081 } | |
1082 // end of GPU Architecture definitions | |
1083 | |
1084 | |
1085 // end of CUDA Helper Functions | |
1086 | |
1087 | |
1088 #endif |