Mercurial > hg > Members > Moririn
changeset 300:8bbc0012e1a4
checkErrors on an example
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 12 Feb 2017 09:12:21 +0900 |
parents | b387b224790c |
children | 609bf62768b9 |
files | src/test/twice.cc src/test/vectorAddDrv.cc |
diffstat | 2 files changed, 39 insertions(+), 121 deletions(-) [+] |
line wrap: on
line diff
--- a/src/test/twice.cc Sun Feb 12 08:36:22 2017 +0900 +++ b/src/test/twice.cc Sun Feb 12 09:12:21 2017 +0900 @@ -43,6 +43,9 @@ if (strcmp(argv[i], "--stream") == 0 || strcmp(argv[i], "-s") == 0) { num_stream = atoi(argv[++i]); } + if (strcmp(argv[i], "--numExec") == 0 || strcmp(argv[i], "-e") == 0) { + num_exec = atoi(argv[++i]); + } } // initialize and load kernel @@ -57,8 +60,10 @@ checkCudaErrors(cuCtxCreate(&context, CU_CTX_SCHED_SPIN, device)); checkCudaErrors(cuModuleLoad(&module, "multiply.ptx")); checkCudaErrors(cuModuleGetFunction(&function, module, "multiply")); - for (int i=0;i<num_stream;i++) - checkCudaErrors(cuStreamCreate(&stream[i],0)); + if (num_stream) { + for (int i=0;i<num_stream;i++) + checkCudaErrors(cuStreamCreate(&stream[i],0)); + } // memory allocate CUdeviceptr devA; @@ -94,7 +99,11 @@ if (num_stream <= cur) cur = 0; B[i] = (float)(i+1); - checkCudaErrors(cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur])); + if (num_stream) { + checkCudaErrors(cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur])); + } else { + checkCudaErrors(cuMemcpyHtoD(devB[i], &B[i], sizeof(float))); + } } cur = 0; @@ -120,7 +129,11 @@ for (int i=0;i<num_exec;i++,cur++) { if (num_stream <= cur) cur = 0; - checkCudaErrors(cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur])); + if (num_stream) { + checkCudaErrors(cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur])); + } else { + checkCudaErrors(cuMemcpyDtoH(result[i], devOut[i], LENGTH*THREAD*sizeof(float))); + } } // wait for stream
--- a/src/test/vectorAddDrv.cc Sun Feb 12 08:36:22 2017 +0900 +++ b/src/test/vectorAddDrv.cc Sun Feb 12 09:12:21 2017 +0900 @@ -72,7 +72,6 @@ int N = 50000, devID = 0; size_t size = N * sizeof(float); - CUresult error; ParseArguments(argc, argv); // Initialize @@ -116,13 +115,7 @@ // Get number of devices supporting CUDA int deviceCount = 0; - error = cuDeviceGetCount(&deviceCount); - - if (error != CUDA_SUCCESS) - { - Cleanup(false); - } - + checkCudaErrors(cuDeviceGetCount(&deviceCount)); if (deviceCount == 0) { printf("There is no device supporting CUDA.\n"); @@ -150,21 +143,9 @@ } // pick up device with zero ordinal (default, or devID) - error = cuDeviceGet(&cuDevice, devID); - - if (error != CUDA_SUCCESS) - { - Cleanup(false); - } - + checkCudaErrors(cuDeviceGet(&cuDevice, devID)); // Create context - error = cuCtxCreate(&cuContext, 0, cuDevice); - - if (error != CUDA_SUCCESS) - { - Cleanup(false); - } - + checkCudaErrors(cuCtxCreate(&cuContext, 0, cuDevice)); // first search for the module path before we load the results string module_path, ptx_source; @@ -204,90 +185,40 @@ int jitRegCount = 32; jitOptVals[2] = (void *)(size_t)jitRegCount; - error = cuModuleLoadDataEx(&cuModule, ptx_source.c_str(), jitNumOptions, jitOptions, (void **)jitOptVals); + checkCudaErrors(cuModuleLoadDataEx(&cuModule, ptx_source.c_str(), jitNumOptions, jitOptions, (void **)jitOptVals)); printf("> PTX JIT log:\n%s\n", jitLogBuffer); } else { - error = cuModuleLoad(&cuModule, module_path.c_str()); - } - - if (error != CUDA_SUCCESS) - { - Cleanup(false); + checkCudaErrors(cuModuleLoad(&cuModule, module_path.c_str())); } // Get function handle from module - error = cuModuleGetFunction(&vecAdd_kernel, cuModule, "VecAdd_kernel"); - - if (error != CUDA_SUCCESS) - { - Cleanup(false); - } + checkCudaErrors(cuModuleGetFunction(&vecAdd_kernel, cuModule, "VecAdd_kernel")); // Allocate input vectors h_A and h_B in host memory h_A = (float *)malloc(size); - - if (h_A == 0) - { - Cleanup(false); - } + if (h_A == 0) { Cleanup(false); } h_B = (float *)malloc(size); - - if (h_B == 0) - { - Cleanup(false); - } + if (h_B == 0) { Cleanup(false); } h_C = (float *)malloc(size); - - if (h_C == 0) - { - Cleanup(false); - } + if (h_C == 0) { Cleanup(false); } // Initialize input vectors RandomInit(h_A, N); RandomInit(h_B, N); // Allocate vectors in device memory - error = cuMemAlloc(&d_A, size); - - if (error != CUDA_SUCCESS) - { - Cleanup(false); - } - - error = cuMemAlloc(&d_B, size); - - if (error != CUDA_SUCCESS) - { - Cleanup(false); - } - - error = cuMemAlloc(&d_C, size); - - if (error != CUDA_SUCCESS) - { - Cleanup(false); - } + checkCudaErrors(cuMemAlloc(&d_A, size)); + checkCudaErrors(cuMemAlloc(&d_B, size)); + checkCudaErrors(cuMemAlloc(&d_C, size)); // Copy vectors from host memory to device memory - error = cuMemcpyHtoD(d_A, h_A, size); - - if (error != CUDA_SUCCESS) - { - Cleanup(false); - } - - error = cuMemcpyHtoD(d_B, h_B, size); - - if (error != CUDA_SUCCESS) - { - Cleanup(false); - } + checkCudaErrors(cuMemcpyHtoD(d_A, h_A, size)); + checkCudaErrors(cuMemcpyHtoD(d_B, h_B, size)); #if 1 @@ -302,15 +233,10 @@ void *args[] = { &d_A, &d_B, &d_C, &N }; // Launch the CUDA kernel - error = cuLaunchKernel(vecAdd_kernel, blocksPerGrid, 1, 1, + checkCudaErrors(cuLaunchKernel(vecAdd_kernel, blocksPerGrid, 1, 1, threadsPerBlock, 1, 1, 0, - NULL, args, NULL); - - if (error != CUDA_SUCCESS) - { - Cleanup(false); - } + NULL, args, NULL)); } else { @@ -331,15 +257,10 @@ int blocksPerGrid = (N + threadsPerBlock - 1) / threadsPerBlock; // Launch the CUDA kernel - error = cuLaunchKernel(vecAdd_kernel, blocksPerGrid, 1, 1, + checkCudaErrors(cuLaunchKernel(vecAdd_kernel, blocksPerGrid, 1, 1, threadsPerBlock, 1, 1, 0, - NULL, NULL, argBuffer); - - if (error != CUDA_SUCCESS) - { - Cleanup(false); - } + NULL, NULL, argBuffer)); } #else @@ -369,36 +290,20 @@ int blocksPerGrid = (N + threadsPerBlock - 1) / threadsPerBlock; // Launch the CUDA kernel - error = cuLaunchKernel(vecAdd_kernel, blocksPerGrid, 1, 1, + checkCudaErrors(cuLaunchKernel(vecAdd_kernel, blocksPerGrid, 1, 1, threadsPerBlock, 1, 1, 0, 0, - NULL, (void **)&kernel_launch_config); - - if (error != CUDA_SUCCESS) - { - Cleanup(false); - } + NULL, (void **)&kernel_launch_config)); } #endif #ifdef _DEBUG - error = cuCtxSynchronize(); - - if (error != CUDA_SUCCESS) - { - Cleanup(false); - } - + checkCudaErrors(cuCtxSynchronize()); #endif // Copy result from device memory to host memory // h_C contains the result in host memory - error = cuMemcpyDtoH(h_C, d_C, size); - - if (error != CUDA_SUCCESS) - { - Cleanup(false); - } + checkCudaErrors(cuMemcpyDtoH(h_C, d_C, size)); // Verify result int i;