Mercurial > hg > Members > Moririn
diff src/test/twice.cu @ 292:2bc63a22dd21
add twice
author | ikkun |
---|---|
date | Thu, 09 Feb 2017 19:51:32 +0900 |
parents | src/test/main.cc@87128b876c63 |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/test/twice.cu Thu Feb 09 19:51:32 2017 +0900 @@ -0,0 +1,154 @@ +#include <stdio.h> +#include <sys/time.h> +#include <string.h> +#include <stdlib.h> + +#include <cuda.h> + +#include <cuda_runtime.h> +#include "helper_cuda.h" + +#define LENGTH (10) +#define THREAD (10) + +double +getTime() { + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_sec + (double)tv.tv_usec*1e-6; +} + +void +check_data(float* A, float B, float* C) { + for (int i=0; i<LENGTH*THREAD; i++) { + if (A[i]*B!=C[i]) { + puts("multiply failure."); + return; + } + } + puts("success."); +} + +void print_result(float* C) { + for (int i=0; i<LENGTH*THREAD; i++) { + printf("%f\n",C[i]); + } +} + +int main(int args, char* argv[]) { + int num_stream = 1; // number of stream + int num_exec = 16; // number of executed kernel + + for (int i=1;argv[i];i++) { + if (strcmp(argv[i], "--stream") == 0 || strcmp(argv[i], "-s") == 0) { + num_stream = atoi(argv[++i]); + } + } + + // initialize and load kernel + CUdevice device; + CUcontext context; + CUmodule module; + CUfunction function; + CUstream stream[num_stream]; + + checkCudaErrors(cuInit(0)); + checkCudaErrors(cuDeviceGet(&device, 0)); + checkCudaErrors(cuCtxCreate(&context, CU_CTX_SCHED_SPIN, device)); + checkCudaErrors(cuModuleLoad(&module, "multiply.ptx")); + checkCudaErrors(cuModuleGetFunction(&function, module, "multiply")); + for (int i=0;i<num_stream;i++) + checkCudaErrors(cuStreamCreate(&stream[i],0)); + + // memory allocate + CUdeviceptr devA; + CUdeviceptr devB[num_exec]; + CUdeviceptr devOut[num_exec]; + + checkCudaErrors(cuMemAlloc(&devA, LENGTH*THREAD*sizeof(float))); + for (int i=0;i<num_exec;i++) { + checkCudaErrors(cuMemAlloc(&devB[i], sizeof(float))); + checkCudaErrors(cuMemAlloc(&devOut[i], LENGTH*THREAD*sizeof(float))); + } + + // input buffer + float* A = new float[LENGTH*THREAD]; + float* B = new float[num_exec]; + + for (int i=0; i<LENGTH*THREAD; i++) + A[i] = (float)(i+1000); + + // output buffer + float** result = new float* [num_exec]; + + for (int i=0;i<num_exec;i++) + result[i] = new float[LENGTH*THREAD]; + + // Synchronous data transfer(host to device) + checkCudaErrors(cuMemcpyHtoD(devA, A, LENGTH*THREAD*sizeof(float))); + + // Asynchronous data transfer(host to device) + int cur = 0; + + for (int i=0;i<num_exec;i++,cur++) { + if (num_stream <= cur) + cur = 0; + B[i] = (float)(i+1); + checkCudaErrors(cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur])); + } + + cur = 0; + + // Asynchronous launch kernel + for (int i=0;i<num_exec;i++,cur++) { + if (num_stream <= cur) + cur=0; + B[i] = (float)(i+1); + //cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur]); + void* args[] = {&devA, &devB[i], &devOut[i]}; + checkCudaErrors(cuLaunchKernel(function, + LENGTH, 1, 1, + THREAD, 1, 1, + 0, stream[cur], args, NULL)); + //cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur]); + } + + cur = 0; + + + // Asynchronous data transfer(device to host) + for (int i=0;i<num_exec;i++,cur++) { + if (num_stream <= cur) + cur = 0; + checkCudaErrors(cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur])); + } + + // wait for stream + for (int i=0;i<num_stream;i++) + checkCudaErrors(cuStreamSynchronize(stream[i])); + + //printf("%0.6f\n",getTime()-start); + + for (int i=0;i<num_exec;i++) + check_data(A,(float)(i+1),result[i]); + + // memory release + checkCudaErrors(cuMemFree(devA)); + for (int i=0;i<num_exec;i++) { + checkCudaErrors(cuMemFree(devB[i])); + checkCudaErrors(cuMemFree(devOut[i])); + } + for (int i=0;i<num_stream;i++) + checkCudaErrors(cuStreamDestroy(stream[i])); + checkCudaErrors(cuModuleUnload(module)); + checkCudaErrors(cuCtxDestroy(context)); + + delete[] A; + delete[] B; + for (int i=0;i<num_exec;i++) + delete[] result[i]; + delete[] result; + + return 0; +} +//