Mercurial > hg > Game > Cerium
changeset 1977:126b28fdae50 draft
fix cuda sample
author | Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 04 Mar 2014 18:11:06 +0900 |
parents | a8f4227d6a21 |
children | 8fbe022126e1 |
files | example/Cuda/main.cc example/Cuda/multiply.cu |
diffstat | 2 files changed, 18 insertions(+), 14 deletions(-) [+] |
line wrap: on
line diff
--- a/example/Cuda/main.cc Mon Mar 03 19:12:02 2014 +0900 +++ b/example/Cuda/main.cc Tue Mar 04 18:11:06 2014 +0900 @@ -4,7 +4,7 @@ #include <cuda.h> #define LENGTH 10000 -#define THREAD 1000 +#define THREAD 100 static double getTime() { @@ -81,16 +81,16 @@ // Synchronous data transfer(host to device) cuMemcpyHtoD(devA, A, LENGTH*THREAD*sizeof(float)); - + // Asynchronous data transfer(host to device) int cur = 0; - for (int i=0;i<num_exec;i++,cur++) { - if (num_stream <= cur) - cur = 0; - B[i] = (float)(i+1); - cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur]); - } + // for (int i=0;i<num_exec;i++,cur++) { + // if (num_stream <= cur) + // cur = 0; + // B[i] = (float)(i+1); + // cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur]); + // } cur = 0; @@ -98,22 +98,25 @@ for (int i=0;i<num_exec;i++,cur++) { if (num_stream <= cur) cur=0; + B[i] = (float)(i+1); + cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur]); void* args[] = {&devA, &devB[i], &devOut[i]}; cuLaunchKernel(function, LENGTH, 1, 1, THREAD, 1, 1, - 0, 0, args, NULL); + 0, stream[cur], args, NULL); + cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur]); } cur = 0; // Asynchronous data transfer(device to host) - for (int i=0;i<num_exec;i++,cur++) { - if (num_stream <= cur) - cur = 0; - cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur]); - } + // for (int i=0;i<num_exec;i++,cur++) { + // if (num_stream <= cur) + // cur = 0; + // cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur]); + // } // wait for stream for (int i=0;i<num_stream;i++)