Mercurial > hg > Game > Cerium
changeset 1975:4cf85b48ab9e draft
running fft with CudaScheduler, but wrong result
author | Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 26 Feb 2014 19:39:15 +0900 |
parents | 9ebee99a9aef |
children | a8f4227d6a21 |
files | TaskManager/Gpu/GpuScheduler.cc TaskManager/Gpu/GpuScheduler.h example/fft/Makefile.cuda example/fft/cuda/butterfly.cu example/fft/cuda/highPassFilter.cu example/fft/cuda/norm.cu example/fft/cuda/spinFact.cu example/fft/cuda/task_init.cc example/fft/cuda/transpose.cu example/fft/main.cc |
diffstat | 10 files changed, 153 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- a/TaskManager/Gpu/GpuScheduler.cc Wed Feb 26 16:24:28 2014 +0900 +++ b/TaskManager/Gpu/GpuScheduler.cc Wed Feb 26 19:39:15 2014 +0900 @@ -294,7 +294,7 @@ int i0 = flag[cur].flip ? i+1 : i ; // flip use memin buffer and memout event ret = clEnqueueReadBuffer(command_queue[cur], mem[cur].buf[i0], CL_FALSE, 0, - output_buf->size, output_buf->addr, 0, NULL, &memout[cur].event[i]); + output_buf->size, output_buf->addr, 0, NULL,&memout[cur].event[i]); if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; } } cur++; // wait write[cur+1]
--- a/TaskManager/Gpu/GpuScheduler.h Wed Feb 26 16:24:28 2014 +0900 +++ b/TaskManager/Gpu/GpuScheduler.h Wed Feb 26 19:39:15 2014 +0900 @@ -15,7 +15,7 @@ extern TaskObject gpu_task_list[MAX_TASK_OBJECT]; -#define STAGE 1 +#define STAGE 8 class GpuScheduler : public MainScheduler { public:
--- a/example/fft/Makefile.cuda Wed Feb 26 16:24:28 2014 +0900 +++ b/example/fft/Makefile.cuda Wed Feb 26 19:39:15 2014 +0900 @@ -18,7 +18,7 @@ CUDA_SRCS = $(filter-out $(CUDA_TASK_DIR)/$(CUDA_SRCS_EXCLUDE),$(CUDA_SRCS_TMP)) CUDA_OBJS = $(CUDA_SRCS:.cu=.ptx) -CFLAGS += -D__CERIUM_CUDA__ +CFLAGS += -D__CERIUM_CUDA__ -DGPU LIBS += `sdl-config --libs` -lCudaManager -F/Library/Frameworks -framework CUDA INCLUDE += -I/Developer/NVIDIA/CUDA-5.5/include
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/fft/cuda/butterfly.cu Wed Feb 26 19:39:15 2014 +0900 @@ -0,0 +1,56 @@ +extern "C" { + __global__ void + butterfly(long* param, float* x_in, float* w, float* x_out) + { + unsigned long gid = blockIdx.x*blockDim.x+threadIdx.x; // (unsigned long)s->get_param(0); + unsigned long nid = blockIdx.y*blockDim.y+threadIdx.y; // (unsigned long)s->get_param(1); + + long n = param[0]; + long direction_flag = param[1]; + long iter = param[2]; + + int butterflySize = 1 << (iter-1); + int butterflyGrpDist = 1 << iter; + int butterflyGrpNum = n >> iter; + int butterflyGrpBase = (gid >> (iter-1))*(butterflyGrpDist); + int butterflyGrpOffset = gid & (butterflySize-1); + + int a = nid * n + butterflyGrpBase + butterflyGrpOffset; + int b = a + butterflySize; + + int l = butterflyGrpNum * butterflyGrpOffset; + + float xa[2], xb[2], xbxx[2], xbyy[2], wab[2], wayx[2], wbyx[2], resa[2], resb[2]; + + xa[0] = x_in[2*a]; + xa[1] = x_in[2*a+1]; + xb[0] = x_in[2*b]; + xb[1] = x_in[2*b+1]; + xbxx[0] = xbxx[1] = xb[0]; + xbyy[0] = xbyy[1] = xb[1]; + + wab[0] = w[2*l]; + if(direction_flag == 0x80000000) { + wab[1] = -w[2*l+1]; + } else { + wab[1] = w[2*l+1]; + } + + wayx[0] = -wab[1]; + wayx[1] = wab[0]; + + wbyx[0] = wab[1]; + wbyx[1] = -wab[0]; + + resa[0] = xa[0] + xbxx[0]*wab[0] + xbyy[0]*wayx[0]; + resa[1] = xa[1] + xbxx[1]*wab[1] + xbyy[1]*wayx[1]; + + resb[0] = xa[0] - xbxx[0]*wab[0] + xbyy[0]*wbyx[0]; + resb[1] = xa[1] - xbxx[1]*wab[1] + xbyy[1]*wbyx[1]; + + x_out[2*a] = resa[0]; + x_out[2*a+1] = resa[1]; + x_out[2*b] = resb[0]; + x_out[2*b+1] = resb[1]; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/fft/cuda/highPassFilter.cu Wed Feb 26 19:39:15 2014 +0900 @@ -0,0 +1,42 @@ +extern "C" { + __global__ void + highPassFilter(long* param, float* in, float* image) + { + unsigned long xgid = blockIdx.x*blockDim.x+threadIdx.x; // (unsigned long)s->get_param(0); + unsigned long ygid = blockIdx.y*blockDim.y+threadIdx.y; // (unsigned long)s->get_param(1); + + long n = param[0]; + long radius = param[1]; + + int n_2[2]; + n_2[0] = n_2[1] = n>>1; + + int mask[2]; + mask[0] = mask[1] = n-1; + + int gid[2]; + gid[0] = (xgid + n_2[0]) & mask[0]; + gid[1] = (ygid + n_2[1]) & mask[1]; + + int diff[2]; + diff[0] = n_2[0] - gid[0]; + diff[1] = n_2[1] - gid[1]; + + int diff2[2]; + diff2[0] = diff[0] * diff[0]; + diff2[1] = diff[1] * diff[1]; + + int dist2 = diff2[0] + diff2[1]; + + int window[2]; + + if (dist2 < radius*radius) { + window[0] = window[1] = (int)0L; + } else { + window[0] = window[1] = (int)-1L; + } + + image[(ygid*n+xgid)*2] = (float)((int)in[(ygid*n+xgid)*2] & window[0]); + image[(ygid*n+xgid)*2+1] = (float)((int)in[(ygid*n+xgid)*2+1] & window[1]); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/fft/cuda/norm.cu Wed Feb 26 19:39:15 2014 +0900 @@ -0,0 +1,13 @@ +extern "C" { + __global__ void + norm(long* param, float* in_x,float* out_x) + { + unsigned long gid = blockIdx.x*blockDim.x+threadIdx.x; // (unsigned long)s->get_param(0); + unsigned long nid = blockIdx.y*blockDim.y+threadIdx.y; //(unsigned long)s->get_param(1); + + long n = param[0]; + + out_x[(nid*n+gid)*2] = in_x[(nid*n+gid)*2] / (float)n; + out_x[(nid*n+gid)*2+1] = in_x[(nid*n+gid)*2+1] / (float)n; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/fft/cuda/spinFact.cu Wed Feb 26 19:39:15 2014 +0900 @@ -0,0 +1,21 @@ +extern "C" { +#include <math.h> + +#define PI 3.14159265358979323846 +#define PI_2 1.57079632679489661923 + + __global__ void + spinFact(long* param, float* w) + { + unsigned long i = blockIdx.x*blockDim.x+threadIdx.x; // (unsigned long)s->get_param(0); + + long n = param[0]; + + float angle[2]; + angle[0] = (float)(2*i*PI/(float)n); + angle[1] = (float)((2*i*PI/(float)n) + PI_2); + + w[2*i] = cos(angle[0]); + w[2*i+1] = cos(angle[1]); + } +}
--- a/example/fft/cuda/task_init.cc Wed Feb 26 16:24:28 2014 +0900 +++ b/example/fft/cuda/task_init.cc Wed Feb 26 19:39:15 2014 +0900 @@ -10,5 +10,5 @@ CudaSchedRegister(BIT_REVERSE, "cuda/bitReverse.ptx", "bitReverse"); CudaSchedRegister(BUTTERFLY, "cuda/butterfly.ptx", "butterfly"); CudaSchedRegister(TRANSPOSE, "cuda/transpose.ptx", "transpose"); - CudaSchedRegister(HIGH_PASS_FILTER, "gpu/highPassFilter.ptx", "highPassFilter"); + CudaSchedRegister(HIGH_PASS_FILTER, "cuda/highPassFilter.ptx", "highPassFilter"); }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/fft/cuda/transpose.cu Wed Feb 26 19:39:15 2014 +0900 @@ -0,0 +1,16 @@ +extern "C" { + __global__ void + transpose(long* param, float* src, float* dst) + { + unsigned long xgid = blockIdx.x*blockDim.x+threadIdx.x; // (unsigned long)s->get_param(0); + unsigned long ygid = blockIdx.y*blockDim.y*threadIdx.y; // (unsigned long)s->get_param(1); + + long n = param[0]; + + unsigned int iid = ygid * n + xgid; + unsigned int oid = xgid * n + ygid; + + dst[2*oid] = src[2*iid]; + dst[2*oid+1] = src[2*iid+1]; + } +}
--- a/example/fft/main.cc Wed Feb 26 16:24:28 2014 +0900 +++ b/example/fft/main.cc Wed Feb 26 19:39:15 2014 +0900 @@ -125,7 +125,7 @@ brev->set_param(1,n); brev->set_inData(0, src, length_src*sizeof(cl_float2)); brev->set_outData(0, dst, length_dst*sizeof(cl_float2)); - brev->set_cpu(SPE_ANY); + brev->set_cpu(spe_cpu); brev->wait_for(waitTask); brev->iterate(gws[0],gws[1]); @@ -140,7 +140,6 @@ bfly->set_inData(0, dst, length_dst*sizeof(cl_float2)); bfly->set_inData(1, spin, sizeof(cl_float2)*(n/2)); bfly->set_outData(0,dst,length_dst*sizeof(cl_float2)); - //bfly->flip(); bfly->set_cpu(spe_cpu); bfly->wait_for(waitTask); bfly->iterate(gws[0],gws[1]); @@ -153,7 +152,6 @@ norm->set_inData(0,dst,length_dst*sizeof(cl_float2)); norm->set_outData(0, dst, length_dst*sizeof(cl_float2)); norm->set_param(0,n); - // norm->flip(); norm->set_cpu(spe_cpu); norm->wait_for(waitTask); norm->iterate(gws[0],gws[1]); @@ -253,7 +251,6 @@ setWorkSize(gws,lws,n,n); hpfl->set_inData(0,rm,length_r*sizeof(cl_float2)); hpfl->set_outData(0, rm, length_r*sizeof(cl_float2)); - // hpfl->flip(); hpfl->set_param(0,n); hpfl->set_param(1,(long)radius); hpfl->set_cpu(spe_cpu);