Mercurial > hg > Game > Cerium
changeset 1835:144e573b030b draft
fix fft
author | Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 20 Dec 2013 03:00:32 +0900 |
parents | 9f5dbb845689 |
children | 56692133c5fb |
files | example/fft/gpu/bitReverse.cl example/fft/gpu/butterfly.cl example/fft/gpu/highPassFilter.cl example/fft/gpu/norm.cl example/fft/gpu/spinFact.cl example/fft/gpu/transpose.cl example/fft/main.cc example/fft/ppe/bitReverse.cc example/fft/ppe/butterfly.cc example/fft/ppe/highPassFilter.cc example/fft/ppe/norm.cc example/fft/ppe/spinFact.cc example/fft/ppe/transpose.cc example/word_count/main.cc |
diffstat | 14 files changed, 39 insertions(+), 35 deletions(-) [+] |
line wrap: on
line diff
--- a/example/fft/gpu/bitReverse.cl Thu Dec 19 11:11:41 2013 +0900 +++ b/example/fft/gpu/bitReverse.cl Fri Dec 20 03:00:32 2013 +0900 @@ -4,8 +4,8 @@ unsigned long gid = (unsigned long)get_global_id(0); unsigned long nid = (unsigned long)get_global_id(1); - unsigned long m = (unsigned long)param[3]; - unsigned long n = (unsigned long)param[4]; + unsigned long m = (unsigned long)param[0]; + unsigned long n = (unsigned long)param[1]; unsigned int j = gid; j = (j & 0x55555555) << 1 | (j & 0xAAAAAAAA) >> 1;
--- a/example/fft/gpu/butterfly.cl Thu Dec 19 11:11:41 2013 +0900 +++ b/example/fft/gpu/butterfly.cl Fri Dec 20 03:00:32 2013 +0900 @@ -4,9 +4,9 @@ unsigned long gid = (unsigned long)get_global_id(0); unsigned long nid = (unsigned long)get_global_id(1); - long n = param[3]; - unsigned long direction_flag = (unsigned long)param[4]; - long iter = param[5]; + long n = param[0]; + unsigned long direction_flag = (unsigned long)param[1]; + long iter = param[2]; int butterflySize = 1 << (iter-1); int butterflyGrpDist = 1 << iter;
--- a/example/fft/gpu/highPassFilter.cl Thu Dec 19 11:11:41 2013 +0900 +++ b/example/fft/gpu/highPassFilter.cl Fri Dec 20 03:00:32 2013 +0900 @@ -4,8 +4,8 @@ unsigned long xgid = (unsigned long)get_global_id(0); unsigned long ygid = (unsigned long)get_global_id(1); - long n = param[3]; - long radius = param[4]; + long n = param[0]; + long radius = param[1]; int2 n_2; n_2.x = n_2.y = n>>1;
--- a/example/fft/gpu/norm.cl Thu Dec 19 11:11:41 2013 +0900 +++ b/example/fft/gpu/norm.cl Fri Dec 20 03:00:32 2013 +0900 @@ -4,7 +4,7 @@ unsigned long gid = (unsigned long)get_global_id(0); unsigned long nid = (unsigned long)get_global_id(1); - long n = param[3]; + long n = param[0]; out_x[nid*n+gid].x = in_x[nid*n+gid].x / (float)n; out_x[nid*n+gid].y = in_x[nid*n+gid].y / (float)n;
--- a/example/fft/gpu/spinFact.cl Thu Dec 19 11:11:41 2013 +0900 +++ b/example/fft/gpu/spinFact.cl Fri Dec 20 03:00:32 2013 +0900 @@ -5,7 +5,7 @@ void spinFact(__constant long *param, __global float2 *w) { unsigned long i = (unsigned long)get_global_id(0); - long n =param[3]; + long n =param[0]; float2 angle; angle.x = (float)(2*i*PI/(float)n);
--- a/example/fft/gpu/transpose.cl Thu Dec 19 11:11:41 2013 +0900 +++ b/example/fft/gpu/transpose.cl Fri Dec 20 03:00:32 2013 +0900 @@ -4,7 +4,7 @@ unsigned long xgid = (unsigned long)get_global_id(0); unsigned long ygid = (unsigned long)get_global_id(1); - long n = (long)param[3]; + long n = (long)param[0]; unsigned int iid = ygid * n + xgid; unsigned int oid = xgid * n + ygid;
--- a/example/fft/main.cc Thu Dec 19 11:11:41 2013 +0900 +++ b/example/fft/main.cc Fri Dec 20 03:00:32 2013 +0900 @@ -121,8 +121,8 @@ HTask* brev = manager->create_task(BIT_REVERSE); setWorkSize(gws,lws,n,n); - brev->set_param(3,m); - brev->set_param(4,n); + brev->set_param(0,m); + brev->set_param(1,n); brev->set_inData(0, src, length_src*sizeof(cl_float2)); brev->set_outData(0, dst, length_dst*sizeof(cl_float2)); brev->set_cpu(spe_cpu); @@ -134,12 +134,13 @@ setWorkSize(gws,lws,n/2,n); for(int iter=1;iter<=m;iter++) { HTask* bfly = manager->create_task(BUTTERFLY); - bfly->set_param(3,n); - bfly->set_param(4,direction_flag); - bfly->set_param(5,(long)iter); + bfly->set_param(0,n); + bfly->set_param(1,direction_flag); + bfly->set_param(2,(long)iter); bfly->set_inData(0, dst, length_dst*sizeof(cl_float2)); bfly->set_inData(1, spin, sizeof(cl_float2)*(n/2)); bfly->set_outData(0,dst,length_dst*sizeof(cl_float2)); + //bfly->flip(); bfly->set_cpu(spe_cpu); bfly->wait_for(waitTask); bfly->iterate(gws[0],gws[1]); @@ -151,7 +152,8 @@ HTask *norm = manager->create_task(NORMALIZATION); norm->set_inData(0,dst,length_dst*sizeof(cl_float2)); norm->set_outData(0, dst, length_dst*sizeof(cl_float2)); - norm->set_param(3,n); + norm->set_param(0,n); + norm->flip(); norm->set_cpu(spe_cpu); norm->wait_for(waitTask); norm->iterate(gws[0],gws[1]); @@ -224,7 +226,7 @@ int length_w = n / 2; HTask* sfac = manager->create_task(SPIN_FACT); sfac->set_outData(0, wm, length_w*sizeof(cl_float2)); - sfac->set_param(3,n); + sfac->set_param(0,n); sfac->set_cpu(spe_cpu); sfac->iterate(gws[0]); @@ -237,7 +239,7 @@ HTask* first_trns = manager->create_task(TRANSPOSE); first_trns->set_inData(0,rm,length_r*sizeof(cl_float2)); first_trns->set_outData(0,xm,length_r*sizeof(cl_float2)); - first_trns->set_param(3,n); + first_trns->set_param(0,n); first_trns->set_cpu(spe_cpu); first_trns->wait_for(waitTask); first_trns->iterate(gws[0],gws[1]); @@ -251,8 +253,9 @@ setWorkSize(gws,lws,n,n); hpfl->set_inData(0,rm,length_r*sizeof(cl_float2)); hpfl->set_outData(0, rm, length_r*sizeof(cl_float2)); - hpfl->set_param(3,n); - hpfl->set_param(4,(long)radius); + hpfl->flip(); + hpfl->set_param(0,n); + hpfl->set_param(1,(long)radius); hpfl->set_cpu(spe_cpu); hpfl->wait_for(waitTask); hpfl->iterate(gws[0],gws[1]); @@ -267,7 +270,7 @@ HTask* second_trns = manager->create_task(TRANSPOSE); second_trns->set_inData(0,xm,length_r*sizeof(cl_float2)); second_trns->set_outData(0,rm,length_r*sizeof(cl_float2)); - second_trns->set_param(3,n); + second_trns->set_param(0,n); second_trns->set_cpu(spe_cpu); second_trns->wait_for(waitTask); second_trns->iterate(gws[0],gws[1]);
--- a/example/fft/ppe/bitReverse.cc Thu Dec 19 11:11:41 2013 +0900 +++ b/example/fft/ppe/bitReverse.cc Fri Dec 20 03:00:32 2013 +0900 @@ -17,10 +17,11 @@ unsigned long gid = s->x; // (unsigned long)s->get_param(0); unsigned long nid = s->y; // (unsigned long)s->get_param(1); - unsigned long m = (unsigned long)s->get_param(3); - unsigned long n = (unsigned long)s->get_param(4); unsigned int j = gid; + unsigned long m = (unsigned long)s->get_param(0); + unsigned long n = (unsigned long)s->get_param(1); + j = (j & 0x55555555) << 1 | (j & 0xAAAAAAAA) >> 1; j = (j & 0x33333333) << 2 | (j & 0xCCCCCCCC) >> 2; j = (j & 0x0F0F0F0F) << 4 | (j & 0xF0F0F0F0) >> 4;
--- a/example/fft/ppe/butterfly.cc Thu Dec 19 11:11:41 2013 +0900 +++ b/example/fft/ppe/butterfly.cc Fri Dec 20 03:00:32 2013 +0900 @@ -18,9 +18,9 @@ unsigned long gid = s->x; // (unsigned long)s->get_param(0); unsigned long nid = s->y; // (unsigned long)s->get_param(1); - long n = (long)s->get_param(3); - unsigned long direction_flag = (unsigned long)s->get_param(4); - long iter = (long)s->get_param(5); + long n = (long)s->get_param(0); + unsigned long direction_flag = (unsigned long)s->get_param(1); + long iter = (long)s->get_param(2); int butterflySize = 1 << (iter-1); int butterflyGrpDist = 1 << iter;
--- a/example/fft/ppe/highPassFilter.cc Thu Dec 19 11:11:41 2013 +0900 +++ b/example/fft/ppe/highPassFilter.cc Fri Dec 20 03:00:32 2013 +0900 @@ -11,14 +11,14 @@ highPassFilter(SchedTask* s,void* rbuf,void* wbuf) { cl_float2* in = (cl_float2*)s->get_input(rbuf,0); - + cl_float2* image = (cl_float2*)s->get_output(wbuf,0); unsigned long xgid = s->x; // (unsigned long)s->get_param(0); unsigned long ygid = s->y; // (unsigned long)s->get_param(1); - long n = (long)s->get_param(3); - long radius = (long)s->get_param(4); + long n = (long)s->get_param(0); + long radius = (long)s->get_param(1); cl_int2 n_2; n_2.x = n_2.y = n>>1;
--- a/example/fft/ppe/norm.cc Thu Dec 19 11:11:41 2013 +0900 +++ b/example/fft/ppe/norm.cc Fri Dec 20 03:00:32 2013 +0900 @@ -16,8 +16,8 @@ unsigned long gid = s->x; // (unsigned long)s->get_param(0); unsigned long nid = s->y; //(unsigned long)s->get_param(1); - - long n = (long)s->get_param(3); + + long n = (long)s->get_param(0); out_x[nid*n+gid].x = in_x[nid*n+gid].x / (float)n; out_x[nid*n+gid].y = in_x[nid*n+gid].y / (float)n;
--- a/example/fft/ppe/spinFact.cc Thu Dec 19 11:11:41 2013 +0900 +++ b/example/fft/ppe/spinFact.cc Fri Dec 20 03:00:32 2013 +0900 @@ -19,7 +19,7 @@ unsigned long i = s->x; // (unsigned long)s->get_param(0); - long n = (long)s->get_param(3); + long n = (long)s->get_param(0); cl_float2 angle; angle.x = (float)(2*i*PI/(float)n);
--- a/example/fft/ppe/transpose.cc Thu Dec 19 11:11:41 2013 +0900 +++ b/example/fft/ppe/transpose.cc Fri Dec 20 03:00:32 2013 +0900 @@ -18,7 +18,7 @@ unsigned long xgid = s->x; // (unsigned long)s->get_param(0); unsigned long ygid = s->y; // (unsigned long)s->get_param(1); - long n = (long)s->get_param(3); + long n = (long)s->get_param(0); unsigned int iid = ygid * n + xgid; unsigned int oid = xgid * n + ygid;
--- a/example/word_count/main.cc Thu Dec 19 11:11:41 2013 +0900 +++ b/example/word_count/main.cc Fri Dec 20 03:00:32 2013 +0900 @@ -283,7 +283,7 @@ (memaddr)&w->self,sizeof(memaddr),0,0); w->t_print = t_print; - for(int i = 0;i<4;i++) { + for(int i=0;i<4;i++) { /* Task を task_blocks ずつ起動する Task */ /* serialize されていると仮定する... */ HTaskPtr t_exec = manager->create_task(RUN_TASK_BLOCKS, @@ -327,7 +327,7 @@ array_task_num = atoi(argv[i+1]); i++; } else if (strcmp(argv[i], "-g") == 0 ) { - spe_cpu = GPU_0; + spe_cpu = GPU_ANY; } else if (strcmp(argv[i], "-i") == 0) { use_iterate = 1; use_task_array = 0;