changeset 1835:144e573b030b draft

fix fft
author Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
date Fri, 20 Dec 2013 03:00:32 +0900
parents 9f5dbb845689
children 56692133c5fb
files example/fft/gpu/bitReverse.cl example/fft/gpu/butterfly.cl example/fft/gpu/highPassFilter.cl example/fft/gpu/norm.cl example/fft/gpu/spinFact.cl example/fft/gpu/transpose.cl example/fft/main.cc example/fft/ppe/bitReverse.cc example/fft/ppe/butterfly.cc example/fft/ppe/highPassFilter.cc example/fft/ppe/norm.cc example/fft/ppe/spinFact.cc example/fft/ppe/transpose.cc example/word_count/main.cc
diffstat 14 files changed, 39 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/example/fft/gpu/bitReverse.cl	Thu Dec 19 11:11:41 2013 +0900
+++ b/example/fft/gpu/bitReverse.cl	Fri Dec 20 03:00:32 2013 +0900
@@ -4,8 +4,8 @@
     unsigned long gid = (unsigned long)get_global_id(0);
     unsigned long nid = (unsigned long)get_global_id(1);
     
-    unsigned long m = (unsigned long)param[3];
-    unsigned long n = (unsigned long)param[4];
+    unsigned long m = (unsigned long)param[0];
+    unsigned long n = (unsigned long)param[1];
     unsigned int j = gid;
     
     j = (j & 0x55555555) << 1 | (j & 0xAAAAAAAA) >> 1;
--- a/example/fft/gpu/butterfly.cl	Thu Dec 19 11:11:41 2013 +0900
+++ b/example/fft/gpu/butterfly.cl	Fri Dec 20 03:00:32 2013 +0900
@@ -4,9 +4,9 @@
     unsigned long gid = (unsigned long)get_global_id(0);
     unsigned long nid = (unsigned long)get_global_id(1);
     
-    long n = param[3];
-    unsigned long direction_flag = (unsigned long)param[4];
-    long iter = param[5];
+    long n = param[0];
+    unsigned long direction_flag = (unsigned long)param[1];
+    long iter = param[2];
 
     int butterflySize = 1 << (iter-1);
     int butterflyGrpDist = 1 << iter;
--- a/example/fft/gpu/highPassFilter.cl	Thu Dec 19 11:11:41 2013 +0900
+++ b/example/fft/gpu/highPassFilter.cl	Fri Dec 20 03:00:32 2013 +0900
@@ -4,8 +4,8 @@
     unsigned long xgid = (unsigned long)get_global_id(0);
     unsigned long ygid = (unsigned long)get_global_id(1);
 
-    long n      = param[3];
-    long radius = param[4];
+    long n      = param[0];
+    long radius = param[1];
 
     int2 n_2;
     n_2.x = n_2.y = n>>1;
--- a/example/fft/gpu/norm.cl	Thu Dec 19 11:11:41 2013 +0900
+++ b/example/fft/gpu/norm.cl	Fri Dec 20 03:00:32 2013 +0900
@@ -4,7 +4,7 @@
     unsigned long gid = (unsigned long)get_global_id(0);
     unsigned long nid = (unsigned long)get_global_id(1);
 
-    long n = param[3];
+    long n = param[0];
 
     out_x[nid*n+gid].x = in_x[nid*n+gid].x / (float)n;
     out_x[nid*n+gid].y = in_x[nid*n+gid].y / (float)n;
--- a/example/fft/gpu/spinFact.cl	Thu Dec 19 11:11:41 2013 +0900
+++ b/example/fft/gpu/spinFact.cl	Fri Dec 20 03:00:32 2013 +0900
@@ -5,7 +5,7 @@
 void spinFact(__constant long *param, __global float2 *w)
 {
     unsigned long i = (unsigned long)get_global_id(0);
-    long n =param[3];
+    long n =param[0];
 
     float2 angle;
     angle.x = (float)(2*i*PI/(float)n);
--- a/example/fft/gpu/transpose.cl	Thu Dec 19 11:11:41 2013 +0900
+++ b/example/fft/gpu/transpose.cl	Fri Dec 20 03:00:32 2013 +0900
@@ -4,7 +4,7 @@
     unsigned long xgid = (unsigned long)get_global_id(0);
     unsigned long ygid = (unsigned long)get_global_id(1);
 
-    long n = (long)param[3];
+    long n = (long)param[0];
 
     unsigned int iid = ygid * n + xgid;
     unsigned int oid = xgid * n + ygid;
--- a/example/fft/main.cc	Thu Dec 19 11:11:41 2013 +0900
+++ b/example/fft/main.cc	Fri Dec 20 03:00:32 2013 +0900
@@ -121,8 +121,8 @@
 
     HTask* brev = manager->create_task(BIT_REVERSE);
     setWorkSize(gws,lws,n,n);
-    brev->set_param(3,m);
-    brev->set_param(4,n);
+    brev->set_param(0,m);
+    brev->set_param(1,n);
     brev->set_inData(0, src, length_src*sizeof(cl_float2));
     brev->set_outData(0, dst, length_dst*sizeof(cl_float2));
     brev->set_cpu(spe_cpu);
@@ -134,12 +134,13 @@
     setWorkSize(gws,lws,n/2,n);
     for(int iter=1;iter<=m;iter++) {
         HTask* bfly = manager->create_task(BUTTERFLY);
-        bfly->set_param(3,n);
-        bfly->set_param(4,direction_flag);
-        bfly->set_param(5,(long)iter);
+        bfly->set_param(0,n);
+        bfly->set_param(1,direction_flag);
+        bfly->set_param(2,(long)iter);
         bfly->set_inData(0, dst, length_dst*sizeof(cl_float2));
         bfly->set_inData(1, spin, sizeof(cl_float2)*(n/2));
         bfly->set_outData(0,dst,length_dst*sizeof(cl_float2));
+        //bfly->flip();
         bfly->set_cpu(spe_cpu);
         bfly->wait_for(waitTask);
         bfly->iterate(gws[0],gws[1]);
@@ -151,7 +152,8 @@
         HTask *norm = manager->create_task(NORMALIZATION);
         norm->set_inData(0,dst,length_dst*sizeof(cl_float2));
         norm->set_outData(0, dst, length_dst*sizeof(cl_float2));
-        norm->set_param(3,n);
+        norm->set_param(0,n);
+        norm->flip();
         norm->set_cpu(spe_cpu);
         norm->wait_for(waitTask);
         norm->iterate(gws[0],gws[1]);
@@ -224,7 +226,7 @@
     int length_w = n / 2;
     HTask* sfac = manager->create_task(SPIN_FACT);
     sfac->set_outData(0, wm, length_w*sizeof(cl_float2));
-    sfac->set_param(3,n);
+    sfac->set_param(0,n);
     sfac->set_cpu(spe_cpu);
     sfac->iterate(gws[0]);
 
@@ -237,7 +239,7 @@
     HTask* first_trns = manager->create_task(TRANSPOSE);
     first_trns->set_inData(0,rm,length_r*sizeof(cl_float2));
     first_trns->set_outData(0,xm,length_r*sizeof(cl_float2));
-    first_trns->set_param(3,n);
+    first_trns->set_param(0,n);
     first_trns->set_cpu(spe_cpu);
     first_trns->wait_for(waitTask);
     first_trns->iterate(gws[0],gws[1]);
@@ -251,8 +253,9 @@
     setWorkSize(gws,lws,n,n);
     hpfl->set_inData(0,rm,length_r*sizeof(cl_float2));
     hpfl->set_outData(0, rm, length_r*sizeof(cl_float2));
-    hpfl->set_param(3,n);
-    hpfl->set_param(4,(long)radius);
+    hpfl->flip();
+    hpfl->set_param(0,n);
+    hpfl->set_param(1,(long)radius);
     hpfl->set_cpu(spe_cpu);
     hpfl->wait_for(waitTask);
     hpfl->iterate(gws[0],gws[1]);
@@ -267,7 +270,7 @@
     HTask* second_trns = manager->create_task(TRANSPOSE);
     second_trns->set_inData(0,xm,length_r*sizeof(cl_float2));
     second_trns->set_outData(0,rm,length_r*sizeof(cl_float2));
-    second_trns->set_param(3,n);
+    second_trns->set_param(0,n);
     second_trns->set_cpu(spe_cpu);
     second_trns->wait_for(waitTask);
     second_trns->iterate(gws[0],gws[1]);
--- a/example/fft/ppe/bitReverse.cc	Thu Dec 19 11:11:41 2013 +0900
+++ b/example/fft/ppe/bitReverse.cc	Fri Dec 20 03:00:32 2013 +0900
@@ -17,10 +17,11 @@
     unsigned long gid = s->x; // (unsigned long)s->get_param(0);
     unsigned long nid = s->y; // (unsigned long)s->get_param(1);
     
-    unsigned long m = (unsigned long)s->get_param(3);
-    unsigned long n = (unsigned long)s->get_param(4);
     unsigned int j = gid;
 
+    unsigned long m = (unsigned long)s->get_param(0);
+    unsigned long n = (unsigned long)s->get_param(1);
+
     j = (j & 0x55555555) << 1 | (j & 0xAAAAAAAA) >> 1;
     j = (j & 0x33333333) << 2 | (j & 0xCCCCCCCC) >> 2;
     j = (j & 0x0F0F0F0F) << 4 | (j & 0xF0F0F0F0) >> 4;
--- a/example/fft/ppe/butterfly.cc	Thu Dec 19 11:11:41 2013 +0900
+++ b/example/fft/ppe/butterfly.cc	Fri Dec 20 03:00:32 2013 +0900
@@ -18,9 +18,9 @@
     unsigned long gid = s->x; // (unsigned long)s->get_param(0);
     unsigned long nid = s->y; // (unsigned long)s->get_param(1);
     
-    long n = (long)s->get_param(3);
-    unsigned long direction_flag = (unsigned long)s->get_param(4);
-    long iter = (long)s->get_param(5);
+    long n = (long)s->get_param(0);
+    unsigned long direction_flag = (unsigned long)s->get_param(1);
+    long iter = (long)s->get_param(2);
 
     int butterflySize = 1 << (iter-1);
     int butterflyGrpDist = 1 << iter;
--- a/example/fft/ppe/highPassFilter.cc	Thu Dec 19 11:11:41 2013 +0900
+++ b/example/fft/ppe/highPassFilter.cc	Fri Dec 20 03:00:32 2013 +0900
@@ -11,14 +11,14 @@
 highPassFilter(SchedTask* s,void* rbuf,void* wbuf)
 {
     cl_float2* in = (cl_float2*)s->get_input(rbuf,0);
-
+    
     cl_float2* image = (cl_float2*)s->get_output(wbuf,0);
 
     unsigned long xgid = s->x; // (unsigned long)s->get_param(0);
     unsigned long ygid = s->y; // (unsigned long)s->get_param(1);
 
-    long n = (long)s->get_param(3);
-    long radius = (long)s->get_param(4);
+    long n = (long)s->get_param(0);
+    long radius = (long)s->get_param(1);
 
     cl_int2 n_2;
     n_2.x = n_2.y = n>>1;
--- a/example/fft/ppe/norm.cc	Thu Dec 19 11:11:41 2013 +0900
+++ b/example/fft/ppe/norm.cc	Fri Dec 20 03:00:32 2013 +0900
@@ -16,8 +16,8 @@
 
     unsigned long gid = s->x; // (unsigned long)s->get_param(0);
     unsigned long nid = s->y; //(unsigned long)s->get_param(1);
-
-    long n = (long)s->get_param(3);
+    
+    long n = (long)s->get_param(0);
 
     out_x[nid*n+gid].x = in_x[nid*n+gid].x / (float)n;
     out_x[nid*n+gid].y = in_x[nid*n+gid].y / (float)n;
--- a/example/fft/ppe/spinFact.cc	Thu Dec 19 11:11:41 2013 +0900
+++ b/example/fft/ppe/spinFact.cc	Fri Dec 20 03:00:32 2013 +0900
@@ -19,7 +19,7 @@
 
     unsigned long i = s->x; // (unsigned long)s->get_param(0);
     
-    long n = (long)s->get_param(3);
+    long n = (long)s->get_param(0);
 
     cl_float2 angle;
     angle.x = (float)(2*i*PI/(float)n);
--- a/example/fft/ppe/transpose.cc	Thu Dec 19 11:11:41 2013 +0900
+++ b/example/fft/ppe/transpose.cc	Fri Dec 20 03:00:32 2013 +0900
@@ -18,7 +18,7 @@
     unsigned long xgid = s->x; // (unsigned long)s->get_param(0);
     unsigned long ygid = s->y; // (unsigned long)s->get_param(1);
 
-    long n = (long)s->get_param(3);
+    long n = (long)s->get_param(0);
 
     unsigned int iid = ygid * n + xgid;
     unsigned int oid = xgid * n + ygid;
--- a/example/word_count/main.cc	Thu Dec 19 11:11:41 2013 +0900
+++ b/example/word_count/main.cc	Fri Dec 20 03:00:32 2013 +0900
@@ -283,7 +283,7 @@
                                    (memaddr)&w->self,sizeof(memaddr),0,0);
     w->t_print = t_print;
     
-    for(int i = 0;i<4;i++) {
+    for(int i=0;i<4;i++) {
         /* Task を task_blocks ずつ起動する Task */
         /* serialize されていると仮定する... */
         HTaskPtr t_exec = manager->create_task(RUN_TASK_BLOCKS,
@@ -327,7 +327,7 @@
             array_task_num = atoi(argv[i+1]);
             i++;
         } else if (strcmp(argv[i], "-g") == 0 ) {
-            spe_cpu = GPU_0;
+            spe_cpu = GPU_ANY;
         } else if (strcmp(argv[i], "-i") == 0) {
             use_iterate = 1;
             use_task_array = 0;