changeset 1625:6ff0c34c8a3c draft

fix fft , used iterate
author Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
date Wed, 29 May 2013 21:42:47 +0900
parents c0482a63d811
children 4fed76f4d101
files TaskManager/kernel/ppe/CpuThreads.cc example/fft/main.cc example/fft/ppe/bitReverse.cc example/fft/ppe/butterfly.cc example/fft/ppe/spinFact.cc
diffstat 5 files changed, 38 insertions(+), 67 deletions(-) [+]
line wrap: on
line diff
--- a/TaskManager/kernel/ppe/CpuThreads.cc	Tue May 28 17:57:33 2013 +0900
+++ b/TaskManager/kernel/ppe/CpuThreads.cc	Wed May 29 21:42:47 2013 +0900
@@ -101,7 +101,7 @@
 
 void
 CpuThreads::spawn_task(int id, TaskListPtr p) {
-    p->self->flag.dim_count = 1;
+    p->self->flag.dim_count = 1; // always dim_count set min cpu. min cpu is 1.
     if (p->dim>0) {
         int dim_count = (p->x)*(p->y)*(p->z);
         if (cpu_num < dim_count) {
--- a/example/fft/main.cc	Tue May 28 17:57:33 2013 +0900
+++ b/example/fft/main.cc	Wed May 29 21:42:47 2013 +0900
@@ -44,6 +44,7 @@
     gettimeofday(&tv, NULL);
     return tv.tv_sec + (double)tv.tv_usec*1e-6;
 }
+
 const char *usr_help_str = "Usage: ./fft [option]\n \
 options\n\
   -cpu     Number of SPE used (default 1)\n\
@@ -72,7 +73,6 @@
 
 int fftCore(TaskManager *manager,cl_float2 *dst, cl_float2 *src, cl_float2 *spin, int m_, enum Mode direction)
 {
-    int iter;
     unsigned int* flag = new unsigned int[1];
     switch (direction) {
     case forward:flag[0] = 0x00000000; break;
@@ -86,47 +86,27 @@
     int length_dst = n[0]*n[0];
     int length_src = n[0]*n[0];
     cl_uint dimension = 2;
-    HTask* brev;
-    int i,j;
+
+    HTask* brev = manager->create_task(BIT_REVERSE);
     setWorkSize(gws,lws,n[0],n[0]);
-    for(i=0;i<gws[0];i++){
-        for(j=0;j<gws[1];j++){
-            brev = manager->create_task(BIT_REVERSE);
-            brev->set_param(0,(memaddr)length_src);
-            brev->set_param(1,(memaddr)i);
-            brev->set_param(2,(memaddr)j);
-            brev->set_inData(0, src, length_src*sizeof(cl_float2));
-            brev->set_inData(1, m,sizeof(int));
-            brev->set_inData(2, n,sizeof(int));
-            brev->set_outData(0, dst, length_dst*sizeof(cl_float2));
-            brev->set_cpu(spe_cpu);
-            brev->spawn();
-        }
-    }
-    exit(0);
-    HTask* bfly;
-    setWorkSize(gws,lws,n[0]/2,n[0]);
+    brev->set_inData(0, src, length_src*sizeof(cl_float2));
+    brev->set_inData(1, m,sizeof(int));
+    brev->set_inData(2, n,sizeof(int));
+    brev->set_outData(0, dst, length_dst*sizeof(cl_float2));
+    brev->set_cpu(spe_cpu);
+    brev->iterate(gws[0],gws[1]);
 
-    for (iter=1; iter<=m_;iter++) {
-        for(i=0;i<gws[0];i++){
-            for(j=0;i<gws[1];j++){
-                bfly = manager->create_task(BUTTERFLY);
-                bfly->set_param(0,(memaddr)length_dst);
-                bfly->set_param(1,(memaddr)iter);
-                bfly->set_param(2,(memaddr)i);
-                bfly->set_param(3,(memaddr)j);
-                bfly->set_inData(0, dst, length_dst*sizeof(cl_float2));
-                bfly->set_inData(1, spin, sizeof(cl_float2)*(n[0]/2));
-                bfly->set_inData(2, m,sizeof(int));
-                bfly->set_inData(3, n,sizeof(int));
-                bfly->set_inData(4, flag,sizeof(int));
-                bfly->set_outData(0,dst,length_dst*sizeof(cl_float2));
-                bfly->set_cpu(spe_cpu);
-                bfly->wait_for(brev);
-                bfly->spawn();
-            }
-        }
-    }
+    HTask* bfly = manager->create_task(BUTTERFLY);
+    setWorkSize(gws,lws,n[0]/2,n[0]);
+    bfly->set_inData(0, dst, length_dst*sizeof(cl_float2));
+    bfly->set_inData(1, spin, sizeof(cl_float2)*(n[0]/2));
+    bfly->set_inData(2, m,sizeof(int));
+    bfly->set_inData(3, n,sizeof(int));
+    bfly->set_inData(4, flag,sizeof(int));
+    bfly->set_outData(0,dst,length_dst*sizeof(cl_float2));
+    bfly->set_cpu(spe_cpu);
+    bfly->wait_for(brev);
+    bfly->iterate(m_,gws[0],gws[1]);
     
     if (direction == inverse) { 
         HTask *norm = manager->create_task(NORMALIZATION);
@@ -140,7 +120,6 @@
         norm->set_inData(0, n,sizeof(int));
         norm->set_outData(0, dst, length_dst*sizeof(cl_float2));
         norm->set_cpu(spe_cpu);
-        norm->nd_range();
         norm->flip();
         norm->wait_for(bfly);
         norm->spawn();
@@ -182,7 +161,6 @@
     cl_float2 *xm = (cl_float2 *)malloc(n[0] * n[0] * sizeof(cl_float2));
     cl_float2 *rm = (cl_float2 *)malloc(n[0] * n[0] * sizeof(cl_float2));
     cl_float2 *wm = (cl_float2 *)malloc(n[0] / 2 * sizeof(cl_float2));
-    int i,j;
     /*
      * [cl_float2]
      * typedef union
@@ -198,27 +176,22 @@
      * #endif
      * } cl_float2;
      */
-    for (i=0; i<n[0]; i++) {
-        for (j=0; j < n[0]; j++) {
+    for (int i=0; i<n[0]; i++) {
+        for (int j=0; j < n[0]; j++) {
             ((float*)xm)[(2*n[0]*j)+2*i+0] = (float)ipgm.buf[n[0]*j+i];
             ((float*)xm)[(2*n[0]*j)+2*i+1] = (float)0;
         }
     }
         
     // Create spin factor
-    setWorkSize(gws,lws,n[0]/2,1); // Todo:setWorkSize(ndr,n[0]/2,1);でできるように
     int length_w = n[0] / 2;
-    HTask* sfac;
-    for(i=0;i<gws[0];i++){
-        sfac = manager->create_task(SPIN_FACT);
-        sfac->set_param(0, (memaddr)length_w);
-        sfac->set_param(1,(memaddr)i);
-        sfac->set_inData(0, n, sizeof(int));
-        sfac->set_outData(0, wm, length_w*sizeof(cl_float2));
-        sfac->set_cpu(spe_cpu);
-        sfac->nd_range();
-        sfac->spawn();
-    }
+    HTask* sfac = manager->create_task(SPIN_FACT);
+    setWorkSize(gws,lws,n[0]/2,1);
+    sfac->set_inData(0, n, sizeof(int));
+    sfac->set_outData(0, wm, length_w*sizeof(cl_float2));
+    sfac->set_cpu(spe_cpu);
+    sfac->iterate(gws[0]);
+
     // Butterfly Operation
     fftCore(manager, rm, xm, wm, m[0], forward);
     
@@ -240,7 +213,6 @@
         trns[i]->set_inData(1, n,sizeof(int));
         trns[i]->set_outData(0, xm, length_r*sizeof(cl_float2));
         trns[i]->set_cpu(spe_cpu);
-        trns[i]->nd_range();
     }
     trns[0]->wait_for(sfac);
     trns[0]->spawn();
@@ -262,7 +234,6 @@
     hpfl->set_inData(1, radius,sizeof(int));
     hpfl->set_outData(0, rm, length_r*sizeof(cl_float2));
     hpfl->set_cpu(spe_cpu);
-    hpfl->nd_range();
     hpfl->wait_for(trns[0]);
     hpfl->spawn();
     // Inverse FFT
@@ -282,8 +253,8 @@
 
     float* ampd;
     ampd = (float*)malloc(n[0]*n[0]*sizeof(float));
-    for (i=0; i < n[0]; i++) {
-        for (j=0; j < n[0]; j++) {
+    for (int i=0; i < n[0]; i++) {
+        for (int j=0; j < n[0]; j++) {
             ampd[n[0]*((i))+((j))] = (AMP(((float*)xm)[(2*n[0]*i)+2*j], ((float*)xm)[(2*n[0]*i)+2*j+1]));
         }
     }
--- a/example/fft/ppe/bitReverse.cc	Tue May 28 17:57:33 2013 +0900
+++ b/example/fft/ppe/bitReverse.cc	Wed May 29 21:42:47 2013 +0900
@@ -16,8 +16,8 @@
     int* n = (int*)s->get_inputAddr(2);
     cl_float2* dst = (cl_float2*)s->get_outputAddr(0);
     
-    unsigned long gid = (unsigned long)s->get_param(1);
-    unsigned long nid = (unsigned long)s->get_param(2);
+    unsigned long gid = (unsigned long)s->get_param(0);
+    unsigned long nid = (unsigned long)s->get_param(1);
     unsigned int j = gid;
     
     j = (j & 0x55555555) << 1 | (j & 0xAAAAAAAA) >> 1;
--- a/example/fft/ppe/butterfly.cc	Tue May 28 17:57:33 2013 +0900
+++ b/example/fft/ppe/butterfly.cc	Wed May 29 21:42:47 2013 +0900
@@ -17,9 +17,9 @@
     
     cl_float2* x_out = (cl_float2*)s->get_outputAddr(0);
 
-    long iter = (long)s->get_inputAddr(1);
-    unsigned long gid = (unsigned long)s->get_param(2);
-    unsigned long nid = (unsigned long)s->get_param(3);
+    long iter = (long)s->get_param(0) + 1;
+    unsigned long gid = (unsigned long)s->get_param(1);
+    unsigned long nid = (unsigned long)s->get_param(2);
 
     int butterflySize = 1 << (iter-1);
     int butterflyGrpDist = 1 << iter;
--- a/example/fft/ppe/spinFact.cc	Tue May 28 17:57:33 2013 +0900
+++ b/example/fft/ppe/spinFact.cc	Wed May 29 21:42:47 2013 +0900
@@ -19,7 +19,7 @@
 
     cl_float2* w = (cl_float2*)s->get_output(wbuf,0);
 
-    unsigned long i = (unsigned long)s->get_param(1);
+    unsigned long i = (unsigned long)s->get_param(0);
     cl_float2 angle;
     angle.x = (float)(2*i*PI/(float)n[0]);
     angle.y = (float)((2*i*PI/(float)n[0]) + PI_2);