changeset 1566:7d307bac94a6 draft

fft
author Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
date Sat, 16 Mar 2013 18:36:50 +0900
parents 4ef32873b5be
children 67f362c6325c
files example/add/ppe/Add.cc example/fft/Func.h example/fft/main.cc example/fft/ppe/bitReverse.cc example/fft/ppe/butterfly.cc example/fft/ppe/highPassFilter.cc example/fft/ppe/norm.cc example/fft/ppe/spinFact.cc example/fft/ppe/transpose.cc example/fft/task_init.cc
diffstat 10 files changed, 62 insertions(+), 53 deletions(-) [+]
line wrap: on
line diff
--- a/example/add/ppe/Add.cc	Fri Mar 15 07:00:34 2013 +0900
+++ b/example/add/ppe/Add.cc	Sat Mar 16 18:36:50 2013 +0900
@@ -10,12 +10,10 @@
 run(SchedTask *s,void *rbuf, void *wbuf)
 {
     float *A,*B,*C;
-
+    
     A = (float*)s->get_input(rbuf, 0);
     B = (float*)s->get_input(rbuf, 1);
     C = (float*)s->get_output(wbuf, 0);
-    
     *C=*A+*B;
-    
     return 0;
 }
--- a/example/fft/Func.h	Fri Mar 15 07:00:34 2013 +0900
+++ b/example/fft/Func.h	Sat Mar 16 18:36:50 2013 +0900
@@ -1,9 +1,11 @@
 enum {
 #include "SysTasks.h"
+    SET_ID,
     SPIN_FACT,
     BIT_REVERSE,
     NORMALIZATION,
     BUTTERFLY,
     TRANSEPOSE,
     HIGH_PASS_FILTER,
+#include "GlobalID.h"
 };
--- a/example/fft/main.cc	Fri Mar 15 07:00:34 2013 +0900
+++ b/example/fft/main.cc	Sat Mar 16 18:36:50 2013 +0900
@@ -74,23 +74,20 @@
 int fftCore(TaskManager *manager,cl_float2 *dst, cl_float2 *src, cl_float2 *spin, int m_, enum Mode direction)
 {
     int *iter = new int[1];
-    unsigned int flag;
+    unsigned int* flag = new unsigned int[1];
     switch (direction) {
-    case forward:flag = 0x00000000; break;
-    case inverse:flag = 0x80000000; break;
+    case forward:flag[0] = 0x00000000; break;
+    case inverse:flag[0] = 0x80000000; break;
     }
-    
-    int *n = new int[1];
-    int *m = new int[1];
-    m[0]=m_;
+    int* n = new int[1];
+    int* m = new int[1];
+    m[0] = m_;
     n[0] = 1<<m[0];
-
     size_t gws[2],lws[2];
-
     int length_dst = n[0]*n[0];
     int length_src = n[0]*n[0];
     HTask *brev = manager->create_task(BIT_REVERSE);
-    setWorkSize(gws,lws,n[0],n[0]);
+    //    setWorkSize(gws,lws,n[0],n[0]);
     cl_uint dimension = 2;
     brev->set_param(0,(memaddr)length_src);
     brev->set_param(1,(memaddr)dimension);
@@ -104,9 +101,9 @@
     brev->set_outData(0, dst, length_dst*sizeof(cl_float2));
     brev->set_cpu(spe_cpu);
     brev->nd_range();
-
+    
     HTaskPtr *bfly = (HTask**)manager->allocate(sizeof(HTask*)*m_);
-    setWorkSize(gws,lws,n[0]/2,n[0]);
+    //    setWorkSize(gws,lws,n[0]/2,n[0]);
 
     for (iter[0]=1; iter[0]<=m_;iter[0]++) {
         bfly[iter[0]-1] = manager->create_task(BUTTERFLY);
@@ -132,10 +129,14 @@
             bfly[iter[0]-1]->wait_for(bfly[iter[0]-2]);
         }
     }
+    brev->spawn();
+    for (int i=0;i<m_;i++) {
+        bfly[i]->spawn();
+    }
     
     if (direction == inverse) { 
         HTask *norm = manager->create_task(NORMALIZATION);
-        setWorkSize(gws,lws,n[0],n[0]);
+        //        setWorkSize(gws,lws,n[0],n[0]);
         norm->set_param(0,(memaddr)length_dst);
         norm->set_param(1,(memaddr)dimension);
         norm->set_param(2,(memaddr)gws[0]);
@@ -150,11 +151,10 @@
         norm->wait_for(bfly[m[0]-1]);
         norm->spawn();
     }
-    brev->spawn();
-    for (int i=0;i<m_;i++) {
-        bfly[i]->spawn();
-    }
-    
+    // brev->spawn();
+    // for (int i=0;i<m_;i++) {
+    //     bfly[i]->spawn();
+    // }
     return 0;
 }
 
@@ -216,6 +216,10 @@
         }
     }
     
+    HTask* setid = manager->create_task(SET_ID);
+    setid->set_cpu(spe_cpu);
+    setid->spawn();
+    
     // Create spin factor 
     HTask *sfac = manager->create_task(SPIN_FACT);
     int length_w = n[0] / 2;
@@ -228,12 +232,13 @@
     ndr->lws[0] = lws[0];
     ndr->lws[1] = lws[1];
     sfac->set_param(0,ndr);
-
+    
     sfac->set_param(1, (memaddr)length_w);
     sfac->set_inData(0, n, sizeof(int));
     sfac->set_outData(0, wm, length_w*sizeof(cl_float2));
     sfac->set_cpu(spe_cpu);
     sfac->nd_range();
+    sfac->wait_for(setid);
     sfac->spawn();
 
     // Butterfly Operation 
@@ -281,7 +286,7 @@
     hpfl->set_cpu(spe_cpu);
     hpfl->nd_range();
     hpfl->wait_for(trns[0]);
-
+    hpfl->spawn();
     // Inverse FFT
 
     // Butterfly Operation
@@ -331,7 +336,7 @@
     if (err<0) {
         fprintf(stderr, "Failed to read image file.\n");
         exit(1);
-    }    
+    }
     st_time = getTime();
     run_start(manager, ipgm);
     manager->set_TMend(TMend);
--- a/example/fft/ppe/bitReverse.cc	Fri Mar 15 07:00:34 2013 +0900
+++ b/example/fft/ppe/bitReverse.cc	Sat Mar 16 18:36:50 2013 +0900
@@ -4,20 +4,20 @@
 #else
 #include <CL/cl.h>
 #endif
+#include "Func.h"
 
 SchedDefineTask1(bitReverse,bitReverse);
 
 static int
 bitReverse(SchedTask* s, void* rbuf, void* wbuf)
 {
-    cl_float2* src = (cl_float2*)s->get_input(rbuf,0);
-    int* m = (int*)s->get_input(rbuf,1);
-    int* n = (int*)s->get_input(rbuf,2);
-    cl_float2* dst = (cl_float2*)s->get_output(wbuf,0);
-    unsigned int* gid = (unsigned int*)s->global_get(0);
-    unsigned int* nid = (unsigned int*)s->global_get(1);
+    cl_float2* src = (cl_float2*)s->get_inputAddr(0);
+    int* m = (int*)s->get_inputAddr(1);
+    int* n = (int*)s->get_inputAddr(2);
+    cl_float2* dst = (cl_float2*)s->get_outputAddr(0);
+    unsigned int* gid = (unsigned int*)s->global_get(FIRSTID);
+    unsigned int* nid = (unsigned int*)s->global_get(SECONDID);
     unsigned int j = gid[0];
-    
     j = (j & 0x55555555) << 1 | (j & 0xAAAAAAAA) >> 1;
     j = (j & 0x33333333) << 2 | (j & 0xCCCCCCCC) >> 2;
     j = (j & 0x0F0F0F0F) << 4 | (j & 0xF0F0F0F0) >> 4;
--- a/example/fft/ppe/butterfly.cc	Fri Mar 15 07:00:34 2013 +0900
+++ b/example/fft/ppe/butterfly.cc	Sat Mar 16 18:36:50 2013 +0900
@@ -4,20 +4,20 @@
 #else
 #include <CL/cl.h>
 #endif
-
+#include "Func.h"
 SchedDefineTask1(butterfly,butterfly);
 
 static int
 butterfly(SchedTask* s,void* rbuf,void* wbuf)
 {
-    cl_float2* x = (cl_float2*)s->get_input(rbuf,0);
-    cl_float2* w = (cl_float2*)s->get_input(rbuf,1);
-    int* n = (int*)s->get_input(rbuf,3);
-    int* iter = (int*)s->get_input(rbuf,4);
-    cl_uint* flag = (cl_uint*)s->get_input(rbuf,5);
+    cl_float2* x = (cl_float2*)s->get_inputAddr(0);
+    cl_float2* w = (cl_float2*)s->get_inputAddr(1);
+    int* n = (int*)s->get_inputAddr(3);
+    int* iter = (int*)s->get_inputAddr(4);
+    unsigned int* flag = (unsigned int*)s->get_inputAddr(5);
     
-    unsigned int* gid = (unsigned int*)s->global_get(0);
-    unsigned int* nid = (unsigned int*)s->global_get(1);
+    unsigned int* gid = (unsigned int*)s->global_get(FIRSTID);
+    unsigned int* nid = (unsigned int*)s->global_get(SECONDID);
 
     int butterflySize = 1 << (iter[0]-1);
     int butterflyGrpDist = 1 << iter[0];
--- a/example/fft/ppe/highPassFilter.cc	Fri Mar 15 07:00:34 2013 +0900
+++ b/example/fft/ppe/highPassFilter.cc	Sat Mar 16 18:36:50 2013 +0900
@@ -4,7 +4,7 @@
 #else
 #include <CL/cl.h>
 #endif
-
+#include "Func.h"
 SchedDefineTask1(highPassFilter,highPassFilter);
 
 static int
@@ -13,8 +13,8 @@
     int* n = (int*)s->get_input(rbuf,0);
     int* radius = (int*)s->get_input(rbuf,1);
     cl_float2* image = (cl_float2*)s->get_output(wbuf,0);
-    unsigned int* xgid = (unsigned int*)s->global_get(0);
-    unsigned int* ygid = (unsigned int*)s->global_get(1);
+    unsigned int* xgid = (unsigned int*)s->global_get(FIRSTID);
+    unsigned int* ygid = (unsigned int*)s->global_get(SECONDID);
 
     cl_int2 n_2;
     n_2.x = n_2.y = n[0]>>1;
--- a/example/fft/ppe/norm.cc	Fri Mar 15 07:00:34 2013 +0900
+++ b/example/fft/ppe/norm.cc	Sat Mar 16 18:36:50 2013 +0900
@@ -4,16 +4,16 @@
 #else
 #include <CL/cl.h>
 #endif
-
+#include "Func.h"
 SchedDefineTask1(norm,norm);
 
 static int
 norm(SchedTask* s, void* rbuf,void* wbuf)
 {
-    cl_float2* x = (cl_float2*)s->get_input(rbuf,0);
-    int* n = (int*)s->get_input(rbuf,1);
-    unsigned int* gid = (unsigned int*)s->global_get(0);
-    unsigned int* nid = (unsigned int*)s->global_get(1);
+    cl_float2* x = (cl_float2*)s->get_inputAddr(0);
+    int* n = (int*)s->get_inputAddr(1);
+    unsigned int* gid = (unsigned int*)s->global_get(FIRSTID);
+    unsigned int* nid = (unsigned int*)s->global_get(SECONDID);
     
     x[nid[0]*n[0]+gid[0]].x = x[nid[0]*n[0]+gid[0]].x / (float)n[0];
     x[nid[0]*n[0]+gid[0]].y = x[nid[0]*n[0]+gid[0]].y / (float)n[0];
--- a/example/fft/ppe/spinFact.cc	Fri Mar 15 07:00:34 2013 +0900
+++ b/example/fft/ppe/spinFact.cc	Sat Mar 16 18:36:50 2013 +0900
@@ -5,6 +5,7 @@
 #else
 #include <CL/cl.h>
 #endif
+#include "Func.h"
 
 #define PI 3.14159265358979323846
 #define PI_2 1.57079632679489661923
@@ -16,7 +17,7 @@
 {
     int* n = (int*)s->get_input(rbuf,0);
     cl_float2* w = (cl_float2*)s->get_output(wbuf,0);
-    unsigned int* i = (unsigned int*)s->global_get(0);
+    unsigned int* i = (unsigned int*)s->global_get(FIRSTID);
     cl_float2 angle;
     angle.x = (float)(2*i[0]*PI/(float)n[0]);
     angle.y = (float)((2*i[0]*PI/(float)n[0]) + PI_2);
--- a/example/fft/ppe/transpose.cc	Fri Mar 15 07:00:34 2013 +0900
+++ b/example/fft/ppe/transpose.cc	Sat Mar 16 18:36:50 2013 +0900
@@ -4,17 +4,18 @@
 #else
 #include <CL/cl.h>
 #endif
+#include "Func.h"
 
 SchedDefineTask1(transpose,transpose);
 
 static int
 transpose(SchedTask* s,void* rbuf,void* wbuf)
 {
-    cl_float2* src = (cl_float2*)s->get_input(rbuf,0);
-    int* n = (int*)s->get_input(rbuf,1);
-    cl_float2* dst = (cl_float2*)s->get_output(wbuf,0);
-    unsigned int* xgid = (unsigned int*)s->global_get(0);
-    unsigned int* ygid = (unsigned int*)s->global_get(1);
+    cl_float2* src = (cl_float2*)s->get_inputAddr(0);
+    int* n = (int*)s->get_inputAddr(1);
+    cl_float2* dst = (cl_float2*)s->get_outputAddr(0);
+    unsigned int* xgid = (unsigned int*)s->global_get(FIRSTID);
+    unsigned int* ygid = (unsigned int*)s->global_get(SECONDID);
 
     unsigned int iid = ygid[0] * n[0] + xgid[0];
     unsigned int oid = xgid[0] * n[0] + ygid[0];
--- a/example/fft/task_init.cc	Fri Mar 15 07:00:34 2013 +0900
+++ b/example/fft/task_init.cc	Sat Mar 16 18:36:50 2013 +0900
@@ -3,6 +3,7 @@
 #include "GpuScheduler.h"
 
 #ifndef __CERIUM_GPU__
+SchedExternTask(setid);
 SchedExternTask(spinFact);
 SchedExternTask(bitReverse);
 SchedExternTask(norm);
@@ -22,6 +23,7 @@
     GpuSchedRegister(TRANSEPOSE, "gpu/fft.cl", "transpose");
     GpuSchedRegister(HIGH_PASS_FILTER, "gpu/fft.cl", "highPassFilter");
 #else
+    SchedRegisterTask(SET_ID,setid);
     SchedRegisterTask(SPIN_FACT,spinFact);
     SchedRegisterTask(NORMALIZATION, norm);
     SchedRegisterTask(BIT_REVERSE, bitReverse);