changeset 1643:6c0b6947c231 draft

fix fft
author Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
date Sat, 22 Jun 2013 18:10:55 +0900
parents b975f29893c7
children b9f46cc511d3
files TaskManager/kernel/ppe/SynchronizedMailManager.cc TaskManager/test/SetCpuTest/Makefile.gpu example/basic/main.cc example/fft/main.cc example/fft/ppe/bitReverse.cc example/fft/ppe/butterfly.cc example/fft/ppe/highPassFilter.cc example/fft/ppe/norm.cc example/fft/ppe/setid.cc example/fft/ppe/setid.h example/fft/ppe/spinFact.cc example/fft/ppe/transpose.cc example/fft/task_init.cc
diffstat 13 files changed, 105 insertions(+), 131 deletions(-) [+]
line wrap: on
line diff
--- a/TaskManager/kernel/ppe/SynchronizedMailManager.cc	Tue Jun 18 21:42:58 2013 +0900
+++ b/TaskManager/kernel/ppe/SynchronizedMailManager.cc	Sat Jun 22 18:10:55 2013 +0900
@@ -24,9 +24,9 @@
 
 SynchronizedMailManager::~SynchronizedMailManager()
 {
-	free(queue);
-	delete queue_remain;
-	delete queue_count;
+    free(queue);
+    delete queue_remain;
+    delete queue_count;
 }
 
 int 
@@ -38,7 +38,7 @@
 void 
 SynchronizedMailManager::send(memaddr data)
 {
-	queue_remain->sem_p();	//資源-1
+    queue_remain->sem_p();	//資源-1
 
     queue[write++] = data;
     //maskの範囲を超えた場合、0に戻す
@@ -50,13 +50,13 @@
 memaddr 
 SynchronizedMailManager::recv()
 {
-	queue_count->sem_p();		//資源-1
-
+    queue_count->sem_p();		//資源-1
+        
     memaddr data;
-	data = queue[read++];
-	read &= mask;
+    data = queue[read++];
+    read &= mask;
 
-	queue_remain->sem_v();	//資源+1
+    queue_remain->sem_v();	//資源+1
 
     return data;
 }
--- a/TaskManager/test/SetCpuTest/Makefile.gpu	Tue Jun 18 21:42:58 2013 +0900
+++ b/TaskManager/test/SetCpuTest/Makefile.gpu	Sat Jun 22 18:10:55 2013 +0900
@@ -35,13 +35,13 @@
 	cpus=16;./$(TARGET) -cpu $$cpus
 	cpus=24;./$(TARGET) -cpu $$cpus
 
-	cpus=0;./$(TARGET) -cpu $$cpus -gpu
-	cpus=1;./$(TARGET) -cpu $$cpus -gpu
-	cpus=2;./$(TARGET) -cpu $$cpus -gpu
-	cpus=4;./$(TARGET) -cpu $$cpus -gpu	
-	cpus=8;./$(TARGET) -cpu $$cpus -gpu
-	cpus=16;./$(TARGET) -cpu $$cpus -gpu
-	cpus=24;./$(TARGET) -cpu $$cpus -gpu
+	cpus=0;./$(TARGET) -cpu $$cpus -gpu $$cpus
+	cpus=1;./$(TARGET) -cpu $$cpus -gpu $$cpus
+	cpus=2;./$(TARGET) -cpu $$cpus -gpu $$cpus
+	cpus=4;./$(TARGET) -cpu $$cpus -gpu $$cpus
+	cpus=8;./$(TARGET) -cpu $$cpus -gpu $$cpus
+	cpus=16;./$(TARGET) -cpu $$cpus -gpu $$cpus
+	cpus=24;./$(TARGET) -cpu $$cpus -gpu $$cpus
 
 debug: $(TARGET)
 	sudo ppu-gdb ./$(TARGET) 
--- a/example/basic/main.cc	Tue Jun 18 21:42:58 2013 +0900
+++ b/example/basic/main.cc	Sat Jun 22 18:10:55 2013 +0900
@@ -91,6 +91,7 @@
     /*
      * set_post() で ppe task を渡せるようにしたい
      */
+    twice->set_cpu(SPE_ANY);
 
     // add Active Queue
     twice->set_post(twice_result, (void*)data, 0);
--- a/example/fft/main.cc	Tue Jun 18 21:42:58 2013 +0900
+++ b/example/fft/main.cc	Sat Jun 22 18:10:55 2013 +0900
@@ -71,17 +71,14 @@
     return 0;
 }
 
-int fftCore(TaskManager *manager,cl_float2 *dst, cl_float2 *src, cl_float2 *spin, int m_, enum Mode direction)
+int fftCore(TaskManager *manager,cl_float2 *dst, cl_float2 *src, cl_float2 *spin, int m, enum Mode direction)
 {
-    unsigned int* flag = new unsigned int[1];
+    unsigned int direction_flag;
     switch (direction) {
-    case forward:flag[0] = 0x00000000; break;
-    case inverse:flag[0] = 0x80000000; break;
+    case forward:direction_flag = 0x00000000; break;
+    case inverse:direction_flag = 0x80000000; break;
     }
-    int n;
-    int m;
-    m = m_;
-    n = 1<<m;
+    int n = 1<<m;
     size_t gws[2],lws[2];
     int length_dst = n*n;
     int length_src = n*n;
@@ -89,33 +86,32 @@
     HTask* brev = manager->create_task(BIT_REVERSE);
     setWorkSize(gws,lws,n,n);
     brev->set_inData(0, src, length_src*sizeof(cl_float2));
-    brev->set_inData(1, (memaddr)m,sizeof(int));
-    brev->set_inData(2, (memaddr)n,sizeof(int));
     brev->set_outData(0, dst, length_dst*sizeof(cl_float2));
+    brev->set_param(3,m);
+    brev->set_param(4,n);
     brev->set_cpu(spe_cpu);
     brev->iterate(gws[0],gws[1]);
 
     HTask* bfly = manager->create_task(BUTTERFLY);
-    setWorkSize(gws,lws,n[0]/2,n[0]);
+    setWorkSize(gws,lws,n/2,n);
     bfly->set_inData(0, dst, length_dst*sizeof(cl_float2));
-    bfly->set_inData(1, spin, sizeof(cl_float2)*(n[0]/2));
-    bfly->set_inData(2, m,sizeof(int));
-    bfly->set_inData(3, n,sizeof(int));
-    bfly->set_inData(4, flag,sizeof(int));
+    bfly->set_inData(1, spin, sizeof(cl_float2)*(n/2));
     bfly->set_outData(0,dst,length_dst*sizeof(cl_float2));
+    bfly->set_param(3,n);
+    bfly->set_param(4,direction_flag);
     bfly->set_cpu(spe_cpu);
     bfly->wait_for(brev);
-    bfly->iterate(gws[0],gws[1],m_);
+    bfly->iterate(gws[0],gws[1],m);
     
     if (direction == inverse) { 
         HTask *norm = manager->create_task(NORMALIZATION);
-        setWorkSize(gws,lws,n[0],n[0]);
-        norm->set_inData(0, n,sizeof(int));
+        setWorkSize(gws,lws,n,n);
         norm->set_outData(0, dst, length_dst*sizeof(cl_float2));
+        norm->set_param(3,n);
         norm->set_cpu(spe_cpu);
         norm->flip();
         norm->wait_for(bfly);
-        norm->iterate(gws[0],lws[0]);
+        norm->iterate(gws[0],gws[0]);
     }
     return 0;
 }
@@ -133,7 +129,7 @@
         }
     }
     if ( (argc == 1)||(filename==0)) {
-        printf("Usage: ./fft -file [image filename] -cpu or -gpu \n");
+        printf("Usage: ./fft -file [image filename] -cpu or -gpu\n");
         exit(-1);
     }
 
@@ -143,16 +139,14 @@
 void
 run_start(TaskManager *manager,pgm_t ipgm)
 {
-    int *n = new int[1];
-    n[0] = ipgm.width;
-    int *m = new int[1];
-    m[0] = (cl_int)(log((double)n[0])/log(2.0));
-    size_t *gws = new size_t[3];
-    size_t *lws = new size_t[3];
+    int n = ipgm.width;
+    int m = (cl_int)(log((double)n)/log(2.0));
+    size_t *gws = new size_t[2];
+    size_t *lws = new size_t[2];
 
-    cl_float2 *xm = (cl_float2 *)malloc(n[0] * n[0] * sizeof(cl_float2));
-    cl_float2 *rm = (cl_float2 *)malloc(n[0] * n[0] * sizeof(cl_float2));
-    cl_float2 *wm = (cl_float2 *)malloc(n[0] / 2 * sizeof(cl_float2));
+    cl_float2 *xm = (cl_float2 *)malloc(n * n * sizeof(cl_float2));
+    cl_float2 *rm = (cl_float2 *)malloc(n * n * sizeof(cl_float2));
+    cl_float2 *wm = (cl_float2 *)malloc(n / 2 * sizeof(cl_float2));
     /*
      * [cl_float2]
      * typedef union
@@ -168,79 +162,78 @@
      * #endif
      * } cl_float2;
      */
-    for (int i=0; i<n[0]; i++) {
-        for (int j=0; j < n[0]; j++) {
-            ((float*)xm)[(2*n[0]*j)+2*i+0] = (float)ipgm.buf[n[0]*j+i];
-            ((float*)xm)[(2*n[0]*j)+2*i+1] = (float)0;
+    for (int i=0; i<n; i++) {
+        for (int j=0; j < n; j++) {
+            ((float*)xm)[(2*n*j)+2*i+0] = (float)ipgm.buf[n*j+i];
+            ((float*)xm)[(2*n*j)+2*i+1] = (float)0;
         }
     }
         
     // Create spin factor
-    int length_w = n[0] / 2;
+    int length_w = n / 2;
     HTask* sfac = manager->create_task(SPIN_FACT);
-    setWorkSize(gws,lws,n[0]/2,1);
-    sfac->set_inData(0, n, sizeof(int));
+    setWorkSize(gws,lws,n/2,1);
     sfac->set_outData(0, wm, length_w*sizeof(cl_float2));
+    sfac->set_param(3,n);
     sfac->set_cpu(spe_cpu);
-    sfac->iterate(gws[0]);
+    sfac->iterate(gws[0],gws[1]);
 
     // Butterfly Operation
-    fftCore(manager, rm, xm, wm, m[0], forward);
+    fftCore(manager, rm, xm, wm, m, forward);
     
     HTaskPtr *trns = (HTask**)manager->allocate(sizeof(HTask*)*2);
 
     // Transpose matrix 
-    int length_r =n[0] * n[0];
-    setWorkSize(gws,lws,n[0]/2,1);
+    int length_r =n*n;
+    setWorkSize(gws,lws,n/2,1);
     for (int i=0;i<2;i++) {
         trns[i]= manager->create_task(TRANSEPOSE);
         trns[i]->set_inData(0, rm, length_r*sizeof(cl_float2));
-        trns[i]->set_inData(1, n,sizeof(int));
         trns[i]->set_outData(0, xm, length_r*sizeof(cl_float2));
+        trns[i]->set_param(3,n);
         trns[i]->set_cpu(spe_cpu);
     }
     trns[0]->wait_for(sfac);
-    trns[0]->iterate(gws[0],lws[0]);
+    trns[0]->iterate(gws[0],gws[1]);
     // Butterfly Operation 
-    fftCore(manager, rm, xm, wm, m[0], forward);
+    fftCore(manager, rm, xm, wm, m, forward);
 
     // Apply high-pass filter
     HTask *hpfl = manager->create_task(HIGH_PASS_FILTER);
-    cl_int *radius = new cl_int[1];
-    radius[0] = n[0]/8;
-    setWorkSize(gws,lws,n[0]/2,1);
-    hpfl->set_inData(0, n,sizeof(int));
-    hpfl->set_inData(1, radius,sizeof(int));
+    cl_int radius = n/8;
+    setWorkSize(gws,lws,n/2,1);
     hpfl->set_outData(0, rm, length_r*sizeof(cl_float2));
+    hpfl->set_param(3,n);
+    hpfl->set_param(4,radius);
     hpfl->set_cpu(spe_cpu);
     hpfl->wait_for(trns[0]);
-    hpfl->iterate(gws[0],lws[0]);
+    hpfl->iterate(gws[0],gws[1]);
     // Inverse FFT
 
     // Butterfly Operation
-    fftCore(manager,xm, rm, wm, m[0], inverse);
+    fftCore(manager,xm, rm, wm, m, inverse);
 
     // Transpose matrix
-    setWorkSize(gws,lws,n[0],n[0]);
-    trns[1]->iterate(gws[0],lws[0]);
+    setWorkSize(gws,lws,n,n);
+    trns[1]->iterate(gws[0],gws[1]);
 
     // Butterfly Operation
 
-    fftCore(manager,xm, rm, wm, m[0], inverse);
+    fftCore(manager,xm, rm, wm, m, inverse);
 
     // Read data from memory buffer
     // spawn and wait 
 
     float* ampd;
-    ampd = (float*)malloc(n[0]*n[0]*sizeof(float));
-    for (int i=0; i < n[0]; i++) {
-        for (int j=0; j < n[0]; j++) {
-            ampd[n[0]*((i))+((j))] = (AMP(((float*)xm)[(2*n[0]*i)+2*j], ((float*)xm)[(2*n[0]*i)+2*j+1]));
+    ampd = (float*)malloc(n*n*sizeof(float));
+    for (int i=0; i < n; i++) {
+        for (int j=0; j < n; j++) {
+            ampd[n*((i))+((j))] = (AMP(((float*)xm)[(2*n*i)+2*j], ((float*)xm)[(2*n*i)+2*j+1]));
         }
     }
     pgm_t opgm;
-    opgm.width = n[0];
-    opgm.height = n[0];
+    opgm.width = n;
+    opgm.height = n;
     normalizeF2PGM(&opgm, ampd);
     free(ampd);
 
--- a/example/fft/ppe/bitReverse.cc	Tue Jun 18 21:42:58 2013 +0900
+++ b/example/fft/ppe/bitReverse.cc	Sat Jun 22 18:10:55 2013 +0900
@@ -12,12 +12,13 @@
 bitReverse(SchedTask* s, void* rbuf, void* wbuf)
 {
     cl_float2* src = (cl_float2*)s->get_inputAddr(0);
-    int* m = (int*)s->get_inputAddr(1);
-    int* n = (int*)s->get_inputAddr(2);
     cl_float2* dst = (cl_float2*)s->get_outputAddr(0);
     
     unsigned long gid = (unsigned long)s->get_param(0);
     unsigned long nid = (unsigned long)s->get_param(1);
+    
+    unsigned long m = (unsigned long)s->get_param(3);
+    unsigned long n = (unsigned long)s->get_param(4);
     unsigned int j = gid;
     
     j = (j & 0x55555555) << 1 | (j & 0xAAAAAAAA) >> 1;
@@ -26,9 +27,8 @@
     j = (j & 0x00FF00FF) << 8 | (j & 0xFF00FF00) >> 8;
     j = (j & 0x0000FFFF) << 16 | (j & 0xFFFF0000) >> 16;
 
-    j >>= (32-m[0]);
+    j >>= (32-m);
 
-    dst[nid*n[0]+j] = src[nid*n[0]+gid];
-
+    dst[nid*n+j] = src[nid*n+gid];
     return 0;
 }
--- a/example/fft/ppe/butterfly.cc	Tue Jun 18 21:42:58 2013 +0900
+++ b/example/fft/ppe/butterfly.cc	Sat Jun 22 18:10:55 2013 +0900
@@ -12,8 +12,6 @@
 {
     cl_float2* x_in = (cl_float2*)s->get_inputAddr(0);
     cl_float2* w = (cl_float2*)s->get_inputAddr(1);
-    int* n = (int*)s->get_inputAddr(3);
-    unsigned int* flag = (unsigned int*)s->get_inputAddr(4);
     
     cl_float2* x_out = (cl_float2*)s->get_outputAddr(0);
 
@@ -21,13 +19,16 @@
     unsigned long nid = (unsigned long)s->get_param(1);
     long iter = (long)s->get_param(2) + 1;
 
+    long n = (long)s->get_param(3);
+    long direction_flag = (long)s->get_param(4);
+
     int butterflySize = 1 << (iter-1);
     int butterflyGrpDist = 1 << iter;
-    int butterflyGrpNum = n[0] >> iter;
+    int butterflyGrpNum = n >> iter;
     int butterflyGrpBase = (gid >> (iter-1))*(butterflyGrpDist);
     int butterflyGrpOffset = gid & (butterflySize-1);
 
-    int a = nid * n[0] + butterflyGrpBase + butterflyGrpOffset;
+    int a = nid * n + butterflyGrpBase + butterflyGrpOffset;
     int b = a + butterflySize;
 
     int l = butterflyGrpNum * butterflyGrpOffset;
@@ -40,7 +41,7 @@
     xbyy.x = xbyy.y = xb.y;
     
     wab.x = (cl_float)((cl_uint)w[l].x ^ 0x0);
-    wab.y = (cl_float)((cl_uint)w[l].y ^ flag[0]);
+    wab.y = (cl_float)((cl_uint)w[l].y ^ direction_flag);
 
     wayx.x = (cl_float)((cl_uint)wab.y ^ 0x80000000);
     wayx.y = (cl_float)((cl_uint)wab.x ^ 0x0);
--- a/example/fft/ppe/highPassFilter.cc	Tue Jun 18 21:42:58 2013 +0900
+++ b/example/fft/ppe/highPassFilter.cc	Sat Jun 22 18:10:55 2013 +0900
@@ -10,19 +10,19 @@
 static int
 highPassFilter(SchedTask* s,void* rbuf,void* wbuf)
 {
-    int* n = (int*)s->get_inputAddr(0);
-    int* radius = (int*)s->get_inputAddr(1);
-
     cl_float2* image = (cl_float2*)s->get_outputAddr(0);
 
     unsigned long xgid = (unsigned long)s->get_param(0);
     unsigned long ygid = (unsigned long)s->get_param(1);
 
+    long n = (long)s->get_param(3);
+    long radius = (long)s->get_param(4);
+
     cl_int2 n_2;
-    n_2.x = n_2.y = n[0]>>1;
+    n_2.x = n_2.y = n>>1;
     
     cl_int2 mask;
-    mask.x = mask.y = n[0]-1;
+    mask.x = mask.y = n-1;
 
     cl_int2 gid;
     gid.x = (xgid + n_2.x) & mask.x;
@@ -40,14 +40,14 @@
 
     cl_int2 window;
 
-    if (dist2 < radius[0]*radius[0]) {
+    if (dist2 < radius*radius) {
         window.x = window.y = (int)0L;
     } else {
         window.x = window.y = (int)-1L;
     }
 
-    image[ygid*n[0]+xgid].x = (float)((int)image[ygid*n[0]+xgid].x & window.x);
-    image[ygid*n[0]+xgid].y = (float)((int)image[ygid*n[0]+xgid].y & window.y);
+    image[ygid*n+xgid].x = (float)((int)image[ygid*n+xgid].x & window.x);
+    image[ygid*n+xgid].y = (float)((int)image[ygid*n+xgid].y & window.y);
 
     return 0;
 }
--- a/example/fft/ppe/norm.cc	Tue Jun 18 21:42:58 2013 +0900
+++ b/example/fft/ppe/norm.cc	Sat Jun 22 18:10:55 2013 +0900
@@ -10,15 +10,15 @@
 static int
 norm(SchedTask* s, void* rbuf,void* wbuf)
 {
-    int* n = (int*)s->get_inputAddr(0);
-
     cl_float2* x = (cl_float2*)s->get_outputAddr(0);
 
-    unsigned int* gid = (unsigned int*)s->global_get(FIRSTID);
-    unsigned int* nid = (unsigned int*)s->global_get(SECONDID);
-    
-    x[nid[0]*n[0]+gid[0]].x = x[nid[0]*n[0]+gid[0]].x / (float)n[0];
-    x[nid[0]*n[0]+gid[0]].y = x[nid[0]*n[0]+gid[0]].y / (float)n[0];
+    unsigned long gid = (unsigned long)s->get_param(0);
+    unsigned long nid = (unsigned long)s->get_param(1);
+
+    long n = (long)s->get_param(3);
+
+    x[nid*n+gid].x = x[nid*n+gid].x / (float)n;
+    x[nid*n+gid].y = x[nid*n+gid].y / (float)n;
 
     return 0;
 }
--- a/example/fft/ppe/setid.cc	Tue Jun 18 21:42:58 2013 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
-#include "setid.h"
-#include "Func.h"
-
-SchedDefineTask1(setid,setid);
-
-static int
-setid(SchedTask* s,void* rbuf,void* wbuf)
-{
-    s->global_alloc(FIRSTID,sizeof(int));
-
-    s->global_alloc(SECONDID,sizeof(int));
-    
-    return 0;
-}
--- a/example/fft/ppe/setid.h	Tue Jun 18 21:42:58 2013 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
-#ifndef INCLUDED_TASK_SET_ID
-#define INCLUDED_TASK_SET_ID
-
-#ifndef INCLUDED_SCHED_TASK
-#include "SchedTask.h"
-#endif
-
-#endif
--- a/example/fft/ppe/spinFact.cc	Tue Jun 18 21:42:58 2013 +0900
+++ b/example/fft/ppe/spinFact.cc	Sat Jun 22 18:10:55 2013 +0900
@@ -15,14 +15,15 @@
 static int
 spinFact(SchedTask* s,void* rbuf,void* wbuf)
 {
-    int* n = (int*)s->get_input(rbuf,0);
-
-    cl_float2* w = (cl_float2*)s->get_output(wbuf,0);
+    cl_float2* w = (cl_float2*)s->get_outputAddr(0);
 
     unsigned long i = (unsigned long)s->get_param(0);
+    
+    long n = (long)s->get_param(3);
+
     cl_float2 angle;
-    angle.x = (float)(2*i*PI/(float)n[0]);
-    angle.y = (float)((2*i*PI/(float)n[0]) + PI_2);
+    angle.x = (float)(2*i*PI/(float)n);
+    angle.y = (float)((2*i*PI/(float)n) + PI_2);
 
     w[i].x = cos(angle.x);
     w[i].y = cos(angle.y);
--- a/example/fft/ppe/transpose.cc	Tue Jun 18 21:42:58 2013 +0900
+++ b/example/fft/ppe/transpose.cc	Sat Jun 22 18:10:55 2013 +0900
@@ -12,15 +12,16 @@
 transpose(SchedTask* s,void* rbuf,void* wbuf)
 {
     cl_float2* src = (cl_float2*)s->get_inputAddr(0);
-    int* n = (int*)s->get_inputAddr(1);
 
     cl_float2* dst = (cl_float2*)s->get_outputAddr(0);
 
     unsigned long xgid = (unsigned long)s->get_param(0);
     unsigned long ygid = (unsigned long)s->get_param(1);
 
-    unsigned int iid = ygid * n[0] + xgid;
-    unsigned int oid = xgid * n[0] + ygid;
+    long n = (long)s->get_param(3);
+
+    unsigned int iid = ygid * n + xgid;
+    unsigned int oid = xgid * n + ygid;
 
     dst[oid] = src[iid];
 
--- a/example/fft/task_init.cc	Tue Jun 18 21:42:58 2013 +0900
+++ b/example/fft/task_init.cc	Sat Jun 22 18:10:55 2013 +0900
@@ -23,7 +23,6 @@
     GpuSchedRegister(TRANSEPOSE, "gpu/fft.cl", "transpose");
     GpuSchedRegister(HIGH_PASS_FILTER, "gpu/fft.cl", "highPassFilter");
 #else
-    SchedRegisterTask(SET_ID,setid);
     SchedRegisterTask(SPIN_FACT,spinFact);
     SchedRegisterTask(NORMALIZATION, norm);
     SchedRegisterTask(BIT_REVERSE, bitReverse);