changeset 1579:7418c7aef534 draft

fft
author Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
date Mon, 25 Mar 2013 23:27:04 +0900
parents a039ff7b0a45
children 806b4658ced6
files example/fft/main.cc example/fft/ppe/bitReverse.cc example/fft/ppe/butterfly.cc example/fft/ppe/setid.cc example/fft/ppe/spinFact.cc example/multiply/main.cc example/word_count/main.cc
diffstat 7 files changed, 81 insertions(+), 100 deletions(-) [+]
line wrap: on
line diff
--- a/example/fft/main.cc	Sun Mar 24 21:13:10 2013 +0900
+++ b/example/fft/main.cc	Mon Mar 25 23:27:04 2013 +0900
@@ -26,7 +26,6 @@
 static double ed_time;
 void TMend(TaskManager *);
 int ndrange_flag;
-
 cl_device_id device_id = NULL;
 cl_context context = NULL;
 cl_command_queue queue = NULL;
@@ -73,7 +72,7 @@
 
 int fftCore(TaskManager *manager,cl_float2 *dst, cl_float2 *src, cl_float2 *spin, int m_, enum Mode direction)
 {
-    int *iter = new int[1];
+    int iter;
     unsigned int* flag = new unsigned int[1];
     switch (direction) {
     case forward:flag[0] = 0x00000000; break;
@@ -86,54 +85,48 @@
     size_t gws[2],lws[2];
     int length_dst = n[0]*n[0];
     int length_src = n[0]*n[0];
-    HTask *brev = manager->create_task(BIT_REVERSE);
+    cl_uint dimension = 2;
+    HTask* brev;
+    int i,j;
     setWorkSize(gws,lws,n[0],n[0]);
-    cl_uint dimension = 2;
-
-    brev->set_param(0,(memaddr)length_src);
-    brev->set_param(1,(memaddr)dimension);
-    brev->set_param(2,(memaddr)gws[0]);
-    brev->set_param(3,(memaddr)gws[1]);
-    brev->set_param(4,(memaddr)lws[0]);
-    brev->set_param(5,(memaddr)lws[1]);
-    brev->set_inData(0, src, length_src*sizeof(cl_float2));
-    brev->set_inData(1, m,sizeof(int));
-    brev->set_inData(2, n,sizeof(int));
-    brev->set_outData(0, dst, length_dst*sizeof(cl_float2));
-    brev->set_cpu(spe_cpu);
-    brev->nd_range();
+    for(i=0;i<gws[0];i++){
+        for(j=0;j<gws[1];j++){
+            brev = manager->create_task(BIT_REVERSE);
+            brev->set_param(0,(memaddr)length_src);
+            brev->set_param(1,(memaddr)i);
+            brev->set_param(2,(memaddr)j);
+            brev->set_inData(0, src, length_src*sizeof(cl_float2));
+            brev->set_inData(1, m,sizeof(int));
+            brev->set_inData(2, n,sizeof(int));
+            brev->set_outData(0, dst, length_dst*sizeof(cl_float2));
+            brev->set_cpu(spe_cpu);
+            brev->spawn();
+        }
+    }
     
-    HTaskPtr *bfly = (HTask**)manager->allocate(sizeof(HTask*)*m_);
+    HTask* bfly;
     setWorkSize(gws,lws,n[0]/2,n[0]);
 
-    for (iter[0]=1; iter[0]<=m_;iter[0]++) {
-        bfly[iter[0]-1] = manager->create_task(BUTTERFLY);
-        bfly[iter[0]-1]->set_param(0,(memaddr)length_dst);
-        bfly[iter[0]-1]->set_param(1,(memaddr)dimension);
-        bfly[iter[0]-1]->set_param(2,(memaddr)gws[0]);
-        bfly[iter[0]-1]->set_param(3,(memaddr)gws[1]);
-        bfly[iter[0]-1]->set_param(4,(memaddr)lws[0]);
-        bfly[iter[0]-1]->set_param(5,(memaddr)lws[1]);
-        bfly[iter[0]-1]->set_inData(0, dst, length_dst*sizeof(cl_float2));
-        bfly[iter[0]-1]->set_inData(1, spin, sizeof(cl_float2)*(n[0]/2));
-        bfly[iter[0]-1]->set_inData(2, m,sizeof(int));
-        bfly[iter[0]-1]->set_inData(3, n,sizeof(int));
-        bfly[iter[0]-1]->set_inData(4, iter,sizeof(int));
-        bfly[iter[0]-1]->set_inData(5, flag,sizeof(int));
-        bfly[iter[0]-1]->set_outData(0,dst,length_dst*sizeof(cl_float2));
-        bfly[iter[0]-1]->set_cpu(spe_cpu);
-        bfly[iter[0]-1]->nd_range();
-        bfly[iter[0]-1]->flip();
-        if (iter[0]==1) {
-            bfly[iter[0]-1]->wait_for(brev);
-        } else {
-            bfly[iter[0]-1]->wait_for(bfly[iter[0]-2]);
+    for (iter=1; iter<=m_;iter++) {
+        for(i=0;i<gws[0];i++){
+            for(j=0;i<gws[1];j++){
+                bfly = manager->create_task(BUTTERFLY);
+                bfly->set_param(0,(memaddr)length_dst);
+                bfly->set_param(1,(memaddr)iter);
+                bfly->set_param(2,(memaddr)i);
+                bfly->set_param(3,(memaddr)j);
+                bfly->set_inData(0, dst, length_dst*sizeof(cl_float2));
+                bfly->set_inData(1, spin, sizeof(cl_float2)*(n[0]/2));
+                bfly->set_inData(2, m,sizeof(int));
+                bfly->set_inData(3, n,sizeof(int));
+                bfly->set_inData(4, flag,sizeof(int));
+                bfly->set_outData(0,dst,length_dst*sizeof(cl_float2));
+                bfly->set_cpu(spe_cpu);
+                bfly->wait_for(brev);
+                bfly->spawn();
+            }
         }
     }
-    brev->spawn();
-    for (int i=0;i<m_;i++) {
-        bfly[i]->spawn();
-    }
     
     if (direction == inverse) { 
         HTask *norm = manager->create_task(NORMALIZATION);
@@ -149,7 +142,7 @@
         norm->set_cpu(spe_cpu);
         norm->nd_range();
         norm->flip();
-        norm->wait_for(bfly[m[0]-1]);
+        norm->wait_for(bfly);
         norm->spawn();
     }
     return 0;
@@ -189,6 +182,7 @@
     cl_float2 *xm = (cl_float2 *)malloc(n[0] * n[0] * sizeof(cl_float2));
     cl_float2 *rm = (cl_float2 *)malloc(n[0] * n[0] * sizeof(cl_float2));
     cl_float2 *wm = (cl_float2 *)malloc(n[0] / 2 * sizeof(cl_float2));
+    int i,j;
     /*
      * [cl_float2]
      * typedef union
@@ -204,39 +198,27 @@
      * #endif
      * } cl_float2;
      */
-    int i, j;
-    for (i=0; i < n[0]; i++) {
+    for (i=0; i<n[0]; i++) {
         for (j=0; j < n[0]; j++) {
             ((float*)xm)[(2*n[0]*j)+2*i+0] = (float)ipgm.buf[n[0]*j+i];
             ((float*)xm)[(2*n[0]*j)+2*i+1] = (float)0;
         }
     }
-    
-    HTask* setid = manager->create_task(SET_ID);
-    setid->set_cpu(spe_cpu);
-    setid->spawn();
-    
-    // Create spin factor 
-    HTask *sfac = manager->create_task(SPIN_FACT);
-    int length_w = n[0] / 2;
+        
+    // Create spin factor
     setWorkSize(gws,lws,n[0]/2,1); // Todo:setWorkSize(ndr,n[0]/2,1);でできるように
-    
-    ND_RANGE_T *ndr = new ND_RANGE_T;
-    ndr->dimension = 1;
-    ndr->gws[0] = gws[0];
-    ndr->gws[1] = gws[1];
-    ndr->lws[0] = lws[0];
-    ndr->lws[1] = lws[1];
-
-    sfac->set_param(0,ndr);
-    sfac->set_param(1, (memaddr)length_w);
-    sfac->set_inData(0, n, sizeof(int));
-    sfac->set_outData(0, wm, length_w*sizeof(cl_float2));
-    sfac->set_cpu(spe_cpu);
-    sfac->nd_range();
-    sfac->wait_for(setid);
-    sfac->spawn();
-
+    int length_w = n[0] / 2;
+    HTask* sfac;
+    for(i=0;i<gws[0];i++){
+        sfac = manager->create_task(SPIN_FACT);
+        sfac->set_param(0, (memaddr)length_w);
+        sfac->set_param(1,(memaddr)i);
+        sfac->set_inData(0, n, sizeof(int));
+        sfac->set_outData(0, wm, length_w*sizeof(cl_float2));
+        sfac->set_cpu(spe_cpu);
+        sfac->nd_range();
+        sfac->spawn();
+    }
     // Butterfly Operation 
     fftCore(manager, rm, xm, wm, m[0], forward);
     
--- a/example/fft/ppe/bitReverse.cc	Sun Mar 24 21:13:10 2013 +0900
+++ b/example/fft/ppe/bitReverse.cc	Mon Mar 25 23:27:04 2013 +0900
@@ -14,13 +14,12 @@
     cl_float2* src = (cl_float2*)s->get_inputAddr(0);
     int* m = (int*)s->get_inputAddr(1);
     int* n = (int*)s->get_inputAddr(2);
-
     cl_float2* dst = (cl_float2*)s->get_outputAddr(0);
-
-    unsigned int* gid = (unsigned int*)s->global_get(FIRSTID);
-    unsigned int* nid = (unsigned int*)s->global_get(SECONDID);
-
-    unsigned int j = gid[0];
+    
+    unsigned long gid = (unsigned long)s->get_param(1);
+    unsigned long nid = (unsigned long)s->get_param(2);
+    unsigned int j = gid;
+    
     j = (j & 0x55555555) << 1 | (j & 0xAAAAAAAA) >> 1;
     j = (j & 0x33333333) << 2 | (j & 0xCCCCCCCC) >> 2;
     j = (j & 0x0F0F0F0F) << 4 | (j & 0xF0F0F0F0) >> 4;
@@ -29,7 +28,7 @@
 
     j >>= (32-m[0]);
 
-    dst[nid[0]*n[0]+j] = src[nid[0]*n[0]+gid[0]];
+    dst[nid*n[0]+j] = src[nid*n[0]+gid];
 
     return 0;
 }
--- a/example/fft/ppe/butterfly.cc	Sun Mar 24 21:13:10 2013 +0900
+++ b/example/fft/ppe/butterfly.cc	Mon Mar 25 23:27:04 2013 +0900
@@ -13,21 +13,21 @@
     cl_float2* x_in = (cl_float2*)s->get_inputAddr(0);
     cl_float2* w = (cl_float2*)s->get_inputAddr(1);
     int* n = (int*)s->get_inputAddr(3);
-    int* iter = (int*)s->get_inputAddr(4);
-    unsigned int* flag = (unsigned int*)s->get_inputAddr(5);
+    unsigned int* flag = (unsigned int*)s->get_inputAddr(4);
     
     cl_float2* x_out = (cl_float2*)s->get_outputAddr(0);
 
-    unsigned int* gid = (unsigned int*)s->global_get(FIRSTID);
-    unsigned int* nid = (unsigned int*)s->global_get(SECONDID);
+    long iter = (long)s->get_inputAddr(1);
+    unsigned long gid = (unsigned long)s->get_param(2);
+    unsigned long nid = (unsigned long)s->get_param(3);
 
-    int butterflySize = 1 << (iter[0]-1);
-    int butterflyGrpDist = 1 << iter[0];
-    int butterflyGrpNum = n[0] >> iter[0];
-    int butterflyGrpBase = (gid[0] >> (iter[0]-1))*(butterflyGrpDist);
-    int butterflyGrpOffset = gid[0] & (butterflySize-1);
+    int butterflySize = 1 << (iter-1);
+    int butterflyGrpDist = 1 << iter;
+    int butterflyGrpNum = n[0] >> iter;
+    int butterflyGrpBase = (gid >> (iter-1))*(butterflyGrpDist);
+    int butterflyGrpOffset = gid & (butterflySize-1);
 
-    int a = nid[0] * n[0] + butterflyGrpBase + butterflyGrpOffset;
+    int a = nid * n[0] + butterflyGrpBase + butterflyGrpOffset;
     int b = a + butterflySize;
 
     int l = butterflyGrpNum * butterflyGrpOffset;
--- a/example/fft/ppe/setid.cc	Sun Mar 24 21:13:10 2013 +0900
+++ b/example/fft/ppe/setid.cc	Mon Mar 25 23:27:04 2013 +0900
@@ -7,7 +7,8 @@
 setid(SchedTask* s,void* rbuf,void* wbuf)
 {
     s->global_alloc(FIRSTID,sizeof(int));
+
     s->global_alloc(SECONDID,sizeof(int));
-
+    
     return 0;
 }
--- a/example/fft/ppe/spinFact.cc	Sun Mar 24 21:13:10 2013 +0900
+++ b/example/fft/ppe/spinFact.cc	Mon Mar 25 23:27:04 2013 +0900
@@ -19,14 +19,13 @@
 
     cl_float2* w = (cl_float2*)s->get_output(wbuf,0);
 
-    unsigned int* i = (unsigned int*)s->global_get(FIRSTID);
+    unsigned long i = (unsigned long)s->get_param(1);
     cl_float2 angle;
-    printf("%d\n",((unsigned int*)s->global_get(SECONDID))[0]);
-    angle.x = (float)(2*i[0]*PI/(float)n[0]);
-    angle.y = (float)((2*i[0]*PI/(float)n[0]) + PI_2);
+    angle.x = (float)(2*i*PI/(float)n[0]);
+    angle.y = (float)((2*i*PI/(float)n[0]) + PI_2);
 
-    w[i[0]].x = cos(angle.x);
-    w[i[0]].y = cos(angle.y);
+    w[i].x = cos(angle.x);
+    w[i].y = cos(angle.y);
 
     return 0;
 }
--- a/example/multiply/main.cc	Sun Mar 24 21:13:10 2013 +0900
+++ b/example/multiply/main.cc	Mon Mar 25 23:27:04 2013 +0900
@@ -75,10 +75,10 @@
 
     // set ND_RANGE parameter
     // 初期値は全部1になっている
-    ndr = new ND_RANGE_T;
+    //    ndr = new ND_RANGE_T;
     
-    ndr->gws[0] = length;
-    manager->set_NDRange(ndr);
+    //    ndr->gws[0] = length;
+    //    manager->set_NDRange(ndr);
 
     multiply = manager->create_task(MULTIPLY_TASK);
     multiply->nd_range();
--- a/example/word_count/main.cc	Sun Mar 24 21:13:10 2013 +0900
+++ b/example/word_count/main.cc	Mon Mar 25 23:27:04 2013 +0900
@@ -1,4 +1,4 @@
-#include <stdio.h>
+s#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/mman.h>