changeset 1571:9832a5eb2027 draft

merge
author Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
date Mon, 18 Mar 2013 15:28:54 +0900
parents b3f865f339f6
children 0bd4598f5a65
files example/fft/main.cc example/fft/ppe/bitReverse.cc example/fft/ppe/butterfly.cc example/fft/ppe/highPassFilter.cc example/fft/ppe/norm.cc example/fft/ppe/spinFact.cc example/fft/ppe/transpose.cc example/multiply/main.cc
diffstat 8 files changed, 39 insertions(+), 20 deletions(-) [+]
line wrap: on
line diff
--- a/example/fft/main.cc	Mon Mar 18 08:18:21 2013 +0900
+++ b/example/fft/main.cc	Mon Mar 18 15:28:54 2013 +0900
@@ -87,8 +87,9 @@
     int length_dst = n[0]*n[0];
     int length_src = n[0]*n[0];
     HTask *brev = manager->create_task(BIT_REVERSE);
-    //    setWorkSize(gws,lws,n[0],n[0]);
+    setWorkSize(gws,lws,n[0],n[0]);
     cl_uint dimension = 2;
+
     brev->set_param(0,(memaddr)length_src);
     brev->set_param(1,(memaddr)dimension);
     brev->set_param(2,(memaddr)gws[0]);
@@ -103,7 +104,7 @@
     brev->nd_range();
     
     HTaskPtr *bfly = (HTask**)manager->allocate(sizeof(HTask*)*m_);
-    //    setWorkSize(gws,lws,n[0]/2,n[0]);
+    setWorkSize(gws,lws,n[0]/2,n[0]);
 
     for (iter[0]=1; iter[0]<=m_;iter[0]++) {
         bfly[iter[0]-1] = manager->create_task(BUTTERFLY);
@@ -113,13 +114,13 @@
         bfly[iter[0]-1]->set_param(3,(memaddr)gws[1]);
         bfly[iter[0]-1]->set_param(4,(memaddr)lws[0]);
         bfly[iter[0]-1]->set_param(5,(memaddr)lws[1]);
-        
         bfly[iter[0]-1]->set_inData(0, dst, length_dst*sizeof(cl_float2));
         bfly[iter[0]-1]->set_inData(1, spin, sizeof(cl_float2)*(n[0]/2));
         bfly[iter[0]-1]->set_inData(2, m,sizeof(int));
         bfly[iter[0]-1]->set_inData(3, n,sizeof(int));
         bfly[iter[0]-1]->set_inData(4, iter,sizeof(int));
         bfly[iter[0]-1]->set_inData(5, flag,sizeof(int));
+        bfly[iter[0]-1]->set_outData(0,dst,length_dst*sizeof(cl_float2));
         bfly[iter[0]-1]->set_cpu(spe_cpu);
         bfly[iter[0]-1]->nd_range();
         bfly[iter[0]-1]->flip();
@@ -136,25 +137,21 @@
     
     if (direction == inverse) { 
         HTask *norm = manager->create_task(NORMALIZATION);
-        //        setWorkSize(gws,lws,n[0],n[0]);
+        setWorkSize(gws,lws,n[0],n[0]);
         norm->set_param(0,(memaddr)length_dst);
         norm->set_param(1,(memaddr)dimension);
         norm->set_param(2,(memaddr)gws[0]);
         norm->set_param(3,(memaddr)gws[1]);
         norm->set_param(4,(memaddr)lws[0]);
         norm->set_param(5,(memaddr)lws[1]);
-        norm->set_inData(0, dst, length_dst*sizeof(cl_float2));
-        norm->set_inData(1, n,sizeof(int));
+        norm->set_inData(0, n,sizeof(int));
+        norm->set_outData(0, dst, length_dst*sizeof(cl_float2));
         norm->set_cpu(spe_cpu);
         norm->nd_range();
         norm->flip();
         norm->wait_for(bfly[m[0]-1]);
         norm->spawn();
     }
-    // brev->spawn();
-    // for (int i=0;i<m_;i++) {
-    //     bfly[i]->spawn();
-    // }
     return 0;
 }
 
@@ -207,7 +204,6 @@
      * #endif
      * } cl_float2;
      */
-
     int i, j;
     for (i=0; i < n[0]; i++) {
         for (j=0; j < n[0]; j++) {
@@ -231,8 +227,8 @@
     ndr->gws[1] = gws[1];
     ndr->lws[0] = lws[0];
     ndr->lws[1] = lws[1];
+
     sfac->set_param(0,ndr);
-    
     sfac->set_param(1, (memaddr)length_w);
     sfac->set_inData(0, n, sizeof(int));
     sfac->set_outData(0, wm, length_w*sizeof(cl_float2));
--- a/example/fft/ppe/bitReverse.cc	Mon Mar 18 08:18:21 2013 +0900
+++ b/example/fft/ppe/bitReverse.cc	Mon Mar 18 15:28:54 2013 +0900
@@ -14,9 +14,12 @@
     cl_float2* src = (cl_float2*)s->get_inputAddr(0);
     int* m = (int*)s->get_inputAddr(1);
     int* n = (int*)s->get_inputAddr(2);
+
     cl_float2* dst = (cl_float2*)s->get_outputAddr(0);
+
     unsigned int* gid = (unsigned int*)s->global_get(FIRSTID);
     unsigned int* nid = (unsigned int*)s->global_get(SECONDID);
+
     unsigned int j = gid[0];
     j = (j & 0x55555555) << 1 | (j & 0xAAAAAAAA) >> 1;
     j = (j & 0x33333333) << 2 | (j & 0xCCCCCCCC) >> 2;
--- a/example/fft/ppe/butterfly.cc	Mon Mar 18 08:18:21 2013 +0900
+++ b/example/fft/ppe/butterfly.cc	Mon Mar 18 15:28:54 2013 +0900
@@ -10,12 +10,14 @@
 static int
 butterfly(SchedTask* s,void* rbuf,void* wbuf)
 {
-    cl_float2* x = (cl_float2*)s->get_inputAddr(0);
+    cl_float2* x_in = (cl_float2*)s->get_inputAddr(0);
     cl_float2* w = (cl_float2*)s->get_inputAddr(1);
     int* n = (int*)s->get_inputAddr(3);
     int* iter = (int*)s->get_inputAddr(4);
     unsigned int* flag = (unsigned int*)s->get_inputAddr(5);
     
+    cl_float2* x_out = (cl_float2*)s->get_outputAddr(0);
+
     unsigned int* gid = (unsigned int*)s->global_get(FIRSTID);
     unsigned int* nid = (unsigned int*)s->global_get(SECONDID);
 
@@ -32,8 +34,8 @@
 
     cl_float2 xa, xb, xbxx, xbyy, wab, wayx, wbyx, resa, resb;
 
-    xa = x[a];
-    xb = x[b];
+    xa = x_in[a];
+    xb = x_in[b];
     xbxx.x = xbxx.y = xb.x;
     xbyy.x = xbyy.y = xb.y;
     
@@ -52,8 +54,8 @@
     resb.x = xa.x - xbxx.x*wab.x + xbyy.x*wbyx.x;
     resb.y = xa.y - xbxx.y*wab.y + xbyy.y*wbyx.y;
 
-    x[a] = resa;
-    x[b] = resb;
+    x_out[a] = resa;
+    x_out[b] = resb;
 
     return 0;
 }
--- a/example/fft/ppe/highPassFilter.cc	Mon Mar 18 08:18:21 2013 +0900
+++ b/example/fft/ppe/highPassFilter.cc	Mon Mar 18 15:28:54 2013 +0900
@@ -12,7 +12,9 @@
 {
     int* n = (int*)s->get_input(rbuf,0);
     int* radius = (int*)s->get_input(rbuf,1);
+
     cl_float2* image = (cl_float2*)s->get_output(wbuf,0);
+
     unsigned int* xgid = (unsigned int*)s->global_get(FIRSTID);
     unsigned int* ygid = (unsigned int*)s->global_get(SECONDID);
 
@@ -43,6 +45,7 @@
     } else {
         window.x = window.y = (int)-1L;
     }
+
     image[ygid[0]*n[0]+xgid[0]].x = (float)((int)image[ygid[0]*n[0]+xgid[0]].x & window.x);
     image[ygid[0]*n[0]+xgid[0]].y = (float)((int)image[ygid[0]*n[0]+xgid[0]].y & window.y);
 
--- a/example/fft/ppe/norm.cc	Mon Mar 18 08:18:21 2013 +0900
+++ b/example/fft/ppe/norm.cc	Mon Mar 18 15:28:54 2013 +0900
@@ -10,8 +10,10 @@
 static int
 norm(SchedTask* s, void* rbuf,void* wbuf)
 {
-    cl_float2* x = (cl_float2*)s->get_inputAddr(0);
-    int* n = (int*)s->get_inputAddr(1);
+    int* n = (int*)s->get_inputAddr(0);
+
+    cl_float2* x = (cl_float2*)s->get_outputAddr(0);
+
     unsigned int* gid = (unsigned int*)s->global_get(FIRSTID);
     unsigned int* nid = (unsigned int*)s->global_get(SECONDID);
     
--- a/example/fft/ppe/spinFact.cc	Mon Mar 18 08:18:21 2013 +0900
+++ b/example/fft/ppe/spinFact.cc	Mon Mar 18 15:28:54 2013 +0900
@@ -16,9 +16,12 @@
 spinFact(SchedTask* s,void* rbuf,void* wbuf)
 {
     int* n = (int*)s->get_input(rbuf,0);
+
     cl_float2* w = (cl_float2*)s->get_output(wbuf,0);
+
     unsigned int* i = (unsigned int*)s->global_get(FIRSTID);
     cl_float2 angle;
+    printf("%d\n",((unsigned int*)s->global_get(SECONDID))[0]);
     angle.x = (float)(2*i[0]*PI/(float)n[0]);
     angle.y = (float)((2*i[0]*PI/(float)n[0]) + PI_2);
 
--- a/example/fft/ppe/transpose.cc	Mon Mar 18 08:18:21 2013 +0900
+++ b/example/fft/ppe/transpose.cc	Mon Mar 18 15:28:54 2013 +0900
@@ -13,7 +13,9 @@
 {
     cl_float2* src = (cl_float2*)s->get_inputAddr(0);
     int* n = (int*)s->get_inputAddr(1);
+
     cl_float2* dst = (cl_float2*)s->get_outputAddr(0);
+
     unsigned int* xgid = (unsigned int*)s->global_get(FIRSTID);
     unsigned int* ygid = (unsigned int*)s->global_get(SECONDID);
 
--- a/example/multiply/main.cc	Mon Mar 18 08:18:21 2013 +0900
+++ b/example/multiply/main.cc	Mon Mar 18 15:28:54 2013 +0900
@@ -71,16 +71,24 @@
     }
     st_time = getTime();
 
+<<<<<<< local
+    ND_RANGE_T_PTR ndr = new ND_RANGE_T;
+    ndr->dimension = 1;
+    ndr->gws[0] = length; ndr->gws[1] = 1; ndr->gws[2] = 1;
+    ndr->lws[0] = 1;   ndr->lws[1] = 1; ndr->lws[2] = 1;
+    //    manager->set_NDRange(ndr);
+=======
     // set ND_RANGE parameter
     // 初期値は全部1になっている
     ndr = new ND_RANGE_T;
     
     ndr->gws[0] = length;
     manager->set_NDRange(ndr);
+>>>>>>> other
 
     multiply = manager->create_task(MULTIPLY_TASK);
     multiply->nd_range();
-    multiply->set_cpu(GPU_0);
+    multiply->set_cpu(SPE_ANY);
 
     /**
      * Set of Input Data