changeset 1560:3df1868130cb draft

fix fft ppe example
author Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
date Mon, 11 Mar 2013 01:20:01 +0900
parents c60caac2f345
children 948bafd61d96
files example/fft/main.cc example/fft/ppe/bitReverse.cc example/fft/ppe/butterfly.cc example/fft/ppe/highPassFilter.cc example/fft/ppe/norm.cc example/fft/ppe/spinFact.cc example/fft/ppe/transpose.cc
diffstat 7 files changed, 50 insertions(+), 51 deletions(-) [+]
line wrap: on
line diff
--- a/example/fft/main.cc	Sat Mar 09 21:23:40 2013 +0900
+++ b/example/fft/main.cc	Mon Mar 11 01:20:01 2013 +0900
@@ -6,6 +6,7 @@
 #include <sys/time.h>
 #include "TaskManager.h"
 #include "GpuScheduler.h"
+#include "SchedTask.h"
 #include "Func.h"
 #ifdef __APPLE__
 #include <OpenCL/opencl.h>
--- a/example/fft/ppe/bitReverse.cc	Sat Mar 09 21:23:40 2013 +0900
+++ b/example/fft/ppe/bitReverse.cc	Mon Mar 11 01:20:01 2013 +0900
@@ -14,11 +14,9 @@
     int* m = (int*)s->get_input(rbuf,1);
     int* n = (int*)s->get_input(rbuf,2);
     cl_float2* dst = (cl_float2*)s->get_output(wbuf,0);
-    //    unsigned int gid = (unsigned int)s->get_param(2);
-    unsigned int gid = (unsigned int)s->get_cpuid();
-    //    unsigned int nid = (unsigned int)s->get_param(3);
-    unsigned int nid = (unsigned int)s->get_cpuid();
-    unsigned int j = gid;
+    unsigned int* gid = (unsigned int*)s->global_get(0);
+    unsigned int* nid = (unsigned int*)s->global_get(1);
+    unsigned int j = gid[0];
     
     j = (j & 0x55555555) << 1 | (j & 0xAAAAAAAA) >> 1;
     j = (j & 0x33333333) << 2 | (j & 0xCCCCCCCC) >> 2;
@@ -28,7 +26,7 @@
 
     j >>= (32-m[0]);
 
-    dst[nid*n[0]+j] = src[nid*n[0]+gid];
+    dst[nid[0]*n[0]+j] = src[nid[0]*n[0]+gid[0]];
 
     return 0;
 }
--- a/example/fft/ppe/butterfly.cc	Sat Mar 09 21:23:40 2013 +0900
+++ b/example/fft/ppe/butterfly.cc	Mon Mar 11 01:20:01 2013 +0900
@@ -16,16 +16,16 @@
     int* iter = (int*)s->get_input(rbuf,4);
     cl_uint* flag = (cl_uint*)s->get_input(rbuf,5);
     
-    unsigned int gid = (unsigned int)s->get_cpuid();
-    unsigned int nid = (unsigned int)s->get_cpuid();
+    unsigned int* gid = (unsigned int*)s->global_get(0);
+    unsigned int* nid = (unsigned int*)s->global_get(1);
 
     int butterflySize = 1 << (iter[0]-1);
     int butterflyGrpDist = 1 << iter[0];
     int butterflyGrpNum = n[0] >> iter[0];
-    int butterflyGrpBase = (gid >> (iter[0]-1))*(butterflyGrpDist);
-    int butterflyGrpOffset = gid & (butterflySize-1);
+    int butterflyGrpBase = (gid[0] >> (iter[0]-1))*(butterflyGrpDist);
+    int butterflyGrpOffset = gid[0] & (butterflySize-1);
 
-    int a = nid * n[0] + butterflyGrpBase + butterflyGrpOffset;
+    int a = nid[0] * n[0] + butterflyGrpBase + butterflyGrpOffset;
     int b = a + butterflySize;
 
     int l = butterflyGrpNum * butterflyGrpOffset;
@@ -34,23 +34,23 @@
 
     xa = x[a];
     xb = x[b];
-    xbxx.hi = xbxx.lo = xb.hi;
-    xbyy.hi = xbyy.lo = xb.lo;
+    xbxx.x = xbxx.y = xb.x;
+    xbyy.x = xbyy.y = xb.y;
     
-    wab.hi = (cl_float)((cl_uint)w[l].hi ^ 0x0);
-    wab.lo = (cl_float)((cl_uint)w[l].lo ^ flag[0]);
+    wab.x = (cl_float)((cl_uint)w[l].x ^ 0x0);
+    wab.y = (cl_float)((cl_uint)w[l].y ^ flag[0]);
 
-    wayx.hi = (cl_float)((cl_uint)wab.lo ^ 0x80000000);
-    wayx.lo = (cl_float)((cl_uint)wab.hi ^ 0x0);
+    wayx.x = (cl_float)((cl_uint)wab.y ^ 0x80000000);
+    wayx.y = (cl_float)((cl_uint)wab.x ^ 0x0);
 
-    wbyx.hi = (cl_float)((cl_uint)wab.lo ^ 0x0);
-    wbyx.lo = (cl_float)((cl_uint)wab.hi ^ 0x80000000);
+    wbyx.x = (cl_float)((cl_uint)wab.y ^ 0x0);
+    wbyx.y = (cl_float)((cl_uint)wab.x ^ 0x80000000);
 
-    resa.hi = xa.hi + xbxx.hi*wab.hi + xbyy.hi*wayx.hi;
-    resa.lo = xa.lo + xbxx.lo*wab.lo + xbyy.lo*wayx.lo;
+    resa.x = xa.x + xbxx.x*wab.x + xbyy.x*wayx.x;
+    resa.y = xa.y + xbxx.y*wab.y + xbyy.y*wayx.y;
 
-    resb.hi = xa.hi - xbxx.hi*wab.hi + xbyy.hi*wbyx.hi;
-    resb.lo = xa.lo - xbxx.lo*wab.lo + xbyy.lo*wbyx.lo;
+    resb.x = xa.x - xbxx.x*wab.x + xbyy.x*wbyx.x;
+    resb.y = xa.y - xbxx.y*wab.y + xbyy.y*wbyx.y;
 
     x[a] = resa;
     x[b] = resb;
--- a/example/fft/ppe/highPassFilter.cc	Sat Mar 09 21:23:40 2013 +0900
+++ b/example/fft/ppe/highPassFilter.cc	Mon Mar 11 01:20:01 2013 +0900
@@ -13,38 +13,38 @@
     int* n = (int*)s->get_input(rbuf,0);
     int* radius = (int*)s->get_input(rbuf,1);
     cl_float2* image = (cl_float2*)s->get_output(wbuf,0);
-    unsigned int xgid = (unsigned int)s->get_cpuid();
-    unsigned int ygid = (unsigned int)s->get_cpuid();
+    unsigned int* xgid = (unsigned int*)s->global_get(0);
+    unsigned int* ygid = (unsigned int*)s->global_get(1);
 
     cl_int2 n_2;
-    n_2.hi = n_2.lo = n[0]>>1;
+    n_2.x = n_2.y = n[0]>>1;
     
     cl_int2 mask;
-    mask.hi = mask.lo = n[0]-1;
+    mask.x = mask.y = n[0]-1;
 
     cl_int2 gid;
-    gid.hi = (xgid + n_2.hi) & mask.hi;
-    gid.lo = (ygid + n_2.lo) & mask.lo;
+    gid.x = (xgid[0] + n_2.x) & mask.x;
+    gid.y = (ygid[0] + n_2.y) & mask.y;
 
     cl_int2 diff;
-    diff.hi = n_2.hi - gid.hi;
-    diff.lo = n_2.lo - gid.lo;
+    diff.x = n_2.x - gid.x;
+    diff.y = n_2.y - gid.y;
     
     cl_int2 diff2;
-    diff2.hi = diff.hi * diff.hi;
-    diff2.lo = diff.lo * diff.lo;
+    diff2.x = diff.x * diff.x;
+    diff2.y = diff.y * diff.y;
 
-    int dist2 = diff2.hi + diff2.lo;
+    int dist2 = diff2.x + diff2.y;
 
     cl_int2 window;
 
     if (dist2 < radius[0]*radius[0]) {
-        window.hi = window.lo = (int)0L;
+        window.x = window.y = (int)0L;
     } else {
-        window.hi = window.lo = (int)-1L;
+        window.x = window.y = (int)-1L;
     }
-    image[ygid*n[0]+xgid].hi = (float)((int)image[ygid*n[0]+xgid].hi & window.hi);
-    image[ygid*n[0]+xgid].lo = (float)((int)image[ygid*n[0]+xgid].lo & window.lo);
+    image[ygid[0]*n[0]+xgid[0]].x = (float)((int)image[ygid[0]*n[0]+xgid[0]].x & window.x);
+    image[ygid[0]*n[0]+xgid[0]].y = (float)((int)image[ygid[0]*n[0]+xgid[0]].y & window.y);
 
     return 0;
 }
--- a/example/fft/ppe/norm.cc	Sat Mar 09 21:23:40 2013 +0900
+++ b/example/fft/ppe/norm.cc	Mon Mar 11 01:20:01 2013 +0900
@@ -12,11 +12,11 @@
 {
     cl_float2* x = (cl_float2*)s->get_input(rbuf,0);
     int* n = (int*)s->get_input(rbuf,1);
-    unsigned int gid = (unsigned int)s->get_cpuid();
-    unsigned int nid = (unsigned int)s->get_cpuid();
+    unsigned int* gid = (unsigned int*)s->global_get(0);
+    unsigned int* nid = (unsigned int*)s->global_get(1);
     
-    x[nid*n[0]+gid].hi = x[nid*n[0]+gid].hi / (float)n[0];
-    x[nid*n[0]+gid].lo = x[nid*n[0]+gid].lo / (float)n[0];
+    x[nid[0]*n[0]+gid[0]].x = x[nid[0]*n[0]+gid[0]].x / (float)n[0];
+    x[nid[0]*n[0]+gid[0]].y = x[nid[0]*n[0]+gid[0]].y / (float)n[0];
 
     return 0;
 }
--- a/example/fft/ppe/spinFact.cc	Sat Mar 09 21:23:40 2013 +0900
+++ b/example/fft/ppe/spinFact.cc	Mon Mar 11 01:20:01 2013 +0900
@@ -16,13 +16,13 @@
 {
     int* n = (int*)s->get_input(rbuf,0);
     cl_float2* w = (cl_float2*)s->get_output(wbuf,0);
-    unsigned int i = (unsigned int)s->get_cpuid();
+    unsigned int* i = (unsigned int*)s->global_get(0);
     cl_float2 angle;
-    angle.hi = (float)(2*i*PI/(float)n[0]);
-    angle.lo = (float)((2*i*PI/(float)n[0]) + PI_2);
+    angle.x = (float)(2*i[0]*PI/(float)n[0]);
+    angle.y = (float)((2*i[0]*PI/(float)n[0]) + PI_2);
 
-    w[i].hi = cos(angle.hi);
-    w[i].lo = cos(angle.lo);
+    w[i[0]].x = cos(angle.x);
+    w[i[0]].y = cos(angle.y);
 
     return 0;
 }
--- a/example/fft/ppe/transpose.cc	Sat Mar 09 21:23:40 2013 +0900
+++ b/example/fft/ppe/transpose.cc	Mon Mar 11 01:20:01 2013 +0900
@@ -13,11 +13,11 @@
     cl_float2* src = (cl_float2*)s->get_input(rbuf,0);
     int* n = (int*)s->get_input(rbuf,1);
     cl_float2* dst = (cl_float2*)s->get_output(wbuf,0);
-    unsigned int xgid = (unsigned int)s->get_cpuid();
-    unsigned int ygid = (unsigned int)s->get_cpuid();
+    unsigned int* xgid = (unsigned int*)s->global_get(0);
+    unsigned int* ygid = (unsigned int*)s->global_get(1);
 
-    unsigned int iid = ygid * n[0] + xgid;
-    unsigned int oid = xgid * n[0] + ygid;
+    unsigned int iid = ygid[0] * n[0] + xgid[0];
+    unsigned int oid = xgid[0] * n[0] + ygid[0];
 
     dst[oid] = src[iid];