Mercurial > hg > Game > Cerium
changeset 1560:3df1868130cb draft
fix fft ppe example
author | Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 11 Mar 2013 01:20:01 +0900 |
parents | c60caac2f345 |
children | 948bafd61d96 |
files | example/fft/main.cc example/fft/ppe/bitReverse.cc example/fft/ppe/butterfly.cc example/fft/ppe/highPassFilter.cc example/fft/ppe/norm.cc example/fft/ppe/spinFact.cc example/fft/ppe/transpose.cc |
diffstat | 7 files changed, 50 insertions(+), 51 deletions(-) [+] |
line wrap: on
line diff
--- a/example/fft/main.cc Sat Mar 09 21:23:40 2013 +0900 +++ b/example/fft/main.cc Mon Mar 11 01:20:01 2013 +0900 @@ -6,6 +6,7 @@ #include <sys/time.h> #include "TaskManager.h" #include "GpuScheduler.h" +#include "SchedTask.h" #include "Func.h" #ifdef __APPLE__ #include <OpenCL/opencl.h>
--- a/example/fft/ppe/bitReverse.cc Sat Mar 09 21:23:40 2013 +0900 +++ b/example/fft/ppe/bitReverse.cc Mon Mar 11 01:20:01 2013 +0900 @@ -14,11 +14,9 @@ int* m = (int*)s->get_input(rbuf,1); int* n = (int*)s->get_input(rbuf,2); cl_float2* dst = (cl_float2*)s->get_output(wbuf,0); - // unsigned int gid = (unsigned int)s->get_param(2); - unsigned int gid = (unsigned int)s->get_cpuid(); - // unsigned int nid = (unsigned int)s->get_param(3); - unsigned int nid = (unsigned int)s->get_cpuid(); - unsigned int j = gid; + unsigned int* gid = (unsigned int*)s->global_get(0); + unsigned int* nid = (unsigned int*)s->global_get(1); + unsigned int j = gid[0]; j = (j & 0x55555555) << 1 | (j & 0xAAAAAAAA) >> 1; j = (j & 0x33333333) << 2 | (j & 0xCCCCCCCC) >> 2; @@ -28,7 +26,7 @@ j >>= (32-m[0]); - dst[nid*n[0]+j] = src[nid*n[0]+gid]; + dst[nid[0]*n[0]+j] = src[nid[0]*n[0]+gid[0]]; return 0; }
--- a/example/fft/ppe/butterfly.cc Sat Mar 09 21:23:40 2013 +0900 +++ b/example/fft/ppe/butterfly.cc Mon Mar 11 01:20:01 2013 +0900 @@ -16,16 +16,16 @@ int* iter = (int*)s->get_input(rbuf,4); cl_uint* flag = (cl_uint*)s->get_input(rbuf,5); - unsigned int gid = (unsigned int)s->get_cpuid(); - unsigned int nid = (unsigned int)s->get_cpuid(); + unsigned int* gid = (unsigned int*)s->global_get(0); + unsigned int* nid = (unsigned int*)s->global_get(1); int butterflySize = 1 << (iter[0]-1); int butterflyGrpDist = 1 << iter[0]; int butterflyGrpNum = n[0] >> iter[0]; - int butterflyGrpBase = (gid >> (iter[0]-1))*(butterflyGrpDist); - int butterflyGrpOffset = gid & (butterflySize-1); + int butterflyGrpBase = (gid[0] >> (iter[0]-1))*(butterflyGrpDist); + int butterflyGrpOffset = gid[0] & (butterflySize-1); - int a = nid * n[0] + butterflyGrpBase + butterflyGrpOffset; + int a = nid[0] * n[0] + butterflyGrpBase + butterflyGrpOffset; int b = a + butterflySize; int l = butterflyGrpNum * butterflyGrpOffset; @@ -34,23 +34,23 @@ xa = x[a]; xb = x[b]; - xbxx.hi = xbxx.lo = xb.hi; - xbyy.hi = xbyy.lo = xb.lo; + xbxx.x = xbxx.y = xb.x; + xbyy.x = xbyy.y = xb.y; - wab.hi = (cl_float)((cl_uint)w[l].hi ^ 0x0); - wab.lo = (cl_float)((cl_uint)w[l].lo ^ flag[0]); + wab.x = (cl_float)((cl_uint)w[l].x ^ 0x0); + wab.y = (cl_float)((cl_uint)w[l].y ^ flag[0]); - wayx.hi = (cl_float)((cl_uint)wab.lo ^ 0x80000000); - wayx.lo = (cl_float)((cl_uint)wab.hi ^ 0x0); + wayx.x = (cl_float)((cl_uint)wab.y ^ 0x80000000); + wayx.y = (cl_float)((cl_uint)wab.x ^ 0x0); - wbyx.hi = (cl_float)((cl_uint)wab.lo ^ 0x0); - wbyx.lo = (cl_float)((cl_uint)wab.hi ^ 0x80000000); + wbyx.x = (cl_float)((cl_uint)wab.y ^ 0x0); + wbyx.y = (cl_float)((cl_uint)wab.x ^ 0x80000000); - resa.hi = xa.hi + xbxx.hi*wab.hi + xbyy.hi*wayx.hi; - resa.lo = xa.lo + xbxx.lo*wab.lo + xbyy.lo*wayx.lo; + resa.x = xa.x + xbxx.x*wab.x + xbyy.x*wayx.x; + resa.y = xa.y + xbxx.y*wab.y + xbyy.y*wayx.y; - resb.hi = xa.hi - xbxx.hi*wab.hi + xbyy.hi*wbyx.hi; - resb.lo = xa.lo - xbxx.lo*wab.lo + xbyy.lo*wbyx.lo; + resb.x = xa.x - xbxx.x*wab.x + xbyy.x*wbyx.x; + resb.y = xa.y - xbxx.y*wab.y + xbyy.y*wbyx.y; x[a] = resa; x[b] = resb;
--- a/example/fft/ppe/highPassFilter.cc Sat Mar 09 21:23:40 2013 +0900 +++ b/example/fft/ppe/highPassFilter.cc Mon Mar 11 01:20:01 2013 +0900 @@ -13,38 +13,38 @@ int* n = (int*)s->get_input(rbuf,0); int* radius = (int*)s->get_input(rbuf,1); cl_float2* image = (cl_float2*)s->get_output(wbuf,0); - unsigned int xgid = (unsigned int)s->get_cpuid(); - unsigned int ygid = (unsigned int)s->get_cpuid(); + unsigned int* xgid = (unsigned int*)s->global_get(0); + unsigned int* ygid = (unsigned int*)s->global_get(1); cl_int2 n_2; - n_2.hi = n_2.lo = n[0]>>1; + n_2.x = n_2.y = n[0]>>1; cl_int2 mask; - mask.hi = mask.lo = n[0]-1; + mask.x = mask.y = n[0]-1; cl_int2 gid; - gid.hi = (xgid + n_2.hi) & mask.hi; - gid.lo = (ygid + n_2.lo) & mask.lo; + gid.x = (xgid[0] + n_2.x) & mask.x; + gid.y = (ygid[0] + n_2.y) & mask.y; cl_int2 diff; - diff.hi = n_2.hi - gid.hi; - diff.lo = n_2.lo - gid.lo; + diff.x = n_2.x - gid.x; + diff.y = n_2.y - gid.y; cl_int2 diff2; - diff2.hi = diff.hi * diff.hi; - diff2.lo = diff.lo * diff.lo; + diff2.x = diff.x * diff.x; + diff2.y = diff.y * diff.y; - int dist2 = diff2.hi + diff2.lo; + int dist2 = diff2.x + diff2.y; cl_int2 window; if (dist2 < radius[0]*radius[0]) { - window.hi = window.lo = (int)0L; + window.x = window.y = (int)0L; } else { - window.hi = window.lo = (int)-1L; + window.x = window.y = (int)-1L; } - image[ygid*n[0]+xgid].hi = (float)((int)image[ygid*n[0]+xgid].hi & window.hi); - image[ygid*n[0]+xgid].lo = (float)((int)image[ygid*n[0]+xgid].lo & window.lo); + image[ygid[0]*n[0]+xgid[0]].x = (float)((int)image[ygid[0]*n[0]+xgid[0]].x & window.x); + image[ygid[0]*n[0]+xgid[0]].y = (float)((int)image[ygid[0]*n[0]+xgid[0]].y & window.y); return 0; }
--- a/example/fft/ppe/norm.cc Sat Mar 09 21:23:40 2013 +0900 +++ b/example/fft/ppe/norm.cc Mon Mar 11 01:20:01 2013 +0900 @@ -12,11 +12,11 @@ { cl_float2* x = (cl_float2*)s->get_input(rbuf,0); int* n = (int*)s->get_input(rbuf,1); - unsigned int gid = (unsigned int)s->get_cpuid(); - unsigned int nid = (unsigned int)s->get_cpuid(); + unsigned int* gid = (unsigned int*)s->global_get(0); + unsigned int* nid = (unsigned int*)s->global_get(1); - x[nid*n[0]+gid].hi = x[nid*n[0]+gid].hi / (float)n[0]; - x[nid*n[0]+gid].lo = x[nid*n[0]+gid].lo / (float)n[0]; + x[nid[0]*n[0]+gid[0]].x = x[nid[0]*n[0]+gid[0]].x / (float)n[0]; + x[nid[0]*n[0]+gid[0]].y = x[nid[0]*n[0]+gid[0]].y / (float)n[0]; return 0; }
--- a/example/fft/ppe/spinFact.cc Sat Mar 09 21:23:40 2013 +0900 +++ b/example/fft/ppe/spinFact.cc Mon Mar 11 01:20:01 2013 +0900 @@ -16,13 +16,13 @@ { int* n = (int*)s->get_input(rbuf,0); cl_float2* w = (cl_float2*)s->get_output(wbuf,0); - unsigned int i = (unsigned int)s->get_cpuid(); + unsigned int* i = (unsigned int*)s->global_get(0); cl_float2 angle; - angle.hi = (float)(2*i*PI/(float)n[0]); - angle.lo = (float)((2*i*PI/(float)n[0]) + PI_2); + angle.x = (float)(2*i[0]*PI/(float)n[0]); + angle.y = (float)((2*i[0]*PI/(float)n[0]) + PI_2); - w[i].hi = cos(angle.hi); - w[i].lo = cos(angle.lo); + w[i[0]].x = cos(angle.x); + w[i[0]].y = cos(angle.y); return 0; }
--- a/example/fft/ppe/transpose.cc Sat Mar 09 21:23:40 2013 +0900 +++ b/example/fft/ppe/transpose.cc Mon Mar 11 01:20:01 2013 +0900 @@ -13,11 +13,11 @@ cl_float2* src = (cl_float2*)s->get_input(rbuf,0); int* n = (int*)s->get_input(rbuf,1); cl_float2* dst = (cl_float2*)s->get_output(wbuf,0); - unsigned int xgid = (unsigned int)s->get_cpuid(); - unsigned int ygid = (unsigned int)s->get_cpuid(); + unsigned int* xgid = (unsigned int*)s->global_get(0); + unsigned int* ygid = (unsigned int*)s->global_get(1); - unsigned int iid = ygid * n[0] + xgid; - unsigned int oid = xgid * n[0] + ygid; + unsigned int iid = ygid[0] * n[0] + xgid[0]; + unsigned int oid = xgid[0] * n[0] + ygid[0]; dst[oid] = src[iid];