Mercurial > hg > Game > Cerium
changeset 1673:2c3adce7eb40 draft
fix fft on gpu
author | Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Thu, 18 Jul 2013 17:16:43 +0900 |
parents | 32bc4ea3557f |
children | 614c60736bfd |
files | example/fft/gpu/bitReverse.cl example/fft/gpu/butterfly.cl example/fft/gpu/fft.cl example/fft/gpu/highPassFilter.cl example/fft/gpu/norm.cl example/fft/gpu/spinFact.cl example/fft/gpu/transpose.cl example/fft/main.cc example/fft/output.pgm example/fft/task_init.cc |
diffstat | 10 files changed, 157 insertions(+), 123 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/fft/gpu/bitReverse.cl Thu Jul 18 17:16:43 2013 +0900 @@ -0,0 +1,20 @@ +__kernel +void bitReverse(__constant long *param, __global float2 *src,__global float2 *dst) +{ + unsigned long gid = (unsigned long)get_global_id(0); + unsigned long nid = (unsigned long)get_global_id(1); + + unsigned long m = (unsigned long)param[3]; + unsigned long n = (unsigned long)param[4]; + unsigned int j = gid; + + j = (j & 0x55555555) << 1 | (j & 0xAAAAAAAA) >> 1; + j = (j & 0x33333333) << 2 | (j & 0xCCCCCCCC) >> 2; + j = (j & 0x0F0F0F0F) << 4 | (j & 0xF0F0F0F0) >> 4; + j = (j & 0x00FF00FF) << 8 | (j & 0xFF00FF00) >> 8; + j = (j & 0x0000FFFF) << 16 | (j & 0xFFFF0000) >> 16; + + j >>= (32-m); + + dst[nid*n+j] = src[nid*n+gid]; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/fft/gpu/butterfly.cl Thu Jul 18 17:16:43 2013 +0900 @@ -0,0 +1,50 @@ +__kernel +void butterfly(__constant long *param, __global float2 *x_in, __global float2 *w, __global float2 *x_out) +{ + unsigned long gid = (unsigned long)get_global_id(0); + unsigned long nid = (unsigned long)get_global_id(1); + + long n = param[3]; + unsigned long direction_flag = (unsigned long)param[4]; + long iter = param[5]; + + int butterflySize = 1 << (iter-1); + int butterflyGrpDist = 1 << iter; + int butterflyGrpNum = n >> iter; + int butterflyGrpBase = (gid >> (iter-1))*(butterflyGrpDist); + int butterflyGrpOffset = gid & (butterflySize-1); + + int a = nid * n + butterflyGrpBase + butterflyGrpOffset; + int b = a + butterflySize; + + int l = butterflyGrpNum * butterflyGrpOffset; + + float2 xa, xb, xbxx, xbyy, wab, wayx, wbyx, resa, resb; + + xa = x_in[a]; + xb = x_in[b]; + xbxx.x = xbxx.y = xb.x; + xbyy.x = xbyy.y = xb.y; + + wab.x = w[l].x; + if(direction_flag == 0x80000000) { + wab.y = -w[l].y; + } else { + wab.y = w[l].y; + } + + wayx.x = -wab.y; + wayx.y = wab.x; + + wbyx.x = wab.y; + wbyx.y = -wab.x; + + resa.x = xa.x + xbxx.x*wab.x + xbyy.x*wayx.x; + resa.y = xa.y + xbxx.y*wab.y + xbyy.y*wayx.y; + + resb.x = xa.x - xbxx.x*wab.x + xbyy.x*wbyx.x; + resb.y = xa.y - xbxx.y*wab.y + xbyy.y*wbyx.y; + + x_out[a] = resa; + x_out[b] = resb; +}
--- a/example/fft/gpu/fft.cl Thu Jul 18 11:20:51 2013 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,116 +0,0 @@ -#define PI 3.14159265358979323846 -#define PI_2 1.57079632679489661923 - -__kernel void spinFact(__constant long *param,__global float2* w) -{ - long n = param[3]; - unsigned int i = get_global_id(0); - - float2 angle = (float2)(2*i*PI/(float)n,(2*i*PI/(float)n)+PI_2); - w[i] = cos(angle); -} - -__kernel void bitReverse(__constant int *param, __global float2 *dst, __global float2 *src) { - int m = param[3]; - int n = param[4]; - unsigned int gid = get_global_id(0); - unsigned int nid = get_global_id(1); - - unsigned int j = gid; - j = (j & 0x55555555) << 1 | (j & 0xAAAAAAAA) >> 1; - j = (j & 0x33333333) << 2 | (j & 0xCCCCCCCC) >> 2; - j = (j & 0x0F0F0F0F) << 4 | (j & 0xF0F0F0F0) >> 4; - j = (j & 0x00FF00FF) << 8 | (j & 0xFF00FF00) >> 8; - j = (j & 0x0000FFFF) << 16 | (j & 0xFFFF0000) >> 16; - - j >>= (32-m); - - dst[nid*n+j] = src[nid*n+gid]; -} - -__kernel void norm(__constant int *param, __global float2 *x,__global float2 *x_out) -{ - int n = param[3]; - unsigned int gid = get_global_id(0); - - - unsigned int nid = get_global_id(1); - - x_out[nid*n+gid] = x[nid*n+gid] / (float2)((float)n, (float)n); -} - -__kernel void butterfly(__constant int *param, __global float2 *x, __global float2* w,__global float2 *x_out) -{ - int n = param[3]; - uint flag = param[4]; - int iter = param[5]; - - unsigned int gid = get_global_id(0); - unsigned int nid = get_global_id(1); - - int butterflySize = 1 << (iter-1); - int butterflyGrpDist = 1 << iter; - int butterflyGrpNum = n >> iter; - int butterflyGrpBase = (gid >> (iter-1))*(butterflyGrpDist); - int butterflyGrpOffset = gid & (butterflySize-1); - - int a = nid * n + butterflyGrpBase + butterflyGrpOffset; - int b = a + butterflySize; - - int l = butterflyGrpNum * butterflyGrpOffset; - - float2 xa, xb, xbxx, xbyy, wab, wayx, wbyx, resa, resb; - - xa = x[a]; - xb = x[b]; - xbxx = xb.xx; - xbyy = xb.yy; - - wab = as_float2(as_uint2(w[l]) ^ (uint2)(0x0, flag)); - wayx = as_float2(as_uint2(wab.yx) ^ (uint2)(0x80000000, 0x0)); - wbyx = as_float2(as_uint2(wab.yx) ^ (uint2)(0x0, 0x80000000)); - - resa = xa + xbxx*wab + xbyy*wayx; - resb = xa - xbxx*wab + xbyy*wbyx; - - x_out[a] = resa; - x_out[b] = resb; -} - -__kernel void transpose(__constant int *param, __global float2* src,__global float2 *dst) -{ - int n = param[3]; - unsigned int xgid = get_global_id(0); - unsigned int ygid = get_global_id(1); - - unsigned int iid = ygid * n + xgid; - unsigned int oid = xgid * n + ygid; - - dst[oid] = src[iid]; -} - -__kernel void highPassFilter(__constant int *param, __global float2* image, __global float2* image_out) -{ - int n = param[3]; - int radius = param[4]; - unsigned int xgid = get_global_id(0); - unsigned int ygid = get_global_id(1); - - int2 n_2 = (int2)(n>>1, n>>1); - int2 mask = (int2)(n-1, n-1); - - int2 gid = ((int2)(xgid, ygid) + n_2) & mask; - - int2 diff = n_2 - gid; - int2 diff2 = diff * diff; - int dist2 = diff2.x + diff2.y; - - int2 window; - - if (dist2 < radius*radius) { - window = (int2)(0L, 0L); - } else { - window = (int2)(-1L, -1L); - } - image_out[ygid*n+xgid] = as_float2(as_int2(image[ygid*n+xgid]) & window); -}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/fft/gpu/highPassFilter.cl Thu Jul 18 17:16:43 2013 +0900 @@ -0,0 +1,40 @@ +__kernel +void highPassFilter(__constant long *param, __global float2 *in,__global float2 *image) +{ + unsigned long xgid = (unsigned long)get_global_id(0); + unsigned long ygid = (unsigned long)get_global_id(1); + + long n = param[3]; + long radius = param[4]; + + int2 n_2; + n_2.x = n_2.y = n>>1; + + int2 mask; + mask.x = mask.y = n-1; + + int2 gid; + gid.x = (xgid + n_2.x) & mask.x; + gid.y = (ygid + n_2.y) & mask.y; + + int2 diff; + diff.x = n_2.x - gid.x; + diff.y = n_2.y - gid.y; + + int2 diff2; + diff2.x = diff.x * diff.x; + diff2.y = diff.y * diff.y; + + int dist2 = diff2.x + diff2.y; + + int2 window; + + if (dist2 < radius*radius) { + window.x = window.y = (int)0L; + } else { + window.x = window.y = (int)-1L; + } + + image[ygid*n+xgid].x = (float)((int)in[ygid*n+xgid].x & window.x); + image[ygid*n+xgid].y = (float)((int)in[ygid*n+xgid].y & window.y); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/fft/gpu/norm.cl Thu Jul 18 17:16:43 2013 +0900 @@ -0,0 +1,11 @@ +__kernel void +norm(__constant long *param, __global float2 *in_x, __global float2 *out_x) +{ + unsigned long gid = (unsigned long)get_global_id(0); + unsigned long nid = (unsigned long)get_global_id(1); + + long n = param[3]; + + out_x[nid*n+gid].x = in_x[nid*n+gid].x / (float)n; + out_x[nid*n+gid].y = in_x[nid*n+gid].y / (float)n; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/fft/gpu/spinFact.cl Thu Jul 18 17:16:43 2013 +0900 @@ -0,0 +1,16 @@ +#define PI 3.14159265358979323846 +#define PI_2 1.57079632679489661923 + +__kernel +void spinFact(__constant long *param, __global float2 *w) +{ + unsigned long i = (unsigned long)get_global_id(0); + long n =param[3]; + + float2 angle; + angle.x = (float)(2*i*PI/(float)n); + angle.y = (float)((2*i*PI/(float)n) + PI_2); + + w[i].x = cos(angle.x); + w[i].y = cos(angle.y); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/fft/gpu/transpose.cl Thu Jul 18 17:16:43 2013 +0900 @@ -0,0 +1,13 @@ +__kernel +void transpose(__constant long *param, __global float2 *src, __global float2 *dst) +{ + unsigned long xgid = (unsigned long)get_global_id(0); + unsigned long ygid = (unsigned long)get_global_id(1); + + long n = (long)param[3]; + + unsigned int iid = ygid * n + xgid; + unsigned int oid = xgid * n + ygid; + + dst[oid] = src[iid]; +}
--- a/example/fft/main.cc Thu Jul 18 11:20:51 2013 +0900 +++ b/example/fft/main.cc Thu Jul 18 17:16:43 2013 +0900 @@ -107,7 +107,7 @@ HTask* fftCore(TaskManager *manager,cl_float2 *dst, cl_float2 *src, cl_float2 *spin, long m, enum Mode direction,HTask* waitTask) { - unsigned int direction_flag; + long direction_flag; switch (direction) { case forward:direction_flag = 0x00000000; break; case inverse:direction_flag = 0x80000000; break;
--- a/example/fft/task_init.cc Thu Jul 18 11:20:51 2013 +0900 +++ b/example/fft/task_init.cc Thu Jul 18 17:16:43 2013 +0900 @@ -16,12 +16,12 @@ task_init(void) { #ifdef __CERIUM_GPU__ - GpuSchedRegister(SPIN_FACT, "gpu/fft.cl", "spinFact"); - GpuSchedRegister(BIT_REVERSE, "gpu/fft.cl", "bitReverse"); - GpuSchedRegister(NORMALIZATION, "gpu/fft.cl", "norm"); - GpuSchedRegister(BUTTERFLY, "gpu/fft.cl", "butterfly"); - GpuSchedRegister(TRANSPOSE, "gpu/fft.cl", "transpose"); - GpuSchedRegister(HIGH_PASS_FILTER, "gpu/fft.cl", "highPassFilter"); + GpuSchedRegister(SPIN_FACT, "gpu/spinFact.cl", "spinFact"); + GpuSchedRegister(BIT_REVERSE, "gpu/bitReverse.cl", "bitReverse"); + GpuSchedRegister(NORMALIZATION, "gpu/norm.cl", "norm"); + GpuSchedRegister(BUTTERFLY, "gpu/butterfly.cl", "butterfly"); + GpuSchedRegister(TRANSPOSE, "gpu/transpose.cl", "transpose"); + GpuSchedRegister(HIGH_PASS_FILTER, "gpu/highPassFilter.cl", "highPassFilter"); #else SchedRegisterTask(SPIN_FACT,spinFact); SchedRegisterTask(NORMALIZATION, norm);