Mercurial > hg > Members > yuuhi > OpenCL
diff fft_fixstart/fft.cl @ 3:f3cfea46e585
add fft_fixstar sample
author | Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 04 Feb 2013 02:59:58 +0900 |
parents | |
children | a664602e1819 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fft_fixstart/fft.cl Mon Feb 04 02:59:58 2013 +0900 @@ -0,0 +1,104 @@ +#define PI 3.14159265358979323846 +#define PI_2 1.57079632679489661923 + +__kernel void spinFact(__global float2* w, int n) +{ + unsigned int i = get_global_id(0); + + float2 angle = (float2)(2*i*PI/(float)n,(2*i*PI/(float)n)+PI_2); + w[i] = cos(angle); +} + +__kernel void bitReverse(__global float2 *dst, __global float2 *src, int m, int n) +{ + unsigned int gid = get_global_id(0); + unsigned int nid = get_global_id(1); + + unsigned int j = gid; + j = (j & 0x55555555) << 1 | (j & 0xAAAAAAAA) >> 1; + j = (j & 0x33333333) << 2 | (j & 0xCCCCCCCC) >> 2; + j = (j & 0x0F0F0F0F) << 4 | (j & 0xF0F0F0F0) >> 4; + j = (j & 0x00FF00FF) << 8 | (j & 0xFF00FF00) >> 8; + j = (j & 0x0000FFFF) << 16 | (j & 0xFFFF0000) >> 16; + + j >>= (32-m); + + dst[nid*n+j] = src[nid*n+gid]; +} + +__kernel void norm(__global float2 *x, int n) +{ + unsigned int gid = get_global_id(0); + unsigned int nid = get_global_id(1); + + x[nid*n+gid] = x[nid*n+gid] / (float2)((float)n, (float)n); +} + +__kernel void butterfly(__global float2 *x, __global float2* w, int m, int n, int iter, uint flag) +{ + unsigned int gid = get_global_id(0); + unsigned int nid = get_global_id(1); + + int butterflySize = 1 << (iter-1); + int butterflyGrpDist = 1 << iter; + int butterflyGrpNum = n >> iter; + int butterflyGrpBase = (gid >> (iter-1))*(butterflyGrpDist); + int butterflyGrpOffset = gid & (butterflySize-1); + + int a = nid * n + butterflyGrpBase + butterflyGrpOffset; + int b = a + butterflySize; + + int l = butterflyGrpNum * butterflyGrpOffset; + + float2 xa, xb, xbxx, xbyy, wab, wayx, wbyx, resa, resb; + + xa = x[a]; + xb = x[b]; + xbxx = xb.xx; + xbyy = xb.yy; + + wab = as_float2(as_uint2(w[l]) ^ (uint2)(0x0, flag)); + wayx = as_float2(as_uint2(wab.yx) ^ (uint2)(0x80000000, 0x0)); + wbyx = as_float2(as_uint2(wab.yx) ^ (uint2)(0x0, 0x80000000)); + + resa = xa + xbxx*wab + xbyy*wayx; + resb = xa - xbxx*wab + xbyy*wbyx; + + x[a] = resa; + x[b] = resb; +} + +__kernel void transpose(__global float2 *dst, __global float2* src, int n) +{ + unsigned int xgid = get_global_id(0); + unsigned int ygid = get_global_id(1); + + unsigned int iid = ygid * n + xgid; + unsigned int oid = xgid * n + ygid; + + dst[oid] = src[iid]; +} + +__kernel void highPassFilter(__global float2* image, int n, int radius) +{ + unsigned int xgid = get_global_id(0); + unsigned int ygid = get_global_id(1); + + int2 n_2 = (int2)(n>>1, n>>1); + int2 mask = (int2)(n-1, n-1); + + int2 gid = ((int2)(xgid, ygid) + n_2) & mask; + + int2 diff = n_2 - gid; + int2 diff2 = diff * diff; + int dist2 = diff2.x + diff2.y; + + int2 window; + + if (dist2 < radius*radius) { + window = (int2)(0L, 0L); + } else { + window = (int2)(-1L, -1L); + } + image[ygid*n+xgid] = as_float2(as_int2(image[ygid*n+xgid]) & window); +}