Mercurial > hg > Game > Cerium
changeset 1731:dc7dd1eaf6de draft
add file
author | Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp> |
---|---|
date | Thu, 31 Oct 2013 21:07:30 +0900 |
parents | 2e8931baef7f |
children | 6acaf7f6ad97 |
files | example/fft/gpu/bitReverse.cc example/fft/gpu/bitReverse.h example/fft/gpu/bitReverse.o example/fft/gpu/butterfly.cc example/fft/gpu/butterfly.h example/fft/gpu/butterfly.o example/fft/gpu/gpu_task_init.o example/fft/gpu/highPassFilter.cc example/fft/gpu/highPassFilter.h example/fft/gpu/highPassFilter.o example/fft/gpu/norm.cc example/fft/gpu/norm.h example/fft/gpu/norm.o example/fft/gpu/spinFact.cc example/fft/gpu/spinFact.h example/fft/gpu/spinFact.o example/fft/gpu/task_init.cc example/fft/gpu/task_init.o example/fft/gpu/transpose.cc example/fft/gpu/transpose.h example/fft/gpu/transpose.o example/fft/ppe/task_init.cc |
diffstat | 22 files changed, 336 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/fft/gpu/bitReverse.cc Thu Oct 31 21:07:30 2013 +0900 @@ -0,0 +1,34 @@ +#include "bitReverse.h" +#ifdef __APPLE__ +#include <OpenCL/opencl.h> +#else +#include <CL/cl.h> +#endif +#include "Func.h" + +SchedDefineTask1(bitReverse,bitReverse); + +static int +bitReverse(SchedTask* s, void* rbuf, void* wbuf) +{ + cl_float2* src = (cl_float2*)s->get_inputAddr(0); + cl_float2* dst = (cl_float2*)s->get_outputAddr(0); + + unsigned long gid = s->x; // (unsigned long)s->get_param(0); + unsigned long nid = s->y; // (unsigned long)s->get_param(1); + + unsigned long m = (unsigned long)s->get_param(3); + unsigned long n = (unsigned long)s->get_param(4); + unsigned int j = gid; + + j = (j & 0x55555555) << 1 | (j & 0xAAAAAAAA) >> 1; + j = (j & 0x33333333) << 2 | (j & 0xCCCCCCCC) >> 2; + j = (j & 0x0F0F0F0F) << 4 | (j & 0xF0F0F0F0) >> 4; + j = (j & 0x00FF00FF) << 8 | (j & 0xFF00FF00) >> 8; + j = (j & 0x0000FFFF) << 16 | (j & 0xFFFF0000) >> 16; + + j >>= (32-m); + + dst[nid*n+j] = src[nid*n+gid]; + return 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/fft/gpu/bitReverse.h Thu Oct 31 21:07:30 2013 +0900 @@ -0,0 +1,8 @@ +#ifndef INCLUDED_TASK_BITREVERSE +#define INCLUDED_TASK_BITREVERSE + +#ifndef INCLUDED_SCHED_TASK +#include "SchedTask.h" +#endif + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/fft/gpu/butterfly.cc Thu Oct 31 21:07:30 2013 +0900 @@ -0,0 +1,66 @@ +#include "butterfly.h" +#ifdef __APPLE__ +#include <OpenCL/opencl.h> +#else +#include <CL/cl.h> +#endif +#include "Func.h" +SchedDefineTask1(butterfly,butterfly); + +static int +butterfly(SchedTask* s,void* rbuf,void* wbuf) +{ + cl_float2* x_in = (cl_float2*)s->get_inputAddr(0); + cl_float2* w = (cl_float2*)s->get_inputAddr(1); + + cl_float2* x_out = (cl_float2*)s->get_outputAddr(0); + + unsigned long gid = s->x; // (unsigned long)s->get_param(0); + unsigned long nid = s->y; // (unsigned long)s->get_param(1); + + long n = (long)s->get_param(3); + unsigned long direction_flag = (unsigned long)s->get_param(4); + long iter = (long)s->get_param(5); + + int butterflySize = 1 << (iter-1); + int butterflyGrpDist = 1 << iter; + int butterflyGrpNum = n >> iter; + int butterflyGrpBase = (gid >> (iter-1))*(butterflyGrpDist); + int butterflyGrpOffset = gid & (butterflySize-1); + + int a = nid * n + butterflyGrpBase + butterflyGrpOffset; + int b = a + butterflySize; + + int l = butterflyGrpNum * butterflyGrpOffset; + + cl_float2 xa, xb, xbxx, xbyy, wab, wayx, wbyx, resa, resb; + + xa = x_in[a]; + xb = x_in[b]; + xbxx.x = xbxx.y = xb.x; + xbyy.x = xbyy.y = xb.y; + + wab.x = w[l].x; + if(direction_flag == 0x80000000) { + wab.y = -w[l].y; + } else { + wab.y = w[l].y; + } + + wayx.x = -wab.y; + wayx.y = wab.x; + + wbyx.x = wab.y; + wbyx.y = -wab.x; + + resa.x = xa.x + xbxx.x*wab.x + xbyy.x*wayx.x; + resa.y = xa.y + xbxx.y*wab.y + xbyy.y*wayx.y; + + resb.x = xa.x - xbxx.x*wab.x + xbyy.x*wbyx.x; + resb.y = xa.y - xbxx.y*wab.y + xbyy.y*wbyx.y; + + x_out[a] = resa; + x_out[b] = resb; + + return 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/fft/gpu/butterfly.h Thu Oct 31 21:07:30 2013 +0900 @@ -0,0 +1,8 @@ +#ifndef INCUDED_TASK_BUTTERFLY +#define INCUDED_TASK_BUTTERFLY + +#ifndef INCLUDED_SCHED_TASK +#include "SchedTask.h" +#endif + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/fft/gpu/highPassFilter.cc Thu Oct 31 21:07:30 2013 +0900 @@ -0,0 +1,55 @@ +#include "highPassFilter.h" +#ifdef __APPLE__ +#include <OpenCL/opencl.h> +#else +#include <CL/cl.h> +#endif +#include "Func.h" +SchedDefineTask1(highPassFilter,highPassFilter); + +static int +highPassFilter(SchedTask* s,void* rbuf,void* wbuf) +{ + cl_float2* in = (cl_float2*)s->get_inputAddr(0); + + cl_float2* image = (cl_float2*)s->get_outputAddr(0); + + unsigned long xgid = s->x; // (unsigned long)s->get_param(0); + unsigned long ygid = s->y; // (unsigned long)s->get_param(1); + + long n = (long)s->get_param(3); + long radius = (long)s->get_param(4); + + cl_int2 n_2; + n_2.x = n_2.y = n>>1; + + cl_int2 mask; + mask.x = mask.y = n-1; + + cl_int2 gid; + gid.x = (xgid + n_2.x) & mask.x; + gid.y = (ygid + n_2.y) & mask.y; + + cl_int2 diff; + diff.x = n_2.x - gid.x; + diff.y = n_2.y - gid.y; + + cl_int2 diff2; + diff2.x = diff.x * diff.x; + diff2.y = diff.y * diff.y; + + int dist2 = diff2.x + diff2.y; + + cl_int2 window; + + if (dist2 < radius*radius) { + window.x = window.y = (int)0L; + } else { + window.x = window.y = (int)-1L; + } + + image[ygid*n+xgid].x = (float)((int)in[ygid*n+xgid].x & window.x); + image[ygid*n+xgid].y = (float)((int)in[ygid*n+xgid].y & window.y); + + return 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/fft/gpu/highPassFilter.h Thu Oct 31 21:07:30 2013 +0900 @@ -0,0 +1,8 @@ +#ifndef INCLUDED_TASK_HIGHPASSFILTER +#define INCLUDED_TASK_HIGHPASSFILTER + +#ifndef INCLUDED_SCHED_TASK +#include "SchedTask.h" +#endif + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/fft/gpu/norm.cc Thu Oct 31 21:07:30 2013 +0900 @@ -0,0 +1,26 @@ +#include "norm.h" +#ifdef __APPLE__ +#include <OpenCL/opencl.h> +#else +#include <CL/cl.h> +#endif +#include "Func.h" +SchedDefineTask1(norm,norm); + +static int +norm(SchedTask* s, void* rbuf,void* wbuf) +{ + cl_float2* in_x = (cl_float2*)s->get_inputAddr(0); + + cl_float2* out_x = (cl_float2*)s->get_outputAddr(0); + + unsigned long gid = s->x; // (unsigned long)s->get_param(0); + unsigned long nid = s->y; //(unsigned long)s->get_param(1); + + long n = (long)s->get_param(3); + + out_x[nid*n+gid].x = in_x[nid*n+gid].x / (float)n; + out_x[nid*n+gid].y = in_x[nid*n+gid].y / (float)n; + + return 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/fft/gpu/norm.h Thu Oct 31 21:07:30 2013 +0900 @@ -0,0 +1,8 @@ +#ifndef INCLUDED_TASK_NORM +#define INCLUDED_TASK_NORM + +#ifndef INCLUDED_SCHED_TASK +#include "SchedTask.h" +#endif + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/fft/gpu/spinFact.cc Thu Oct 31 21:07:30 2013 +0900 @@ -0,0 +1,32 @@ +#include <math.h> +#include "spinFact.h" +#ifdef __APPLE__ +#include <OpenCL/opencl.h> +#else +#include <CL/cl.h> +#endif +#include "Func.h" + +#define PI 3.14159265358979323846 +#define PI_2 1.57079632679489661923 + +SchedDefineTask1(spinFact,spinFact); + +static int +spinFact(SchedTask* s,void* rbuf,void* wbuf) +{ + cl_float2* w = (cl_float2*)s->get_outputAddr(0); + + unsigned long i = s->x; // (unsigned long)s->get_param(0); + + long n = (long)s->get_param(3); + + cl_float2 angle; + angle.x = (float)(2*i*PI/(float)n); + angle.y = (float)((2*i*PI/(float)n) + PI_2); + + w[i].x = cos(angle.x); + w[i].y = cos(angle.y); + + return 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/fft/gpu/spinFact.h Thu Oct 31 21:07:30 2013 +0900 @@ -0,0 +1,8 @@ +#ifndef INCLUDED_TASK_SPINFACT +#define INCLUDED_TASK_SPINFACT + +#ifndef INCLUDED_SCHED_TASK +#include "SchedTask.h" +#endif + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/fft/gpu/task_init.cc Thu Oct 31 21:07:30 2013 +0900 @@ -0,0 +1,20 @@ +#include "Func.h" +#include "Scheduler.h" + +SchedExternTask(spinFact); +SchedExternTask(bitReverse); +SchedExternTask(norm); +SchedExternTask(butterfly); +SchedExternTask(transpose); +SchedExternTask(highPassFilter); + +void +task_init(void) +{ + SchedRegisterTask(SPIN_FACT,spinFact); + SchedRegisterTask(NORMALIZATION, norm); + SchedRegisterTask(BIT_REVERSE, bitReverse); + SchedRegisterTask(BUTTERFLY, butterfly); + SchedRegisterTask(TRANSPOSE, transpose); + SchedRegisterTask(HIGH_PASS_FILTER, highPassFilter); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/fft/gpu/transpose.cc Thu Oct 31 21:07:30 2013 +0900 @@ -0,0 +1,29 @@ +#include "transpose.h" +#ifdef __APPLE__ +#include <OpenCL/opencl.h> +#else +#include <CL/cl.h> +#endif +#include "Func.h" + +SchedDefineTask1(transpose,transpose); + +static int +transpose(SchedTask* s,void* rbuf,void* wbuf) +{ + cl_float2* src = (cl_float2*)s->get_inputAddr(0); + + cl_float2* dst = (cl_float2*)s->get_outputAddr(0); + + unsigned long xgid = s->x; // (unsigned long)s->get_param(0); + unsigned long ygid = s->y; // (unsigned long)s->get_param(1); + + long n = (long)s->get_param(3); + + unsigned int iid = ygid * n + xgid; + unsigned int oid = xgid * n + ygid; + + dst[oid] = src[iid]; + + return 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/fft/gpu/transpose.h Thu Oct 31 21:07:30 2013 +0900 @@ -0,0 +1,8 @@ +#ifndef INCLUDED_TASK_TRANSPOSE +#define INCLUDED_TASK_TRANSPOSE + +#ifndef INCLUDED_SCHED_TASK +#include "SchedTask.h" +#endif + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/fft/ppe/task_init.cc Thu Oct 31 21:07:30 2013 +0900 @@ -0,0 +1,26 @@ +#include "Func.h" +#include "Scheduler.h" + +SchedExternTask(spinFact); +SchedExternTask(bitReverse); +SchedExternTask(norm); +SchedExternTask(butterfly); +SchedExternTask(transpose); +SchedExternTask(highPassFilter); + +void +task_init(void) +{ + SchedRegisterTask(SPIN_FACT,spinFact); + SchedRegisterTask(NORMALIZATION, norm); + SchedRegisterTask(BIT_REVERSE, bitReverse); + SchedRegisterTask(BUTTERFLY, butterfly); + SchedRegisterTask(TRANSPOSE, transpose); + SchedRegisterTask(HIGH_PASS_FILTER, highPassFilter); +} + +void +gpu_task_init() +{ + +}