view example/fft/gpu/butterfly.cc @ 1731:dc7dd1eaf6de draft

add file
author Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
date Thu, 31 Oct 2013 21:07:30 +0900
parents
children
line wrap: on
line source

#include "butterfly.h"
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#include "Func.h"
SchedDefineTask1(butterfly,butterfly);

static int
butterfly(SchedTask* s,void* rbuf,void* wbuf)
{
    cl_float2* x_in = (cl_float2*)s->get_inputAddr(0);
    cl_float2* w = (cl_float2*)s->get_inputAddr(1);
    
    cl_float2* x_out = (cl_float2*)s->get_outputAddr(0);

    unsigned long gid = s->x; // (unsigned long)s->get_param(0);
    unsigned long nid = s->y; // (unsigned long)s->get_param(1);
    
    long n = (long)s->get_param(3);
    unsigned long direction_flag = (unsigned long)s->get_param(4);
    long iter = (long)s->get_param(5);

    int butterflySize = 1 << (iter-1);
    int butterflyGrpDist = 1 << iter;
    int butterflyGrpNum = n >> iter;
    int butterflyGrpBase = (gid >> (iter-1))*(butterflyGrpDist);
    int butterflyGrpOffset = gid & (butterflySize-1);

    int a = nid * n + butterflyGrpBase + butterflyGrpOffset;
    int b = a + butterflySize;
    
    int l = butterflyGrpNum * butterflyGrpOffset;

    cl_float2 xa, xb, xbxx, xbyy, wab, wayx, wbyx, resa, resb;

    xa = x_in[a];
    xb = x_in[b];
    xbxx.x = xbxx.y = xb.x;
    xbyy.x = xbyy.y = xb.y;
    
    wab.x = w[l].x;
    if(direction_flag == 0x80000000) {
        wab.y = -w[l].y;
    } else {
        wab.y = w[l].y;
    }

    wayx.x = -wab.y;
    wayx.y = wab.x;

    wbyx.x = wab.y;
    wbyx.y = -wab.x;

    resa.x = xa.x + xbxx.x*wab.x + xbyy.x*wayx.x;
    resa.y = xa.y + xbxx.y*wab.y + xbyy.y*wayx.y;

    resb.x = xa.x - xbxx.x*wab.x + xbyy.x*wbyx.x;
    resb.y = xa.y - xbxx.y*wab.y + xbyy.y*wbyx.y;

    x_out[a] = resa;
    x_out[b] = resb;

    return 0;
}