view example/fft/gpu/butterfly.cl @ 2069:26aa08c9a1de draft default tip

cuda example fix
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Sun, 12 Feb 2017 10:04:55 +0900
parents 144e573b030b
children
line wrap: on
line source

__kernel
void butterfly(__constant long *param, __global float2 *x_in, __global float2 *w, __global float2 *x_out)
{
    unsigned long gid = (unsigned long)get_global_id(0);
    unsigned long nid = (unsigned long)get_global_id(1);
    
    long n = param[0];
    unsigned long direction_flag = (unsigned long)param[1];
    long iter = param[2];

    int butterflySize = 1 << (iter-1);
    int butterflyGrpDist = 1 << iter;
    int butterflyGrpNum = n >> iter;
    int butterflyGrpBase = (gid >> (iter-1))*(butterflyGrpDist);
    int butterflyGrpOffset = gid & (butterflySize-1);

    int a = nid * n + butterflyGrpBase + butterflyGrpOffset;
    int b = a + butterflySize;

    int l = butterflyGrpNum * butterflyGrpOffset;

    float2 xa, xb, xbxx, xbyy, wab, wayx, wbyx, resa, resb;

    xa = x_in[a];
    xb = x_in[b];
    xbxx.x = xbxx.y = xb.x;
    xbyy.x = xbyy.y = xb.y;
    
    wab.x = w[l].x;
    if(direction_flag == 0x80000000) {
        wab.y = -w[l].y;
    } else {
        wab.y = w[l].y;
    }

    wayx.x = -wab.y;
    wayx.y = wab.x;

    wbyx.x = wab.y;
    wbyx.y = -wab.x;

    resa.x = xa.x + xbxx.x*wab.x + xbyy.x*wayx.x;
    resa.y = xa.y + xbxx.y*wab.y + xbyy.y*wayx.y;

    resb.x = xa.x - xbxx.x*wab.x + xbyy.x*wbyx.x;
    resb.y = xa.y - xbxx.y*wab.y + xbyy.y*wbyx.y;

    x_out[a] = resa;
    x_out[b] = resb;
}