view example/fft/gpu/bitReverse.cc @ 1786:ba6ffc679a8f draft

minor fix
author Masataka Kohagura <e085726@ie.u-ryukyu.ac.jp>
date Sat, 30 Nov 2013 21:06:44 +0900
parents dc7dd1eaf6de
children
line wrap: on
line source

#include "bitReverse.h"
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#include "Func.h"

SchedDefineTask1(bitReverse,bitReverse);

static int
bitReverse(SchedTask* s, void* rbuf, void* wbuf)
{
    cl_float2* src = (cl_float2*)s->get_inputAddr(0);
    cl_float2* dst = (cl_float2*)s->get_outputAddr(0);
    
    unsigned long gid = s->x; // (unsigned long)s->get_param(0);
    unsigned long nid = s->y; // (unsigned long)s->get_param(1);
    
    unsigned long m = (unsigned long)s->get_param(3);
    unsigned long n = (unsigned long)s->get_param(4);
    unsigned int j = gid;

    j = (j & 0x55555555) << 1 | (j & 0xAAAAAAAA) >> 1;
    j = (j & 0x33333333) << 2 | (j & 0xCCCCCCCC) >> 2;
    j = (j & 0x0F0F0F0F) << 4 | (j & 0xF0F0F0F0) >> 4;
    j = (j & 0x00FF00FF) << 8 | (j & 0xFF00FF00) >> 8;
    j = (j & 0x0000FFFF) << 16 | (j & 0xFFFF0000) >> 16;

    j >>= (32-m);

    dst[nid*n+j] = src[nid*n+gid];
    return 0;
}