view example/fft/spe/transpose.cc @ 2069:26aa08c9a1de draft default tip

cuda example fix
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Sun, 12 Feb 2017 10:04:55 +0900
parents 786ab4ad682e
children
line wrap: on
line source

#include "transpose.h"
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif

SchedDefineTask1(transpose,transpose);

static int
transpose(SchedTask* s,void* rbuf,void* wbuf)
{
    cl_float2* src = (cl_float2*)s->get_input(rbuf,0);
    int* n = (int*)s->get_input(rbuf,1);
    cl_float2* dst = (cl_float2*)s->get_output(wbuf,0);
    unsigned int xgid = (unsigned int)s->get_cpuid();
    unsigned int ygid = (unsigned int)s->get_cpuid();

    unsigned int iid = ygid * n[0] + xgid;
    unsigned int oid = xgid * n[0] + ygid;

    dst[oid] = src[iid];

    return 0;
}