view example/fft/main.cc @ 1643:6c0b6947c231 draft

fix fft
author Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
date Sat, 22 Jun 2013 18:10:55 +0900
parents fbb4757d82ee
children ab6b11476e02
line wrap: on
line source

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/time.h>
#include "TaskManager.h"
#include "GpuScheduler.h"
#include "SchedTask.h"
#include "Func.h"
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#include "pgm.h"
extern void task_init();

#define PI 3.14159265358979

#define MAX_SOURCE_SIZE (0x100000)

#define AMP(a, b) (sqrt((a)*(a)+(b)*(b)))

static double st_time;
static double ed_time;
void TMend(TaskManager *);
int ndrange_flag;
cl_device_id device_id = NULL;
cl_context context = NULL;
cl_command_queue queue = NULL;
cl_program program = NULL;
CPU_TYPE spe_cpu = SPE_ANY;

enum Mode {
    forward = 0,
    inverse = 1
};

static double
getTime()
{
    struct timeval tv;
    gettimeofday(&tv, NULL);
    return tv.tv_sec + (double)tv.tv_usec*1e-6;
}

const char *usr_help_str = "Usage: ./fft [option]\n \
options\n\
  -cpu     Number of SPE used (default 1)\n\
  -l, --length  Sorted number of data (default 1200)\n\
  -h, --help    Print this message";

int setWorkSize(size_t* gws, size_t* lws, cl_int x, cl_int y)
{
    switch(y) {
    case 1:
        gws[0] = x;
        gws[1] = 1;
        lws[0] = 1;
        lws[1] = 1;
        break;
    default:
        gws[0] = x;
        gws[1] = y;
        lws[0] = 1;
        lws[1] = 1;
        break;
    }

    return 0;
}

int fftCore(TaskManager *manager,cl_float2 *dst, cl_float2 *src, cl_float2 *spin, int m, enum Mode direction)
{
    unsigned int direction_flag;
    switch (direction) {
    case forward:direction_flag = 0x00000000; break;
    case inverse:direction_flag = 0x80000000; break;
    }
    int n = 1<<m;
    size_t gws[2],lws[2];
    int length_dst = n*n;
    int length_src = n*n;

    HTask* brev = manager->create_task(BIT_REVERSE);
    setWorkSize(gws,lws,n,n);
    brev->set_inData(0, src, length_src*sizeof(cl_float2));
    brev->set_outData(0, dst, length_dst*sizeof(cl_float2));
    brev->set_param(3,m);
    brev->set_param(4,n);
    brev->set_cpu(spe_cpu);
    brev->iterate(gws[0],gws[1]);

    HTask* bfly = manager->create_task(BUTTERFLY);
    setWorkSize(gws,lws,n/2,n);
    bfly->set_inData(0, dst, length_dst*sizeof(cl_float2));
    bfly->set_inData(1, spin, sizeof(cl_float2)*(n/2));
    bfly->set_outData(0,dst,length_dst*sizeof(cl_float2));
    bfly->set_param(3,n);
    bfly->set_param(4,direction_flag);
    bfly->set_cpu(spe_cpu);
    bfly->wait_for(brev);
    bfly->iterate(gws[0],gws[1],m);
    
    if (direction == inverse) { 
        HTask *norm = manager->create_task(NORMALIZATION);
        setWorkSize(gws,lws,n,n);
        norm->set_outData(0, dst, length_dst*sizeof(cl_float2));
        norm->set_param(3,n);
        norm->set_cpu(spe_cpu);
        norm->flip();
        norm->wait_for(bfly);
        norm->iterate(gws[0],gws[0]);
    }
    return 0;
}

char *
init(int argc, char**argv){

    char *filename = 0;

    for (int i = 1; argv[i]; ++i) {
        if (strcmp(argv[i], "-file") == 0) {
            filename = argv[i+1];
        }  else if (strcmp(argv[i], "-g") == 0) {
            spe_cpu = GPU_0;
        }
    }
    if ( (argc == 1)||(filename==0)) {
        printf("Usage: ./fft -file [image filename] -cpu or -gpu\n");
        exit(-1);
    }

    return filename;
}

void
run_start(TaskManager *manager,pgm_t ipgm)
{
    int n = ipgm.width;
    int m = (cl_int)(log((double)n)/log(2.0));
    size_t *gws = new size_t[2];
    size_t *lws = new size_t[2];

    cl_float2 *xm = (cl_float2 *)malloc(n * n * sizeof(cl_float2));
    cl_float2 *rm = (cl_float2 *)malloc(n * n * sizeof(cl_float2));
    cl_float2 *wm = (cl_float2 *)malloc(n / 2 * sizeof(cl_float2));
    /*
     * [cl_float2]
     * typedef union
     * {
     * cl_float  CL_ALIGNED(8) s[2];
     * #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
     * __extension__ struct{ cl_float  x, y; };
     * __extension__ struct{ cl_float  s0, s1; };
     * __extension__ struct{ cl_float  lo, hi; };
     * #endif
     * #if defined( __CL_FLOAT2__) 
     * __cl_float2     v2;
     * #endif
     * } cl_float2;
     */
    for (int i=0; i<n; i++) {
        for (int j=0; j < n; j++) {
            ((float*)xm)[(2*n*j)+2*i+0] = (float)ipgm.buf[n*j+i];
            ((float*)xm)[(2*n*j)+2*i+1] = (float)0;
        }
    }
        
    // Create spin factor
    int length_w = n / 2;
    HTask* sfac = manager->create_task(SPIN_FACT);
    setWorkSize(gws,lws,n/2,1);
    sfac->set_outData(0, wm, length_w*sizeof(cl_float2));
    sfac->set_param(3,n);
    sfac->set_cpu(spe_cpu);
    sfac->iterate(gws[0],gws[1]);

    // Butterfly Operation
    fftCore(manager, rm, xm, wm, m, forward);
    
    HTaskPtr *trns = (HTask**)manager->allocate(sizeof(HTask*)*2);

    // Transpose matrix 
    int length_r =n*n;
    setWorkSize(gws,lws,n/2,1);
    for (int i=0;i<2;i++) {
        trns[i]= manager->create_task(TRANSEPOSE);
        trns[i]->set_inData(0, rm, length_r*sizeof(cl_float2));
        trns[i]->set_outData(0, xm, length_r*sizeof(cl_float2));
        trns[i]->set_param(3,n);
        trns[i]->set_cpu(spe_cpu);
    }
    trns[0]->wait_for(sfac);
    trns[0]->iterate(gws[0],gws[1]);
    // Butterfly Operation 
    fftCore(manager, rm, xm, wm, m, forward);

    // Apply high-pass filter
    HTask *hpfl = manager->create_task(HIGH_PASS_FILTER);
    cl_int radius = n/8;
    setWorkSize(gws,lws,n/2,1);
    hpfl->set_outData(0, rm, length_r*sizeof(cl_float2));
    hpfl->set_param(3,n);
    hpfl->set_param(4,radius);
    hpfl->set_cpu(spe_cpu);
    hpfl->wait_for(trns[0]);
    hpfl->iterate(gws[0],gws[1]);
    // Inverse FFT

    // Butterfly Operation
    fftCore(manager,xm, rm, wm, m, inverse);

    // Transpose matrix
    setWorkSize(gws,lws,n,n);
    trns[1]->iterate(gws[0],gws[1]);

    // Butterfly Operation

    fftCore(manager,xm, rm, wm, m, inverse);

    // Read data from memory buffer
    // spawn and wait 

    float* ampd;
    ampd = (float*)malloc(n*n*sizeof(float));
    for (int i=0; i < n; i++) {
        for (int j=0; j < n; j++) {
            ampd[n*((i))+((j))] = (AMP(((float*)xm)[(2*n*i)+2*j], ((float*)xm)[(2*n*i)+2*j+1]));
        }
    }
    pgm_t opgm;
    opgm.width = n;
    opgm.height = n;
    normalizeF2PGM(&opgm, ampd);
    free(ampd);

    // Write out image
    writePGM(&opgm, "output.pgm");

    // Finalizations
    destroyPGM(&ipgm);
    destroyPGM(&opgm);

    free(wm);
    free(rm);
    free(xm);
}

int TMmain(TaskManager *manager, int argc, char** argv) {
    task_init();
    char * pgm_file = init(argc,argv);
    pgm_t ipgm;
    /* Read image */
    int err = readPGM(&ipgm, pgm_file);
    if (err<0) {
        fprintf(stderr, "Failed to read image file.\n");
        exit(1);
    }
    st_time = getTime();
    run_start(manager, ipgm);
    manager->set_TMend(TMend);
    return 0;
}

void
TMend(TaskManager *manager)
{
    ed_time = getTime();
    fprintf(stdout, "image out put succeeded.\n");
    printf("Time: %0.6f\n",ed_time-st_time);
}