view example/fft/main.cc @ 1592:afcb4a3f3526 draft

merge
author Masa <e085726@ie.u-ryukyu.ac.jp>
date Mon, 01 Apr 2013 18:00:35 +0900
parents 8ee897303cd0
children 6ff0c34c8a3c
line wrap: on
line source

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/time.h>
#include "TaskManager.h"
#include "GpuScheduler.h"
#include "SchedTask.h"
#include "Func.h"
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#include "pgm.h"
extern void task_init();

#define PI 3.14159265358979

#define MAX_SOURCE_SIZE (0x100000)

#define AMP(a, b) (sqrt((a)*(a)+(b)*(b)))

static double st_time;
static double ed_time;
void TMend(TaskManager *);
int ndrange_flag;
cl_device_id device_id = NULL;
cl_context context = NULL;
cl_command_queue queue = NULL;
cl_program program = NULL;
CPU_TYPE spe_cpu = SPE_ANY;

enum Mode {
    forward = 0,
    inverse = 1
};

static double
getTime()
{
    struct timeval tv;
    gettimeofday(&tv, NULL);
    return tv.tv_sec + (double)tv.tv_usec*1e-6;
}
const char *usr_help_str = "Usage: ./fft [option]\n \
options\n\
  -cpu     Number of SPE used (default 1)\n\
  -l, --length  Sorted number of data (default 1200)\n\
  -h, --help    Print this message";

int setWorkSize(size_t* gws, size_t* lws, cl_int x, cl_int y)
{
    switch(y) {
    case 1:
        gws[0] = x;
        gws[1] = 1;
        lws[0] = 1;
        lws[1] = 1;
        break;
    default:
        gws[0] = x;
        gws[1] = y;
        lws[0] = 1;
        lws[1] = 1;
        break;
    }

    return 0;
}

int fftCore(TaskManager *manager,cl_float2 *dst, cl_float2 *src, cl_float2 *spin, int m_, enum Mode direction)
{
    int iter;
    unsigned int* flag = new unsigned int[1];
    switch (direction) {
    case forward:flag[0] = 0x00000000; break;
    case inverse:flag[0] = 0x80000000; break;
    }
    int* n = new int[1];
    int* m = new int[1];
    m[0] = m_;
    n[0] = 1<<m[0];
    size_t gws[2],lws[2];
    int length_dst = n[0]*n[0];
    int length_src = n[0]*n[0];
    cl_uint dimension = 2;
    HTask* brev;
    int i,j;
    setWorkSize(gws,lws,n[0],n[0]);
    for(i=0;i<gws[0];i++){
        for(j=0;j<gws[1];j++){
            brev = manager->create_task(BIT_REVERSE);
            brev->set_param(0,(memaddr)length_src);
            brev->set_param(1,(memaddr)i);
            brev->set_param(2,(memaddr)j);
            brev->set_inData(0, src, length_src*sizeof(cl_float2));
            brev->set_inData(1, m,sizeof(int));
            brev->set_inData(2, n,sizeof(int));
            brev->set_outData(0, dst, length_dst*sizeof(cl_float2));
            brev->set_cpu(spe_cpu);
            brev->spawn();
        }
    }
    exit(0);
    HTask* bfly;
    setWorkSize(gws,lws,n[0]/2,n[0]);

    for (iter=1; iter<=m_;iter++) {
        for(i=0;i<gws[0];i++){
            for(j=0;i<gws[1];j++){
                bfly = manager->create_task(BUTTERFLY);
                bfly->set_param(0,(memaddr)length_dst);
                bfly->set_param(1,(memaddr)iter);
                bfly->set_param(2,(memaddr)i);
                bfly->set_param(3,(memaddr)j);
                bfly->set_inData(0, dst, length_dst*sizeof(cl_float2));
                bfly->set_inData(1, spin, sizeof(cl_float2)*(n[0]/2));
                bfly->set_inData(2, m,sizeof(int));
                bfly->set_inData(3, n,sizeof(int));
                bfly->set_inData(4, flag,sizeof(int));
                bfly->set_outData(0,dst,length_dst*sizeof(cl_float2));
                bfly->set_cpu(spe_cpu);
                bfly->wait_for(brev);
                bfly->spawn();
            }
        }
    }
    
    if (direction == inverse) { 
        HTask *norm = manager->create_task(NORMALIZATION);
        setWorkSize(gws,lws,n[0],n[0]);
        norm->set_param(0,(memaddr)length_dst);
        norm->set_param(1,(memaddr)dimension);
        norm->set_param(2,(memaddr)gws[0]);
        norm->set_param(3,(memaddr)gws[1]);
        norm->set_param(4,(memaddr)lws[0]);
        norm->set_param(5,(memaddr)lws[1]);
        norm->set_inData(0, n,sizeof(int));
        norm->set_outData(0, dst, length_dst*sizeof(cl_float2));
        norm->set_cpu(spe_cpu);
        norm->nd_range();
        norm->flip();
        norm->wait_for(bfly);
        norm->spawn();
    }
    return 0;
}

char *
init(int argc, char**argv){

    char *filename = 0;

    for (int i = 1; argv[i]; ++i) {
        if (strcmp(argv[i], "-file") == 0) {
            filename = argv[i+1];
        }  else if (strcmp(argv[i], "-g") == 0) {
            spe_cpu = GPU_0;
        }
    }
    if ( (argc == 1)||(filename==0)) {
        printf("Usage: ./fft -file [image filename] -cpu or -gpu \n");
        exit(-1);
    }

    return filename;
}

void
run_start(TaskManager *manager,pgm_t ipgm)
{
    int dimension;
    int *n = new int[1];
    n[0] = ipgm.width;
    int *m = new int[1];
    m[0] = (cl_int)(log((double)n[0])/log(2.0));
    size_t *gws = new size_t[3];
    size_t *lws = new size_t[3];

    cl_float2 *xm = (cl_float2 *)malloc(n[0] * n[0] * sizeof(cl_float2));
    cl_float2 *rm = (cl_float2 *)malloc(n[0] * n[0] * sizeof(cl_float2));
    cl_float2 *wm = (cl_float2 *)malloc(n[0] / 2 * sizeof(cl_float2));
    int i,j;
    /*
     * [cl_float2]
     * typedef union
     * {
     * cl_float  CL_ALIGNED(8) s[2];
     * #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
     * __extension__ struct{ cl_float  x, y; };
     * __extension__ struct{ cl_float  s0, s1; };
     * __extension__ struct{ cl_float  lo, hi; };
     * #endif
     * #if defined( __CL_FLOAT2__) 
     * __cl_float2     v2;
     * #endif
     * } cl_float2;
     */
    for (i=0; i<n[0]; i++) {
        for (j=0; j < n[0]; j++) {
            ((float*)xm)[(2*n[0]*j)+2*i+0] = (float)ipgm.buf[n[0]*j+i];
            ((float*)xm)[(2*n[0]*j)+2*i+1] = (float)0;
        }
    }
        
    // Create spin factor
    setWorkSize(gws,lws,n[0]/2,1); // Todo:setWorkSize(ndr,n[0]/2,1);でできるように
    int length_w = n[0] / 2;
    HTask* sfac;
    for(i=0;i<gws[0];i++){
        sfac = manager->create_task(SPIN_FACT);
        sfac->set_param(0, (memaddr)length_w);
        sfac->set_param(1,(memaddr)i);
        sfac->set_inData(0, n, sizeof(int));
        sfac->set_outData(0, wm, length_w*sizeof(cl_float2));
        sfac->set_cpu(spe_cpu);
        sfac->nd_range();
        sfac->spawn();
    }
    // Butterfly Operation
    fftCore(manager, rm, xm, wm, m[0], forward);
    
    HTaskPtr *trns = (HTask**)manager->allocate(sizeof(HTask*)*2);

    // Transpose matrix 
    int length_r =n[0] * n[0];
    setWorkSize(gws,lws,n[0]/2,1);
    dimension = 2;
    for (int i=0;i<2;i++) {
        trns[i]= manager->create_task(TRANSEPOSE);
        trns[i]->set_param(0, (memaddr)length_r);
        trns[i]->set_param(1,(memaddr)dimension);
        trns[i]->set_param(2,(memaddr)gws[0]);
        trns[i]->set_param(3,(memaddr)gws[1]);
        trns[i]->set_param(4,(memaddr)lws[0]);
        trns[i]->set_param(5,(memaddr)lws[1]);
        trns[i]->set_inData(0, rm, length_r*sizeof(cl_float2));
        trns[i]->set_inData(1, n,sizeof(int));
        trns[i]->set_outData(0, xm, length_r*sizeof(cl_float2));
        trns[i]->set_cpu(spe_cpu);
        trns[i]->nd_range();
    }
    trns[0]->wait_for(sfac);
    trns[0]->spawn();
    // Butterfly Operation 
    fftCore(manager, rm, xm, wm, m[0], forward);

    // Apply high-pass filter
    HTask *hpfl = manager->create_task(HIGH_PASS_FILTER);
    cl_int *radius = new cl_int[1];
    radius[0] = n[0]/8;
    setWorkSize(gws,lws,n[0]/2,1);
    hpfl->set_param(0, (memaddr)length_r);
    hpfl->set_param(1,(memaddr)dimension);
    hpfl->set_param(2,(memaddr)gws[0]);
    hpfl->set_param(3,(memaddr)gws[1]);
    hpfl->set_param(4,(memaddr)lws[0]);
    hpfl->set_param(5,(memaddr)lws[1]);
    hpfl->set_inData(0, n,sizeof(int));
    hpfl->set_inData(1, radius,sizeof(int));
    hpfl->set_outData(0, rm, length_r*sizeof(cl_float2));
    hpfl->set_cpu(spe_cpu);
    hpfl->nd_range();
    hpfl->wait_for(trns[0]);
    hpfl->spawn();
    // Inverse FFT

    // Butterfly Operation
    fftCore(manager,xm, rm, wm, m[0], inverse);

    // Transpose matrix
    trns[1]->spawn();

    // Butterfly Operation

    fftCore(manager,xm, rm, wm, m[0], inverse);

    // Read data from memory buffer
    // spawn and wait 

    float* ampd;
    ampd = (float*)malloc(n[0]*n[0]*sizeof(float));
    for (i=0; i < n[0]; i++) {
        for (j=0; j < n[0]; j++) {
            ampd[n[0]*((i))+((j))] = (AMP(((float*)xm)[(2*n[0]*i)+2*j], ((float*)xm)[(2*n[0]*i)+2*j+1]));
        }
    }
    pgm_t opgm;
    opgm.width = n[0];
    opgm.height = n[0];
    normalizeF2PGM(&opgm, ampd);
    free(ampd);

    // Write out image
    writePGM(&opgm, "output.pgm");

    // Finalizations
    destroyPGM(&ipgm);
    destroyPGM(&opgm);

    free(wm);
    free(rm);
    free(xm);
}

int TMmain(TaskManager *manager, int argc, char** argv) {
    task_init();
    char * pgm_file = init(argc,argv);
    pgm_t ipgm;
    /* Read image */
    int err = readPGM(&ipgm, pgm_file);
    if (err<0) {
        fprintf(stderr, "Failed to read image file.\n");
        exit(1);
    }
    st_time = getTime();
    run_start(manager, ipgm);
    manager->set_TMend(TMend);
    return 0;
}

void
TMend(TaskManager *manager)
{
    ed_time = getTime();
    fprintf(stdout, "image out put succeeded.\n");
    printf("Time: %0.6f\n",ed_time-st_time);
}