view example/fft/main.cc @ 2069:26aa08c9a1de draft default tip

cuda example fix
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Sun, 12 Feb 2017 10:04:55 +0900
parents 892c77a1529f
children
line wrap: on
line source

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/time.h>
#include "TaskManager.h"
#include "GpuScheduler.h"
#include "SchedTask.h"
#include "Func.h"
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#include "pgm.h"
extern void task_init();
#ifdef GPU
extern void gpu_task_init();
#endif
#define PI 3.14159265358979

#define MAX_SOURCE_SIZE (0x100000)

#define AMP(a, b) (sqrt((a)*(a)+(b)*(b)))

static double st_time;
static double ed_time;
void TMend(TaskManager *);
cl_device_id device_id = NULL;
cl_context context = NULL;
cl_command_queue queue = NULL;
cl_program program = NULL;
CPU_TYPE spe_cpu = SPE_ANY;
bool flip_flag   = false;

cl_float2* xm;
cl_float2* rm;
cl_float2* wm;
pgm_t ipgm;

enum Mode {
    forward = 0,
    inverse = 1
};

static double
getTime()
{
    struct timeval tv;
    gettimeofday(&tv, NULL);
    return tv.tv_sec + (double)tv.tv_usec*1e-6;
}

void
output()
{
    int n = ipgm.width;
    float* ampd;
    ampd = (float*)malloc(n*n*sizeof(float));
    for (int i=0; i < n; i++) {
        for (int j=0; j < n; j++) {
            ampd[n*((i))+((j))] = (AMP(((float*)xm)[(2*n*i)+2*j], ((float*)xm)[(2*n*i)+2*j+1]));
        }
    }
    pgm_t opgm;
    opgm.width = n;
    opgm.height = n;
    normalizeF2PGM(&opgm, ampd);
    free(ampd);

    // Write out image
    writePGM(&opgm, "output.pgm");

    // Finalizations
    destroyPGM(&ipgm);
    destroyPGM(&opgm);

    free(wm);
    free(rm);
    free(xm);    
}

const char *usr_help_str = "Usage: ./fft [option]\n \
options\n\
  -cpu     Number of SPE used (default 1)\n\
  -l, --length  Sorted number of data (default 1200)\n\
  -h, --help    Print this message";

int setWorkSize(size_t* gws, size_t* lws, cl_int x, cl_int y)
{
    switch(y) {
    case 1:
        gws[0] = x;
        gws[1] = 1;
        lws[0] = 1;
        lws[1] = 1;
        break;
    default:
        gws[0] = x;
        gws[1] = y;
        lws[0] = 1;
        lws[1] = 1;
        break;
    }

    return 0;
}

HTask*
fftCore(TaskManager *manager,cl_float2 *dst, cl_float2 *src, cl_float2 *spin, long m, enum Mode direction, HTask* waitTask, bool last)
{
    long direction_flag;
    switch (direction) {
    case forward:direction_flag = 0x00000000; break;
    case inverse:direction_flag = 0x80000000; break;
    }
    long n = 1<<m;
    size_t gws[2],lws[2];
    int length_dst = n*n;
    int length_src = n*n;

    HTask* brev = manager->create_task(BIT_REVERSE);
    setWorkSize(gws,lws,n,n);
    brev->set_param(0,m);
    brev->set_param(1,n);
    brev->set_inData(0, src, length_src*sizeof(cl_float2));
    brev->set_outData(0, dst, length_dst*sizeof(cl_float2));
    brev->set_cpu(spe_cpu);
    if (flip_flag)brev->flip();
    brev->wait_for(waitTask);
    brev->iterate(gws[0],gws[1]);

    waitTask = brev;
    
    setWorkSize(gws,lws,n/2,n);
    for(int iter=1;iter<=m;iter++) {
        HTask* bfly = manager->create_task(BUTTERFLY);
        bfly->set_param(0,n);
        bfly->set_param(1,direction_flag);
        bfly->set_param(2,(long)iter);
        bfly->set_inData(0, dst, length_dst*sizeof(cl_float2));
        bfly->set_inData(1, spin, sizeof(cl_float2)*(n/2));
        bfly->set_outData(0, dst,length_dst*sizeof(cl_float2));
        bfly->set_cpu(spe_cpu);
        if (flip_flag)bfly->flip();
        bfly->wait_for(waitTask);
        bfly->iterate(gws[0],gws[1]);
        waitTask = bfly;
    }
    
    if (direction == inverse) { 
        setWorkSize(gws,lws,n,n);
        HTask *norm = manager->create_task(NORMALIZATION);
        norm->set_inData(0, dst,length_dst*sizeof(cl_float2));
        if ( (!last)&&flip_flag )
            norm->flip();
        norm->set_outData(0, dst, length_dst*sizeof(cl_float2));
        norm->set_param(0,n);
        norm->set_cpu(spe_cpu);
        norm->wait_for(waitTask);
        norm->iterate(gws[0],gws[1]);
        
        waitTask = norm;
    }
    return waitTask;
}

char *
init(int argc, char**argv){
    
    char *filename = 0;
    
    //    printf("%s ",argv[4]);
    for (int i = 1; argv[i]; ++i) {
        if (strcmp(argv[i], "-file") == 0) {
            filename = argv[i+1];
        }  else if (strcmp(argv[i], "-g") == 0) {
            spe_cpu = GPU_0;
        }  else if (strcmp(argv[i], "-any") == 0) {
            spe_cpu = ANY_ANY;
        } else if (strcmp(argv[i], "-flip") == 0) {
            flip_flag = true;
        } 
    }
    if ( (argc == 1)||(filename==0)) {
        printf("Usage: ./fft -file [image filename] -cpu or -gpu\n");
        exit(-1);
    }

    return filename;
}

void
run_start(TaskManager *manager,pgm_t ipgm)
{
    long n = ipgm.width;
    long m = (cl_int)(log((double)n)/log(2.0));
    size_t *gws = new size_t[2];
    size_t *lws = new size_t[2];
    
    xm = (cl_float2 *)malloc(n * n * sizeof(cl_float2));
    rm = (cl_float2 *)malloc(n * n * sizeof(cl_float2));
    wm = (cl_float2 *)malloc(n / 2 * sizeof(cl_float2));
    
    HTask* waitTask;
    /*
     * [cl_float2]
     * typedef union
     * {
     * cl_float  CL_ALIGNED(8) s[2];
     * #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
     * __extension__ struct{ cl_float  x, y; };
     * __extension__ struct{ cl_float  s0, s1; };
     * __extension__ struct{ cl_float  lo, hi; };
     * #endif
     * #if defined( __CL_FLOAT2__) 
     * __cl_float2     v2;
     * #endif
     * } cl_float2;
     */
    for (int i=0; i<n; i++) {
        for (int j=0; j < n; j++) {
            ((float*)xm)[(2*n*j)+2*i+0] = (float)ipgm.buf[n*j+i];
            ((float*)xm)[(2*n*j)+2*i+1] = (float)0;
        }
    }
        
    // Create spin factor
    setWorkSize(gws,lws,n/2,1);
    int length_w = n / 2;
    HTask* sfac = manager->create_task(SPIN_FACT);
    sfac->set_outData(0, wm, length_w*sizeof(cl_float2));
    sfac->set_param(0,n);
    sfac->set_cpu(spe_cpu);
    if (flip_flag)sfac->flip();
    sfac->iterate(gws[0]);

    // Butterfly Operation
    waitTask = fftCore(manager, rm, xm, wm, m, forward, sfac, false);

    // Transpose matrix 
    int length_r =n*n;
    setWorkSize(gws,lws,n,n);
    HTask* first_trns = manager->create_task(TRANSPOSE);
    first_trns->set_inData(0,rm,length_r*sizeof(cl_float2));
    first_trns->set_outData(0,xm,length_r*sizeof(cl_float2));
    first_trns->set_param(0,n);
    first_trns->set_cpu(spe_cpu);
    if (flip_flag)first_trns->flip();
    first_trns->wait_for(waitTask);
    first_trns->iterate(gws[0],gws[1]);

    // Butterfly Operation 
    waitTask = fftCore(manager, rm, xm, wm, m, forward, first_trns, false);

    // Apply high-pass filter
    HTask *hpfl = manager->create_task(HIGH_PASS_FILTER);
    cl_int radius = n/8;
    setWorkSize(gws,lws,n,n);
    hpfl->set_inData(0,rm,length_r*sizeof(cl_float2));
    hpfl->set_outData(0, rm, length_r*sizeof(cl_float2));
    hpfl->set_param(0,n);
    hpfl->set_param(1,(long)radius);
    hpfl->set_cpu(spe_cpu);
    if (flip_flag) hpfl->flip();
    hpfl->wait_for(waitTask);
    hpfl->iterate(gws[0],gws[1]);

    // Inverse FFT

    // Butterfly Operation
    waitTask = fftCore(manager,xm, rm, wm, m, inverse, hpfl, false);

    // Transpose matrix
    setWorkSize(gws,lws,n,n);
    HTask* second_trns = manager->create_task(TRANSPOSE);
    second_trns->set_inData(0,xm,length_r*sizeof(cl_float2));
    second_trns->set_outData(0,rm,length_r*sizeof(cl_float2));
    second_trns->set_param(0,n);
    second_trns->set_cpu(spe_cpu);
    if (flip_flag)second_trns->flip();
    second_trns->wait_for(waitTask);
    second_trns->iterate(gws[0],gws[1]);

    // Butterfly Operation

    waitTask = fftCore(manager,xm, rm, wm, m, inverse, second_trns, true);
}

int TMmain(TaskManager *manager, int argc, char** argv) {
    task_init();
#ifdef GPU
    gpu_task_init();
#endif
    char * pgm_file = init(argc,argv);
    /* Read image */
    int err = readPGM(&ipgm, pgm_file);
    if (err<0) {
        fprintf(stderr, "Failed to read image file.\n");
        exit(1);
    }
    run_start(manager, ipgm);
    st_time = getTime();
    manager->set_TMend(TMend);
    return 0;
}

void
TMend(TaskManager *manager)
{
    ed_time = getTime();
    output();
    //    fprintf(stdout, "image out put succeeded.\n");
    printf("%0.6f\n",ed_time-st_time);
}