Mercurial > hg > Game > Cerium
view example/fft/main.cc @ 1581:8ee897303cd0 draft
fix multi_dimention
author | Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp> |
---|---|
date | Sat, 30 Mar 2013 18:29:54 +0900 |
parents | 7418c7aef534 |
children | 6ff0c34c8a3c |
line wrap: on
line source
#include <stdio.h> #include <stdlib.h> #include <math.h> #include <sys/stat.h> #include <fcntl.h> #include <sys/time.h> #include "TaskManager.h" #include "GpuScheduler.h" #include "SchedTask.h" #include "Func.h" #ifdef __APPLE__ #include <OpenCL/opencl.h> #else #include <CL/cl.h> #endif #include "pgm.h" extern void task_init(); #define PI 3.14159265358979 #define MAX_SOURCE_SIZE (0x100000) #define AMP(a, b) (sqrt((a)*(a)+(b)*(b))) static double st_time; static double ed_time; void TMend(TaskManager *); int ndrange_flag; cl_device_id device_id = NULL; cl_context context = NULL; cl_command_queue queue = NULL; cl_program program = NULL; CPU_TYPE spe_cpu = SPE_ANY; enum Mode { forward = 0, inverse = 1 }; static double getTime() { struct timeval tv; gettimeofday(&tv, NULL); return tv.tv_sec + (double)tv.tv_usec*1e-6; } const char *usr_help_str = "Usage: ./fft [option]\n \ options\n\ -cpu Number of SPE used (default 1)\n\ -l, --length Sorted number of data (default 1200)\n\ -h, --help Print this message"; int setWorkSize(size_t* gws, size_t* lws, cl_int x, cl_int y) { switch(y) { case 1: gws[0] = x; gws[1] = 1; lws[0] = 1; lws[1] = 1; break; default: gws[0] = x; gws[1] = y; lws[0] = 1; lws[1] = 1; break; } return 0; } int fftCore(TaskManager *manager,cl_float2 *dst, cl_float2 *src, cl_float2 *spin, int m_, enum Mode direction) { int iter; unsigned int* flag = new unsigned int[1]; switch (direction) { case forward:flag[0] = 0x00000000; break; case inverse:flag[0] = 0x80000000; break; } int* n = new int[1]; int* m = new int[1]; m[0] = m_; n[0] = 1<<m[0]; size_t gws[2],lws[2]; int length_dst = n[0]*n[0]; int length_src = n[0]*n[0]; cl_uint dimension = 2; HTask* brev; int i,j; setWorkSize(gws,lws,n[0],n[0]); for(i=0;i<gws[0];i++){ for(j=0;j<gws[1];j++){ brev = manager->create_task(BIT_REVERSE); brev->set_param(0,(memaddr)length_src); brev->set_param(1,(memaddr)i); brev->set_param(2,(memaddr)j); brev->set_inData(0, src, length_src*sizeof(cl_float2)); brev->set_inData(1, m,sizeof(int)); brev->set_inData(2, n,sizeof(int)); brev->set_outData(0, dst, length_dst*sizeof(cl_float2)); brev->set_cpu(spe_cpu); brev->spawn(); } } exit(0); HTask* bfly; setWorkSize(gws,lws,n[0]/2,n[0]); for (iter=1; iter<=m_;iter++) { for(i=0;i<gws[0];i++){ for(j=0;i<gws[1];j++){ bfly = manager->create_task(BUTTERFLY); bfly->set_param(0,(memaddr)length_dst); bfly->set_param(1,(memaddr)iter); bfly->set_param(2,(memaddr)i); bfly->set_param(3,(memaddr)j); bfly->set_inData(0, dst, length_dst*sizeof(cl_float2)); bfly->set_inData(1, spin, sizeof(cl_float2)*(n[0]/2)); bfly->set_inData(2, m,sizeof(int)); bfly->set_inData(3, n,sizeof(int)); bfly->set_inData(4, flag,sizeof(int)); bfly->set_outData(0,dst,length_dst*sizeof(cl_float2)); bfly->set_cpu(spe_cpu); bfly->wait_for(brev); bfly->spawn(); } } } if (direction == inverse) { HTask *norm = manager->create_task(NORMALIZATION); setWorkSize(gws,lws,n[0],n[0]); norm->set_param(0,(memaddr)length_dst); norm->set_param(1,(memaddr)dimension); norm->set_param(2,(memaddr)gws[0]); norm->set_param(3,(memaddr)gws[1]); norm->set_param(4,(memaddr)lws[0]); norm->set_param(5,(memaddr)lws[1]); norm->set_inData(0, n,sizeof(int)); norm->set_outData(0, dst, length_dst*sizeof(cl_float2)); norm->set_cpu(spe_cpu); norm->nd_range(); norm->flip(); norm->wait_for(bfly); norm->spawn(); } return 0; } char * init(int argc, char**argv){ char *filename = 0; for (int i = 1; argv[i]; ++i) { if (strcmp(argv[i], "-file") == 0) { filename = argv[i+1]; } else if (strcmp(argv[i], "-g") == 0) { spe_cpu = GPU_0; } } if ( (argc == 1)||(filename==0)) { printf("Usage: ./fft -file [image filename] -cpu or -gpu \n"); exit(-1); } return filename; } void run_start(TaskManager *manager,pgm_t ipgm) { int dimension; int *n = new int[1]; n[0] = ipgm.width; int *m = new int[1]; m[0] = (cl_int)(log((double)n[0])/log(2.0)); size_t *gws = new size_t[3]; size_t *lws = new size_t[3]; cl_float2 *xm = (cl_float2 *)malloc(n[0] * n[0] * sizeof(cl_float2)); cl_float2 *rm = (cl_float2 *)malloc(n[0] * n[0] * sizeof(cl_float2)); cl_float2 *wm = (cl_float2 *)malloc(n[0] / 2 * sizeof(cl_float2)); int i,j; /* * [cl_float2] * typedef union * { * cl_float CL_ALIGNED(8) s[2]; * #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) * __extension__ struct{ cl_float x, y; }; * __extension__ struct{ cl_float s0, s1; }; * __extension__ struct{ cl_float lo, hi; }; * #endif * #if defined( __CL_FLOAT2__) * __cl_float2 v2; * #endif * } cl_float2; */ for (i=0; i<n[0]; i++) { for (j=0; j < n[0]; j++) { ((float*)xm)[(2*n[0]*j)+2*i+0] = (float)ipgm.buf[n[0]*j+i]; ((float*)xm)[(2*n[0]*j)+2*i+1] = (float)0; } } // Create spin factor setWorkSize(gws,lws,n[0]/2,1); // Todo:setWorkSize(ndr,n[0]/2,1);でできるように int length_w = n[0] / 2; HTask* sfac; for(i=0;i<gws[0];i++){ sfac = manager->create_task(SPIN_FACT); sfac->set_param(0, (memaddr)length_w); sfac->set_param(1,(memaddr)i); sfac->set_inData(0, n, sizeof(int)); sfac->set_outData(0, wm, length_w*sizeof(cl_float2)); sfac->set_cpu(spe_cpu); sfac->nd_range(); sfac->spawn(); } // Butterfly Operation fftCore(manager, rm, xm, wm, m[0], forward); HTaskPtr *trns = (HTask**)manager->allocate(sizeof(HTask*)*2); // Transpose matrix int length_r =n[0] * n[0]; setWorkSize(gws,lws,n[0]/2,1); dimension = 2; for (int i=0;i<2;i++) { trns[i]= manager->create_task(TRANSEPOSE); trns[i]->set_param(0, (memaddr)length_r); trns[i]->set_param(1,(memaddr)dimension); trns[i]->set_param(2,(memaddr)gws[0]); trns[i]->set_param(3,(memaddr)gws[1]); trns[i]->set_param(4,(memaddr)lws[0]); trns[i]->set_param(5,(memaddr)lws[1]); trns[i]->set_inData(0, rm, length_r*sizeof(cl_float2)); trns[i]->set_inData(1, n,sizeof(int)); trns[i]->set_outData(0, xm, length_r*sizeof(cl_float2)); trns[i]->set_cpu(spe_cpu); trns[i]->nd_range(); } trns[0]->wait_for(sfac); trns[0]->spawn(); // Butterfly Operation fftCore(manager, rm, xm, wm, m[0], forward); // Apply high-pass filter HTask *hpfl = manager->create_task(HIGH_PASS_FILTER); cl_int *radius = new cl_int[1]; radius[0] = n[0]/8; setWorkSize(gws,lws,n[0]/2,1); hpfl->set_param(0, (memaddr)length_r); hpfl->set_param(1,(memaddr)dimension); hpfl->set_param(2,(memaddr)gws[0]); hpfl->set_param(3,(memaddr)gws[1]); hpfl->set_param(4,(memaddr)lws[0]); hpfl->set_param(5,(memaddr)lws[1]); hpfl->set_inData(0, n,sizeof(int)); hpfl->set_inData(1, radius,sizeof(int)); hpfl->set_outData(0, rm, length_r*sizeof(cl_float2)); hpfl->set_cpu(spe_cpu); hpfl->nd_range(); hpfl->wait_for(trns[0]); hpfl->spawn(); // Inverse FFT // Butterfly Operation fftCore(manager,xm, rm, wm, m[0], inverse); // Transpose matrix trns[1]->spawn(); // Butterfly Operation fftCore(manager,xm, rm, wm, m[0], inverse); // Read data from memory buffer // spawn and wait float* ampd; ampd = (float*)malloc(n[0]*n[0]*sizeof(float)); for (i=0; i < n[0]; i++) { for (j=0; j < n[0]; j++) { ampd[n[0]*((i))+((j))] = (AMP(((float*)xm)[(2*n[0]*i)+2*j], ((float*)xm)[(2*n[0]*i)+2*j+1])); } } pgm_t opgm; opgm.width = n[0]; opgm.height = n[0]; normalizeF2PGM(&opgm, ampd); free(ampd); // Write out image writePGM(&opgm, "output.pgm"); // Finalizations destroyPGM(&ipgm); destroyPGM(&opgm); free(wm); free(rm); free(xm); } int TMmain(TaskManager *manager, int argc, char** argv) { task_init(); char * pgm_file = init(argc,argv); pgm_t ipgm; /* Read image */ int err = readPGM(&ipgm, pgm_file); if (err<0) { fprintf(stderr, "Failed to read image file.\n"); exit(1); } st_time = getTime(); run_start(manager, ipgm); manager->set_TMend(TMend); return 0; } void TMend(TaskManager *manager) { ed_time = getTime(); fprintf(stdout, "image out put succeeded.\n"); printf("Time: %0.6f\n",ed_time-st_time); }