Mercurial > hg > Game > Cerium
view example/fft/main.cc @ 2069:26aa08c9a1de draft default tip
cuda example fix
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 12 Feb 2017 10:04:55 +0900 |
parents | 892c77a1529f |
children |
line wrap: on
line source
#include <stdio.h> #include <stdlib.h> #include <math.h> #include <sys/stat.h> #include <fcntl.h> #include <sys/time.h> #include "TaskManager.h" #include "GpuScheduler.h" #include "SchedTask.h" #include "Func.h" #ifdef __APPLE__ #include <OpenCL/opencl.h> #else #include <CL/cl.h> #endif #include "pgm.h" extern void task_init(); #ifdef GPU extern void gpu_task_init(); #endif #define PI 3.14159265358979 #define MAX_SOURCE_SIZE (0x100000) #define AMP(a, b) (sqrt((a)*(a)+(b)*(b))) static double st_time; static double ed_time; void TMend(TaskManager *); cl_device_id device_id = NULL; cl_context context = NULL; cl_command_queue queue = NULL; cl_program program = NULL; CPU_TYPE spe_cpu = SPE_ANY; bool flip_flag = false; cl_float2* xm; cl_float2* rm; cl_float2* wm; pgm_t ipgm; enum Mode { forward = 0, inverse = 1 }; static double getTime() { struct timeval tv; gettimeofday(&tv, NULL); return tv.tv_sec + (double)tv.tv_usec*1e-6; } void output() { int n = ipgm.width; float* ampd; ampd = (float*)malloc(n*n*sizeof(float)); for (int i=0; i < n; i++) { for (int j=0; j < n; j++) { ampd[n*((i))+((j))] = (AMP(((float*)xm)[(2*n*i)+2*j], ((float*)xm)[(2*n*i)+2*j+1])); } } pgm_t opgm; opgm.width = n; opgm.height = n; normalizeF2PGM(&opgm, ampd); free(ampd); // Write out image writePGM(&opgm, "output.pgm"); // Finalizations destroyPGM(&ipgm); destroyPGM(&opgm); free(wm); free(rm); free(xm); } const char *usr_help_str = "Usage: ./fft [option]\n \ options\n\ -cpu Number of SPE used (default 1)\n\ -l, --length Sorted number of data (default 1200)\n\ -h, --help Print this message"; int setWorkSize(size_t* gws, size_t* lws, cl_int x, cl_int y) { switch(y) { case 1: gws[0] = x; gws[1] = 1; lws[0] = 1; lws[1] = 1; break; default: gws[0] = x; gws[1] = y; lws[0] = 1; lws[1] = 1; break; } return 0; } HTask* fftCore(TaskManager *manager,cl_float2 *dst, cl_float2 *src, cl_float2 *spin, long m, enum Mode direction, HTask* waitTask, bool last) { long direction_flag; switch (direction) { case forward:direction_flag = 0x00000000; break; case inverse:direction_flag = 0x80000000; break; } long n = 1<<m; size_t gws[2],lws[2]; int length_dst = n*n; int length_src = n*n; HTask* brev = manager->create_task(BIT_REVERSE); setWorkSize(gws,lws,n,n); brev->set_param(0,m); brev->set_param(1,n); brev->set_inData(0, src, length_src*sizeof(cl_float2)); brev->set_outData(0, dst, length_dst*sizeof(cl_float2)); brev->set_cpu(spe_cpu); if (flip_flag)brev->flip(); brev->wait_for(waitTask); brev->iterate(gws[0],gws[1]); waitTask = brev; setWorkSize(gws,lws,n/2,n); for(int iter=1;iter<=m;iter++) { HTask* bfly = manager->create_task(BUTTERFLY); bfly->set_param(0,n); bfly->set_param(1,direction_flag); bfly->set_param(2,(long)iter); bfly->set_inData(0, dst, length_dst*sizeof(cl_float2)); bfly->set_inData(1, spin, sizeof(cl_float2)*(n/2)); bfly->set_outData(0, dst,length_dst*sizeof(cl_float2)); bfly->set_cpu(spe_cpu); if (flip_flag)bfly->flip(); bfly->wait_for(waitTask); bfly->iterate(gws[0],gws[1]); waitTask = bfly; } if (direction == inverse) { setWorkSize(gws,lws,n,n); HTask *norm = manager->create_task(NORMALIZATION); norm->set_inData(0, dst,length_dst*sizeof(cl_float2)); if ( (!last)&&flip_flag ) norm->flip(); norm->set_outData(0, dst, length_dst*sizeof(cl_float2)); norm->set_param(0,n); norm->set_cpu(spe_cpu); norm->wait_for(waitTask); norm->iterate(gws[0],gws[1]); waitTask = norm; } return waitTask; } char * init(int argc, char**argv){ char *filename = 0; // printf("%s ",argv[4]); for (int i = 1; argv[i]; ++i) { if (strcmp(argv[i], "-file") == 0) { filename = argv[i+1]; } else if (strcmp(argv[i], "-g") == 0) { spe_cpu = GPU_0; } else if (strcmp(argv[i], "-any") == 0) { spe_cpu = ANY_ANY; } else if (strcmp(argv[i], "-flip") == 0) { flip_flag = true; } } if ( (argc == 1)||(filename==0)) { printf("Usage: ./fft -file [image filename] -cpu or -gpu\n"); exit(-1); } return filename; } void run_start(TaskManager *manager,pgm_t ipgm) { long n = ipgm.width; long m = (cl_int)(log((double)n)/log(2.0)); size_t *gws = new size_t[2]; size_t *lws = new size_t[2]; xm = (cl_float2 *)malloc(n * n * sizeof(cl_float2)); rm = (cl_float2 *)malloc(n * n * sizeof(cl_float2)); wm = (cl_float2 *)malloc(n / 2 * sizeof(cl_float2)); HTask* waitTask; /* * [cl_float2] * typedef union * { * cl_float CL_ALIGNED(8) s[2]; * #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) * __extension__ struct{ cl_float x, y; }; * __extension__ struct{ cl_float s0, s1; }; * __extension__ struct{ cl_float lo, hi; }; * #endif * #if defined( __CL_FLOAT2__) * __cl_float2 v2; * #endif * } cl_float2; */ for (int i=0; i<n; i++) { for (int j=0; j < n; j++) { ((float*)xm)[(2*n*j)+2*i+0] = (float)ipgm.buf[n*j+i]; ((float*)xm)[(2*n*j)+2*i+1] = (float)0; } } // Create spin factor setWorkSize(gws,lws,n/2,1); int length_w = n / 2; HTask* sfac = manager->create_task(SPIN_FACT); sfac->set_outData(0, wm, length_w*sizeof(cl_float2)); sfac->set_param(0,n); sfac->set_cpu(spe_cpu); if (flip_flag)sfac->flip(); sfac->iterate(gws[0]); // Butterfly Operation waitTask = fftCore(manager, rm, xm, wm, m, forward, sfac, false); // Transpose matrix int length_r =n*n; setWorkSize(gws,lws,n,n); HTask* first_trns = manager->create_task(TRANSPOSE); first_trns->set_inData(0,rm,length_r*sizeof(cl_float2)); first_trns->set_outData(0,xm,length_r*sizeof(cl_float2)); first_trns->set_param(0,n); first_trns->set_cpu(spe_cpu); if (flip_flag)first_trns->flip(); first_trns->wait_for(waitTask); first_trns->iterate(gws[0],gws[1]); // Butterfly Operation waitTask = fftCore(manager, rm, xm, wm, m, forward, first_trns, false); // Apply high-pass filter HTask *hpfl = manager->create_task(HIGH_PASS_FILTER); cl_int radius = n/8; setWorkSize(gws,lws,n,n); hpfl->set_inData(0,rm,length_r*sizeof(cl_float2)); hpfl->set_outData(0, rm, length_r*sizeof(cl_float2)); hpfl->set_param(0,n); hpfl->set_param(1,(long)radius); hpfl->set_cpu(spe_cpu); if (flip_flag) hpfl->flip(); hpfl->wait_for(waitTask); hpfl->iterate(gws[0],gws[1]); // Inverse FFT // Butterfly Operation waitTask = fftCore(manager,xm, rm, wm, m, inverse, hpfl, false); // Transpose matrix setWorkSize(gws,lws,n,n); HTask* second_trns = manager->create_task(TRANSPOSE); second_trns->set_inData(0,xm,length_r*sizeof(cl_float2)); second_trns->set_outData(0,rm,length_r*sizeof(cl_float2)); second_trns->set_param(0,n); second_trns->set_cpu(spe_cpu); if (flip_flag)second_trns->flip(); second_trns->wait_for(waitTask); second_trns->iterate(gws[0],gws[1]); // Butterfly Operation waitTask = fftCore(manager,xm, rm, wm, m, inverse, second_trns, true); } int TMmain(TaskManager *manager, int argc, char** argv) { task_init(); #ifdef GPU gpu_task_init(); #endif char * pgm_file = init(argc,argv); /* Read image */ int err = readPGM(&ipgm, pgm_file); if (err<0) { fprintf(stderr, "Failed to read image file.\n"); exit(1); } run_start(manager, ipgm); st_time = getTime(); manager->set_TMend(TMend); return 0; } void TMend(TaskManager *manager) { ed_time = getTime(); output(); // fprintf(stdout, "image out put succeeded.\n"); printf("%0.6f\n",ed_time-st_time); }