Mercurial > hg > Members > yuuhi > OpenCL
view fft_fixstart/main.cc @ 9:ed3d4a769bf3
add divice type all
author | Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 05 Feb 2013 17:21:18 +0900 |
parents | 1b8da19bb31c |
children | e38bef2012bc |
line wrap: on
line source
#include <stdio.h> #include <stdlib.h> #include <math.h> #include <sys/stat.h> #include <fcntl.h> #include <sys/time.h> #ifdef __APPLE__ #include <OpenCL/opencl.h> #else #include <CL/cl.h> #endif #include "pgm.h" #define PI 3.14159265358979 #define MAX_SOURCE_SIZE (0x100000) #define AMP(a, b) (sqrt((a)*(a)+(b)*(b))) static double st_time; static double ed_time; cl_device_id device_id = NULL; cl_context context = NULL; cl_command_queue queue = NULL; cl_program program = NULL; cl_device_type device_type = CL_DEVICE_TYPE_GPU; enum Mode { forward = 0, inverse = 1 }; static double getTime() { struct timeval tv; gettimeofday(&tv, NULL); return tv.tv_sec + (double)tv.tv_usec*1e-6; } int setWorkSize(size_t* gws, size_t* lws, cl_int x, cl_int y) { switch(y) { case 1: gws[0] = x; gws[1] = 1; lws[0] = 1; lws[1] = 1; break; default: gws[0] = x; gws[1] = y; lws[0] = 1; lws[1] = 1; break; } return 0; } int fftCore(cl_mem dst, cl_mem src, cl_mem spin, cl_int m, enum Mode direction) { cl_int ret; cl_int iter; cl_uint flag; cl_int n = 1<<m; cl_event kernelDone; cl_kernel brev = NULL; cl_kernel bfly = NULL; cl_kernel norm = NULL; brev = clCreateKernel(program, "bitReverse", &ret); bfly = clCreateKernel(program, "butterfly", &ret); norm = clCreateKernel(program, "norm", &ret); size_t gws[2]; size_t lws[2]; switch (direction) { case forward:flag = 0x00000000; break; case inverse:flag = 0x80000000; break; } ret = clSetKernelArg(brev, 0, sizeof(cl_mem), (void *)&dst); ret = clSetKernelArg(brev, 1, sizeof(cl_mem), (void *)&src); ret = clSetKernelArg(brev, 2, sizeof(cl_int), (void *)&m); ret = clSetKernelArg(brev, 3, sizeof(cl_int), (void *)&n); ret = clSetKernelArg(bfly, 0, sizeof(cl_mem), (void *)&dst); ret = clSetKernelArg(bfly, 1, sizeof(cl_mem), (void *)&spin); ret = clSetKernelArg(bfly, 2, sizeof(cl_int), (void *)&m); ret = clSetKernelArg(bfly, 3, sizeof(cl_int), (void *)&n); ret = clSetKernelArg(bfly, 5, sizeof(cl_uint), (void *)&flag); ret = clSetKernelArg(norm, 0, sizeof(cl_mem), (void *)&dst); ret = clSetKernelArg(norm, 1, sizeof(cl_int), (void *)&n); /* Reverse bit ordering */ setWorkSize(gws, lws, n, n); ret = clEnqueueTask(queue, brev, 0, NULL, NULL); /* Perform Butterfly Operations*/ setWorkSize(gws, lws, n/2, n); for (iter=1; iter <= m; iter++) { ret = clSetKernelArg(bfly, 4, sizeof(cl_int), (void *)&iter); ret = clEnqueueTask(queue, bfly, 0, NULL, NULL); ret = clWaitForEvents(1, &kernelDone); } if (direction == inverse) { setWorkSize(gws, lws, n, n); ret = clEnqueueTask(queue, norm, 0, NULL, NULL); ret = clWaitForEvents(1, &kernelDone); } ret = clReleaseKernel(bfly); ret = clReleaseKernel(brev); ret = clReleaseKernel(norm); return 0; } char * init(int argc, char**argv){ char *filename = 0; for (int i = 1; argv[i]; ++i) { if (strcmp(argv[i], "-file") == 0) { filename = argv[i+1]; } else if (strcmp(argv[i], "-cpu") == 0) { device_type = CL_DEVICE_TYPE_CPU; } else if (strcmp(argv[i], "-gpu") == 0) { device_type = CL_DEVICE_TYPE_GPU; } else if (strcmp(argv[i], "-all") == 0) { device_type = CL_DEVICE_TYPE_ALL; } } if ( (argc == 1)||(filename==0)) { printf("Usage: ./fft -file [image filename] -cpu or -gpu \n"); exit(-1); } return filename; } int main(int argc, char** argv) { cl_mem xmobj = NULL; cl_mem rmobj = NULL; cl_mem wmobj = NULL; cl_kernel sfac = NULL; cl_kernel trns = NULL; cl_kernel hpfl = NULL; cl_platform_id platform_id = NULL; cl_uint ret_num_devices; cl_uint ret_num_platforms; cl_int ret; cl_float2 *xm; cl_float2 *rm; cl_float2 *wm; pgm_t ipgm; pgm_t opgm; const char fileName[] = "./fft.cl"; size_t source_size; char *source_str; cl_int i, j; cl_int n; cl_int m; size_t gws[2]; size_t lws[2]; /* Load kernel source code */ int fd = open(fileName, O_RDONLY); if (fd<0) { fprintf(stderr, "Failed to load kernel %s.\n",fileName); exit(1); } struct stat stats; fstat(fd, &stats); off_t size = stats.st_size; if (size<=0) { fprintf(stderr, "Failed to load kernel.\n"); exit(1); } source_str = (char*)alloca(size); source_size = read(fd, source_str, size); close( fd ); char * pgm_file = init(argc,argv); /* Read image */ int err = readPGM(&ipgm, pgm_file); if (err<0) { fprintf(stderr, "Failed to read image file.\n"); exit(1); } n = ipgm.width; m = (cl_int)(log((double)n)/log(2.0)); xm = (cl_float2 *)malloc(n * n * sizeof(cl_float2)); rm = (cl_float2 *)malloc(n * n * sizeof(cl_float2)); wm = (cl_float2 *)malloc(n / 2 * sizeof(cl_float2)); for (i=0; i < n; i++) { for (j=0; j < n; j++) { ((float*)xm)[(2*n*j)+2*i+0] = (float)ipgm.buf[n*j+i]; ((float*)xm)[(2*n*j)+2*i+1] = (float)0; } } /* Get platform/device */ ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms); ret = clGetDeviceIDs( platform_id, device_type, 1, &device_id, &ret_num_devices); /* Create OpenCL context */ context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret); /* Create Command queue */ queue = clCreateCommandQueue(context, device_id, 0, &ret); st_time = getTime(); /* Create Buffer Objects */ xmobj = clCreateBuffer(context, CL_MEM_READ_WRITE, n*n*sizeof(cl_float2), NULL, &ret); rmobj = clCreateBuffer(context, CL_MEM_READ_WRITE, n*n*sizeof(cl_float2), NULL, &ret); wmobj = clCreateBuffer(context, CL_MEM_READ_WRITE, (n/2)*sizeof(cl_float2), NULL, &ret); /* Transfer data to memory buffer */ ret = clEnqueueWriteBuffer(queue, xmobj, CL_TRUE, 0, n*n*sizeof(cl_float2), xm, 0, NULL, NULL); /* Create kernel program from source */ program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret); /* Build kernel program */ ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); if (ret<0) { size_t size; clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &size); char *log = new char[size]; clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, size, log, NULL); printf("%s ",log); exit (ret); } /* Create OpenCL Kernel */ sfac = clCreateKernel(program, "spinFact", &ret); trns = clCreateKernel(program, "transpose", &ret); hpfl = clCreateKernel(program, "highPassFilter", &ret); /* Create spin factor */ ret = clSetKernelArg(sfac, 0, sizeof(cl_mem), (void *)&wmobj); ret = clSetKernelArg(sfac, 1, sizeof(cl_int), (void *)&n); setWorkSize(gws, lws, n/2, 1); ret = clEnqueueTask(queue, sfac, 0, NULL, NULL); /* Butterfly Operation */ fftCore(rmobj, xmobj, wmobj, m, forward); /* Transpose matrix */ ret = clSetKernelArg(trns, 0, sizeof(cl_mem), (void *)&xmobj); ret = clSetKernelArg(trns, 1, sizeof(cl_mem), (void *)&rmobj); ret = clSetKernelArg(trns, 2, sizeof(cl_int), (void *)&n); setWorkSize(gws, lws, n, n); ret = clEnqueueTask(queue, trns, 0, NULL, NULL); /* Butterfly Operation */ fftCore(rmobj, xmobj, wmobj, m, forward); /* Apply high-pass filter */ cl_int radius = n/8; ret = clSetKernelArg(hpfl, 0, sizeof(cl_mem), (void *)&rmobj); ret = clSetKernelArg(hpfl, 1, sizeof(cl_int), (void *)&n); ret = clSetKernelArg(hpfl, 2, sizeof(cl_int), (void *)&radius); setWorkSize(gws, lws, n, n); ret = clEnqueueTask(queue, hpfl, 0, NULL, NULL); /* Inverse FFT */ /* Butterfly Operation */ fftCore(xmobj, rmobj, wmobj, m, inverse); /* Transpose matrix */ ret = clSetKernelArg(trns, 0, sizeof(cl_mem), (void *)&rmobj); ret = clSetKernelArg(trns, 1, sizeof(cl_mem), (void *)&xmobj); setWorkSize(gws, lws, n, n); ret = clEnqueueTask(queue, trns, 0, NULL, NULL); /* Butterfly Operation */ fftCore(xmobj, rmobj, wmobj, m, inverse); /* Read data from memory buffer */ ret = clEnqueueReadBuffer(queue, xmobj, CL_TRUE, 0, n*n*sizeof(cl_float2), xm, 0, NULL, NULL); /* */ float* ampd; ampd = (float*)malloc(n*n*sizeof(float)); for (i=0; i < n; i++) { for (j=0; j < n; j++) { ampd[n*((i))+((j))] = (AMP(((float*)xm)[(2*n*i)+2*j], ((float*)xm)[(2*n*i)+2*j+1])); } } opgm.width = n; opgm.height = n; normalizeF2PGM(&opgm, ampd); free(ampd); ret = clFlush(queue); ret = clFinish(queue); ed_time = getTime(); /* Write out image */ writePGM(&opgm, "output.pgm"); /* Finalizations*/ ret = clFlush(queue); ret = clFinish(queue); ret = clReleaseKernel(hpfl); ret = clReleaseKernel(trns); ret = clReleaseKernel(sfac); ret = clReleaseProgram(program); ret = clReleaseMemObject(xmobj); ret = clReleaseMemObject(rmobj); ret = clReleaseMemObject(wmobj); ret = clReleaseCommandQueue(queue); ret = clReleaseContext(context); destroyPGM(&ipgm); destroyPGM(&opgm); free(wm); free(rm); free(xm); fprintf(stdout, "image out put succeeded.\n"); printf("Time: %0.6f\n",ed_time-st_time); return 0; }