Mercurial > hg > Game > Cerium
changeset 2008:2c8eab01cc78 draft
implement fft using cuda
author | Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 03 Jun 2014 18:10:19 +0900 |
parents | bc2121b09cbc |
children | 113b1edd2a9a |
files | example/cuda_fft/main.cc example/cuda_fft/pgm.h |
diffstat | 2 files changed, 243 insertions(+), 50 deletions(-) [+] |
line wrap: on
line diff
--- a/example/cuda_fft/main.cc Tue Jun 03 16:02:06 2014 +0900 +++ b/example/cuda_fft/main.cc Tue Jun 03 18:10:19 2014 +0900 @@ -2,6 +2,7 @@ #include <sys/time.h> #include <string.h> #include <cuda.h> +#include <vector_types.h> #include "pgm.h" @@ -16,16 +17,6 @@ inverse = 1 }; -struct int2 { - int x; - int y; -}; - -struct float2 { - float x; - float y; -}; - CUmodule module; static double @@ -40,12 +31,12 @@ { switch(y) { case 1: - block = x; - thread = 1; + *block = x; + *thread = 1; break; default: - block = x; - thread = y; + *block = x; + *thread = y; break; } @@ -66,37 +57,38 @@ setWorkSize(&block, &thread, n, n); CUfunction bitReverse; - cuModuleGetFunction(bitReverse, module, "bitReverse"); + cuModuleGetFunction(&bitReverse, module, "bitReverse"); - void* kernel_args[] = {&dst, &src, &m, &n}; + void* bitReverse_args[] = {&dst, &src, &m, &n}; cuLaunchKernel(bitReverse, block, 1, 1, thread, 1, 1, - 0, NULL, kernel_args, NULL); + 0, NULL, bitReverse_args, NULL); CUfunction butterfly; - cuModuleGetFunction(butterfly, module, "butterfly"); + cuModuleGetFunction(&butterfly, module, "butterfly"); setWorkSize(&block, &thread, n/2, n); + void* butterfly_args[] = {&dst, &spin, &m, &n, 0, &flag}; for (int i=1;i<=m;i++) { - kernel_args[] = {&dst, &spin, &m, &n, &i, &flag}; + butterfly_args[4] = &i; cuLaunchKernel(butterfly, block, 1, 1, thread, 1, 1, - 0, NULL, kernel_args, NULL); + 0, NULL, butterfly_args, NULL); } CUfunction norm; - cuModuleGetFunction(norm, module, "norm"); + cuModuleGetFunction(&norm, module, "norm"); + void* norm_args[] = {&dst, &m}; if (direction == inverse) { setWorkSize(&block, &thread, n, n); - kernel_args[] = {&dst, &m}; cuLaunchKernel(norm, block, 1, 1, thread, 1, 1, - 0, NULL, kernel_args, NULL); + 0, NULL, norm_args, NULL); } return 0; @@ -132,7 +124,7 @@ cuModuleLoad(&module, "fft.ptx"); - char* pgm_file = init(argc, argv); + char* pgm_file = init(args, argv); pgm_t ipgm; int err = readPGM(&ipgm, pgm_file); @@ -171,74 +163,88 @@ cuMemcpyHtoD(xmobj, xm, n*n*sizeof(float2)); CUfunction spinFact; - cuModuleGetFunction(spinFact, module, "spinFact"); + cuModuleGetFunction(&spinFact, module, "spinFact"); int block, thread; setWorkSize(&block, &thread, n/2, 1); - void* kernel_args[] = {&xmobj, &n}; + void* spinFact_args[] = {&xmobj, &n}; cuLaunchKernel(spinFact, block, 1, 1, thread, 1, 1, - 0, NULL, kernel_args, NULL); + 0, NULL, spinFact_args, NULL); fftCore(rmobj, xmobj, wmobj, m, forward); - CUfunction transfer; - cuModuleGetFunction(transfer, module, "transfer"); + CUfunction transpose; + cuModuleGetFunction(&transpose, module, "transpose"); setWorkSize(&block, &thread, n, n); - kernel_args[] = {&xmobj, &rmobj, &n}; - cuLaunchKernel(transfer, + void* transpose_args[] = {&xmobj, &rmobj, &n}; + cuLaunchKernel(transpose, block, 1, 1, thread, 1, 1, - 0, NULL, kernel_args, NULL); + 0, NULL, transpose_args, NULL); fftCore(rmobj, xmobj, wmobj, m, forward); CUfunction highPassFilter; - cuModuleGetFunction(transfer, module, "highPassFilter"); + cuModuleGetFunction(&highPassFilter, module, "highPassFilter"); setWorkSize(&block, &thread, n, n); int radius = n/8; - kernel_args[] = {&rmobj, &n, &radius}; + void*highPassFilter_args[] = {&rmobj, &n, &radius}; cuLaunchKernel(highPassFilter, block, 1, 1, thread, 1, 1, - 0, NULL, kernel_args, NULL); + 0, NULL, highPassFilter_args, NULL); fftCore(xmobj, rmobj, wmobj, m, inverse); setWorkSize(&block, &thread, n, n); - kernel_args[] = {&rmobj, &xmobj}; - cuLaunchKernel(transfer, + void* transpose2_args[] = {&rmobj, &xmobj, &n}; + cuLaunchKernel(transpose, block, 1, 1, thread, 1, 1, - 0, NULL, kernel_args, NULL); + 0, NULL, transpose2_args, NULL); fftCore(xmobj, rmobj, wmobj, m, inverse); + cuMemcpyDtoH(xm, xmobj, n*n*sizeof(float2)); + + float* ampd; + ampd = (float*)malloc(n*n*sizeof(float)); + + for (int i=0;i<n*n;i++) + ampd[i] = (AMP(xm[i].x, xm[i].y)); + opgm.width = n; + opgm.height = n; + normalizeF2PGM(&opgm, ampd); + free(ampd); + + ed_time = getTime(); + + writePGM(&opgm, "output.pgm"); // memory release - cuMemFree(devA); - for (int i=0;i<num_exec;i++) { - cuMemFree(devB[i]); - cuMemFree(devOut[i]); - } - for (int i=0;i<num_stream;i++) - cuStreamDestroy(stream[i]); + cuMemFree(xmobj); + cuMemFree(rmobj); + cuMemFree(wmobj); cuModuleUnload(module); cuCtxDestroy(context); - delete[] A; - delete[] B; - for (int i=0;i<num_exec;i++) - delete[] result[i]; - delete[] result; + destroyPGM(&ipgm); + destroyPGM(&opgm); + + free(xm); + free(rm); + free(wm); + + printf("Time: %0.6f\n", ed_time-st_time); return 0; }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/cuda_fft/pgm.h Tue Jun 03 18:10:19 2014 +0900 @@ -0,0 +1,187 @@ +#ifndef _PGM_H_ +#define _PGM_H_ + +#include <math.h> +#include <string.h> + +#define PGM_MAGIC "P5" + +#ifdef _WIN32 +#define STRTOK_R(ptr, del, saveptr) strtok_s(ptr, del, saveptr) +#else +#define STRTOK_R(ptr, del, saveptr) strtok_r(ptr, del, saveptr) +#endif + +typedef struct _pgm_t { + int width; + int height; + unsigned char *buf; +} pgm_t; + +int readPGM(pgm_t* pgm, const char* filename) +{ + char *token, *pc, *saveptr; + char *buf; + size_t bufsize; + char del[] = " \t\n"; + unsigned char *dot; + + long begin, end; + int filesize; + int i, w, h, luma, pixs; + + + FILE* fp; + if ((fp = fopen(filename, "rb"))==NULL) { + fprintf(stderr, "Failed to open file\n"); + return -1; + } + + fseek(fp, 0, SEEK_SET); + begin = ftell(fp); + fseek(fp, 0, SEEK_END); + end = ftell(fp); + filesize = (int)(end - begin); + + buf = (char*)malloc(filesize * sizeof(char)); + fseek(fp, 0, SEEK_SET); + bufsize = fread(buf, filesize * sizeof(char), 1, fp); + + fclose(fp); + + token = (char *)STRTOK_R(buf, del, &saveptr); + if (strncmp(token, PGM_MAGIC, 2) != 0) { + return -1; + } + + token = (char *)STRTOK_R(NULL, del, &saveptr); + if (token[0] == '#' ) { + token = (char *)STRTOK_R(NULL, "\n", &saveptr); + token = (char *)STRTOK_R(NULL, del, &saveptr); + } + + w = strtoul(token, &pc, 10); + token = (char *)STRTOK_R(NULL, del, &saveptr); + h = strtoul(token, &pc, 10); + token = (char *)STRTOK_R(NULL, del, &saveptr); + luma = strtoul(token, &pc, 10); + + token = pc + 1; + pixs = w * h; + + pgm->buf = (unsigned char *)malloc(pixs * sizeof(unsigned char)); + + dot = pgm->buf; + + for (i=0; i< pixs; i++, dot++) { + *dot = *token++; + } + + pgm->width = w; + pgm->height = h; + + return 0; +} + +int writePGM(pgm_t* pgm, const char* filename) +{ + int i, w, h, pixs; + FILE* fp; + unsigned char* dot; + + w = pgm->width; + h = pgm->height; + pixs = w * h; + + if ((fp = fopen(filename, "wb+")) ==NULL) { + fprintf(stderr, "Failed to open file\n"); + return -1; + } + + fprintf (fp, "%s\n%d %d\n255\n", PGM_MAGIC, w, h); + + dot = pgm->buf; + + for (i=0; i<pixs; i++, dot++) { + putc((unsigned char)*dot, fp); + } + + fclose(fp); + + return 0; +} + +int normalizeD2PGM(pgm_t* pgm, double* x) +{ + int i, j, w, h; + + w = pgm->width; + h = pgm->height; + + pgm->buf = (unsigned char*)malloc(w * h * sizeof(unsigned char)); + + double min = 0; + double max = 0; + for (i=0; i < h; i++) { + for (j=0; j < w; j++) { + if (max < x[i*w+j]) + max = x[i*w+j]; + if (min > x[i*w+j]) + min = x[i*w+j]; + } + } + + for (i=0; i < h; i++) { + for (j=0; j < w; j++) { + if((max-min)!=0) + pgm->buf[i*w+j] = (unsigned char)(255*(x[i*w+j]-min)/(max-min)); + else + pgm->buf[i*w+j]= 0; + } + } + + return 0; +} + +int normalizeF2PGM(pgm_t* pgm, float* x) +{ + int i, j, w, h; + + w = pgm->width; + h = pgm->height; + + pgm->buf = (unsigned char*)malloc(w * h * sizeof(unsigned char)); + + float min = 0; + float max = 0; + for (i=0; i < h; i++) { + for (j=0; j < w; j++) { + if (max < x[i*w+j]) + max = x[i*w+j]; + if (min > x[i*w+j]) + min = x[i*w+j]; + } + } + + for (i=0; i < h; i++) { + for (j=0; j < w; j++) { + if((max-min)!=0) + pgm->buf[i*w+j] = (unsigned char)(255*(x[i*w+j]-min)/(max-min)); + else + pgm->buf[i*w+j]= 0; + } + } + + return 0; +} + +int destroyPGM(pgm_t* pgm) +{ + if (pgm->buf) { + free(pgm->buf); + } + + return 0; +} + +#endif /* _PGM_H_ */