Mercurial > hg > Game > Cerium
annotate example/cuda_fft/main.cc @ 2048:6796d85f3d6b draft
remove error
author | Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Thu, 28 Jan 2016 00:05:49 +0900 |
parents | faaea4e1ce1c |
children |
rev | line source |
---|---|
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
1 #include <stdio.h> |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
2 #include <sys/time.h> |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
3 #include <string.h> |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
4 #include <cuda.h> |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
5 #include <vector_types.h> |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
6 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
7 #include "pgm.h" |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
8 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
9 #define PI 3.14159265358979 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
10 #define MAX_SOURCE_SIZE (0x100000) |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
11 #define AMP(a, b) (sqrt((a)*(a)+(b))) |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
12 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
13 static double st_time; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
14 static double ed_time; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
15 enum Mode { |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
16 forward = 0, |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
17 inverse = 1 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
18 }; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
19 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
20 CUmodule module; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
21 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
22 static double |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
23 getTime() { |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
24 struct timeval tv; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
25 gettimeofday(&tv, NULL); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
26 return tv.tv_sec + (double)tv.tv_usec*1e-6; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
27 } |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
28 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
29 int |
2010 | 30 setWorkSize(int* xblocks, int* yblocks, int x, int y) |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
31 { |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
32 switch(y) { |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
33 case 1: |
2010 | 34 *xblocks = x; |
35 *yblocks = 1; | |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
36 break; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
37 default: |
2010 | 38 *xblocks = x; |
39 *yblocks = y; | |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
40 break; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
41 } |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
42 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
43 return 0; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
44 } |
2010 | 45 |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
46 int |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
47 fftCore(CUdeviceptr dst, CUdeviceptr src, CUdeviceptr spin, int m, enum Mode direction) |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
48 { |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
49 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
50 unsigned int flag; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
51 switch (direction) { |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
52 case forward:flag = 0x00000000; break; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
53 case inverse:flag = 0x80000000; break; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
54 } |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
55 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
56 int n = 1<<m; |
2010 | 57 int xblocks, yblocks; |
58 setWorkSize(&xblocks, &yblocks, n, n); | |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
59 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
60 CUfunction bitReverse; |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
61 cuModuleGetFunction(&bitReverse, module, "bitReverse"); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
62 |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
63 void* bitReverse_args[] = {&dst, &src, &m, &n}; |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
64 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
65 cuLaunchKernel(bitReverse, |
2010 | 66 xblocks, yblocks, 1, |
67 1, 1, 1, | |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
68 0, NULL, bitReverse_args, NULL); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
69 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
70 CUfunction butterfly; |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
71 cuModuleGetFunction(&butterfly, module, "butterfly"); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
72 |
2010 | 73 setWorkSize(&xblocks, &yblocks, n/2, n); |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
74 void* butterfly_args[] = {&dst, &spin, &m, &n, 0, &flag}; |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
75 for (int i=1;i<=m;i++) { |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
76 butterfly_args[4] = &i; |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
77 cuLaunchKernel(butterfly, |
2010 | 78 xblocks, yblocks, 1, |
79 1, 1, 1, | |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
80 0, NULL, butterfly_args, NULL); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
81 } |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
82 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
83 CUfunction norm; |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
84 cuModuleGetFunction(&norm, module, "norm"); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
85 |
2010 | 86 void* norm_args[] = {&dst, &n}; |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
87 if (direction == inverse) { |
2010 | 88 setWorkSize(&xblocks, &yblocks, n, n); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
89 cuLaunchKernel(norm, |
2010 | 90 xblocks, yblocks, 1, |
91 1, 1, 1, | |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
92 0, NULL, norm_args, NULL); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
93 } |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
94 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
95 return 0; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
96 } |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
97 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
98 char* |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
99 init(int argc, char**argv){ |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
100 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
101 char *filename = 0; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
102 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
103 for (int i = 1; argv[i]; ++i) { |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
104 if (strcmp(argv[i], "-file") == 0) { |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
105 filename = argv[i+1]; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
106 } |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
107 } |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
108 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
109 if ( (argc == 1)||(filename==0)) { |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
110 printf("Usage: ./fft -file [image filename] \n"); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
111 exit(-1); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
112 } |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
113 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
114 return filename; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
115 } |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
116 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
117 int main(int args, char* argv[]) { |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
118 cuInit(0); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
119 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
120 CUdevice device; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
121 cuDeviceGet(&device, 0); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
122 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
123 CUcontext context; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
124 cuCtxCreate(&context, CU_CTX_SCHED_SPIN, device); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
125 |
2011 | 126 cuModuleLoad(&module, "fft.ptx"); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
127 |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
128 char* pgm_file = init(args, argv); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
129 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
130 pgm_t ipgm; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
131 int err = readPGM(&ipgm, pgm_file); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
132 if (err<0) { |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
133 fprintf(stderr, "Failed to read image file.\n"); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
134 exit(1); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
135 } |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
136 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
137 int n = ipgm.width; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
138 int m = (int)(log((double)n)/log(2.0)); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
139 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
140 pgm_t opgm; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
141 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
142 float2* xm = (float2*)malloc(n*n*sizeof(float2)); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
143 float2* rm = (float2*)malloc(n*n*sizeof(float2)); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
144 float2* wm = (float2*)malloc(n/2*sizeof(float2)); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
145 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
146 for (int i=0; i<n*n; i++) { |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
147 xm[i].x = (float)ipgm.buf[i]; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
148 xm[i].y = (float)0; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
149 } |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
150 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
151 st_time = getTime(); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
152 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
153 // memory allocate |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
154 CUdeviceptr xmobj; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
155 cuMemAlloc(&xmobj, n*n*sizeof(float2)); |
2010 | 156 |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
157 CUdeviceptr rmobj; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
158 cuMemAlloc(&rmobj, n*n*sizeof(float2)); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
159 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
160 CUdeviceptr wmobj; |
2010 | 161 cuMemAlloc(&wmobj, n/2*sizeof(float2)); |
162 | |
163 CUfunction spinFact; | |
164 cuModuleGetFunction(&spinFact, module, "spinFact"); | |
165 | |
166 int xblocks, yblocks; | |
167 setWorkSize(&xblocks, &yblocks, n/2, 1); | |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
168 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
169 // Synchronous data transfer(host to device) |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
170 cuMemcpyHtoD(xmobj, xm, n*n*sizeof(float2)); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
171 |
2010 | 172 void* spinFact_args[] = {&wmobj, &n}; |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
173 cuLaunchKernel(spinFact, |
2010 | 174 xblocks, yblocks, 1, |
175 1, 1, 1, | |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
176 0, NULL, spinFact_args, NULL); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
177 |
2010 | 178 |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
179 fftCore(rmobj, xmobj, wmobj, m, forward); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
180 |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
181 CUfunction transpose; |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
182 cuModuleGetFunction(&transpose, module, "transpose"); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
183 |
2010 | 184 setWorkSize(&xblocks, &yblocks, n, n); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
185 |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
186 void* transpose_args[] = {&xmobj, &rmobj, &n}; |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
187 cuLaunchKernel(transpose, |
2010 | 188 xblocks, yblocks, 1, |
189 1, 1, 1, | |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
190 0, NULL, transpose_args, NULL); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
191 |
2010 | 192 |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
193 fftCore(rmobj, xmobj, wmobj, m, forward); |
2010 | 194 |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
195 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
196 CUfunction highPassFilter; |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
197 cuModuleGetFunction(&highPassFilter, module, "highPassFilter"); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
198 |
2010 | 199 setWorkSize(&xblocks, &yblocks, n, n); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
200 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
201 int radius = n/8; |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
202 void*highPassFilter_args[] = {&rmobj, &n, &radius}; |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
203 cuLaunchKernel(highPassFilter, |
2010 | 204 xblocks, yblocks, 1, |
205 1, 1, 1, | |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
206 0, NULL, highPassFilter_args, NULL); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
207 |
2010 | 208 |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
209 fftCore(xmobj, rmobj, wmobj, m, inverse); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
210 |
2010 | 211 setWorkSize(&xblocks, &yblocks, n, n); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
212 |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
213 void* transpose2_args[] = {&rmobj, &xmobj, &n}; |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
214 cuLaunchKernel(transpose, |
2010 | 215 xblocks, yblocks, 1, |
216 1, 1, 1, | |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
217 0, NULL, transpose2_args, NULL); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
218 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
219 fftCore(xmobj, rmobj, wmobj, m, inverse); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
220 |
2010 | 221 |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
222 cuMemcpyDtoH(xm, xmobj, n*n*sizeof(float2)); |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
223 |
2010 | 224 cuStreamSynchronize(NULL); |
225 | |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
226 float* ampd; |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
227 ampd = (float*)malloc(n*n*sizeof(float)); |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
228 |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
229 for (int i=0;i<n*n;i++) |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
230 ampd[i] = (AMP(xm[i].x, xm[i].y)); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
231 |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
232 opgm.width = n; |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
233 opgm.height = n; |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
234 normalizeF2PGM(&opgm, ampd); |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
235 free(ampd); |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
236 |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
237 ed_time = getTime(); |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
238 |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
239 writePGM(&opgm, "output.pgm"); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
240 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
241 // memory release |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
242 cuMemFree(xmobj); |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
243 cuMemFree(rmobj); |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
244 cuMemFree(wmobj); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
245 cuModuleUnload(module); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
246 cuCtxDestroy(context); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
247 |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
248 destroyPGM(&ipgm); |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
249 destroyPGM(&opgm); |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
250 |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
251 free(xm); |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
252 free(rm); |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
253 free(wm); |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
254 |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
255 printf("Time: %0.6f\n", ed_time-st_time); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
256 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
257 return 0; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
258 } |