Mercurial > hg > Game > Cerium
annotate example/cuda_fft/main.cc @ 2008:2c8eab01cc78 draft
implement fft using cuda
author | Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 03 Jun 2014 18:10:19 +0900 (2014-06-03) |
parents | bc2121b09cbc |
children | 6fced32f85fd |
rev | line source |
---|---|
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
1 #include <stdio.h> |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
2 #include <sys/time.h> |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
3 #include <string.h> |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
4 #include <cuda.h> |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
5 #include <vector_types.h> |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
6 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
7 #include "pgm.h" |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
8 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
9 #define PI 3.14159265358979 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
10 #define MAX_SOURCE_SIZE (0x100000) |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
11 #define AMP(a, b) (sqrt((a)*(a)+(b))) |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
12 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
13 static double st_time; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
14 static double ed_time; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
15 enum Mode { |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
16 forward = 0, |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
17 inverse = 1 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
18 }; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
19 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
20 CUmodule module; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
21 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
22 static double |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
23 getTime() { |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
24 struct timeval tv; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
25 gettimeofday(&tv, NULL); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
26 return tv.tv_sec + (double)tv.tv_usec*1e-6; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
27 } |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
28 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
29 int |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
30 setWorkSize(int* block, int* thread, int x, int y) |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
31 { |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
32 switch(y) { |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
33 case 1: |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
34 *block = x; |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
35 *thread = 1; |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
36 break; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
37 default: |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
38 *block = x; |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
39 *thread = y; |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
40 break; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
41 } |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
42 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
43 return 0; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
44 } |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
45 int |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
46 fftCore(CUdeviceptr dst, CUdeviceptr src, CUdeviceptr spin, int m, enum Mode direction) |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
47 { |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
48 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
49 unsigned int flag; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
50 switch (direction) { |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
51 case forward:flag = 0x00000000; break; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
52 case inverse:flag = 0x80000000; break; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
53 } |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
54 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
55 int n = 1<<m; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
56 int block, thread; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
57 setWorkSize(&block, &thread, n, n); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
58 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
59 CUfunction bitReverse; |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
60 cuModuleGetFunction(&bitReverse, module, "bitReverse"); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
61 |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
62 void* bitReverse_args[] = {&dst, &src, &m, &n}; |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
63 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
64 cuLaunchKernel(bitReverse, |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
65 block, 1, 1, |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
66 thread, 1, 1, |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
67 0, NULL, bitReverse_args, NULL); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
68 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
69 CUfunction butterfly; |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
70 cuModuleGetFunction(&butterfly, module, "butterfly"); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
71 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
72 setWorkSize(&block, &thread, n/2, n); |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
73 void* butterfly_args[] = {&dst, &spin, &m, &n, 0, &flag}; |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
74 for (int i=1;i<=m;i++) { |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
75 butterfly_args[4] = &i; |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
76 cuLaunchKernel(butterfly, |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
77 block, 1, 1, |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
78 thread, 1, 1, |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
79 0, NULL, butterfly_args, NULL); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
80 } |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
81 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
82 CUfunction norm; |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
83 cuModuleGetFunction(&norm, module, "norm"); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
84 |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
85 void* norm_args[] = {&dst, &m}; |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
86 if (direction == inverse) { |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
87 setWorkSize(&block, &thread, n, n); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
88 cuLaunchKernel(norm, |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
89 block, 1, 1, |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
90 thread, 1, 1, |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
91 0, NULL, norm_args, NULL); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
92 } |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
93 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
94 return 0; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
95 } |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
96 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
97 char* |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
98 init(int argc, char**argv){ |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
99 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
100 char *filename = 0; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
101 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
102 for (int i = 1; argv[i]; ++i) { |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
103 if (strcmp(argv[i], "-file") == 0) { |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
104 filename = argv[i+1]; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
105 } |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
106 } |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
107 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
108 if ( (argc == 1)||(filename==0)) { |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
109 printf("Usage: ./fft -file [image filename] \n"); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
110 exit(-1); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
111 } |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
112 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
113 return filename; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
114 } |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
115 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
116 int main(int args, char* argv[]) { |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
117 cuInit(0); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
118 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
119 CUdevice device; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
120 cuDeviceGet(&device, 0); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
121 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
122 CUcontext context; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
123 cuCtxCreate(&context, CU_CTX_SCHED_SPIN, device); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
124 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
125 cuModuleLoad(&module, "fft.ptx"); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
126 |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
127 char* pgm_file = init(args, argv); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
128 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
129 pgm_t ipgm; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
130 int err = readPGM(&ipgm, pgm_file); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
131 if (err<0) { |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
132 fprintf(stderr, "Failed to read image file.\n"); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
133 exit(1); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
134 } |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
135 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
136 int n = ipgm.width; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
137 int m = (int)(log((double)n)/log(2.0)); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
138 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
139 pgm_t opgm; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
140 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
141 float2* xm = (float2*)malloc(n*n*sizeof(float2)); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
142 float2* rm = (float2*)malloc(n*n*sizeof(float2)); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
143 float2* wm = (float2*)malloc(n/2*sizeof(float2)); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
144 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
145 for (int i=0; i<n*n; i++) { |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
146 xm[i].x = (float)ipgm.buf[i]; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
147 xm[i].y = (float)0; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
148 } |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
149 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
150 st_time = getTime(); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
151 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
152 // memory allocate |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
153 CUdeviceptr xmobj; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
154 cuMemAlloc(&xmobj, n*n*sizeof(float2)); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
155 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
156 CUdeviceptr rmobj; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
157 cuMemAlloc(&rmobj, n*n*sizeof(float2)); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
158 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
159 CUdeviceptr wmobj; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
160 cuMemAlloc(&wmobj, (n/2)*sizeof(float2)); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
161 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
162 // Synchronous data transfer(host to device) |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
163 cuMemcpyHtoD(xmobj, xm, n*n*sizeof(float2)); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
164 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
165 CUfunction spinFact; |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
166 cuModuleGetFunction(&spinFact, module, "spinFact"); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
167 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
168 int block, thread; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
169 setWorkSize(&block, &thread, n/2, 1); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
170 |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
171 void* spinFact_args[] = {&xmobj, &n}; |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
172 cuLaunchKernel(spinFact, |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
173 block, 1, 1, |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
174 thread, 1, 1, |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
175 0, NULL, spinFact_args, NULL); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
176 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
177 fftCore(rmobj, xmobj, wmobj, m, forward); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
178 |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
179 CUfunction transpose; |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
180 cuModuleGetFunction(&transpose, module, "transpose"); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
181 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
182 setWorkSize(&block, &thread, n, n); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
183 |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
184 void* transpose_args[] = {&xmobj, &rmobj, &n}; |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
185 cuLaunchKernel(transpose, |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
186 block, 1, 1, |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
187 thread, 1, 1, |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
188 0, NULL, transpose_args, NULL); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
189 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
190 fftCore(rmobj, xmobj, wmobj, m, forward); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
191 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
192 CUfunction highPassFilter; |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
193 cuModuleGetFunction(&highPassFilter, module, "highPassFilter"); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
194 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
195 setWorkSize(&block, &thread, n, n); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
196 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
197 int radius = n/8; |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
198 void*highPassFilter_args[] = {&rmobj, &n, &radius}; |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
199 cuLaunchKernel(highPassFilter, |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
200 block, 1, 1, |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
201 thread, 1, 1, |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
202 0, NULL, highPassFilter_args, NULL); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
203 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
204 fftCore(xmobj, rmobj, wmobj, m, inverse); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
205 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
206 setWorkSize(&block, &thread, n, n); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
207 |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
208 void* transpose2_args[] = {&rmobj, &xmobj, &n}; |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
209 cuLaunchKernel(transpose, |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
210 block, 1, 1, |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
211 thread, 1, 1, |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
212 0, NULL, transpose2_args, NULL); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
213 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
214 fftCore(xmobj, rmobj, wmobj, m, inverse); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
215 |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
216 cuMemcpyDtoH(xm, xmobj, n*n*sizeof(float2)); |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
217 |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
218 float* ampd; |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
219 ampd = (float*)malloc(n*n*sizeof(float)); |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
220 |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
221 for (int i=0;i<n*n;i++) |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
222 ampd[i] = (AMP(xm[i].x, xm[i].y)); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
223 |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
224 opgm.width = n; |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
225 opgm.height = n; |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
226 normalizeF2PGM(&opgm, ampd); |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
227 free(ampd); |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
228 |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
229 ed_time = getTime(); |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
230 |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
231 writePGM(&opgm, "output.pgm"); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
232 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
233 // memory release |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
234 cuMemFree(xmobj); |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
235 cuMemFree(rmobj); |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
236 cuMemFree(wmobj); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
237 cuModuleUnload(module); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
238 cuCtxDestroy(context); |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
239 |
2008
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
240 destroyPGM(&ipgm); |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
241 destroyPGM(&opgm); |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
242 |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
243 free(xm); |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
244 free(rm); |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
245 free(wm); |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
246 |
2c8eab01cc78
implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
2007
diff
changeset
|
247 printf("Time: %0.6f\n", ed_time-st_time); |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
248 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
249 return 0; |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
250 } |