annotate example/cuda_fft/main.cc @ 2048:6796d85f3d6b draft

remove error
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Thu, 28 Jan 2016 00:05:49 +0900
parents faaea4e1ce1c
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1 #include <stdio.h>
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2 #include <sys/time.h>
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3 #include <string.h>
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
4 #include <cuda.h>
2008
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
5 #include <vector_types.h>
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
6
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
7 #include "pgm.h"
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
8
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
9 #define PI 3.14159265358979
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
10 #define MAX_SOURCE_SIZE (0x100000)
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
11 #define AMP(a, b) (sqrt((a)*(a)+(b)))
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
12
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
13 static double st_time;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
14 static double ed_time;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
15 enum Mode {
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
16 forward = 0,
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
17 inverse = 1
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
18 };
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
19
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
20 CUmodule module;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
21
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
22 static double
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
23 getTime() {
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
24 struct timeval tv;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
25 gettimeofday(&tv, NULL);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
26 return tv.tv_sec + (double)tv.tv_usec*1e-6;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
27 }
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
28
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
29 int
2010
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
30 setWorkSize(int* xblocks, int* yblocks, int x, int y)
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
31 {
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
32 switch(y) {
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
33 case 1:
2010
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
34 *xblocks = x;
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
35 *yblocks = 1;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
36 break;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
37 default:
2010
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
38 *xblocks = x;
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
39 *yblocks = y;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
40 break;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
41 }
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
42
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
43 return 0;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
44 }
2010
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
45
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
46 int
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
47 fftCore(CUdeviceptr dst, CUdeviceptr src, CUdeviceptr spin, int m, enum Mode direction)
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
48 {
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
49
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
50 unsigned int flag;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
51 switch (direction) {
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
52 case forward:flag = 0x00000000; break;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
53 case inverse:flag = 0x80000000; break;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
54 }
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
55
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
56 int n = 1<<m;
2010
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
57 int xblocks, yblocks;
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
58 setWorkSize(&xblocks, &yblocks, n, n);
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
59
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
60 CUfunction bitReverse;
2008
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
61 cuModuleGetFunction(&bitReverse, module, "bitReverse");
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
62
2008
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
63 void* bitReverse_args[] = {&dst, &src, &m, &n};
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
64
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
65 cuLaunchKernel(bitReverse,
2010
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
66 xblocks, yblocks, 1,
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
67 1, 1, 1,
2008
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
68 0, NULL, bitReverse_args, NULL);
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
69
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
70 CUfunction butterfly;
2008
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
71 cuModuleGetFunction(&butterfly, module, "butterfly");
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
72
2010
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
73 setWorkSize(&xblocks, &yblocks, n/2, n);
2008
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
74 void* butterfly_args[] = {&dst, &spin, &m, &n, 0, &flag};
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
75 for (int i=1;i<=m;i++) {
2008
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
76 butterfly_args[4] = &i;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
77 cuLaunchKernel(butterfly,
2010
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
78 xblocks, yblocks, 1,
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
79 1, 1, 1,
2008
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
80 0, NULL, butterfly_args, NULL);
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
81 }
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
82
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
83 CUfunction norm;
2008
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
84 cuModuleGetFunction(&norm, module, "norm");
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
85
2010
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
86 void* norm_args[] = {&dst, &n};
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
87 if (direction == inverse) {
2010
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
88 setWorkSize(&xblocks, &yblocks, n, n);
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
89 cuLaunchKernel(norm,
2010
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
90 xblocks, yblocks, 1,
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
91 1, 1, 1,
2008
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
92 0, NULL, norm_args, NULL);
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
93 }
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
94
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
95 return 0;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
96 }
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
97
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
98 char*
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
99 init(int argc, char**argv){
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
100
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
101 char *filename = 0;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
102
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
103 for (int i = 1; argv[i]; ++i) {
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
104 if (strcmp(argv[i], "-file") == 0) {
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
105 filename = argv[i+1];
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
106 }
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
107 }
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
108
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
109 if ( (argc == 1)||(filename==0)) {
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
110 printf("Usage: ./fft -file [image filename] \n");
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
111 exit(-1);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
112 }
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
113
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
114 return filename;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
115 }
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
116
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
117 int main(int args, char* argv[]) {
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
118 cuInit(0);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
119
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
120 CUdevice device;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
121 cuDeviceGet(&device, 0);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
122
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
123 CUcontext context;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
124 cuCtxCreate(&context, CU_CTX_SCHED_SPIN, device);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
125
2011
faaea4e1ce1c minor change
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2010
diff changeset
126 cuModuleLoad(&module, "fft.ptx");
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
127
2008
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
128 char* pgm_file = init(args, argv);
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
129
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
130 pgm_t ipgm;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
131 int err = readPGM(&ipgm, pgm_file);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
132 if (err<0) {
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
133 fprintf(stderr, "Failed to read image file.\n");
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
134 exit(1);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
135 }
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
136
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
137 int n = ipgm.width;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
138 int m = (int)(log((double)n)/log(2.0));
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
139
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
140 pgm_t opgm;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
141
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
142 float2* xm = (float2*)malloc(n*n*sizeof(float2));
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
143 float2* rm = (float2*)malloc(n*n*sizeof(float2));
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
144 float2* wm = (float2*)malloc(n/2*sizeof(float2));
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
145
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
146 for (int i=0; i<n*n; i++) {
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
147 xm[i].x = (float)ipgm.buf[i];
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
148 xm[i].y = (float)0;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
149 }
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
150
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
151 st_time = getTime();
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
152
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
153 // memory allocate
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
154 CUdeviceptr xmobj;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
155 cuMemAlloc(&xmobj, n*n*sizeof(float2));
2010
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
156
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
157 CUdeviceptr rmobj;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
158 cuMemAlloc(&rmobj, n*n*sizeof(float2));
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
159
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
160 CUdeviceptr wmobj;
2010
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
161 cuMemAlloc(&wmobj, n/2*sizeof(float2));
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
162
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
163 CUfunction spinFact;
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
164 cuModuleGetFunction(&spinFact, module, "spinFact");
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
165
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
166 int xblocks, yblocks;
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
167 setWorkSize(&xblocks, &yblocks, n/2, 1);
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
168
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
169 // Synchronous data transfer(host to device)
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
170 cuMemcpyHtoD(xmobj, xm, n*n*sizeof(float2));
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
171
2010
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
172 void* spinFact_args[] = {&wmobj, &n};
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
173 cuLaunchKernel(spinFact,
2010
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
174 xblocks, yblocks, 1,
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
175 1, 1, 1,
2008
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
176 0, NULL, spinFact_args, NULL);
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
177
2010
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
178
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
179 fftCore(rmobj, xmobj, wmobj, m, forward);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
180
2008
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
181 CUfunction transpose;
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
182 cuModuleGetFunction(&transpose, module, "transpose");
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
183
2010
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
184 setWorkSize(&xblocks, &yblocks, n, n);
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
185
2008
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
186 void* transpose_args[] = {&xmobj, &rmobj, &n};
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
187 cuLaunchKernel(transpose,
2010
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
188 xblocks, yblocks, 1,
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
189 1, 1, 1,
2008
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
190 0, NULL, transpose_args, NULL);
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
191
2010
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
192
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
193 fftCore(rmobj, xmobj, wmobj, m, forward);
2010
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
194
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
195
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
196 CUfunction highPassFilter;
2008
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
197 cuModuleGetFunction(&highPassFilter, module, "highPassFilter");
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
198
2010
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
199 setWorkSize(&xblocks, &yblocks, n, n);
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
200
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
201 int radius = n/8;
2008
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
202 void*highPassFilter_args[] = {&rmobj, &n, &radius};
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
203 cuLaunchKernel(highPassFilter,
2010
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
204 xblocks, yblocks, 1,
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
205 1, 1, 1,
2008
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
206 0, NULL, highPassFilter_args, NULL);
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
207
2010
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
208
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
209 fftCore(xmobj, rmobj, wmobj, m, inverse);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
210
2010
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
211 setWorkSize(&xblocks, &yblocks, n, n);
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
212
2008
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
213 void* transpose2_args[] = {&rmobj, &xmobj, &n};
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
214 cuLaunchKernel(transpose,
2010
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
215 xblocks, yblocks, 1,
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
216 1, 1, 1,
2008
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
217 0, NULL, transpose2_args, NULL);
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
218
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
219 fftCore(xmobj, rmobj, wmobj, m, inverse);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
220
2010
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
221
2008
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
222 cuMemcpyDtoH(xm, xmobj, n*n*sizeof(float2));
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
223
2010
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
224 cuStreamSynchronize(NULL);
6fced32f85fd wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2008
diff changeset
225
2008
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
226 float* ampd;
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
227 ampd = (float*)malloc(n*n*sizeof(float));
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
228
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
229 for (int i=0;i<n*n;i++)
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
230 ampd[i] = (AMP(xm[i].x, xm[i].y));
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
231
2008
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
232 opgm.width = n;
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
233 opgm.height = n;
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
234 normalizeF2PGM(&opgm, ampd);
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
235 free(ampd);
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
236
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
237 ed_time = getTime();
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
238
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
239 writePGM(&opgm, "output.pgm");
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
240
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
241 // memory release
2008
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
242 cuMemFree(xmobj);
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
243 cuMemFree(rmobj);
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
244 cuMemFree(wmobj);
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
245 cuModuleUnload(module);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
246 cuCtxDestroy(context);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
247
2008
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
248 destroyPGM(&ipgm);
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
249 destroyPGM(&opgm);
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
250
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
251 free(xm);
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
252 free(rm);
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
253 free(wm);
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
254
2c8eab01cc78 implement fft using cuda
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2007
diff changeset
255 printf("Time: %0.6f\n", ed_time-st_time);
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
256
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
257 return 0;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
258 }