annotate example/cuda_fft/fft.cu @ 2048:6796d85f3d6b draft

remove error
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Thu, 28 Jan 2016 00:05:49 +0900
parents faaea4e1ce1c
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1 extern "C" {
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
3 #define PI 3.14159265358979323846
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
4 #define PI_2 1.57079632679489661923
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
5
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
6 __global__ void
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
7 bitReverse(float2* dst, float2* src, int m, int n)
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
8 {
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
9 unsigned long gid = blockIdx.x*blockDim.x+threadIdx.x; // (unsigned long)s->get_param(0);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
10 unsigned long nid = blockIdx.y*blockDim.y+threadIdx.y; // (unsigned long)s->get_param(1);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
11
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
12 unsigned int j = gid;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
13 j = (j & 0x55555555) << 1 | (j & 0xAAAAAAAA) >> 1;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
14 j = (j & 0x33333333) << 2 | (j & 0xCCCCCCCC) >> 2;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
15 j = (j & 0x0F0F0F0F) << 4 | (j & 0xF0F0F0F0) >> 4;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
16 j = (j & 0x00FF00FF) << 8 | (j & 0xFF00FF00) >> 8;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
17 j = (j & 0x0000FFFF) << 16 | (j & 0xFFFF0000) >> 16;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
18
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
19 j >>= (32-m);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
20
2011
faaea4e1ce1c minor change
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2010
diff changeset
21 dst[nid*n+j] = src[nid*n+gid];
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
22 }
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
23
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
24 __global__ void
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
25 butterfly(float2* x, float2* w, int m, int n, int iter, unsigned int flag)
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
26 {
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
27 unsigned long gid = blockIdx.x*blockDim.x+threadIdx.x; // (unsigned long)s->get_param(0);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
28 unsigned long nid = blockIdx.y*blockDim.y+threadIdx.y; // (unsigned long)s->get_param(1);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
29
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
30 int butterflySize = 1 << (iter-1);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
31 int butterflyGrpDist = 1 << iter;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
32 int butterflyGrpNum = n >> iter;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
33 int butterflyGrpBase = (gid >> (iter-1))*(butterflyGrpDist);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
34 int butterflyGrpOffset = gid & (butterflySize-1);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
35
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
36 int a = nid * n + butterflyGrpBase + butterflyGrpOffset;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
37 int b = a + butterflySize;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
38
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
39 int l = butterflyGrpNum * butterflyGrpOffset;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
40
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
41 float2 xa, xb, xbxx, xbyy, wab, wayx, wbyx, resa, resb;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
42
2011
faaea4e1ce1c minor change
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2010
diff changeset
43 xa = x[a];
faaea4e1ce1c minor change
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2010
diff changeset
44 xb = x[b];
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
45 xbxx.x = xbxx.y = xb.x;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
46 xbyy.x = xbyy.y = xb.y;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
47
2011
faaea4e1ce1c minor change
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2010
diff changeset
48 wab.x = (float)((unsigned int)w[l].x ^ (unsigned int)0x0);
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
49 wab.y = (float)((unsigned int)w[l].y ^ (unsigned int)flag);
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
50
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
51 wayx.x = (float)((unsigned int)wab.y ^ (unsigned int)0x80000000);
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
52 wayx.y = (float)((unsigned int)wab.x ^ (unsigned int)0x0);
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
53
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
54 wbyx.x = (float)((unsigned int)wab.y ^ (unsigned int)0x0);
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
55 wbyx.y = (float)((unsigned int)wab.x ^ (unsigned int)0x80000000);
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
56
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
57 resa.x = xa.x + xbxx.x*wab.x + xbyy.x*wayx.x;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
58 resa.y = xa.y + xbxx.y*wab.y + xbyy.y*wayx.y;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
59
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
60 resb.x = xa.x - xbxx.x*wab.x + xbyy.x*wbyx.x;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
61 resb.y = xa.y - xbxx.y*wab.y + xbyy.y*wbyx.y;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
62
2011
faaea4e1ce1c minor change
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2010
diff changeset
63 x[a] = resa;
faaea4e1ce1c minor change
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2010
diff changeset
64 x[b] = resb;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
65 }
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
66
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
67 __global__ void
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
68 highPassFilter(float2* image, int n, int radius)
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
69 {
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
70 unsigned long xgid = blockIdx.x*blockDim.x+threadIdx.x; // (unsigned long)s->get_param(0);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
71 unsigned long ygid = blockIdx.y*blockDim.y+threadIdx.y; // (unsigned long)s->get_param(1);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
72
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
73 int2 n_2;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
74 n_2.x = n_2.y = n>>1;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
75
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
76 int2 mask;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
77 mask.x = mask.y = n-1;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
78
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
79 int2 gid;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
80 gid.x = (xgid + n_2.x) & mask.x;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
81 gid.y = (ygid + n_2.y) & mask.y;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
82
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
83 int2 diff;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
84 diff.x = n_2.x - gid.x;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
85 diff.y = n_2.y - gid.y;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
86
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
87 int2 diff2;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
88 diff2.x = diff.x * diff.x;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
89 diff2.y = diff.y * diff.y;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
90
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
91 int dist2 = diff2.x + diff2.y;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
92
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
93 int2 window;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
94
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
95 if (dist2 < radius*radius) {
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
96 window.x = window.y = (int)0L;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
97 } else {
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
98 window.x = window.y = (int)-1L;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
99 }
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
100
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
101 image[ygid*n+xgid].x = (float)((int)image[ygid*n+xgid].x & window.x);
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
102 image[ygid*n+xgid].y = (float)((int)image[ygid*n+xgid].y & window.y);
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
103 }
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
104
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
105 __global__ void
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
106 norm(float2* x, int n)
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
107 {
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
108 unsigned long gid = blockIdx.x*blockDim.x+threadIdx.x; // (unsigned long)s->get_param(0);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
109 unsigned long nid = blockIdx.y*blockDim.y+threadIdx.y; //(unsigned long)s->get_param(1);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
110
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
111 x[nid*n+gid].x = x[nid*n+gid].x / (float)n;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
112 x[nid*n+gid].y = x[nid*n+gid].y / (float)n;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
113 }
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
114
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
115
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
116
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
117 __global__ void
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
118 spinFact(float2* w, int n)
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
119 {
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
120 unsigned long i = blockIdx.x*blockDim.x+threadIdx.x; // (unsigned long)s->get_param(0);
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
121 float2 angle;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
122 angle.x = (float)(2*i*PI/(float)n);
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
123 angle.y = (float)((2*i*PI/(float)n) + PI_2);
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
124
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
125 w[i].x = cos(angle.x);
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
126 w[i].y = cos(angle.y);
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
127 }
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
128
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
129 __global__ void
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
130 transpose(float2* dst, float2* src, int n)
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
131 {
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
132 unsigned long xgid = blockIdx.x*blockDim.x+threadIdx.x; // (unsigned long)s->get_param(0);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
133 unsigned long ygid = blockIdx.y*blockDim.y*threadIdx.y; // (unsigned long)s->get_param(1);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
134
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
135 unsigned int iid = ygid * n + xgid;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
136 unsigned int oid = xgid * n + ygid;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
137
2011
faaea4e1ce1c minor change
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2010
diff changeset
138 dst[oid] = src[iid];
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
139 }
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
140 }