annotate example/cuda_fft/fft.cu @ 2007:bc2121b09cbc draft

kernel done
author Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
date Tue, 03 Jun 2014 16:02:06 +0900
parents f6aa6d6a3fa2
children 6fced32f85fd
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1 extern "C" {
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
3 #define PI 3.14159265358979323846
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
4 #define PI_2 1.57079632679489661923
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
5
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
6 __global__ void
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
7 bitReverse(float2* dst, float2* src, int m, int n)
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
8 {
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
9 unsigned long gid = blockIdx.x*blockDim.x+threadIdx.x; // (unsigned long)s->get_param(0);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
10 unsigned long nid = blockIdx.y*blockDim.y+threadIdx.y; // (unsigned long)s->get_param(1);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
11
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
12 unsigned int j = gid;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
13 j = (j & 0x55555555) << 1 | (j & 0xAAAAAAAA) >> 1;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
14 j = (j & 0x33333333) << 2 | (j & 0xCCCCCCCC) >> 2;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
15 j = (j & 0x0F0F0F0F) << 4 | (j & 0xF0F0F0F0) >> 4;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
16 j = (j & 0x00FF00FF) << 8 | (j & 0xFF00FF00) >> 8;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
17 j = (j & 0x0000FFFF) << 16 | (j & 0xFFFF0000) >> 16;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
18
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
19 j >>= (32-m);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
20
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
21 dst[nid*n+j].x = src[nid*n+gid].x;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
22 dst[nid*n+j].y = src[nid*n+gid].y;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
23 }
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
24
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
25 __global__ void
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
26 butterfly(float2* x, float2* w, int m, int n, int iter, unsigned int flag)
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
27 {
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
28 unsigned long gid = blockIdx.x*blockDim.x+threadIdx.x; // (unsigned long)s->get_param(0);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
29 unsigned long nid = blockIdx.y*blockDim.y+threadIdx.y; // (unsigned long)s->get_param(1);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
30
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
31 int butterflySize = 1 << (iter-1);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
32 int butterflyGrpDist = 1 << iter;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
33 int butterflyGrpNum = n >> iter;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
34 int butterflyGrpBase = (gid >> (iter-1))*(butterflyGrpDist);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
35 int butterflyGrpOffset = gid & (butterflySize-1);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
36
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
37 int a = nid * n + butterflyGrpBase + butterflyGrpOffset;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
38 int b = a + butterflySize;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
39
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
40 int l = butterflyGrpNum * butterflyGrpOffset;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
41
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
42 float2 xa, xb, xbxx, xbyy, wab, wayx, wbyx, resa, resb;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
43
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
44 xa.x = x[a].x;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
45 xa.y = x[a].y;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
46 xb.x = x[b].x;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
47 xb.y = x[b].y;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
48 xbxx.x = xbxx.y = xb.x;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
49 xbyy.x = xbyy.y = xb.y;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
50
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
51 wab.x = (float)((unsigned int)w[l].x);
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
52 wab.y = (float)((unsigned int)w[l].y ^ (unsigned int)flag);
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
53
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
54 wayx.x = (float)((unsigned int)wab.y ^ (unsigned int)0x80000000);
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
55 wayx.y = (float)((unsigned int)wab.x ^ (unsigned int)0x0);
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
56
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
57 wbyx.x = (float)((unsigned int)wab.y ^ (unsigned int)0x0);
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
58 wbyx.y = (float)((unsigned int)wab.x ^ (unsigned int)0x80000000);
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
59
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
60 resa.x = xa.x + xbxx.x*wab.x + xbyy.x*wayx.x;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
61 resa.y = xa.y + xbxx.y*wab.y + xbyy.y*wayx.y;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
62
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
63 resb.x = xa.x - xbxx.x*wab.x + xbyy.x*wbyx.x;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
64 resb.y = xa.y - xbxx.y*wab.y + xbyy.y*wbyx.y;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
65
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
66 x[a].x = resa.x;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
67 x[a].y = resa.y;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
68 x[b].x = resb.x;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
69 x[b].y = resb.y;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
70 }
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
71
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
72 __global__ void
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
73 highPassFilter(float2* image, int n, int radius)
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
74 {
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
75 unsigned long xgid = blockIdx.x*blockDim.x+threadIdx.x; // (unsigned long)s->get_param(0);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
76 unsigned long ygid = blockIdx.y*blockDim.y+threadIdx.y; // (unsigned long)s->get_param(1);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
77
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
78 int2 n_2;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
79 n_2.x = n_2.y = n>>1;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
80
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
81 int2 mask;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
82 mask.x = mask.y = n-1;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
83
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
84 int2 gid;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
85 gid.x = (xgid + n_2.x) & mask.x;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
86 gid.y = (ygid + n_2.y) & mask.y;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
87
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
88 int2 diff;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
89 diff.x = n_2.x - gid.x;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
90 diff.y = n_2.y - gid.y;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
91
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
92 int2 diff2;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
93 diff2.x = diff.x * diff.x;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
94 diff2.y = diff.y * diff.y;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
95
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
96 int dist2 = diff2.x + diff2.y;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
97
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
98 int2 window;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
99
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
100 if (dist2 < radius*radius) {
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
101 window.x = window.y = (int)0L;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
102 } else {
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
103 window.x = window.y = (int)-1L;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
104 }
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
105
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
106 image[ygid*n+xgid].x = (float)((int)image[ygid*n+xgid].x & window.x);
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
107 image[ygid*n+xgid].y = (float)((int)image[ygid*n+xgid].y & window.y);
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
108 }
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
109
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
110 __global__ void
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
111 norm(float2* x, int n)
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
112 {
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
113 unsigned long gid = blockIdx.x*blockDim.x+threadIdx.x; // (unsigned long)s->get_param(0);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
114 unsigned long nid = blockIdx.y*blockDim.y+threadIdx.y; //(unsigned long)s->get_param(1);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
115
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
116 x[nid*n+gid].x = x[nid*n+gid].x / (float)n;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
117 x[nid*n+gid].y = x[nid*n+gid].y / (float)n;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
118 }
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
119
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
120
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
121
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
122 __global__ void
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
123 spinFact(float2* w, int n)
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
124 {
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
125 unsigned long i = blockIdx.x*blockDim.x+threadIdx.x; // (unsigned long)s->get_param(0);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
126
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
127 float2 angle;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
128 angle.x = (float)(2*i*PI/(float)n);
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
129 angle.y = (float)((2*i*PI/(float)n) + PI_2);
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
130
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
131 w[i].x = cos(angle.x);
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
132 w[i].y = cos(angle.y);
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
133 }
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
134
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
135 __global__ void
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
136 transpose(float2* dst, float2* src, int n)
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
137 {
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
138 unsigned long xgid = blockIdx.x*blockDim.x+threadIdx.x; // (unsigned long)s->get_param(0);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
139 unsigned long ygid = blockIdx.y*blockDim.y*threadIdx.y; // (unsigned long)s->get_param(1);
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
140
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
141 unsigned int iid = ygid * n + xgid;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
142 unsigned int oid = xgid * n + ygid;
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
143
2007
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
144 dst[oid].x = src[iid].x;
bc2121b09cbc kernel done
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 2006
diff changeset
145 dst[oid].y = src[iid].y;
2006
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
146 }
f6aa6d6a3fa2 add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
147 }