Mercurial > hg > Game > Cerium
annotate example/many_task/sort.cc @ 1519:9a5f87f4b60f draft
add spe_cpu for gpu
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Sat, 10 Nov 2012 18:21:16 +0900 |
parents | e544f9747169 |
children | 027d99ecb50e |
rev | line source |
---|---|
227 | 1 #include "TaskManager.h" |
573 | 2 #include "SchedTask.h" |
227 | 3 #include "sort.h" |
4 #include "Func.h" | |
934
83b64b7a51bd
sort fix ( not working now )
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
932
diff
changeset
|
5 #include <string.h> |
227 | 6 |
945
9ed1c4a877ca
sort example fix ( simple task accepts one param and more compatible with old task)
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
941
diff
changeset
|
7 extern int get_split_num(int len, int num); |
940
e01b551f25d6
unknown dead lock still...
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
939
diff
changeset
|
8 extern int all; // allocate task at once |
1519 | 9 extern CPU_TYPE spe_cpu ; |
227 | 10 |
11 /** | |
12 * 一つの block にある data の数が MAX_BLOCK_SIZE 超えないような | |
13 * len の分割数を返す | |
14 * | |
15 * @param len sort する data の総数 | |
16 * @param num 使用する SPE の数 | |
17 * | |
18 * @return data の分割数 | |
19 * | |
20 * TODO: | |
21 * len が num 以下とか考えてません | |
22 */ | |
945
9ed1c4a877ca
sort example fix ( simple task accepts one param and more compatible with old task)
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
941
diff
changeset
|
23 int |
227 | 24 get_split_num(int len, int num) |
25 { | |
26 if (len / num < MAX_BLOCK_SIZE) { | |
1515 | 27 return num; |
227 | 28 } else { |
1515 | 29 // 切り上げ |
30 return (len + MAX_BLOCK_SIZE - 1) / MAX_BLOCK_SIZE; | |
227 | 31 } |
1515 | 32 } |
227 | 33 |
934
83b64b7a51bd
sort fix ( not working now )
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
932
diff
changeset
|
34 |
227 | 35 /** |
36 * btask が全て終了したら、再び sort_start を実行する | |
37 * @param d 生成された btask の数 | |
38 */ | |
935 | 39 |
945
9ed1c4a877ca
sort example fix ( simple task accepts one param and more compatible with old task)
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
941
diff
changeset
|
40 SchedDefineTask1(SortSimple, sort_start ); |
935 | 41 |
42 static int | |
945
9ed1c4a877ca
sort example fix ( simple task accepts one param and more compatible with old task)
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
941
diff
changeset
|
43 sort_start(SchedTask *manager, void *d, void *e) |
227 | 44 { |
945
9ed1c4a877ca
sort example fix ( simple task accepts one param and more compatible with old task)
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
941
diff
changeset
|
45 Sort *s = (Sort*)manager->get_param(0); |
9ed1c4a877ca
sort example fix ( simple task accepts one param and more compatible with old task)
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
941
diff
changeset
|
46 int half_num = s->split_num-1; |
9ed1c4a877ca
sort example fix ( simple task accepts one param and more compatible with old task)
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
941
diff
changeset
|
47 static int sort_count = s->split_num; // sort 完了に必要な回数 |
934
83b64b7a51bd
sort fix ( not working now )
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
932
diff
changeset
|
48 |
83b64b7a51bd
sort fix ( not working now )
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
932
diff
changeset
|
49 // 一つのタスクで sort する data 数 |
945
9ed1c4a877ca
sort example fix ( simple task accepts one param and more compatible with old task)
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
941
diff
changeset
|
50 int block_num = (s->data_length + s->split_num -1)/s->split_num; |
934
83b64b7a51bd
sort fix ( not working now )
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
932
diff
changeset
|
51 int half_block_num = block_num/2; |
83b64b7a51bd
sort fix ( not working now )
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
932
diff
changeset
|
52 |
945
9ed1c4a877ca
sort example fix ( simple task accepts one param and more compatible with old task)
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
941
diff
changeset
|
53 int last_block_num = s->data_length - (s->split_num-1)*block_num; |
934
83b64b7a51bd
sort fix ( not working now )
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
932
diff
changeset
|
54 int last_half_block_num = half_block_num+(last_block_num/2); |
83b64b7a51bd
sort fix ( not working now )
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
932
diff
changeset
|
55 |
932
53ad3a61b40b
sort test (add swap())
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
674
diff
changeset
|
56 if (--sort_count < 0) { |
1515 | 57 return 0; |
932
53ad3a61b40b
sort test (add swap())
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
674
diff
changeset
|
58 } |
53ad3a61b40b
sort test (add swap())
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
674
diff
changeset
|
59 |
53ad3a61b40b
sort test (add swap())
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
674
diff
changeset
|
60 |
1516
e544f9747169
fix gpu kernel source
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1515
diff
changeset
|
61 |
945
9ed1c4a877ca
sort example fix ( simple task accepts one param and more compatible with old task)
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
941
diff
changeset
|
62 for (int i = 0; i < s->split_num-1; i++) { |
1515 | 63 s->fsort[i] = manager->create_task(QUICK_SORT, |
64 (memaddr)&s->data[i*block_num], sizeof(Data)*block_num, | |
65 (memaddr)&s->data[i*block_num], sizeof(Data)*block_num); | |
1516
e544f9747169
fix gpu kernel source
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1515
diff
changeset
|
66 |
1515 | 67 s->fsort[i]->flip(); |
1516
e544f9747169
fix gpu kernel source
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1515
diff
changeset
|
68 |
1515 | 69 if (i>0 && s->bsort[i-1]) { |
70 s->fsort[i]->wait_for(s->bsort[i-1]); | |
71 } | |
72 if (i<s->split_num-2 && s->bsort[i]) { | |
1516
e544f9747169
fix gpu kernel source
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1515
diff
changeset
|
73 s->fsort[i]->wait_for(s->bsort[i]); |
1515 | 74 } |
1519 | 75 s->fsort[i]->set_cpu(spe_cpu); |
1515 | 76 s->fsort[i]->set_param(0,(memaddr)block_num); |
932
53ad3a61b40b
sort test (add swap())
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
674
diff
changeset
|
77 } |
53ad3a61b40b
sort test (add swap())
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
674
diff
changeset
|
78 |
53ad3a61b40b
sort test (add swap())
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
674
diff
changeset
|
79 // 最後の block は端数なので last_block_num を使う |
53ad3a61b40b
sort test (add swap())
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
674
diff
changeset
|
80 { |
1515 | 81 int i = s->split_num-1; |
932
53ad3a61b40b
sort test (add swap())
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
674
diff
changeset
|
82 |
1515 | 83 s->fsort[i] = manager->create_task(QUICK_SORT, |
84 (memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num, | |
85 (memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num); | |
86 s->fsort[i]->flip(); | |
87 if (i>0 && s->bsort[i-1]) { | |
88 s->fsort[i]->wait_for(s->bsort[i-1]); | |
89 } | |
1519 | 90 s->fsort[i]->set_cpu(spe_cpu); |
1515 | 91 s->fsort[i]->set_param(0,(memaddr)last_block_num); |
932
53ad3a61b40b
sort test (add swap())
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
674
diff
changeset
|
92 } |
53ad3a61b40b
sort test (add swap())
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
674
diff
changeset
|
93 |
945
9ed1c4a877ca
sort example fix ( simple task accepts one param and more compatible with old task)
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
941
diff
changeset
|
94 if (s->split_num > 1) { |
932
53ad3a61b40b
sort test (add swap())
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
674
diff
changeset
|
95 |
1515 | 96 for (int i = 0; i < half_num-1; i++) { |
97 if (s->bsort[i]) manager->free_htask(s->bsort[i]); | |
98 s->bsort[i] = manager->create_task(QUICK_SORT, | |
99 (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num, | |
100 (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num); | |
101 s->bsort[i]->flip(); | |
1519 | 102 s->bsort[i]->set_cpu(spe_cpu); |
1515 | 103 s->bsort[i]->set_param(0,(memaddr)block_num); |
104 } | |
932
53ad3a61b40b
sort test (add swap())
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
674
diff
changeset
|
105 |
1515 | 106 { |
107 int i = half_num-1; | |
932
53ad3a61b40b
sort test (add swap())
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
674
diff
changeset
|
108 |
1515 | 109 if (s->bsort[i]) manager->free_htask(s->bsort[i]); |
110 s->bsort[i] = manager->create_task(QUICK_SORT, | |
111 (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num, | |
112 (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num); | |
113 s->bsort[i]->flip(); | |
1519 | 114 s->bsort[i]->set_cpu(spe_cpu); |
1515 | 115 s->bsort[i]->set_param(0,(memaddr)last_half_block_num); |
116 } | |
117 | |
118 for (int i = 0; i < half_num; i++) { | |
119 s->bsort[i]->wait_for(s->fsort[i]); | |
120 s->bsort[i]->wait_for(s->fsort[i+1]); | |
121 s->bsort[i]->no_auto_free(); | |
122 s->bsort[i]->spawn(); | |
123 } | |
932
53ad3a61b40b
sort test (add swap())
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
674
diff
changeset
|
124 } |
53ad3a61b40b
sort test (add swap())
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
674
diff
changeset
|
125 |
945
9ed1c4a877ca
sort example fix ( simple task accepts one param and more compatible with old task)
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
941
diff
changeset
|
126 HTaskPtr restart = manager->create_task(SortSimple,0,0,0,0); |
9ed1c4a877ca
sort example fix ( simple task accepts one param and more compatible with old task)
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
941
diff
changeset
|
127 restart->set_param(0,(memaddr)s); |
9ed1c4a877ca
sort example fix ( simple task accepts one param and more compatible with old task)
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
941
diff
changeset
|
128 if (!all) restart->wait_for(s->fsort[0]); |
9ed1c4a877ca
sort example fix ( simple task accepts one param and more compatible with old task)
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
941
diff
changeset
|
129 for (int i = 0; i < s->split_num; i++) { |
1515 | 130 s->fsort[i]->spawn(); |
932
53ad3a61b40b
sort test (add swap())
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
674
diff
changeset
|
131 } |
954 | 132 if (sort_count == 1) { |
1515 | 133 // last loop wait for all task |
134 // we should not need this? | |
135 for (int i = 0; i < half_num; i++) { | |
136 restart->wait_for(s->bsort[i]); | |
137 s->bsort[i]->auto_free(); | |
138 } | |
954 | 139 } |
935 | 140 restart->spawn(); |
1516
e544f9747169
fix gpu kernel source
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1515
diff
changeset
|
141 |
945
9ed1c4a877ca
sort example fix ( simple task accepts one param and more compatible with old task)
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
941
diff
changeset
|
142 return 0; |
934
83b64b7a51bd
sort fix ( not working now )
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
932
diff
changeset
|
143 } |
227 | 144 |
935 | 145 |
651 | 146 /* end */ |