Mercurial > hg > Game > Cerium
annotate example/many_task/gpu/sort_test.cc @ 2048:6796d85f3d6b draft
remove error
author | Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Thu, 28 Jan 2016 00:05:49 +0900 |
parents | fac06524090b |
children |
rev | line source |
---|---|
1520 | 1 #include <stdlib.h> |
2 #include <OpenCL/opencl.h> | |
3 #include <stdio.h> | |
4 #include <fcntl.h> | |
5 #include <string.h> | |
6 #include <sys/time.h> | |
7 #include <sys/stat.h> | |
8 #include "sort.h" | |
9 #define DEFAULT 432 | |
10 | |
11 extern int data_length; | |
12 extern DataPtr data; | |
13 | |
14 // 計測用 | |
15 static double st_time; | |
16 static double ed_time; | |
17 static int length = DEFAULT; | |
18 | |
1538
fac06524090b
add gpu task wordcount. But not work print
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1520
diff
changeset
|
19 |
1520 | 20 int |
21 init(int argc, char **argv) | |
22 { | |
23 for (int i = 1; argv[i]; ++i) { | |
24 if (strcmp(argv[i], "--length") == 0 || strcmp(argv[i], "-l") == 0) { | |
25 length = atoi(argv[++i]); | |
26 } | |
27 } | |
28 | |
29 return 0; | |
30 } | |
31 | |
32 int | |
33 get_split_num(int len, int num) | |
34 { | |
35 if (len / num < MAX_BLOCK_SIZE) { | |
36 return num; | |
37 } else { | |
38 // 切り上げ | |
39 return (len + MAX_BLOCK_SIZE - 1) / MAX_BLOCK_SIZE; | |
40 } | |
41 } | |
42 | |
43 | |
44 static double | |
45 getTime() | |
46 { | |
47 struct timeval tv; | |
48 gettimeofday(&tv, NULL); | |
49 return tv.tv_sec + (double)tv.tv_usec*1e-6; | |
50 } | |
51 | |
52 void | |
53 show( Data *data, int size ) | |
54 { | |
55 puts("-----------------------------------------------"); | |
56 for(int i=0; i<=size; i++) printf("data[%02d].index = %d\n", i, data[i].index); | |
57 puts("-----------------------------------------------"); | |
58 return; | |
59 } | |
60 | |
61 Sort sorter; | |
62 | |
63 static void | |
64 check_data() | |
65 { | |
66 for(int i=0; i< sorter.data_length-1;i++) { | |
67 if (sorter.data[i].index>sorter.data[i+1].index) { | |
68 printf("Data are not sorted at %d. %d > %d \n",i, sorter.data[i].index,sorter.data[i+1].index); | |
69 return; | |
70 } | |
71 } | |
72 printf("Data are sorted\n"); | |
73 } | |
74 | |
75 void | |
76 gpu_init() | |
77 { | |
1538
fac06524090b
add gpu task wordcount. But not work print
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1520
diff
changeset
|
78 |
fac06524090b
add gpu task wordcount. But not work print
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1520
diff
changeset
|
79 } |
fac06524090b
add gpu task wordcount. But not work print
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1520
diff
changeset
|
80 |
fac06524090b
add gpu task wordcount. But not work print
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1520
diff
changeset
|
81 void |
fac06524090b
add gpu task wordcount. But not work print
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1520
diff
changeset
|
82 sort_start(Sort s){ |
fac06524090b
add gpu task wordcount. But not work print
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1520
diff
changeset
|
83 |
fac06524090b
add gpu task wordcount. But not work print
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1520
diff
changeset
|
84 Sort sorter = s; |
fac06524090b
add gpu task wordcount. But not work print
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1520
diff
changeset
|
85 int length = sorter.data_length; |
fac06524090b
add gpu task wordcount. But not work print
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1520
diff
changeset
|
86 cl_platform_id platform_id = NULL; |
fac06524090b
add gpu task wordcount. But not work print
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1520
diff
changeset
|
87 cl_uint ret_num_platforms = NULL; |
fac06524090b
add gpu task wordcount. But not work print
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1520
diff
changeset
|
88 cl_device_id device_id = NULL; |
fac06524090b
add gpu task wordcount. But not work print
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1520
diff
changeset
|
89 cl_uint ret_num_devices = NULL; |
fac06524090b
add gpu task wordcount. But not work print
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1520
diff
changeset
|
90 cl_int ret = NULL; |
fac06524090b
add gpu task wordcount. But not work print
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1520
diff
changeset
|
91 |
1520 | 92 clGetPlatformIDs(1, &platform_id, &ret_num_platforms); |
93 clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, | |
94 &ret_num_devices); | |
95 | |
1538
fac06524090b
add gpu task wordcount. But not work print
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1520
diff
changeset
|
96 cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret); |
fac06524090b
add gpu task wordcount. But not work print
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1520
diff
changeset
|
97 cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret); |
1520 | 98 |
99 //ファイルオープン | |
100 | |
101 const char* filename = "QuickSort.cl"; | |
102 const char* functionname = "quick_sort"; | |
103 | |
104 int fp = open(filename, O_RDONLY); | |
105 | |
106 if (!fp) { | |
107 fprintf(stderr, "Failed to load kernel.\n"); | |
108 exit(1); | |
109 } | |
110 | |
111 struct stat stats; | |
112 fstat(fp,&stats); | |
113 off_t size = stats.st_size; | |
114 | |
115 if (!size) { | |
116 fprintf(stderr, "Failed to load kernel.\n"); | |
117 exit(1); | |
118 } | |
119 | |
120 char *kernel_src_str = new char[size]; | |
121 size_t kernel_code_size = read(fp, kernel_src_str, size); | |
122 close(fp); | |
123 | |
1538
fac06524090b
add gpu task wordcount. But not work print
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1520
diff
changeset
|
124 cl_program program = clCreateProgramWithSource(context, 1, (const char **)&kernel_src_str, |
1520 | 125 (const size_t *)&kernel_code_size, &ret); |
126 clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); | |
1538
fac06524090b
add gpu task wordcount. But not work print
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1520
diff
changeset
|
127 cl_kernel kernel = clCreateKernel(program,functionname, &ret); |
1520 | 128 |
129 //メモリバッファの作成 | |
130 cl_mem mem_count = clCreateBuffer(context, CL_MEM_READ_ONLY,sizeof(int),NULL, &ret); | |
131 cl_mem mem_data = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(Data)*length, NULL, &ret); | |
132 | |
133 st_time = getTime(); | |
134 | |
135 //メモリバッファに入力データを書き込み | |
136 ret = clEnqueueWriteBuffer(command_queue, mem_count, CL_TRUE, 0, | |
137 sizeof(int), &length, 0, NULL, NULL); | |
138 ret = clEnqueueWriteBuffer(command_queue, mem_data, CL_TRUE, 0, | |
139 sizeof(Data)*length, sorter.data, 0, NULL, NULL); | |
140 | |
141 //print_data(data, count, "before"); | |
142 clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&mem_count); | |
143 clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&mem_data); | |
144 | |
1538
fac06524090b
add gpu task wordcount. But not work print
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1520
diff
changeset
|
145 cl_event ev = NULL; |
fac06524090b
add gpu task wordcount. But not work print
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1520
diff
changeset
|
146 |
1520 | 147 ret = clEnqueueTask(command_queue, kernel, 0, NULL, &ev); |
148 | |
149 //メモリバッファから結果を取得 | |
150 ret = clEnqueueReadBuffer(command_queue, mem_data, CL_TRUE, 0,sizeof(Data)*length, sorter.data, 1, &ev, NULL); | |
151 clFlush(command_queue); | |
152 ed_time = getTime(); | |
153 show(sorter.data, length-1); | |
154 check_data(); | |
155 printf("Time: %0.6f\n",ed_time-st_time); | |
156 | |
157 clReleaseKernel(kernel); | |
158 clReleaseProgram(program); | |
159 clReleaseMemObject(mem_data); | |
160 clReleaseEvent(ev); | |
161 clReleaseCommandQueue(command_queue); | |
162 clReleaseContext(context); | |
163 | |
164 } | |
165 | |
166 int main(int argc, char *argv[]) { | |
167 | |
168 // 無効な引数ならデフォルトの値として432を設定 | |
169 | |
170 | |
171 if (argc>1) { | |
172 if (init(argc,argv) < 0) { | |
173 return -1; | |
174 } | |
175 } | |
176 | |
177 gpu_init(); | |
178 | |
179 sorter.data = new Data[length]; | |
180 sorter.data_length = length; | |
181 sorter.split_num = get_split_num(sorter.data_length, 1); // (length, cpu_num) | |
182 | |
183 for (int i = 0; i < length; i++) { | |
184 sorter.data[i].index = rand()%10000; | |
185 sorter.data[i].ptr = i; | |
186 } | |
187 | |
188 sort_start(sorter); | |
189 return 0; | |
190 } |