Mercurial > hg > CbC > CbC_gcc
comparison libcpp/makeucnid.c @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children | 04ced10e8804 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a06113de4d67 |
---|---|
1 /* Make ucnid.h from various sources. | |
2 Copyright (C) 2005, 2009 Free Software Foundation, Inc. | |
3 | |
4 This program is free software; you can redistribute it and/or modify it | |
5 under the terms of the GNU General Public License as published by the | |
6 Free Software Foundation; either version 3, or (at your option) any | |
7 later version. | |
8 | |
9 This program is distributed in the hope that it will be useful, | |
10 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 GNU General Public License for more details. | |
13 | |
14 You should have received a copy of the GNU General Public License | |
15 along with this program; see the file COPYING3. If not see | |
16 <http://www.gnu.org/licenses/>. */ | |
17 | |
18 /* Run this program as | |
19 ./makeucnid ucnid.tab UnicodeData.txt DerivedNormalizationProps.txt \ | |
20 > ucnid.h | |
21 */ | |
22 | |
23 #include <stdio.h> | |
24 #include <string.h> | |
25 #include <ctype.h> | |
26 #include <stdbool.h> | |
27 #include <stdlib.h> | |
28 | |
29 enum { | |
30 C99 = 1, | |
31 CXX = 2, | |
32 digit = 4, | |
33 not_NFC = 8, | |
34 not_NFKC = 16, | |
35 maybe_not_NFC = 32 | |
36 }; | |
37 | |
38 static unsigned flags[65536]; | |
39 static unsigned short decomp[65536][2]; | |
40 static unsigned char combining_value[65536]; | |
41 | |
42 /* Die! */ | |
43 | |
44 static void | |
45 fail (const char *s) | |
46 { | |
47 fprintf (stderr, "%s\n", s); | |
48 exit (1); | |
49 } | |
50 | |
51 /* Read ucnid.tab and set the C99 and CXX flags in header[]. */ | |
52 | |
53 static void | |
54 read_ucnid (const char *fname) | |
55 { | |
56 FILE *f = fopen (fname, "r"); | |
57 unsigned fl = 0; | |
58 | |
59 if (!f) | |
60 fail ("opening ucnid.tab"); | |
61 for (;;) | |
62 { | |
63 char line[256]; | |
64 | |
65 if (!fgets (line, sizeof (line), f)) | |
66 break; | |
67 if (strcmp (line, "[C99]\n") == 0) | |
68 fl = C99; | |
69 else if (strcmp (line, "[CXX]\n") == 0) | |
70 fl = CXX; | |
71 else if (isxdigit (line[0])) | |
72 { | |
73 char *l = line; | |
74 while (*l) | |
75 { | |
76 unsigned long start, end; | |
77 char *endptr; | |
78 start = strtoul (l, &endptr, 16); | |
79 if (endptr == l || (*endptr != '-' && ! isspace (*endptr))) | |
80 fail ("parsing ucnid.tab [1]"); | |
81 l = endptr; | |
82 if (*l != '-') | |
83 end = start; | |
84 else | |
85 { | |
86 end = strtoul (l + 1, &endptr, 16); | |
87 if (end < start) | |
88 fail ("parsing ucnid.tab, end before start"); | |
89 l = endptr; | |
90 if (! isspace (*l)) | |
91 fail ("parsing ucnid.tab, junk after range"); | |
92 } | |
93 while (isspace (*l)) | |
94 l++; | |
95 if (end > 0xFFFF) | |
96 fail ("parsing ucnid.tab, end too large"); | |
97 while (start <= end) | |
98 flags[start++] |= fl; | |
99 } | |
100 } | |
101 } | |
102 if (ferror (f)) | |
103 fail ("reading ucnid.tab"); | |
104 fclose (f); | |
105 } | |
106 | |
107 /* Read UnicodeData.txt and set the 'digit' flag, and | |
108 also fill in the 'decomp' table to be the decompositions of | |
109 characters for which both the character decomposed and all the code | |
110 points in the decomposition are either C99 or CXX. */ | |
111 | |
112 static void | |
113 read_table (char *fname) | |
114 { | |
115 FILE * f = fopen (fname, "r"); | |
116 | |
117 if (!f) | |
118 fail ("opening UnicodeData.txt"); | |
119 for (;;) | |
120 { | |
121 char line[256]; | |
122 unsigned long codepoint, this_decomp[4]; | |
123 char *l; | |
124 int i; | |
125 int decomp_useful; | |
126 | |
127 if (!fgets (line, sizeof (line), f)) | |
128 break; | |
129 codepoint = strtoul (line, &l, 16); | |
130 if (l == line || *l != ';') | |
131 fail ("parsing UnicodeData.txt, reading code point"); | |
132 if (codepoint > 0xffff || ! (flags[codepoint] & (C99 | CXX))) | |
133 continue; | |
134 | |
135 do { | |
136 l++; | |
137 } while (*l != ';'); | |
138 /* Category value; things starting with 'N' are numbers of some | |
139 kind. */ | |
140 if (*++l == 'N') | |
141 flags[codepoint] |= digit; | |
142 | |
143 do { | |
144 l++; | |
145 } while (*l != ';'); | |
146 /* Canonical combining class; in NFC/NFKC, they must be increasing | |
147 (or zero). */ | |
148 if (! isdigit (*++l)) | |
149 fail ("parsing UnicodeData.txt, combining class not number"); | |
150 combining_value[codepoint] = strtoul (l, &l, 10); | |
151 if (*l++ != ';') | |
152 fail ("parsing UnicodeData.txt, junk after combining class"); | |
153 | |
154 /* Skip over bidi value. */ | |
155 do { | |
156 l++; | |
157 } while (*l != ';'); | |
158 | |
159 /* Decomposition mapping. */ | |
160 decomp_useful = flags[codepoint]; | |
161 if (*++l == '<') /* Compatibility mapping. */ | |
162 continue; | |
163 for (i = 0; i < 4; i++) | |
164 { | |
165 if (*l == ';') | |
166 break; | |
167 if (!isxdigit (*l)) | |
168 fail ("parsing UnicodeData.txt, decomposition format"); | |
169 this_decomp[i] = strtoul (l, &l, 16); | |
170 decomp_useful &= flags[this_decomp[i]]; | |
171 while (isspace (*l)) | |
172 l++; | |
173 } | |
174 if (i > 2) /* Decomposition too long. */ | |
175 fail ("parsing UnicodeData.txt, decomposition too long"); | |
176 if (decomp_useful) | |
177 while (--i >= 0) | |
178 decomp[codepoint][i] = this_decomp[i]; | |
179 } | |
180 if (ferror (f)) | |
181 fail ("reading UnicodeData.txt"); | |
182 fclose (f); | |
183 } | |
184 | |
185 /* Read DerivedNormalizationProps.txt and set the flags that say whether | |
186 a character is in NFC, NFKC, or is context-dependent. */ | |
187 | |
188 static void | |
189 read_derived (const char *fname) | |
190 { | |
191 FILE * f = fopen (fname, "r"); | |
192 | |
193 if (!f) | |
194 fail ("opening DerivedNormalizationProps.txt"); | |
195 for (;;) | |
196 { | |
197 char line[256]; | |
198 unsigned long start, end; | |
199 char *l; | |
200 bool not_NFC_p, not_NFKC_p, maybe_not_NFC_p; | |
201 | |
202 if (!fgets (line, sizeof (line), f)) | |
203 break; | |
204 not_NFC_p = (strstr (line, "; NFC_QC; N") != NULL); | |
205 not_NFKC_p = (strstr (line, "; NFKC_QC; N") != NULL); | |
206 maybe_not_NFC_p = (strstr (line, "; NFC_QC; M") != NULL); | |
207 if (! not_NFC_p && ! not_NFKC_p && ! maybe_not_NFC_p) | |
208 continue; | |
209 | |
210 start = strtoul (line, &l, 16); | |
211 if (l == line) | |
212 fail ("parsing DerivedNormalizationProps.txt, reading start"); | |
213 if (start > 0xffff) | |
214 continue; | |
215 if (*l == '.' && l[1] == '.') | |
216 end = strtoul (l + 2, &l, 16); | |
217 else | |
218 end = start; | |
219 | |
220 while (start <= end) | |
221 flags[start++] |= ((not_NFC_p ? not_NFC : 0) | |
222 | (not_NFKC_p ? not_NFKC : 0) | |
223 | (maybe_not_NFC_p ? maybe_not_NFC : 0) | |
224 ); | |
225 } | |
226 if (ferror (f)) | |
227 fail ("reading DerivedNormalizationProps.txt"); | |
228 fclose (f); | |
229 } | |
230 | |
231 /* Write out the table. | |
232 The table consists of two words per entry. The first word is the flags | |
233 for the unicode code points up to and including the second word. */ | |
234 | |
235 static void | |
236 write_table (void) | |
237 { | |
238 unsigned i; | |
239 unsigned last_flag = flags[0]; | |
240 bool really_safe = decomp[0][0] == 0; | |
241 unsigned char last_combine = combining_value[0]; | |
242 | |
243 for (i = 1; i <= 65536; i++) | |
244 if (i == 65536 | |
245 || (flags[i] != last_flag && ((flags[i] | last_flag) & (C99 | CXX))) | |
246 || really_safe != (decomp[i][0] == 0) | |
247 || combining_value[i] != last_combine) | |
248 { | |
249 printf ("{ %s|%s|%s|%s|%s|%s|%s, %3d, %#06x },\n", | |
250 last_flag & C99 ? "C99" : " 0", | |
251 last_flag & digit ? "DIG" : " 0", | |
252 last_flag & CXX ? "CXX" : " 0", | |
253 really_safe ? "CID" : " 0", | |
254 last_flag & not_NFC ? " 0" : "NFC", | |
255 last_flag & not_NFKC ? " 0" : "NKC", | |
256 last_flag & maybe_not_NFC ? "CTX" : " 0", | |
257 combining_value[i - 1], | |
258 i - 1); | |
259 last_flag = flags[i]; | |
260 last_combine = combining_value[0]; | |
261 really_safe = decomp[i][0] == 0; | |
262 } | |
263 } | |
264 | |
265 /* Print out the huge copyright notice. */ | |
266 | |
267 static void | |
268 write_copyright (void) | |
269 { | |
270 static const char copyright[] = "\ | |
271 /* Unicode characters and various properties.\n\ | |
272 Copyright (C) 2003, 2005 Free Software Foundation, Inc.\n\ | |
273 \n\ | |
274 This program is free software; you can redistribute it and/or modify it\n\ | |
275 under the terms of the GNU General Public License as published by the\n\ | |
276 Free Software Foundation; either version 3, or (at your option) any\n\ | |
277 later version.\n\ | |
278 \n\ | |
279 This program is distributed in the hope that it will be useful,\n\ | |
280 but WITHOUT ANY WARRANTY; without even the implied warranty of\n\ | |
281 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\ | |
282 GNU General Public License for more details.\n\ | |
283 \n\ | |
284 You should have received a copy of the GNU General Public License\n\ | |
285 along with this program; see the file COPYING3. If not see\n\ | |
286 <http://www.gnu.org/licenses/>.\n\ | |
287 \n\ | |
288 \n\ | |
289 Copyright (C) 1991-2005 Unicode, Inc. All rights reserved.\n\ | |
290 Distributed under the Terms of Use in\n\ | |
291 http://www.unicode.org/copyright.html.\n\ | |
292 \n\ | |
293 Permission is hereby granted, free of charge, to any person\n\ | |
294 obtaining a copy of the Unicode data files and any associated\n\ | |
295 documentation (the \"Data Files\") or Unicode software and any\n\ | |
296 associated documentation (the \"Software\") to deal in the Data Files\n\ | |
297 or Software without restriction, including without limitation the\n\ | |
298 rights to use, copy, modify, merge, publish, distribute, and/or\n\ | |
299 sell copies of the Data Files or Software, and to permit persons to\n\ | |
300 whom the Data Files or Software are furnished to do so, provided\n\ | |
301 that (a) the above copyright notice(s) and this permission notice\n\ | |
302 appear with all copies of the Data Files or Software, (b) both the\n\ | |
303 above copyright notice(s) and this permission notice appear in\n\ | |
304 associated documentation, and (c) there is clear notice in each\n\ | |
305 modified Data File or in the Software as well as in the\n\ | |
306 documentation associated with the Data File(s) or Software that the\n\ | |
307 data or software has been modified.\n\ | |
308 \n\ | |
309 THE DATA FILES AND SOFTWARE ARE PROVIDED \"AS IS\", WITHOUT WARRANTY\n\ | |
310 OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE\n\ | |
311 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n\ | |
312 NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE\n\ | |
313 COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR\n\ | |
314 ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY\n\ | |
315 DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,\n\ | |
316 WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS\n\ | |
317 ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE\n\ | |
318 OF THE DATA FILES OR SOFTWARE.\n\ | |
319 \n\ | |
320 Except as contained in this notice, the name of a copyright holder\n\ | |
321 shall not be used in advertising or otherwise to promote the sale,\n\ | |
322 use or other dealings in these Data Files or Software without prior\n\ | |
323 written authorization of the copyright holder. */\n"; | |
324 | |
325 puts (copyright); | |
326 } | |
327 | |
328 /* Main program. */ | |
329 | |
330 int | |
331 main(int argc, char ** argv) | |
332 { | |
333 if (argc != 4) | |
334 fail ("too few arguments to makeucn"); | |
335 read_ucnid (argv[1]); | |
336 read_table (argv[2]); | |
337 read_derived (argv[3]); | |
338 | |
339 write_copyright (); | |
340 write_table (); | |
341 return 0; | |
342 } |