annotate findUnicode.pl @ 3:0285a01caf93

fix
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Sat, 15 Jun 2013 18:23:21 +0900
parents 7f5d4dad9d6b
children 8da43f287516
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1 #!/usr/bin/perl
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3 # find used unicode
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
4 use strict;
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
5 use utf8;
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
6 use open qw(:std :utf8); # input/output default encoding will be UTF-8, it looks like default
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
7
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
8 my %used;
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
9
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
10 while(<>) {
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
11 for my $ch ( /(.)/g ) {
2
7f5d4dad9d6b merge operation
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 1
diff changeset
12 next if (ord($ch)<128);
7f5d4dad9d6b merge operation
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 1
diff changeset
13 next if (ord($ch)>12288); # ignore CJKV
1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
14 $used{ord($ch)}++;
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
15 }
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
16 }
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
17
2
7f5d4dad9d6b merge operation
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 1
diff changeset
18 for my $bdf (<[0-9]*.bdf>) {
1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
19 open(my $f,"<",$bdf);
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
20 my %has;
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
21 while(<$f>) {
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
22 if (/^ENCODING\s+(\d+)/) {
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
23 my $encoding=$1;
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
24 $has{$encoding} = 1;
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
25 }
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
26 }
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
27 my %no;
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
28 for my $ch ( keys %used ) {
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
29 $no{$ch} ++ if (! defined $has{$ch}) ;
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
30 }
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
31 for my $ch ( sort {$a<=>$b} keys %no ) {
2
7f5d4dad9d6b merge operation
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 1
diff changeset
32 my $hex = sprintf("%x",$ch);
7f5d4dad9d6b merge operation
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 1
diff changeset
33 print chr($ch)," $ch 0x$hex is not in $bdf\n";
1
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
34 }
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
diff changeset
35 }