Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 #!/usr/bin/perl
2 #
3 # This Source Code Form is subject to the terms of the Mozilla Public
4 # License, v. 2.0. If a copy of the MPL was not distributed with this
5 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 ######################################################################
8 #
9 # Initial global variable
10 #
11 ######################################################################
12 %utot = ();
13 $ui=0;
14 $li=0;
16 ######################################################################
17 #
18 # Open the unicode database file
19 #
20 ######################################################################
21 open ( UNICODATA , "< ../../unicharutil/tools/UnicodeData-Latest.txt")
22 || die "cannot find UnicodeData-Latest.txt";
24 ######################################################################
25 #
26 # Open the JIS X 4051 Class file
27 #
28 ######################################################################
29 open ( CLASS , "< jisx4051class.txt")
30 || die "cannot find jisx4051class.txt";
32 ######################################################################
33 #
34 # Open the JIS X 4051 Class simplified mapping
35 #
36 ######################################################################
37 open ( SIMP , "< jisx4051simp.txt")
38 || die "cannot find jisx4051simp.txt";
40 ######################################################################
41 #
42 # Open the output file
43 #
44 ######################################################################
45 open ( OUT , "> anzx4051.html")
46 || die "cannot open output anzx4051.html file";
48 ######################################################################
49 #
50 # Open the output file
51 #
52 ######################################################################
53 open ( HEADER , "> ../src/jisx4051class.h")
54 || die "cannot open output ../src/jisx4051class.h file";
56 ######################################################################
57 #
58 # Generate license and header
59 #
60 ######################################################################
61 $hthmlheader = <<END_OF_HTML;
62 <!-- This Source Code Form is subject to the terms of the Mozilla Public
63 - License, v. 2.0. If a copy of the MPL was not distributed with this
64 - file, You can obtain one at http://mozilla.org/MPL/2.0/. -->
66 <HTML>
67 <HEAD>
68 <TITLE>
69 Analysis of JIS X 4051 to Unicode General Category Mapping
70 </TITLE>
71 </HEAD>
72 <BODY>
73 <H1>
74 Analysis of JIS X 4051 to Unicode General Category Mapping
75 </H1>
76 END_OF_HTML
77 print OUT $hthmlheader;
79 ######################################################################
80 #
81 # Generate license and header
82 #
83 ######################################################################
84 $npl = <<END_OF_NPL;
85 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
86 /* This Source Code Form is subject to the terms of the Mozilla Public
87 * License, v. 2.0. If a copy of the MPL was not distributed with this
88 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
89 /*
90 DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY
91 mozilla/intl/lwbrk/tools/anzx4051.pl
92 */
93 END_OF_NPL
94 print HEADER $npl;
96 %occ = ();
97 %gcat = ();
98 %dcat = ();
99 %simp = ();
100 %gcount = ();
101 %dcount = ();
102 %sccount = ();
103 %rangecount = ();
105 ######################################################################
106 #
107 # Process the file line by line
108 #
109 ######################################################################
110 while(<UNICODATA>) {
111 chop;
112 ######################################################################
113 #
114 # Get value from fields
115 #
116 ######################################################################
117 @f = split(/;/ , $_);
118 $c = $f[0]; # The unicode value
119 $g = $f[2];
120 $d = substr($g, 0, 1);
122 $gcat{$c} = $g;
123 $dcat{$c} = $d;
124 $gcount{$g}++;
125 $dcount{$d}++;
126 }
127 close(UNIDATA);
129 while(<SIMP>) {
130 chop;
131 ######################################################################
132 #
133 # Get value from fields
134 #
135 ######################################################################
136 @f = split(/;/ , $_);
138 $simp{$f[0]} = $f[1];
139 $sccount{$f[1]}++;
140 }
141 close(SIMP);
143 sub GetClass{
144 my ($u) = @_;
145 my $hex = DecToHex($u);
146 $g = $gcat{$hex};
147 if($g ne "") {
148 return $g;
149 } elsif (( 0x3400 <= $u) && ( $u <= 0x9fa5 ) ) {
150 return "Han";
151 } elsif (( 0xac00 <= $u) && ( $u <= 0xd7a3 ) ) {
152 return "Lo";
153 } elsif (( 0xd800 <= $u) && ( $u <= 0xdb7f ) ) {
154 return "Cs";
155 } elsif (( 0xdb80 <= $u) && ( $u <= 0xdbff ) ) {
156 return "Cs";
157 } elsif (( 0xdc00 <= $u) && ( $u <= 0xdfff ) ) {
158 return "Cs";
159 } elsif (( 0xe000 <= $u) && ( $u <= 0xf8ff ) ) {
160 return "Co";
161 } else {
162 printf "WARNING !!!! Cannot find General Category for U+%s \n" , $hex;
163 }
164 }
165 sub GetDClass{
166 my ($u) = @_;
167 my $hex = DecToHex($u);
168 $g = $dcat{$hex};
169 if($g ne "") {
170 return $g;
171 } elsif (( 0x3400 <= $u) && ( $u <= 0x9fa5 ) ) {
172 return "Han";
173 } elsif (( 0xac00 <= $u) && ( $u <= 0xd7a3 ) ) {
174 return "L";
175 } elsif (( 0xd800 <= $u) && ( $u <= 0xdb7f ) ) {
176 return "C";
177 } elsif (( 0xdb80 <= $u) && ( $u <= 0xdbff ) ) {
178 return "C";
179 } elsif (( 0xdc00 <= $u) && ( $u <= 0xdfff ) ) {
180 return "C";
181 } elsif (( 0xe000 <= $u) && ( $u <= 0xf8ff ) ) {
182 return "C";
183 } else {
184 printf "WARNING !!!! Cannot find Detailed General Category for U+%s \n" , $hex;
185 }
186 }
187 sub DecToHex{
188 my ($d) = @_;
189 return sprintf("%04X", $d);
190 }
191 %gtotal = ();
192 %dtotal = ();
193 while(<CLASS>) {
194 chop;
195 ######################################################################
196 #
197 # Get value from fields
198 #
199 ######################################################################
200 @f = split(/;/ , $_);
202 if( substr($f[2], 0, 1) ne "a")
203 {
204 $sc = $simp{$f[2]};
205 $l = hex($f[0]);
206 if($f[1] eq "")
207 {
208 $h = $l;
209 } else {
210 $h = hex($f[1]);
211 }
212 for($k = $l; $k <= $h ; $k++)
213 {
214 if( exists($occ{$k}))
215 {
216 # printf "WARNING !! Conflict defination!!! U+%s -> [%s] [%s | %s]\n",
217 # DecToHex($k), $occ{$k} , $f[2] , $sc;
218 }
219 else
220 {
221 $occ{$k} = $sc . " | " . $f[2];
222 $gclass = GetClass($k);
223 $dclass = GetDClass($k);
224 $gtotal{$sc . $gclass}++;
225 $dtotal{$sc . $dclass}++;
226 $u = DecToHex($k);
227 $rk = " " . substr($u,0,2) . ":" . $sc;
228 $rangecount{$rk}++;
229 }
230 }
231 }
232 }
234 #print %gtotal;
235 #print %dtotal;
237 sub printreport
238 {
239 print OUT "<TABLE BORDER=3>\n";
240 print OUT "<TR BGCOLOR=blue><TH><TH>\n";
242 foreach $d (sort(keys %dcount)) {
243 print OUT "<TD BGCOLOR=red>$d</TD>\n";
244 }
246 print OUT "<TD BGCOLOR=white>Total</TD>\n";
247 foreach $g (sort(keys %gcount)) {
248 print OUT "<TD BGCOLOR=yellow>$g</TD>\n";
249 }
250 print OUT "</TR>\n";
251 foreach $sc (sort(keys %sccount)) {
253 print OUT "<TR><TH>$sc<TH>\n";
255 $total = 0;
256 foreach $d (sort (keys %dcount)) {
257 $count = $dtotal{$sc . $d};
258 $total += $count;
259 print OUT "<TD>$count</TD>\n";
260 }
262 print OUT "<TD BGCOLOR=white>$total</TD>\n";
264 foreach $g (sort(keys %gcount)) {
265 $count = $gtotal{$sc . $g};
266 print OUT "<TD>$count</TD>\n";
267 }
270 print OUT "</TR>\n";
271 }
272 print OUT "</TABLE>\n";
275 print OUT "<TABLE BORDER=3>\n";
276 print OUT "<TR BGCOLOR=blue><TH><TH>\n";
278 foreach $sc (sort(keys %sccount))
279 {
280 print OUT "<TD BGCOLOR=red>$sc</TD>\n";
281 }
283 print OUT "</TR>\n";
286 for($rr = 0; $rr < 0x4f; $rr++)
287 {
288 $empty = 0;
289 $r = sprintf("%02X" , $rr) ;
290 $tmp = "<TR><TH>" . $r . "<TH>\n";
292 foreach $sc (sort(keys %sccount)) {
293 $count = $rangecount{ " " .$r . ":" .$sc};
294 $tmp .= sprintf("<TD>%s</TD>\n", $count);
295 $empty += $count;
296 }
298 $tmp .= "</TR>\n";
300 if($empty ne 0)
301 {
302 print OUT $tmp;
303 }
304 }
305 print OUT "</TABLE>\n";
307 }
308 printreport();
310 sub printarray
311 {
312 my($r, $def) = @_;
313 printf "[%s || %s]\n", $r, $def;
314 $k = hex($r) * 256;
315 printf HEADER "static const uint32_t gLBClass%s[32] = {\n", $r;
316 for($i = 0 ; $i < 256; $i+= 8)
317 {
318 for($j = 7 ; $j >= 0; $j-- )
319 {
320 $v = $k + $i + $j;
321 if( exists($occ{$v}))
322 {
323 $p = substr($occ{$v}, 1,1);
324 } else {
325 $p = $def;
326 }
328 if($j eq 7 )
329 {
330 printf HEADER "0x%s" , $p;
331 } else {
332 printf HEADER "%s", $p ;
333 }
334 }
335 printf HEADER ", // U+%04X - U+%04X\n", $k + $i ,( $k + $i + 7);
336 }
337 print HEADER "};\n\n";
338 }
339 printarray("00", "7");
340 printarray("20", "7");
341 printarray("21", "7");
342 printarray("30", "5");
343 printarray("0E", "8");
344 printarray("17", "7");
346 #print %rangecount;
348 ######################################################################
349 #
350 # Close files
351 #
352 ######################################################################
353 close(HEADER);
354 close(CLASS);
355 close(OUT);