|
1 #!/usr/bin/perl |
|
2 # |
|
3 # This Source Code Form is subject to the terms of the Mozilla Public |
|
4 # License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5 # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
6 |
|
7 open ( TEXTFILE , "< NormalizationTest.txt") |
|
8 || die "Cannot find NormalizationTest.txt. The latest version should be available from\n http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt\n"; |
|
9 |
|
10 open ( OUT , "> NormalizationData.h") |
|
11 #open ( OUT , "> test.txt") |
|
12 || die "Cannot create output file NormalizationData.h\n"; |
|
13 |
|
14 $mpl = <<END_OF_MPL; |
|
15 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
16 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
17 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
18 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
19 /* |
|
20 DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY |
|
21 mozilla/intl/unicharutil/tools/genNormalizationData.pl |
|
22 */ |
|
23 END_OF_MPL |
|
24 |
|
25 print OUT $mpl; |
|
26 |
|
27 # XXX This code assumes that wchar_t is 16-bit unsigned, which is currently |
|
28 # true on Windows, Linux and Mac (with |g++ -fshort-wchar|). |
|
29 # To make it work where that assumption doesn't hold, one could generate |
|
30 # one huge array containing all the strings as 16-bit units (including |
|
31 # the 0 terminator) and initialize the array of testcaseLine with pointers |
|
32 # into the huge array. |
|
33 |
|
34 while(<TEXTFILE>) { |
|
35 chop; |
|
36 if (/^# NormalizationTest-(.+)\.txt/) { |
|
37 print OUT "static char versionText[] = \"$1\";\n"; |
|
38 } elsif (/^\@Part(.)/) { |
|
39 if ($1 != "0") { |
|
40 print OUT " {\n"; |
|
41 print OUT " L\"\",\n"; |
|
42 print OUT " L\"\",\n"; |
|
43 print OUT " L\"\",\n"; |
|
44 print OUT " L\"\",\n"; |
|
45 print OUT " L\"\",\n"; |
|
46 print OUT " \"\",\n"; |
|
47 print OUT " },\n"; |
|
48 print OUT "};\n"; |
|
49 } |
|
50 print OUT "\n"; |
|
51 print OUT "static testcaseLine Part$1TestData[] = \n"; |
|
52 print OUT "{\n"; |
|
53 } else { |
|
54 unless (/^\#/) { |
|
55 @cases = split(/;/ , $_); |
|
56 print OUT " {\n"; |
|
57 for ($case = 0; $case < 5; ++$case) { |
|
58 $c = $cases[$case]; |
|
59 print OUT " L\""; |
|
60 @codepoints = split(/ / , $c); |
|
61 foreach (@codepoints) { |
|
62 $cp = hex($_); |
|
63 if ($cp < 0x10000) { |
|
64 # BMP codepoint |
|
65 printf OUT "\\x%04X", $cp; |
|
66 } else { |
|
67 # non-BMP codepoint, convert to surrogate pair |
|
68 printf OUT "\\x%04X\\x%04X", |
|
69 ($cp >> 10) + 0xD7C0, |
|
70 ($cp & 0x03FF) | 0xDC00; |
|
71 } |
|
72 } |
|
73 print OUT "\",\n"; |
|
74 } |
|
75 $description = $cases[10]; |
|
76 $description =~ s/^ \) //; |
|
77 print OUT " \"$description\"\n"; |
|
78 print OUT " },\n"; |
|
79 } |
|
80 } |
|
81 } |
|
82 |
|
83 print OUT " {\n"; |
|
84 print OUT " L\"\",\n"; |
|
85 print OUT " L\"\",\n"; |
|
86 print OUT " L\"\",\n"; |
|
87 print OUT " L\"\",\n"; |
|
88 print OUT " L\"\",\n"; |
|
89 print OUT " \"\",\n"; |
|
90 print OUT " },\n"; |
|
91 print OUT "};\n"; |
|
92 close (OUT); |
|
93 close (TEXTFILE); |