Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | #!/usr/bin/perl |
michael@0 | 2 | # |
michael@0 | 3 | # This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 4 | # License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 5 | # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
michael@0 | 6 | |
michael@0 | 7 | open ( TEXTFILE , "< NormalizationTest.txt") |
michael@0 | 8 | || die "Cannot find NormalizationTest.txt. The latest version should be available from\n http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt\n"; |
michael@0 | 9 | |
michael@0 | 10 | open ( OUT , "> NormalizationData.h") |
michael@0 | 11 | #open ( OUT , "> test.txt") |
michael@0 | 12 | || die "Cannot create output file NormalizationData.h\n"; |
michael@0 | 13 | |
michael@0 | 14 | $mpl = <<END_OF_MPL; |
michael@0 | 15 | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
michael@0 | 16 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 17 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 18 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 19 | /* |
michael@0 | 20 | DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY |
michael@0 | 21 | mozilla/intl/unicharutil/tools/genNormalizationData.pl |
michael@0 | 22 | */ |
michael@0 | 23 | END_OF_MPL |
michael@0 | 24 | |
michael@0 | 25 | print OUT $mpl; |
michael@0 | 26 | |
michael@0 | 27 | # XXX This code assumes that wchar_t is 16-bit unsigned, which is currently |
michael@0 | 28 | # true on Windows, Linux and Mac (with |g++ -fshort-wchar|). |
michael@0 | 29 | # To make it work where that assumption doesn't hold, one could generate |
michael@0 | 30 | # one huge array containing all the strings as 16-bit units (including |
michael@0 | 31 | # the 0 terminator) and initialize the array of testcaseLine with pointers |
michael@0 | 32 | # into the huge array. |
michael@0 | 33 | |
michael@0 | 34 | while(<TEXTFILE>) { |
michael@0 | 35 | chop; |
michael@0 | 36 | if (/^# NormalizationTest-(.+)\.txt/) { |
michael@0 | 37 | print OUT "static char versionText[] = \"$1\";\n"; |
michael@0 | 38 | } elsif (/^\@Part(.)/) { |
michael@0 | 39 | if ($1 != "0") { |
michael@0 | 40 | print OUT " {\n"; |
michael@0 | 41 | print OUT " L\"\",\n"; |
michael@0 | 42 | print OUT " L\"\",\n"; |
michael@0 | 43 | print OUT " L\"\",\n"; |
michael@0 | 44 | print OUT " L\"\",\n"; |
michael@0 | 45 | print OUT " L\"\",\n"; |
michael@0 | 46 | print OUT " \"\",\n"; |
michael@0 | 47 | print OUT " },\n"; |
michael@0 | 48 | print OUT "};\n"; |
michael@0 | 49 | } |
michael@0 | 50 | print OUT "\n"; |
michael@0 | 51 | print OUT "static testcaseLine Part$1TestData[] = \n"; |
michael@0 | 52 | print OUT "{\n"; |
michael@0 | 53 | } else { |
michael@0 | 54 | unless (/^\#/) { |
michael@0 | 55 | @cases = split(/;/ , $_); |
michael@0 | 56 | print OUT " {\n"; |
michael@0 | 57 | for ($case = 0; $case < 5; ++$case) { |
michael@0 | 58 | $c = $cases[$case]; |
michael@0 | 59 | print OUT " L\""; |
michael@0 | 60 | @codepoints = split(/ / , $c); |
michael@0 | 61 | foreach (@codepoints) { |
michael@0 | 62 | $cp = hex($_); |
michael@0 | 63 | if ($cp < 0x10000) { |
michael@0 | 64 | # BMP codepoint |
michael@0 | 65 | printf OUT "\\x%04X", $cp; |
michael@0 | 66 | } else { |
michael@0 | 67 | # non-BMP codepoint, convert to surrogate pair |
michael@0 | 68 | printf OUT "\\x%04X\\x%04X", |
michael@0 | 69 | ($cp >> 10) + 0xD7C0, |
michael@0 | 70 | ($cp & 0x03FF) | 0xDC00; |
michael@0 | 71 | } |
michael@0 | 72 | } |
michael@0 | 73 | print OUT "\",\n"; |
michael@0 | 74 | } |
michael@0 | 75 | $description = $cases[10]; |
michael@0 | 76 | $description =~ s/^ \) //; |
michael@0 | 77 | print OUT " \"$description\"\n"; |
michael@0 | 78 | print OUT " },\n"; |
michael@0 | 79 | } |
michael@0 | 80 | } |
michael@0 | 81 | } |
michael@0 | 82 | |
michael@0 | 83 | print OUT " {\n"; |
michael@0 | 84 | print OUT " L\"\",\n"; |
michael@0 | 85 | print OUT " L\"\",\n"; |
michael@0 | 86 | print OUT " L\"\",\n"; |
michael@0 | 87 | print OUT " L\"\",\n"; |
michael@0 | 88 | print OUT " L\"\",\n"; |
michael@0 | 89 | print OUT " \"\",\n"; |
michael@0 | 90 | print OUT " },\n"; |
michael@0 | 91 | print OUT "};\n"; |
michael@0 | 92 | close (OUT); |
michael@0 | 93 | close (TEXTFILE); |