intl/unicharutil/tests/genNormalizationData.pl

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 #!/usr/bin/perl
michael@0 2 #
michael@0 3 # This Source Code Form is subject to the terms of the Mozilla Public
michael@0 4 # License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 5 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
michael@0 6
michael@0 7 open ( TEXTFILE , "< NormalizationTest.txt")
michael@0 8 || die "Cannot find NormalizationTest.txt. The latest version should be available from\n http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt\n";
michael@0 9
michael@0 10 open ( OUT , "> NormalizationData.h")
michael@0 11 #open ( OUT , "> test.txt")
michael@0 12 || die "Cannot create output file NormalizationData.h\n";
michael@0 13
michael@0 14 $mpl = <<END_OF_MPL;
michael@0 15 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 16 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 17 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 18 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 19 /*
michael@0 20 DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY
michael@0 21 mozilla/intl/unicharutil/tools/genNormalizationData.pl
michael@0 22 */
michael@0 23 END_OF_MPL
michael@0 24
michael@0 25 print OUT $mpl;
michael@0 26
michael@0 27 # XXX This code assumes that wchar_t is 16-bit unsigned, which is currently
michael@0 28 # true on Windows, Linux and Mac (with |g++ -fshort-wchar|).
michael@0 29 # To make it work where that assumption doesn't hold, one could generate
michael@0 30 # one huge array containing all the strings as 16-bit units (including
michael@0 31 # the 0 terminator) and initialize the array of testcaseLine with pointers
michael@0 32 # into the huge array.
michael@0 33
michael@0 34 while(<TEXTFILE>) {
michael@0 35 chop;
michael@0 36 if (/^# NormalizationTest-(.+)\.txt/) {
michael@0 37 print OUT "static char versionText[] = \"$1\";\n";
michael@0 38 } elsif (/^\@Part(.)/) {
michael@0 39 if ($1 != "0") {
michael@0 40 print OUT " {\n";
michael@0 41 print OUT " L\"\",\n";
michael@0 42 print OUT " L\"\",\n";
michael@0 43 print OUT " L\"\",\n";
michael@0 44 print OUT " L\"\",\n";
michael@0 45 print OUT " L\"\",\n";
michael@0 46 print OUT " \"\",\n";
michael@0 47 print OUT " },\n";
michael@0 48 print OUT "};\n";
michael@0 49 }
michael@0 50 print OUT "\n";
michael@0 51 print OUT "static testcaseLine Part$1TestData[] = \n";
michael@0 52 print OUT "{\n";
michael@0 53 } else {
michael@0 54 unless (/^\#/) {
michael@0 55 @cases = split(/;/ , $_);
michael@0 56 print OUT " {\n";
michael@0 57 for ($case = 0; $case < 5; ++$case) {
michael@0 58 $c = $cases[$case];
michael@0 59 print OUT " L\"";
michael@0 60 @codepoints = split(/ / , $c);
michael@0 61 foreach (@codepoints) {
michael@0 62 $cp = hex($_);
michael@0 63 if ($cp < 0x10000) {
michael@0 64 # BMP codepoint
michael@0 65 printf OUT "\\x%04X", $cp;
michael@0 66 } else {
michael@0 67 # non-BMP codepoint, convert to surrogate pair
michael@0 68 printf OUT "\\x%04X\\x%04X",
michael@0 69 ($cp >> 10) + 0xD7C0,
michael@0 70 ($cp & 0x03FF) | 0xDC00;
michael@0 71 }
michael@0 72 }
michael@0 73 print OUT "\",\n";
michael@0 74 }
michael@0 75 $description = $cases[10];
michael@0 76 $description =~ s/^ \) //;
michael@0 77 print OUT " \"$description\"\n";
michael@0 78 print OUT " },\n";
michael@0 79 }
michael@0 80 }
michael@0 81 }
michael@0 82
michael@0 83 print OUT " {\n";
michael@0 84 print OUT " L\"\",\n";
michael@0 85 print OUT " L\"\",\n";
michael@0 86 print OUT " L\"\",\n";
michael@0 87 print OUT " L\"\",\n";
michael@0 88 print OUT " L\"\",\n";
michael@0 89 print OUT " \"\",\n";
michael@0 90 print OUT " },\n";
michael@0 91 print OUT "};\n";
michael@0 92 close (OUT);
michael@0 93 close (TEXTFILE);

mercurial