intl/unicharutil/tests/genNormalizationData.pl

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 #!/usr/bin/perl 
     2 #
     3 # This Source Code Form is subject to the terms of the Mozilla Public
     4 # License, v. 2.0. If a copy of the MPL was not distributed with this
     5 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
     7 open ( TEXTFILE , "< NormalizationTest.txt")
     8     || die "Cannot find NormalizationTest.txt. The latest version should be available from\n http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt\n";
    10 open ( OUT , "> NormalizationData.h")
    11 #open ( OUT , "> test.txt")
    12     || die "Cannot create output file NormalizationData.h\n";
    14 $mpl = <<END_OF_MPL;
    15 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
    16 /* This Source Code Form is subject to the terms of the Mozilla Public
    17  * License, v. 2.0. If a copy of the MPL was not distributed with this
    18  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
    19 /* 
    20     DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY
    21     mozilla/intl/unicharutil/tools/genNormalizationData.pl
    22  */
    23 END_OF_MPL
    25 print OUT $mpl;
    27 # XXX This code assumes that wchar_t is 16-bit unsigned, which is currently
    28 #      true on Windows, Linux and Mac (with |g++ -fshort-wchar|).
    29 #      To make it work where that assumption doesn't hold, one could generate
    30 #      one huge array containing all the strings as 16-bit units (including
    31 #      the 0 terminator) and initialize the array of testcaseLine with pointers
    32 #      into the huge array.
    34 while(<TEXTFILE>) {
    35     chop;
    36     if (/^# NormalizationTest-(.+)\.txt/) {
    37 	print OUT "static char versionText[] = \"$1\";\n";
    38     } elsif (/^\@Part(.)/) {
    39 	if ($1 != "0") {
    40 	    print OUT "  {\n";
    41 	    print OUT "    L\"\",\n";
    42 	    print OUT "    L\"\",\n";
    43 	    print OUT "    L\"\",\n";
    44 	    print OUT "    L\"\",\n";
    45 	    print OUT "    L\"\",\n";
    46 	    print OUT "    \"\",\n";
    47 	    print OUT "  },\n";
    48 	    print OUT "};\n";
    49 	}
    50 	print OUT "\n";
    51 	print OUT "static testcaseLine Part$1TestData[] = \n";
    52 	print OUT "{\n";
    53     } else {
    54 	unless (/^\#/) {
    55 	    @cases = split(/;/ , $_);
    56 	    print OUT "  {\n";
    57 	    for ($case = 0; $case < 5; ++$case) {
    58 		$c = $cases[$case];
    59 		print OUT "    L\"";
    60 		@codepoints = split(/ / , $c);
    61 		foreach (@codepoints) {
    62 		    $cp = hex($_);
    63 		    if ($cp < 0x10000) {
    64                       # BMP codepoint
    65 			printf OUT "\\x%04X", $cp;
    66 		    } else {
    67                       # non-BMP codepoint, convert to surrogate pair
    68 			printf OUT "\\x%04X\\x%04X",
    69 			           ($cp >> 10) + 0xD7C0,
    70 			           ($cp & 0x03FF) | 0xDC00;
    71 		    }
    72 		}
    73 		print OUT "\",\n";
    74 	    }
    75 	    $description = $cases[10];
    76 	    $description =~ s/^ \) //;
    77 	    print OUT "    \"$description\"\n";
    78 	    print OUT "  },\n";
    79 	}
    80     }
    81 }
    83 print OUT "  {\n";
    84 print OUT "    L\"\",\n";
    85 print OUT "    L\"\",\n";
    86 print OUT "    L\"\",\n";
    87 print OUT "    L\"\",\n";
    88 print OUT "    L\"\",\n";
    89 print OUT "    \"\",\n";
    90 print OUT "  },\n";
    91 print OUT "};\n";
    92 close (OUT);
    93 close (TEXTFILE);

mercurial