intl/unicharutil/tests/genNormalizationData.pl

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/unicharutil/tests/genNormalizationData.pl	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,93 @@
     1.4 +#!/usr/bin/perl 
     1.5 +#
     1.6 +# This Source Code Form is subject to the terms of the Mozilla Public
     1.7 +# License, v. 2.0. If a copy of the MPL was not distributed with this
     1.8 +# file, You can obtain one at http://mozilla.org/MPL/2.0/.
     1.9 +
    1.10 +open ( TEXTFILE , "< NormalizationTest.txt")
    1.11 +    || die "Cannot find NormalizationTest.txt. The latest version should be available from\n http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt\n";
    1.12 +
    1.13 +open ( OUT , "> NormalizationData.h")
    1.14 +#open ( OUT , "> test.txt")
    1.15 +    || die "Cannot create output file NormalizationData.h\n";
    1.16 +
    1.17 +$mpl = <<END_OF_MPL;
    1.18 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
    1.19 +/* This Source Code Form is subject to the terms of the Mozilla Public
    1.20 + * License, v. 2.0. If a copy of the MPL was not distributed with this
    1.21 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
    1.22 +/* 
    1.23 +    DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY
    1.24 +    mozilla/intl/unicharutil/tools/genNormalizationData.pl
    1.25 + */
    1.26 +END_OF_MPL
    1.27 +
    1.28 +print OUT $mpl;
    1.29 +
    1.30 +# XXX This code assumes that wchar_t is 16-bit unsigned, which is currently
    1.31 +#      true on Windows, Linux and Mac (with |g++ -fshort-wchar|).
    1.32 +#      To make it work where that assumption doesn't hold, one could generate
    1.33 +#      one huge array containing all the strings as 16-bit units (including
    1.34 +#      the 0 terminator) and initialize the array of testcaseLine with pointers
    1.35 +#      into the huge array.
    1.36 +
    1.37 +while(<TEXTFILE>) {
    1.38 +    chop;
    1.39 +    if (/^# NormalizationTest-(.+)\.txt/) {
    1.40 +	print OUT "static char versionText[] = \"$1\";\n";
    1.41 +    } elsif (/^\@Part(.)/) {
    1.42 +	if ($1 != "0") {
    1.43 +	    print OUT "  {\n";
    1.44 +	    print OUT "    L\"\",\n";
    1.45 +	    print OUT "    L\"\",\n";
    1.46 +	    print OUT "    L\"\",\n";
    1.47 +	    print OUT "    L\"\",\n";
    1.48 +	    print OUT "    L\"\",\n";
    1.49 +	    print OUT "    \"\",\n";
    1.50 +	    print OUT "  },\n";
    1.51 +	    print OUT "};\n";
    1.52 +	}
    1.53 +	print OUT "\n";
    1.54 +	print OUT "static testcaseLine Part$1TestData[] = \n";
    1.55 +	print OUT "{\n";
    1.56 +    } else {
    1.57 +	unless (/^\#/) {
    1.58 +	    @cases = split(/;/ , $_);
    1.59 +	    print OUT "  {\n";
    1.60 +	    for ($case = 0; $case < 5; ++$case) {
    1.61 +		$c = $cases[$case];
    1.62 +		print OUT "    L\"";
    1.63 +		@codepoints = split(/ / , $c);
    1.64 +		foreach (@codepoints) {
    1.65 +		    $cp = hex($_);
    1.66 +		    if ($cp < 0x10000) {
    1.67 +                      # BMP codepoint
    1.68 +			printf OUT "\\x%04X", $cp;
    1.69 +		    } else {
    1.70 +                      # non-BMP codepoint, convert to surrogate pair
    1.71 +			printf OUT "\\x%04X\\x%04X",
    1.72 +			           ($cp >> 10) + 0xD7C0,
    1.73 +			           ($cp & 0x03FF) | 0xDC00;
    1.74 +		    }
    1.75 +		}
    1.76 +		print OUT "\",\n";
    1.77 +	    }
    1.78 +	    $description = $cases[10];
    1.79 +	    $description =~ s/^ \) //;
    1.80 +	    print OUT "    \"$description\"\n";
    1.81 +	    print OUT "  },\n";
    1.82 +	}
    1.83 +    }
    1.84 +}
    1.85 + 
    1.86 +print OUT "  {\n";
    1.87 +print OUT "    L\"\",\n";
    1.88 +print OUT "    L\"\",\n";
    1.89 +print OUT "    L\"\",\n";
    1.90 +print OUT "    L\"\",\n";
    1.91 +print OUT "    L\"\",\n";
    1.92 +print OUT "    \"\",\n";
    1.93 +print OUT "  },\n";
    1.94 +print OUT "};\n";
    1.95 +close (OUT);
    1.96 +close (TEXTFILE);

mercurial