1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/unicharutil/tests/genNormalizationData.pl Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,93 @@ 1.4 +#!/usr/bin/perl 1.5 +# 1.6 +# This Source Code Form is subject to the terms of the Mozilla Public 1.7 +# License, v. 2.0. If a copy of the MPL was not distributed with this 1.8 +# file, You can obtain one at http://mozilla.org/MPL/2.0/. 1.9 + 1.10 +open ( TEXTFILE , "< NormalizationTest.txt") 1.11 + || die "Cannot find NormalizationTest.txt. The latest version should be available from\n http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt\n"; 1.12 + 1.13 +open ( OUT , "> NormalizationData.h") 1.14 +#open ( OUT , "> test.txt") 1.15 + || die "Cannot create output file NormalizationData.h\n"; 1.16 + 1.17 +$mpl = <<END_OF_MPL; 1.18 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.19 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.20 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.21 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.22 +/* 1.23 + DO NOT EDIT THIS DOCUMENT !!! THIS DOCUMENT IS GENERATED BY 1.24 + mozilla/intl/unicharutil/tools/genNormalizationData.pl 1.25 + */ 1.26 +END_OF_MPL 1.27 + 1.28 +print OUT $mpl; 1.29 + 1.30 +# XXX This code assumes that wchar_t is 16-bit unsigned, which is currently 1.31 +# true on Windows, Linux and Mac (with |g++ -fshort-wchar|). 1.32 +# To make it work where that assumption doesn't hold, one could generate 1.33 +# one huge array containing all the strings as 16-bit units (including 1.34 +# the 0 terminator) and initialize the array of testcaseLine with pointers 1.35 +# into the huge array. 1.36 + 1.37 +while(<TEXTFILE>) { 1.38 + chop; 1.39 + if (/^# NormalizationTest-(.+)\.txt/) { 1.40 + print OUT "static char versionText[] = \"$1\";\n"; 1.41 + } elsif (/^\@Part(.)/) { 1.42 + if ($1 != "0") { 1.43 + print OUT " {\n"; 1.44 + print OUT " L\"\",\n"; 1.45 + print OUT " L\"\",\n"; 1.46 + print OUT " L\"\",\n"; 1.47 + print OUT " L\"\",\n"; 1.48 + print OUT " L\"\",\n"; 1.49 + print OUT " \"\",\n"; 1.50 + print OUT " },\n"; 1.51 + print OUT "};\n"; 1.52 + } 1.53 + print OUT "\n"; 1.54 + print OUT "static testcaseLine Part$1TestData[] = \n"; 1.55 + print OUT "{\n"; 1.56 + } else { 1.57 + unless (/^\#/) { 1.58 + @cases = split(/;/ , $_); 1.59 + print OUT " {\n"; 1.60 + for ($case = 0; $case < 5; ++$case) { 1.61 + $c = $cases[$case]; 1.62 + print OUT " L\""; 1.63 + @codepoints = split(/ / , $c); 1.64 + foreach (@codepoints) { 1.65 + $cp = hex($_); 1.66 + if ($cp < 0x10000) { 1.67 + # BMP codepoint 1.68 + printf OUT "\\x%04X", $cp; 1.69 + } else { 1.70 + # non-BMP codepoint, convert to surrogate pair 1.71 + printf OUT "\\x%04X\\x%04X", 1.72 + ($cp >> 10) + 0xD7C0, 1.73 + ($cp & 0x03FF) | 0xDC00; 1.74 + } 1.75 + } 1.76 + print OUT "\",\n"; 1.77 + } 1.78 + $description = $cases[10]; 1.79 + $description =~ s/^ \) //; 1.80 + print OUT " \"$description\"\n"; 1.81 + print OUT " },\n"; 1.82 + } 1.83 + } 1.84 +} 1.85 + 1.86 +print OUT " {\n"; 1.87 +print OUT " L\"\",\n"; 1.88 +print OUT " L\"\",\n"; 1.89 +print OUT " L\"\",\n"; 1.90 +print OUT " L\"\",\n"; 1.91 +print OUT " L\"\",\n"; 1.92 +print OUT " \"\",\n"; 1.93 +print OUT " },\n"; 1.94 +print OUT "};\n"; 1.95 +close (OUT); 1.96 +close (TEXTFILE);