Sat, 03 Jan 2015 20:18:00 +0100
Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.
michael@0 | 1 | #!/usr/bin/env perl |
michael@0 | 2 | |
michael@0 | 3 | # This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 4 | # License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 5 | # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
michael@0 | 6 | |
michael@0 | 7 | # This tool is used to prepare lookup tables of Unicode character properties |
michael@0 | 8 | # needed by gfx code to support text shaping operations. The properties are |
michael@0 | 9 | # read from the Unicode Character Database and compiled into multi-level arrays |
michael@0 | 10 | # for efficient lookup. |
michael@0 | 11 | # |
michael@0 | 12 | # To regenerate the tables in nsUnicodePropertyData.cpp: |
michael@0 | 13 | # |
michael@0 | 14 | # (1) Download the current Unicode data files from |
michael@0 | 15 | # |
michael@0 | 16 | # http://www.unicode.org/Public/UNIDATA/ |
michael@0 | 17 | # |
michael@0 | 18 | # NB: not all the files are actually needed; currently, we require |
michael@0 | 19 | # - UnicodeData.txt |
michael@0 | 20 | # - Scripts.txt |
michael@0 | 21 | # - EastAsianWidth.txt |
michael@0 | 22 | # - BidiMirroring.txt |
michael@0 | 23 | # - HangulSyllableType.txt |
michael@0 | 24 | # - ReadMe.txt (to record version/date of the UCD) |
michael@0 | 25 | # - Unihan_Variants.txt (from Unihan.zip) |
michael@0 | 26 | # though this may change if we find a need for additional properties. |
michael@0 | 27 | # |
michael@0 | 28 | # The Unicode data files listed above should be together in one directory. |
michael@0 | 29 | # We also require the file |
michael@0 | 30 | # http://www.unicode.org/Public/security/latest/xidmodifications.txt |
michael@0 | 31 | # This file should be in a sub-directory "security" immediately below the |
michael@0 | 32 | # directory containing the other Unicode data files. |
michael@0 | 33 | # |
michael@0 | 34 | # (2) Run this tool using a command line of the form |
michael@0 | 35 | # |
michael@0 | 36 | # perl genUnicodePropertyData.pl \ |
michael@0 | 37 | # /path/to/harfbuzz/src \ |
michael@0 | 38 | # /path/to/UCD-directory |
michael@0 | 39 | # |
michael@0 | 40 | # This will generate (or overwrite!) the files |
michael@0 | 41 | # |
michael@0 | 42 | # nsUnicodePropertyData.cpp |
michael@0 | 43 | # nsUnicodeScriptCodes.h |
michael@0 | 44 | # |
michael@0 | 45 | # in the current directory. |
michael@0 | 46 | |
michael@0 | 47 | use strict; |
michael@0 | 48 | use List::Util qw(first); |
michael@0 | 49 | |
michael@0 | 50 | if ($#ARGV != 1) { |
michael@0 | 51 | print <<__EOT; |
michael@0 | 52 | # Run this tool using a command line of the form |
michael@0 | 53 | # |
michael@0 | 54 | # perl genUnicodePropertyData.pl \ |
michael@0 | 55 | # /path/to/harfbuzz/src \ |
michael@0 | 56 | # /path/to/UCD-directory |
michael@0 | 57 | # |
michael@0 | 58 | # where harfbuzz/src is the directory containing harfbuzz .cc and .hh files, |
michael@0 | 59 | # and UCD-directory is a directory containing the current Unicode Character |
michael@0 | 60 | # Database files (UnicodeData.txt, etc), available from |
michael@0 | 61 | # http://www.unicode.org/Public/UNIDATA/ |
michael@0 | 62 | # |
michael@0 | 63 | # This will generate (or overwrite!) the files |
michael@0 | 64 | # |
michael@0 | 65 | # nsUnicodePropertyData.cpp |
michael@0 | 66 | # nsUnicodeScriptCodes.h |
michael@0 | 67 | # |
michael@0 | 68 | # in the current directory. |
michael@0 | 69 | __EOT |
michael@0 | 70 | exit 0; |
michael@0 | 71 | } |
michael@0 | 72 | |
michael@0 | 73 | # load HB_Script and HB_Category constants |
michael@0 | 74 | |
michael@0 | 75 | # NOTE that HB_SCRIPT_* constants are now "tag" values, NOT sequentially-allocated |
michael@0 | 76 | # script codes as used by Glib/Pango/etc. |
michael@0 | 77 | # We therefore define a set of MOZ_SCRIPT_* constants that are script _codes_ |
michael@0 | 78 | # compatible with those libraries, and map these to HB_SCRIPT_* _tags_ as needed. |
michael@0 | 79 | |
michael@0 | 80 | # CHECK that this matches Pango source (as found for example at |
michael@0 | 81 | # http://git.gnome.org/browse/pango/tree/pango/pango-script.h) |
michael@0 | 82 | # for as many codes as that defines (currently up through Unicode 5.1) |
michael@0 | 83 | # and the GLib enumeration |
michael@0 | 84 | # http://developer.gnome.org/glib/2.30/glib-Unicode-Manipulation.html#GUnicodeScript |
michael@0 | 85 | # (currently defined up through Unicode 6.0). |
michael@0 | 86 | # Constants beyond these may be regarded as unstable for now, but we don't actually |
michael@0 | 87 | # depend on the specific values. |
michael@0 | 88 | my %scriptCode = ( |
michael@0 | 89 | INVALID => -1, |
michael@0 | 90 | COMMON => 0, |
michael@0 | 91 | INHERITED => 1, |
michael@0 | 92 | ARABIC => 2, |
michael@0 | 93 | ARMENIAN => 3, |
michael@0 | 94 | BENGALI => 4, |
michael@0 | 95 | BOPOMOFO => 5, |
michael@0 | 96 | CHEROKEE => 6, |
michael@0 | 97 | COPTIC => 7, |
michael@0 | 98 | CYRILLIC => 8, |
michael@0 | 99 | DESERET => 9, |
michael@0 | 100 | DEVANAGARI => 10, |
michael@0 | 101 | ETHIOPIC => 11, |
michael@0 | 102 | GEORGIAN => 12, |
michael@0 | 103 | GOTHIC => 13, |
michael@0 | 104 | GREEK => 14, |
michael@0 | 105 | GUJARATI => 15, |
michael@0 | 106 | GURMUKHI => 16, |
michael@0 | 107 | HAN => 17, |
michael@0 | 108 | HANGUL => 18, |
michael@0 | 109 | HEBREW => 19, |
michael@0 | 110 | HIRAGANA => 20, |
michael@0 | 111 | KANNADA => 21, |
michael@0 | 112 | KATAKANA => 22, |
michael@0 | 113 | KHMER => 23, |
michael@0 | 114 | LAO => 24, |
michael@0 | 115 | LATIN => 25, |
michael@0 | 116 | MALAYALAM => 26, |
michael@0 | 117 | MONGOLIAN => 27, |
michael@0 | 118 | MYANMAR => 28, |
michael@0 | 119 | OGHAM => 29, |
michael@0 | 120 | OLD_ITALIC => 30, |
michael@0 | 121 | ORIYA => 31, |
michael@0 | 122 | RUNIC => 32, |
michael@0 | 123 | SINHALA => 33, |
michael@0 | 124 | SYRIAC => 34, |
michael@0 | 125 | TAMIL => 35, |
michael@0 | 126 | TELUGU => 36, |
michael@0 | 127 | THAANA => 37, |
michael@0 | 128 | THAI => 38, |
michael@0 | 129 | TIBETAN => 39, |
michael@0 | 130 | CANADIAN_ABORIGINAL => 40, |
michael@0 | 131 | YI => 41, |
michael@0 | 132 | TAGALOG => 42, |
michael@0 | 133 | HANUNOO => 43, |
michael@0 | 134 | BUHID => 44, |
michael@0 | 135 | TAGBANWA => 45, |
michael@0 | 136 | # unicode 4.0 additions |
michael@0 | 137 | BRAILLE => 46, |
michael@0 | 138 | CYPRIOT => 47, |
michael@0 | 139 | LIMBU => 48, |
michael@0 | 140 | OSMANYA => 49, |
michael@0 | 141 | SHAVIAN => 50, |
michael@0 | 142 | LINEAR_B => 51, |
michael@0 | 143 | TAI_LE => 52, |
michael@0 | 144 | UGARITIC => 53, |
michael@0 | 145 | # unicode 4.1 additions |
michael@0 | 146 | NEW_TAI_LUE => 54, |
michael@0 | 147 | BUGINESE => 55, |
michael@0 | 148 | GLAGOLITIC => 56, |
michael@0 | 149 | TIFINAGH => 57, |
michael@0 | 150 | SYLOTI_NAGRI => 58, |
michael@0 | 151 | OLD_PERSIAN => 59, |
michael@0 | 152 | KHAROSHTHI => 60, |
michael@0 | 153 | # unicode 5.0 additions |
michael@0 | 154 | UNKNOWN => 61, |
michael@0 | 155 | BALINESE => 62, |
michael@0 | 156 | CUNEIFORM => 63, |
michael@0 | 157 | PHOENICIAN => 64, |
michael@0 | 158 | PHAGS_PA => 65, |
michael@0 | 159 | NKO => 66, |
michael@0 | 160 | # unicode 5.1 additions |
michael@0 | 161 | KAYAH_LI => 67, |
michael@0 | 162 | LEPCHA => 68, |
michael@0 | 163 | REJANG => 69, |
michael@0 | 164 | SUNDANESE => 70, |
michael@0 | 165 | SAURASHTRA => 71, |
michael@0 | 166 | CHAM => 72, |
michael@0 | 167 | OL_CHIKI => 73, |
michael@0 | 168 | VAI => 74, |
michael@0 | 169 | CARIAN => 75, |
michael@0 | 170 | LYCIAN => 76, |
michael@0 | 171 | LYDIAN => 77, |
michael@0 | 172 | # unicode 5.2 additions |
michael@0 | 173 | AVESTAN => 78, |
michael@0 | 174 | BAMUM => 79, |
michael@0 | 175 | EGYPTIAN_HIEROGLYPHS => 80, |
michael@0 | 176 | IMPERIAL_ARAMAIC => 81, |
michael@0 | 177 | INSCRIPTIONAL_PAHLAVI => 82, |
michael@0 | 178 | INSCRIPTIONAL_PARTHIAN => 83, |
michael@0 | 179 | JAVANESE => 84, |
michael@0 | 180 | KAITHI => 85, |
michael@0 | 181 | LISU => 86, |
michael@0 | 182 | MEETEI_MAYEK => 87, |
michael@0 | 183 | OLD_SOUTH_ARABIAN => 88, |
michael@0 | 184 | OLD_TURKIC => 89, |
michael@0 | 185 | SAMARITAN => 90, |
michael@0 | 186 | TAI_THAM => 91, |
michael@0 | 187 | TAI_VIET => 92, |
michael@0 | 188 | # unicode 6.0 additions |
michael@0 | 189 | BATAK => 93, |
michael@0 | 190 | BRAHMI => 94, |
michael@0 | 191 | MANDAIC => 95, |
michael@0 | 192 | # unicode 6.1 additions |
michael@0 | 193 | CHAKMA => 96, |
michael@0 | 194 | MEROITIC_CURSIVE => 97, |
michael@0 | 195 | MEROITIC_HIEROGLYPHS => 98, |
michael@0 | 196 | MIAO => 99, |
michael@0 | 197 | SHARADA => 100, |
michael@0 | 198 | SORA_SOMPENG => 101, |
michael@0 | 199 | TAKRI => 102 |
michael@0 | 200 | ); |
michael@0 | 201 | |
michael@0 | 202 | my $sc = -1; |
michael@0 | 203 | my $cc = -1; |
michael@0 | 204 | my %catCode; |
michael@0 | 205 | my @scriptCodeToTag; |
michael@0 | 206 | my @scriptCodeToName; |
michael@0 | 207 | |
michael@0 | 208 | sub readHarfBuzzHeader |
michael@0 | 209 | { |
michael@0 | 210 | my $file = shift; |
michael@0 | 211 | open FH, "< $ARGV[0]/$file" or die "can't open harfbuzz header $ARGV[0]/$file\n"; |
michael@0 | 212 | while (<FH>) { |
michael@0 | 213 | if (m/HB_SCRIPT_([A-Z_]+)\s*=\s*HB_TAG\s*\(('.','.','.','.')\)\s*,/) { |
michael@0 | 214 | unless (exists $scriptCode{$1}) { |
michael@0 | 215 | warn "unknown script name $1 found in $file\n"; |
michael@0 | 216 | next; |
michael@0 | 217 | } |
michael@0 | 218 | $sc = $scriptCode{$1}; |
michael@0 | 219 | $scriptCodeToTag[$sc] = $2; |
michael@0 | 220 | $scriptCodeToName[$sc] = $1; |
michael@0 | 221 | } |
michael@0 | 222 | if (m/HB_UNICODE_GENERAL_CATEGORY_([A-Z_]+)/) { |
michael@0 | 223 | $cc++; |
michael@0 | 224 | $catCode{$1} = $cc; |
michael@0 | 225 | } |
michael@0 | 226 | } |
michael@0 | 227 | close FH; |
michael@0 | 228 | } |
michael@0 | 229 | |
michael@0 | 230 | &readHarfBuzzHeader("hb-common.h"); |
michael@0 | 231 | &readHarfBuzzHeader("hb-unicode.h"); |
michael@0 | 232 | |
michael@0 | 233 | die "didn't find HarfBuzz script codes\n" if $sc == -1; |
michael@0 | 234 | die "didn't find HarfBuzz category codes\n" if $cc == -1; |
michael@0 | 235 | |
michael@0 | 236 | my %xidmodCode = ( |
michael@0 | 237 | 'inclusion' => 0, |
michael@0 | 238 | 'recommended' => 1, |
michael@0 | 239 | 'default-ignorable' => 2, |
michael@0 | 240 | 'historic' => 3, |
michael@0 | 241 | 'limited-use' => 4, |
michael@0 | 242 | 'not-NFKC' => 5, |
michael@0 | 243 | 'not-xid' => 6, |
michael@0 | 244 | 'obsolete' => 7, |
michael@0 | 245 | 'technical' => 8, |
michael@0 | 246 | 'not-chars' => 9 |
michael@0 | 247 | ); |
michael@0 | 248 | |
michael@0 | 249 | my %bidicategoryCode = ( |
michael@0 | 250 | "L" => "0", # Left-to-Right |
michael@0 | 251 | "R" => "1", # Right-to-Left |
michael@0 | 252 | "EN" => "2", # European Number |
michael@0 | 253 | "ES" => "3", # European Number Separator |
michael@0 | 254 | "ET" => "4", # European Number Terminator |
michael@0 | 255 | "AN" => "5", # Arabic Number |
michael@0 | 256 | "CS" => "6", # Common Number Separator |
michael@0 | 257 | "B" => "7", # Paragraph Separator |
michael@0 | 258 | "S" => "8", # Segment Separator |
michael@0 | 259 | "WS" => "9", # Whitespace |
michael@0 | 260 | "ON" => "10", # Other Neutrals |
michael@0 | 261 | "LRE" => "11", # Left-to-Right Embedding |
michael@0 | 262 | "LRO" => "12", # Left-to-Right Override |
michael@0 | 263 | "AL" => "13", # Right-to-Left Arabic |
michael@0 | 264 | "RLE" => "14", # Right-to-Left Embedding |
michael@0 | 265 | "RLO" => "15", # Right-to-Left Override |
michael@0 | 266 | "PDF" => "16", # Pop Directional Format |
michael@0 | 267 | "NSM" => "17", # Non-Spacing Mark |
michael@0 | 268 | "BN" => "18" # Boundary Neutral |
michael@0 | 269 | ); |
michael@0 | 270 | |
michael@0 | 271 | # initialize default properties |
michael@0 | 272 | my @script; |
michael@0 | 273 | my @category; |
michael@0 | 274 | my @combining; |
michael@0 | 275 | my @eaw; |
michael@0 | 276 | my @mirror; |
michael@0 | 277 | my @hangul; |
michael@0 | 278 | my @casemap; |
michael@0 | 279 | my @xidmod; |
michael@0 | 280 | my @numericvalue; |
michael@0 | 281 | my @hanVariant; |
michael@0 | 282 | my @bidicategory; |
michael@0 | 283 | my @fullWidth; |
michael@0 | 284 | for (my $i = 0; $i < 0x110000; ++$i) { |
michael@0 | 285 | $script[$i] = $scriptCode{"UNKNOWN"}; |
michael@0 | 286 | $category[$i] = $catCode{"UNASSIGNED"}; |
michael@0 | 287 | $combining[$i] = 0; |
michael@0 | 288 | $casemap[$i] = 0; |
michael@0 | 289 | $xidmod[$i] = $xidmodCode{"not-chars"}; |
michael@0 | 290 | $numericvalue[$i] = -1; |
michael@0 | 291 | $hanVariant[$i] = 0; |
michael@0 | 292 | $bidicategory[$i] = $bidicategoryCode{"L"}; |
michael@0 | 293 | $fullWidth[$i] = 0; |
michael@0 | 294 | } |
michael@0 | 295 | |
michael@0 | 296 | # blocks where the default for bidi category is not L |
michael@0 | 297 | for my $i (0x0600..0x07BF, 0x08A0..0x08FF, 0xFB50..0xFDCF, 0xFDF0..0xFDFF, 0xFE70..0xFEFF, 0x1EE00..0x0001EEFF) { |
michael@0 | 298 | $bidicategory[$i] = $bidicategoryCode{"AL"}; |
michael@0 | 299 | } |
michael@0 | 300 | for my $i (0x0590..0x05FF, 0x07C0..0x089F, 0xFB1D..0xFB4F, 0x00010800..0x00010FFF, 0x0001E800..0x0001EDFF, 0x0001EF00..0x0001EFFF) { |
michael@0 | 301 | $bidicategory[$i] = $bidicategoryCode{"R"}; |
michael@0 | 302 | } |
michael@0 | 303 | for my $i (0x20A0..0x20CF) { |
michael@0 | 304 | $bidicategory[$i] = $bidicategoryCode{"ET"}; |
michael@0 | 305 | } |
michael@0 | 306 | |
michael@0 | 307 | my %ucd2hb = ( |
michael@0 | 308 | 'Cc' => 'CONTROL', |
michael@0 | 309 | 'Cf' => 'FORMAT', |
michael@0 | 310 | 'Cn' => 'UNASSIGNED', |
michael@0 | 311 | 'Co' => 'PRIVATE_USE', |
michael@0 | 312 | 'Cs' => 'SURROGATE', |
michael@0 | 313 | 'Ll' => 'LOWERCASE_LETTER', |
michael@0 | 314 | 'Lm' => 'MODIFIER_LETTER', |
michael@0 | 315 | 'Lo' => 'OTHER_LETTER', |
michael@0 | 316 | 'Lt' => 'TITLECASE_LETTER', |
michael@0 | 317 | 'Lu' => 'UPPERCASE_LETTER', |
michael@0 | 318 | 'Mc' => 'SPACING_MARK', |
michael@0 | 319 | 'Me' => 'ENCLOSING_MARK', |
michael@0 | 320 | 'Mn' => 'NON_SPACING_MARK', |
michael@0 | 321 | 'Nd' => 'DECIMAL_NUMBER', |
michael@0 | 322 | 'Nl' => 'LETTER_NUMBER', |
michael@0 | 323 | 'No' => 'OTHER_NUMBER', |
michael@0 | 324 | 'Pc' => 'CONNECT_PUNCTUATION', |
michael@0 | 325 | 'Pd' => 'DASH_PUNCTUATION', |
michael@0 | 326 | 'Pe' => 'CLOSE_PUNCTUATION', |
michael@0 | 327 | 'Pf' => 'FINAL_PUNCTUATION', |
michael@0 | 328 | 'Pi' => 'INITIAL_PUNCTUATION', |
michael@0 | 329 | 'Po' => 'OTHER_PUNCTUATION', |
michael@0 | 330 | 'Ps' => 'OPEN_PUNCTUATION', |
michael@0 | 331 | 'Sc' => 'CURRENCY_SYMBOL', |
michael@0 | 332 | 'Sk' => 'MODIFIER_SYMBOL', |
michael@0 | 333 | 'Sm' => 'MATH_SYMBOL', |
michael@0 | 334 | 'So' => 'OTHER_SYMBOL', |
michael@0 | 335 | 'Zl' => 'LINE_SEPARATOR', |
michael@0 | 336 | 'Zp' => 'PARAGRAPH_SEPARATOR', |
michael@0 | 337 | 'Zs' => 'SPACE_SEPARATOR' |
michael@0 | 338 | ); |
michael@0 | 339 | |
michael@0 | 340 | # read ReadMe.txt |
michael@0 | 341 | my @versionInfo; |
michael@0 | 342 | open FH, "< $ARGV[1]/ReadMe.txt" or die "can't open Unicode ReadMe.txt file\n"; |
michael@0 | 343 | while (<FH>) { |
michael@0 | 344 | chomp; |
michael@0 | 345 | push @versionInfo, $_; |
michael@0 | 346 | } |
michael@0 | 347 | close FH; |
michael@0 | 348 | |
michael@0 | 349 | my $kTitleToUpper = 0x80000000; |
michael@0 | 350 | my $kUpperToLower = 0x40000000; |
michael@0 | 351 | my $kLowerToTitle = 0x20000000; |
michael@0 | 352 | my $kLowerToUpper = 0x10000000; |
michael@0 | 353 | my $kCaseMapCharMask = 0x001fffff; |
michael@0 | 354 | |
michael@0 | 355 | # read UnicodeData.txt |
michael@0 | 356 | open FH, "< $ARGV[1]/UnicodeData.txt" or die "can't open UCD file UnicodeData.txt\n"; |
michael@0 | 357 | while (<FH>) { |
michael@0 | 358 | chomp; |
michael@0 | 359 | my @fields = split /;/; |
michael@0 | 360 | if ($fields[1] =~ /First/) { |
michael@0 | 361 | my $first = hex "0x$fields[0]"; |
michael@0 | 362 | $_ = <FH>; |
michael@0 | 363 | @fields = split /;/; |
michael@0 | 364 | if ($fields[1] =~ /Last/) { |
michael@0 | 365 | my $last = hex "0x$fields[0]"; |
michael@0 | 366 | do { |
michael@0 | 367 | $category[$first] = $catCode{$ucd2hb{$fields[2]}}; |
michael@0 | 368 | $combining[$first] = $fields[3]; |
michael@0 | 369 | $bidicategory[$first] = $bidicategoryCode{$fields[4]}; |
michael@0 | 370 | unless (length($fields[7]) == 0) { |
michael@0 | 371 | $numericvalue[$first] = $fields[7]; |
michael@0 | 372 | } |
michael@0 | 373 | if ($fields[1] =~ /CJK/) { |
michael@0 | 374 | @hanVariant[$first] = 3; |
michael@0 | 375 | } |
michael@0 | 376 | $first++; |
michael@0 | 377 | } while ($first <= $last); |
michael@0 | 378 | } else { |
michael@0 | 379 | die "didn't find Last code for range!\n"; |
michael@0 | 380 | } |
michael@0 | 381 | } else { |
michael@0 | 382 | my $usv = hex "0x$fields[0]"; |
michael@0 | 383 | $category[$usv] = $catCode{$ucd2hb{$fields[2]}}; |
michael@0 | 384 | $combining[$usv] = $fields[3]; |
michael@0 | 385 | my $upper = hex $fields[12]; |
michael@0 | 386 | my $lower = hex $fields[13]; |
michael@0 | 387 | my $title = hex $fields[14]; |
michael@0 | 388 | # we only store one mapping for each character, |
michael@0 | 389 | # but also record what kind of mapping it is |
michael@0 | 390 | if ($upper && $lower) { |
michael@0 | 391 | $casemap[$usv] |= $kTitleToUpper; |
michael@0 | 392 | $casemap[$usv] |= ($usv ^ $upper); |
michael@0 | 393 | } |
michael@0 | 394 | elsif ($lower) { |
michael@0 | 395 | $casemap[$usv] |= $kUpperToLower; |
michael@0 | 396 | $casemap[$usv] |= ($usv ^ $lower); |
michael@0 | 397 | } |
michael@0 | 398 | elsif ($title && ($title != $upper)) { |
michael@0 | 399 | $casemap[$usv] |= $kLowerToTitle; |
michael@0 | 400 | $casemap[$usv] |= ($usv ^ $title); |
michael@0 | 401 | } |
michael@0 | 402 | elsif ($upper) { |
michael@0 | 403 | $casemap[$usv] |= $kLowerToUpper; |
michael@0 | 404 | $casemap[$usv] |= ($usv ^ $upper); |
michael@0 | 405 | } |
michael@0 | 406 | $bidicategory[$usv] = $bidicategoryCode{$fields[4]}; |
michael@0 | 407 | unless (length($fields[7]) == 0) { |
michael@0 | 408 | $numericvalue[$usv] = $fields[7]; |
michael@0 | 409 | } |
michael@0 | 410 | if ($fields[1] =~ /CJK/) { |
michael@0 | 411 | @hanVariant[$usv] = 3; |
michael@0 | 412 | } |
michael@0 | 413 | if ($fields[5] =~ /^<narrow>/) { |
michael@0 | 414 | my $wideChar = hex(substr($fields[5], 9)); |
michael@0 | 415 | die "didn't expect supplementary-plane values here" if $usv > 0xffff || $wideChar > 0xffff; |
michael@0 | 416 | $fullWidth[$usv] = $wideChar; |
michael@0 | 417 | } |
michael@0 | 418 | elsif ($fields[5] =~ /^<wide>/) { |
michael@0 | 419 | my $narrowChar = hex(substr($fields[5], 7)); |
michael@0 | 420 | die "didn't expect supplementary-plane values here" if $usv > 0xffff || $narrowChar > 0xffff; |
michael@0 | 421 | $fullWidth[$narrowChar] = $usv; |
michael@0 | 422 | } |
michael@0 | 423 | } |
michael@0 | 424 | } |
michael@0 | 425 | close FH; |
michael@0 | 426 | |
michael@0 | 427 | # read Scripts.txt |
michael@0 | 428 | open FH, "< $ARGV[1]/Scripts.txt" or die "can't open UCD file Scripts.txt\n"; |
michael@0 | 429 | push @versionInfo, ""; |
michael@0 | 430 | while (<FH>) { |
michael@0 | 431 | chomp; |
michael@0 | 432 | push @versionInfo, $_; |
michael@0 | 433 | last if /Date:/; |
michael@0 | 434 | } |
michael@0 | 435 | while (<FH>) { |
michael@0 | 436 | if (m/([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))*\s+;\s+([^ ]+)/) { |
michael@0 | 437 | my $script = uc($3); |
michael@0 | 438 | warn "unknown script $script" unless exists $scriptCode{$script}; |
michael@0 | 439 | $script = $scriptCode{$script}; |
michael@0 | 440 | my $start = hex "0x$1"; |
michael@0 | 441 | my $end = (defined $2) ? hex "0x$2" : $start; |
michael@0 | 442 | for (my $i = $start; $i <= $end; ++$i) { |
michael@0 | 443 | $script[$i] = $script; |
michael@0 | 444 | } |
michael@0 | 445 | } |
michael@0 | 446 | } |
michael@0 | 447 | close FH; |
michael@0 | 448 | |
michael@0 | 449 | # read EastAsianWidth.txt |
michael@0 | 450 | my %eawCode = ( |
michael@0 | 451 | 'A' => 0, # ; Ambiguous |
michael@0 | 452 | 'F' => 1, # ; Fullwidth |
michael@0 | 453 | 'H' => 2, # ; Halfwidth |
michael@0 | 454 | 'N' => 3, # ; Neutral |
michael@0 | 455 | 'NA'=> 4, # ; Narrow |
michael@0 | 456 | 'W' => 5 # ; Wide |
michael@0 | 457 | ); |
michael@0 | 458 | open FH, "< $ARGV[1]/EastAsianWidth.txt" or die "can't open UCD file EastAsianWidth.txt\n"; |
michael@0 | 459 | push @versionInfo, ""; |
michael@0 | 460 | while (<FH>) { |
michael@0 | 461 | chomp; |
michael@0 | 462 | push @versionInfo, $_; |
michael@0 | 463 | last if /Date:/; |
michael@0 | 464 | } |
michael@0 | 465 | while (<FH>) { |
michael@0 | 466 | s/#.*//; |
michael@0 | 467 | if (m/([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))*\s*;\s*([^ ]+)/) { |
michael@0 | 468 | my $eaw = uc($3); |
michael@0 | 469 | warn "unknown EAW code $eaw" unless exists $eawCode{$eaw}; |
michael@0 | 470 | $eaw = $eawCode{$eaw}; |
michael@0 | 471 | my $start = hex "0x$1"; |
michael@0 | 472 | my $end = (defined $2) ? hex "0x$2" : $start; |
michael@0 | 473 | for (my $i = $start; $i <= $end; ++$i) { |
michael@0 | 474 | $eaw[$i] = $eaw; |
michael@0 | 475 | } |
michael@0 | 476 | } |
michael@0 | 477 | } |
michael@0 | 478 | close FH; |
michael@0 | 479 | |
michael@0 | 480 | # read BidiMirroring.txt |
michael@0 | 481 | my @offsets = (); |
michael@0 | 482 | push @offsets, 0; |
michael@0 | 483 | |
michael@0 | 484 | open FH, "< $ARGV[1]/BidiMirroring.txt" or die "can't open UCD file BidiMirroring.txt\n"; |
michael@0 | 485 | push @versionInfo, ""; |
michael@0 | 486 | while (<FH>) { |
michael@0 | 487 | chomp; |
michael@0 | 488 | push @versionInfo, $_; |
michael@0 | 489 | last if /Date:/; |
michael@0 | 490 | } |
michael@0 | 491 | while (<FH>) { |
michael@0 | 492 | s/#.*//; |
michael@0 | 493 | if (m/([0-9A-F]{4,6});\s*([0-9A-F]{4,6})/) { |
michael@0 | 494 | my $mirrorOffset = hex("0x$2") - hex("0x$1"); |
michael@0 | 495 | my $offsetIndex = first { $offsets[$_] eq $mirrorOffset } 0..$#offsets; |
michael@0 | 496 | if ($offsetIndex == undef) { |
michael@0 | 497 | die "too many offset codes\n" if scalar @offsets == 31; |
michael@0 | 498 | push @offsets, $mirrorOffset; |
michael@0 | 499 | $offsetIndex = $#offsets; |
michael@0 | 500 | } |
michael@0 | 501 | $mirror[hex "0x$1"] = $offsetIndex; |
michael@0 | 502 | } |
michael@0 | 503 | } |
michael@0 | 504 | close FH; |
michael@0 | 505 | |
michael@0 | 506 | # read HangulSyllableType.txt |
michael@0 | 507 | my %hangulType = ( |
michael@0 | 508 | 'L' => 0x01, |
michael@0 | 509 | 'V' => 0x02, |
michael@0 | 510 | 'T' => 0x04, |
michael@0 | 511 | 'LV' => 0x03, |
michael@0 | 512 | 'LVT' => 0x07 |
michael@0 | 513 | ); |
michael@0 | 514 | open FH, "< $ARGV[1]/HangulSyllableType.txt" or die "can't open UCD file HangulSyllableType.txt\n"; |
michael@0 | 515 | push @versionInfo, ""; |
michael@0 | 516 | while (<FH>) { |
michael@0 | 517 | chomp; |
michael@0 | 518 | push @versionInfo, $_; |
michael@0 | 519 | last if /Date:/; |
michael@0 | 520 | } |
michael@0 | 521 | while (<FH>) { |
michael@0 | 522 | s/#.*//; |
michael@0 | 523 | if (m/([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))*\s*;\s*([^ ]+)/) { |
michael@0 | 524 | my $hangul = uc($3); |
michael@0 | 525 | warn "unknown Hangul syllable type" unless exists $hangulType{$hangul}; |
michael@0 | 526 | $hangul = $hangulType{$hangul}; |
michael@0 | 527 | my $start = hex "0x$1"; |
michael@0 | 528 | my $end = (defined $2) ? hex "0x$2" : $start; |
michael@0 | 529 | for (my $i = $start; $i <= $end; ++$i) { |
michael@0 | 530 | $hangul[$i] = $hangul; |
michael@0 | 531 | } |
michael@0 | 532 | } |
michael@0 | 533 | } |
michael@0 | 534 | close FH; |
michael@0 | 535 | |
michael@0 | 536 | # read xidmodifications.txt |
michael@0 | 537 | open FH, "< $ARGV[1]/security/xidmodifications.txt" or die "can't open UCD file xidmodifications.txt\n"; |
michael@0 | 538 | push @versionInfo, ""; |
michael@0 | 539 | while (<FH>) { |
michael@0 | 540 | chomp; |
michael@0 | 541 | unless (/\xef\xbb\xbf/) { |
michael@0 | 542 | push @versionInfo, $_; |
michael@0 | 543 | } |
michael@0 | 544 | last if /Generated:/; |
michael@0 | 545 | } |
michael@0 | 546 | while (<FH>) { |
michael@0 | 547 | if (m/([0-9A-F]{4,6})(?:\.\.([0-9A-F]{4,6}))*\s+;\s+[^ ]+\s+;\s+([^ ]+)/) { |
michael@0 | 548 | my $xidmod = $3; |
michael@0 | 549 | warn "unknown Identifier Modification $xidmod" unless exists $xidmodCode{$xidmod}; |
michael@0 | 550 | $xidmod = $xidmodCode{$xidmod}; |
michael@0 | 551 | my $start = hex "0x$1"; |
michael@0 | 552 | my $end = (defined $2) ? hex "0x$2" : $start; |
michael@0 | 553 | for (my $i = $start; $i <= $end; ++$i) { |
michael@0 | 554 | $xidmod[$i] = $xidmod; |
michael@0 | 555 | } |
michael@0 | 556 | } |
michael@0 | 557 | } |
michael@0 | 558 | close FH; |
michael@0 | 559 | # special case U+30FB KATAKANA MIDDLE DOT -- see bug 857490 |
michael@0 | 560 | $xidmod[0x30FB] = 1; |
michael@0 | 561 | |
michael@0 | 562 | open FH, "< $ARGV[1]/Unihan_Variants.txt" or die "can't open UCD file Unihan_Variants.txt (from Unihan.zip)\n"; |
michael@0 | 563 | push @versionInfo, ""; |
michael@0 | 564 | while (<FH>) { |
michael@0 | 565 | chomp; |
michael@0 | 566 | push @versionInfo, $_; |
michael@0 | 567 | last if /Date:/; |
michael@0 | 568 | } |
michael@0 | 569 | my $savedusv = 0; |
michael@0 | 570 | my $hasTC = 0; |
michael@0 | 571 | my $hasSC = 0; |
michael@0 | 572 | while (<FH>) { |
michael@0 | 573 | chomp; |
michael@0 | 574 | if (m/U\+([0-9A-F]{4,6})\s+k([^ ]+)Variant/) { |
michael@0 | 575 | my $usv = hex "0x$1"; |
michael@0 | 576 | if ($usv != $savedusv) { |
michael@0 | 577 | unless ($savedusv == 0) { |
michael@0 | 578 | if ($hasTC && !$hasSC) { |
michael@0 | 579 | $hanVariant[$savedusv] = 1; |
michael@0 | 580 | } elsif (!$hasTC && $hasSC) { |
michael@0 | 581 | $hanVariant[$savedusv] = 2; |
michael@0 | 582 | } |
michael@0 | 583 | } |
michael@0 | 584 | $savedusv = $usv; |
michael@0 | 585 | $hasTC = 0; |
michael@0 | 586 | $hasSC = 0; |
michael@0 | 587 | } |
michael@0 | 588 | if ($2 eq "Traditional") { |
michael@0 | 589 | $hasTC = 1; |
michael@0 | 590 | } |
michael@0 | 591 | if ($2 eq "Simplified") { |
michael@0 | 592 | $hasSC = 1; |
michael@0 | 593 | } |
michael@0 | 594 | } |
michael@0 | 595 | } |
michael@0 | 596 | close FH; |
michael@0 | 597 | |
michael@0 | 598 | my $timestamp = gmtime(); |
michael@0 | 599 | |
michael@0 | 600 | open DATA_TABLES, "> nsUnicodePropertyData.cpp" or die "unable to open nsUnicodePropertyData.cpp for output"; |
michael@0 | 601 | |
michael@0 | 602 | my $licenseBlock = q[ |
michael@0 | 603 | /* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
michael@0 | 604 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 605 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 606 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 607 | |
michael@0 | 608 | /* |
michael@0 | 609 | * Derived from the Unicode Character Database by genUnicodePropertyData.pl |
michael@0 | 610 | * |
michael@0 | 611 | * For Unicode terms of use, see http://www.unicode.org/terms_of_use.html |
michael@0 | 612 | */ |
michael@0 | 613 | ]; |
michael@0 | 614 | |
michael@0 | 615 | my $versionInfo = join("\n", @versionInfo); |
michael@0 | 616 | |
michael@0 | 617 | print DATA_TABLES <<__END; |
michael@0 | 618 | $licenseBlock |
michael@0 | 619 | /* |
michael@0 | 620 | * Created on $timestamp from UCD data files with version info: |
michael@0 | 621 | * |
michael@0 | 622 | |
michael@0 | 623 | $versionInfo |
michael@0 | 624 | |
michael@0 | 625 | * |
michael@0 | 626 | * * * * * This file contains MACHINE-GENERATED DATA, do not edit! * * * * * |
michael@0 | 627 | */ |
michael@0 | 628 | |
michael@0 | 629 | #include <stdint.h> |
michael@0 | 630 | #include "harfbuzz/hb.h" |
michael@0 | 631 | |
michael@0 | 632 | __END |
michael@0 | 633 | |
michael@0 | 634 | open HEADER, "> nsUnicodeScriptCodes.h" or die "unable to open nsUnicodeScriptCodes.h for output"; |
michael@0 | 635 | |
michael@0 | 636 | print HEADER <<__END; |
michael@0 | 637 | $licenseBlock |
michael@0 | 638 | /* |
michael@0 | 639 | * Created on $timestamp from UCD data files with version info: |
michael@0 | 640 | * |
michael@0 | 641 | |
michael@0 | 642 | $versionInfo |
michael@0 | 643 | |
michael@0 | 644 | * |
michael@0 | 645 | * * * * * This file contains MACHINE-GENERATED DATA, do not edit! * * * * * |
michael@0 | 646 | */ |
michael@0 | 647 | |
michael@0 | 648 | #ifndef NS_UNICODE_SCRIPT_CODES |
michael@0 | 649 | #define NS_UNICODE_SCRIPT_CODES |
michael@0 | 650 | |
michael@0 | 651 | __END |
michael@0 | 652 | |
michael@0 | 653 | print DATA_TABLES "static const uint32_t sScriptCodeToTag[] = {\n"; |
michael@0 | 654 | for (my $i = 0; $i < scalar @scriptCodeToTag; ++$i) { |
michael@0 | 655 | printf DATA_TABLES " HB_TAG(%s)", $scriptCodeToTag[$i]; |
michael@0 | 656 | print DATA_TABLES $i < $#scriptCodeToTag ? ",\n" : "\n"; |
michael@0 | 657 | } |
michael@0 | 658 | print DATA_TABLES "};\n\n"; |
michael@0 | 659 | |
michael@0 | 660 | our $totalData = 0; |
michael@0 | 661 | |
michael@0 | 662 | print DATA_TABLES "static const int16_t sMirrorOffsets[] = {\n"; |
michael@0 | 663 | for (my $i = 0; $i < scalar @offsets; ++$i) { |
michael@0 | 664 | printf DATA_TABLES " $offsets[$i]"; |
michael@0 | 665 | print DATA_TABLES $i < $#offsets ? ",\n" : "\n"; |
michael@0 | 666 | } |
michael@0 | 667 | print DATA_TABLES "};\n\n"; |
michael@0 | 668 | |
michael@0 | 669 | print HEADER "#pragma pack(1)\n\n"; |
michael@0 | 670 | |
michael@0 | 671 | sub sprintCharProps1 |
michael@0 | 672 | { |
michael@0 | 673 | my $usv = shift; |
michael@0 | 674 | return sprintf("{%d,%d,%d}, ", $mirror[$usv], $hangul[$usv], $combining[$usv]); |
michael@0 | 675 | } |
michael@0 | 676 | &genTables("CharProp1", "struct nsCharProps1 {\n unsigned char mMirrorOffsetIndex:5;\n unsigned char mHangulType:3;\n unsigned char mCombiningClass:8;\n};", |
michael@0 | 677 | "nsCharProps1", 11, 5, \&sprintCharProps1, 1, 2, 1); |
michael@0 | 678 | |
michael@0 | 679 | sub sprintCharProps2 |
michael@0 | 680 | { |
michael@0 | 681 | my $usv = shift; |
michael@0 | 682 | return sprintf("{%d,%d,%d,%d,%d,%d},", |
michael@0 | 683 | $script[$usv], $eaw[$usv], $category[$usv], |
michael@0 | 684 | $bidicategory[$usv], $xidmod[$usv], $numericvalue[$usv]); |
michael@0 | 685 | } |
michael@0 | 686 | &genTables("CharProp2", "struct nsCharProps2 {\n unsigned char mScriptCode:8;\n unsigned char mEAW:3;\n unsigned char mCategory:5;\n unsigned char mBidiCategory:5;\n unsigned char mXidmod:4;\n signed char mNumericValue:5;\n unsigned char mHanVariant:2;\n};", |
michael@0 | 687 | "nsCharProps2", 11, 5, \&sprintCharProps2, 16, 4, 1); |
michael@0 | 688 | |
michael@0 | 689 | print HEADER "#pragma pack()\n\n"; |
michael@0 | 690 | |
michael@0 | 691 | sub sprintHanVariants |
michael@0 | 692 | { |
michael@0 | 693 | my $baseUsv = shift; |
michael@0 | 694 | my $varShift = 0; |
michael@0 | 695 | my $val = 0; |
michael@0 | 696 | while ($varShift < 8) { |
michael@0 | 697 | $val |= $hanVariant[$baseUsv++] << $varShift; |
michael@0 | 698 | $varShift += 2; |
michael@0 | 699 | } |
michael@0 | 700 | return sprintf("0x%02x,", $val); |
michael@0 | 701 | } |
michael@0 | 702 | &genTables("HanVariant", "", "uint8_t", 9, 7, \&sprintHanVariants, 2, 1, 4); |
michael@0 | 703 | |
michael@0 | 704 | sub sprintFullWidth |
michael@0 | 705 | { |
michael@0 | 706 | my $usv = shift; |
michael@0 | 707 | return sprintf("0x%04x,", $fullWidth[$usv]); |
michael@0 | 708 | } |
michael@0 | 709 | &genTables("FullWidth", "", "uint16_t", 10, 6, \&sprintFullWidth, 0, 2, 1); |
michael@0 | 710 | |
michael@0 | 711 | sub sprintCasemap |
michael@0 | 712 | { |
michael@0 | 713 | my $usv = shift; |
michael@0 | 714 | return sprintf("0x%08x,", $casemap[$usv]); |
michael@0 | 715 | } |
michael@0 | 716 | &genTables("CaseMap", "", "uint32_t", 11, 5, \&sprintCasemap, 1, 4, 1); |
michael@0 | 717 | |
michael@0 | 718 | print STDERR "Total data = $totalData\n"; |
michael@0 | 719 | |
michael@0 | 720 | printf DATA_TABLES "const uint32_t kTitleToUpper = 0x%08x;\n", $kTitleToUpper; |
michael@0 | 721 | printf DATA_TABLES "const uint32_t kUpperToLower = 0x%08x;\n", $kUpperToLower; |
michael@0 | 722 | printf DATA_TABLES "const uint32_t kLowerToTitle = 0x%08x;\n", $kLowerToTitle; |
michael@0 | 723 | printf DATA_TABLES "const uint32_t kLowerToUpper = 0x%08x;\n", $kLowerToUpper; |
michael@0 | 724 | printf DATA_TABLES "const uint32_t kCaseMapCharMask = 0x%08x;\n\n", $kCaseMapCharMask; |
michael@0 | 725 | |
michael@0 | 726 | sub genTables |
michael@0 | 727 | { |
michael@0 | 728 | my ($prefix, $typedef, $type, $indexBits, $charBits, $func, $maxPlane, $bytesPerEntry, $charsPerEntry) = @_; |
michael@0 | 729 | |
michael@0 | 730 | print DATA_TABLES "#define k${prefix}MaxPlane $maxPlane\n"; |
michael@0 | 731 | print DATA_TABLES "#define k${prefix}IndexBits $indexBits\n"; |
michael@0 | 732 | print DATA_TABLES "#define k${prefix}CharBits $charBits\n"; |
michael@0 | 733 | |
michael@0 | 734 | my $indexLen = 1 << $indexBits; |
michael@0 | 735 | my $charsPerPage = 1 << $charBits; |
michael@0 | 736 | my %charIndex = (); |
michael@0 | 737 | my %pageMapIndex = (); |
michael@0 | 738 | my @pageMap = (); |
michael@0 | 739 | my @char = (); |
michael@0 | 740 | |
michael@0 | 741 | my $planeMap = "\x00" x $maxPlane; |
michael@0 | 742 | foreach my $plane (0 .. $maxPlane) { |
michael@0 | 743 | my $pageMap = "\x00" x $indexLen * 2; |
michael@0 | 744 | foreach my $page (0 .. $indexLen - 1) { |
michael@0 | 745 | my $charValues = ""; |
michael@0 | 746 | for (my $ch = 0; $ch < $charsPerPage; $ch += $charsPerEntry) { |
michael@0 | 747 | my $usv = $plane * 0x10000 + $page * $charsPerPage + $ch; |
michael@0 | 748 | $charValues .= &$func($usv); |
michael@0 | 749 | } |
michael@0 | 750 | chop $charValues; |
michael@0 | 751 | |
michael@0 | 752 | unless (exists $charIndex{$charValues}) { |
michael@0 | 753 | $charIndex{$charValues} = scalar keys %charIndex; |
michael@0 | 754 | $char[$charIndex{$charValues}] = $charValues; |
michael@0 | 755 | } |
michael@0 | 756 | substr($pageMap, $page * 2, 2) = pack('S', $charIndex{$charValues}); |
michael@0 | 757 | } |
michael@0 | 758 | |
michael@0 | 759 | unless (exists $pageMapIndex{$pageMap}) { |
michael@0 | 760 | $pageMapIndex{$pageMap} = scalar keys %pageMapIndex; |
michael@0 | 761 | $pageMap[$pageMapIndex{$pageMap}] = $pageMap; |
michael@0 | 762 | } |
michael@0 | 763 | if ($plane > 0) { |
michael@0 | 764 | substr($planeMap, $plane - 1, 1) = pack('C', $pageMapIndex{$pageMap}); |
michael@0 | 765 | } |
michael@0 | 766 | } |
michael@0 | 767 | |
michael@0 | 768 | if ($maxPlane) { |
michael@0 | 769 | print DATA_TABLES "static const uint8_t s${prefix}Planes[$maxPlane] = {"; |
michael@0 | 770 | print DATA_TABLES join(',', map { sprintf("%d", $_) } unpack('C*', $planeMap)); |
michael@0 | 771 | print DATA_TABLES "};\n\n"; |
michael@0 | 772 | } |
michael@0 | 773 | |
michael@0 | 774 | my $chCount = scalar @char; |
michael@0 | 775 | my $pmBits = $chCount > 255 ? 16 : 8; |
michael@0 | 776 | my $pmCount = scalar @pageMap; |
michael@0 | 777 | if ($maxPlane == 0) { |
michael@0 | 778 | die "there should only be one pageMap entry!" if $pmCount > 1; |
michael@0 | 779 | print DATA_TABLES "static const uint${pmBits}_t s${prefix}Pages[$indexLen] = {\n"; |
michael@0 | 780 | } else { |
michael@0 | 781 | print DATA_TABLES "static const uint${pmBits}_t s${prefix}Pages[$pmCount][$indexLen] = {\n"; |
michael@0 | 782 | } |
michael@0 | 783 | for (my $i = 0; $i < scalar @pageMap; ++$i) { |
michael@0 | 784 | print DATA_TABLES $maxPlane > 0 ? " {" : " "; |
michael@0 | 785 | print DATA_TABLES join(',', map { sprintf("%d", $_) } unpack('S*', $pageMap[$i])); |
michael@0 | 786 | print DATA_TABLES $maxPlane > 0 ? ($i < $#pageMap ? "},\n" : "}\n") : "\n"; |
michael@0 | 787 | } |
michael@0 | 788 | print DATA_TABLES "};\n\n"; |
michael@0 | 789 | |
michael@0 | 790 | print HEADER "$typedef\n\n" if $typedef ne ''; |
michael@0 | 791 | |
michael@0 | 792 | my $pageLen = $charsPerPage / $charsPerEntry; |
michael@0 | 793 | print DATA_TABLES "static const $type s${prefix}Values[$chCount][$pageLen] = {\n"; |
michael@0 | 794 | for (my $i = 0; $i < scalar @char; ++$i) { |
michael@0 | 795 | print DATA_TABLES " {"; |
michael@0 | 796 | print DATA_TABLES $char[$i]; |
michael@0 | 797 | print DATA_TABLES $i < $#char ? "},\n" : "}\n"; |
michael@0 | 798 | } |
michael@0 | 799 | print DATA_TABLES "};\n\n"; |
michael@0 | 800 | |
michael@0 | 801 | my $dataSize = $pmCount * $indexLen * $pmBits/8 + |
michael@0 | 802 | $chCount * $pageLen * $bytesPerEntry + |
michael@0 | 803 | $maxPlane; |
michael@0 | 804 | $totalData += $dataSize; |
michael@0 | 805 | |
michael@0 | 806 | print STDERR "Data for $prefix = $dataSize\n"; |
michael@0 | 807 | } |
michael@0 | 808 | |
michael@0 | 809 | print DATA_TABLES <<__END; |
michael@0 | 810 | /* |
michael@0 | 811 | * * * * * This file contains MACHINE-GENERATED DATA, do not edit! * * * * * |
michael@0 | 812 | */ |
michael@0 | 813 | __END |
michael@0 | 814 | |
michael@0 | 815 | close DATA_TABLES; |
michael@0 | 816 | |
michael@0 | 817 | print HEADER "enum {\n"; |
michael@0 | 818 | for (my $i = 0; $i < scalar @scriptCodeToName; ++$i) { |
michael@0 | 819 | print HEADER " MOZ_SCRIPT_", $scriptCodeToName[$i], " = ", $i, ",\n"; |
michael@0 | 820 | } |
michael@0 | 821 | print HEADER "\n MOZ_NUM_SCRIPT_CODES = ", scalar @scriptCodeToName, ",\n"; |
michael@0 | 822 | print HEADER "\n MOZ_SCRIPT_INVALID = -1\n"; |
michael@0 | 823 | print HEADER "};\n\n"; |
michael@0 | 824 | |
michael@0 | 825 | print HEADER <<__END; |
michael@0 | 826 | #endif |
michael@0 | 827 | /* |
michael@0 | 828 | * * * * * This file contains MACHINE-GENERATED DATA, do not edit! * * * * * |
michael@0 | 829 | */ |
michael@0 | 830 | __END |
michael@0 | 831 | |
michael@0 | 832 | close HEADER; |
michael@0 | 833 |