Sat, 03 Jan 2015 20:18:00 +0100
Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.
michael@0 | 1 | #!/usr/bin/perl |
michael@0 | 2 | # |
michael@0 | 3 | # This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 4 | # License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 5 | # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
michael@0 | 6 | |
michael@0 | 7 | $header = <<END_OF_HEADER; |
michael@0 | 8 | # This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 9 | # License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 10 | # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
michael@0 | 11 | |
michael@0 | 12 | # |
michael@0 | 13 | # THIS FILE IS GENERATED BY mozilla/intl/unicharutil/tools/gentransliterate.pl |
michael@0 | 14 | # PLEASE DO NOT MODIFY THIS FILE BY HAND |
michael@0 | 15 | # |
michael@0 | 16 | entity.list.name=transliterate |
michael@0 | 17 | entity.169=(c) |
michael@0 | 18 | # |
michael@0 | 19 | # |
michael@0 | 20 | # Here are the windows-1252 characters from the range 0x80 - 0x9F |
michael@0 | 21 | # |
michael@0 | 22 | END_OF_HEADER |
michael@0 | 23 | |
michael@0 | 24 | $handcoded = <<END_OF_HANDCODED; |
michael@0 | 25 | # EURO SIGN |
michael@0 | 26 | entity.8364=EUR |
michael@0 | 27 | # SINGLE LOW-9 QUOTATION MARK |
michael@0 | 28 | entity.8218=, |
michael@0 | 29 | # LATIN SMALL LETTER F WITH HOOK |
michael@0 | 30 | entity.402=f |
michael@0 | 31 | # DOUBLE LOW-9 QUOTATION MARK |
michael@0 | 32 | entity.8222=" |
michael@0 | 33 | # DAGGER |
michael@0 | 34 | entity.8224=+ |
michael@0 | 35 | # DOUBLE DAGGER |
michael@0 | 36 | entity.8225=++ |
michael@0 | 37 | # MODIFIER LETTER CIRCUMFLEX ACCENT |
michael@0 | 38 | entity.710=^ |
michael@0 | 39 | # PER MILLE SIGN |
michael@0 | 40 | entity.8240=0/00 |
michael@0 | 41 | # SINGLE LEFT-POINTING ANGLE QUOTATION MARK |
michael@0 | 42 | entity.8249=< |
michael@0 | 43 | # LATIN CAPITAL LIGATURE OE |
michael@0 | 44 | entity.338=OE |
michael@0 | 45 | # LEFT SINGLE QUOTATION MARK |
michael@0 | 46 | entity.8216=' |
michael@0 | 47 | # RIGHT SINGLE QUOTATION MARK |
michael@0 | 48 | entity.8217=' |
michael@0 | 49 | # LEFT DOUBLE QUOTATION MARK |
michael@0 | 50 | entity.8220=" |
michael@0 | 51 | # RIGHT DOUBLE QUOTATION MARK |
michael@0 | 52 | entity.8221=" |
michael@0 | 53 | # BULLET |
michael@0 | 54 | entity.8226=. |
michael@0 | 55 | # EN DASH |
michael@0 | 56 | entity.8211=-- |
michael@0 | 57 | # EM DASH |
michael@0 | 58 | entity.8212=--- |
michael@0 | 59 | # SMALL TILDE |
michael@0 | 60 | entity.732=~ |
michael@0 | 61 | # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK |
michael@0 | 62 | entity.8250=> |
michael@0 | 63 | # LATIN SMALL LIGATURE OE |
michael@0 | 64 | entity.339=oe |
michael@0 | 65 | # U+2000 EN QUAD |
michael@0 | 66 | entity.8192=\\u0020 |
michael@0 | 67 | # U+2001 EM QUAD |
michael@0 | 68 | entity.8193=\\u0020 |
michael@0 | 69 | # U+2010 HYPHEN |
michael@0 | 70 | entity.8208=- |
michael@0 | 71 | # U+2011 NON-BREAKING HYPHEN |
michael@0 | 72 | entity.8209=- |
michael@0 | 73 | # U+2012 FIGURE DASH |
michael@0 | 74 | entity.8210=- |
michael@0 | 75 | # U+2015 HORIZONTAL BAR |
michael@0 | 76 | entity.8213=-- |
michael@0 | 77 | # U+200B, ZERO WIDTH SPACE (a.k.a. InvisibleComma) |
michael@0 | 78 | entity.8203= |
michael@0 | 79 | # U+2061, ApplyFunction, character showing function application in presentation tagging |
michael@0 | 80 | entity.8289= |
michael@0 | 81 | # U+2062, InvisibleTimes, marks multiplication when it is understood without a mark |
michael@0 | 82 | entity.8290= |
michael@0 | 83 | # U+2146, DifferentialD, d for use in differentials, e.g., within integrals |
michael@0 | 84 | entity.8518=d |
michael@0 | 85 | # U+2212, MINUS SIGN, official Unicode minus sign |
michael@0 | 86 | entity.8722=- |
michael@0 | 87 | # Hebrew punctuation |
michael@0 | 88 | # U+05BE HEBREW PUNCTUATION MAQAF |
michael@0 | 89 | entity.1470=- |
michael@0 | 90 | # U+05C0 HEBREW PUNCTUATION PASEQ |
michael@0 | 91 | entity.1472=| |
michael@0 | 92 | # U+05C3 HEBREW PUNCTUATION SOF PASUQ |
michael@0 | 93 | entity.1475=: |
michael@0 | 94 | # U+05F3 HEBREW PUNCTUATION GERESH |
michael@0 | 95 | entity.1523=' |
michael@0 | 96 | # U+05F4 HEBREW PUNCTUATION GERSHAYIM |
michael@0 | 97 | entity.1524=" |
michael@0 | 98 | ## |
michael@0 | 99 | ## End of hand coded section |
michael@0 | 100 | ## Below are generated from the unicode character database |
michael@0 | 101 | ## |
michael@0 | 102 | END_OF_HANDCODED |
michael@0 | 103 | |
michael@0 | 104 | @table = (); |
michael@0 | 105 | sub FromLatinComment |
michael@0 | 106 | { |
michael@0 | 107 | my ($cmt) = (@_); |
michael@0 | 108 | $char = ""; |
michael@0 | 109 | if($cmt =~ /PRECEDED BY APOSTROPHE/) { |
michael@0 | 110 | $char = "\'"; |
michael@0 | 111 | } |
michael@0 | 112 | if($cmt =~ /CAPITAL LETTER ([A-Z]*)/) { |
michael@0 | 113 | $char = $char . $1; |
michael@0 | 114 | } |
michael@0 | 115 | if($cmt =~ /SMALL LETTER ([A-Z]*)/) { |
michael@0 | 116 | $char = $char . lc($1); |
michael@0 | 117 | } |
michael@0 | 118 | @f = split(/ / , $cmt); |
michael@0 | 119 | while($item = shift @f) { |
michael@0 | 120 | if($item eq "DOT") { |
michael@0 | 121 | $char .= "."; |
michael@0 | 122 | } elsif ($item eq "DIAERESIS") { |
michael@0 | 123 | $char .= "\""; |
michael@0 | 124 | } elsif ($item eq "BREVE") { |
michael@0 | 125 | $char .= "("; |
michael@0 | 126 | } elsif ($item eq "ACUTE") { |
michael@0 | 127 | $char .= "\'"; |
michael@0 | 128 | } elsif ($item eq "GRAVE") { |
michael@0 | 129 | $char .= "`"; |
michael@0 | 130 | } elsif ($item eq "TILDE") { |
michael@0 | 131 | $char .= "~"; |
michael@0 | 132 | } elsif ($item eq "CARON") { |
michael@0 | 133 | $char .= "("; |
michael@0 | 134 | } elsif ($item eq "HOOK") { |
michael@0 | 135 | $char .= "?"; |
michael@0 | 136 | } elsif ($item eq "CEDILLA") { |
michael@0 | 137 | $char .= ","; |
michael@0 | 138 | } elsif ($item eq "MACRON") { |
michael@0 | 139 | $char .= "-"; |
michael@0 | 140 | } elsif ($item eq "CIRCUMFLEX") { |
michael@0 | 141 | $char .= "^"; |
michael@0 | 142 | } elsif ($item eq "RING") { |
michael@0 | 143 | $char .= "*"; |
michael@0 | 144 | } elsif ($item eq "OGONEK") { |
michael@0 | 145 | $char .= ";"; |
michael@0 | 146 | } elsif ($item eq "LINE") { |
michael@0 | 147 | $char .= "_"; |
michael@0 | 148 | } elsif ($item eq "COMMA") { |
michael@0 | 149 | $char .= ","; |
michael@0 | 150 | } elsif ($item eq "STROKE") { |
michael@0 | 151 | $char .= "/"; |
michael@0 | 152 | } elsif ($item eq "HORN") { |
michael@0 | 153 | $char .= "+"; |
michael@0 | 154 | } elsif ($item =~ /^(LATIN|CAPITAL|SMALL|LETTER|WITH|ABOVE|BELOW|INVERTED|MIDDLE|AND|BY|APOSTROPHE|[A-Z])$/) { |
michael@0 | 155 | # ignore |
michael@0 | 156 | } else { |
michael@0 | 157 | #print "AAAA $item\n"; |
michael@0 | 158 | } |
michael@0 | 159 | } |
michael@0 | 160 | |
michael@0 | 161 | return $char; |
michael@0 | 162 | } |
michael@0 | 163 | sub warning |
michael@0 | 164 | { |
michael@0 | 165 | my ($warning) = (@_); |
michael@0 | 166 | print "WARNING: $warning \n"; |
michael@0 | 167 | } |
michael@0 | 168 | sub doutput |
michael@0 | 169 | { |
michael@0 | 170 | my ($u, $cmt, $udec, $str) = (@_); |
michael@0 | 171 | # don't print out comments - for debugging purposes only |
michael@0 | 172 | # print "# U+$u $cmt\n"; |
michael@0 | 173 | print "entity.$udec=$str\n"; |
michael@0 | 174 | } |
michael@0 | 175 | sub output |
michael@0 | 176 | { |
michael@0 | 177 | my ($u, $cmt, $udec, $str) = (@_); |
michael@0 | 178 | if(decomposeIntoNonASCII($str)) { |
michael@0 | 179 | if(($cmt =~ "LATIN") && ($cmt =~ "LETTER") && !($cmt =~ "LONG")) { |
michael@0 | 180 | $str = FromLatinComment($cmt); |
michael@0 | 181 | output($u,$cmt,$udec,$str); |
michael@0 | 182 | } |
michael@0 | 183 | } else { |
michael@0 | 184 | # don't print out comments - for debugging purposes only |
michael@0 | 185 | # print OUT "# U+$u $cmt\n"; |
michael@0 | 186 | print OUT "entity.$udec=$str\n"; |
michael@0 | 187 | } |
michael@0 | 188 | } |
michael@0 | 189 | |
michael@0 | 190 | sub decomposeIntoNonASCII |
michael@0 | 191 | { |
michael@0 | 192 | my ($dec) = (@_); |
michael@0 | 193 | return $dec =~ /\\u([1-9A-F][0-9A-F][0-9A-F]|[0-9A-F][1-9A-F][0-9A-F]|00[8-9A-F])[0-9A-F]/; |
michael@0 | 194 | } |
michael@0 | 195 | |
michael@0 | 196 | sub foldcombining |
michael@0 | 197 | { |
michael@0 | 198 | my ($dec) = (@_); |
michael@0 | 199 | $grave = "0060"; |
michael@0 | 200 | $acute = "0027"; |
michael@0 | 201 | $hat = "005E"; |
michael@0 | 202 | $hat = "005E"; |
michael@0 | 203 | $tilde = "007E"; |
michael@0 | 204 | $overscore = "002D"; ## should be 00AF but we can only handle ASCII now |
michael@0 | 205 | $umlaut = "0022"; ## should be 00A8 but we can only handle ASCII now |
michael@0 | 206 | $doubleacute = "0022"; |
michael@0 | 207 | $dot = "002E"; |
michael@0 | 208 | $doublegrave = "0060 0060"; |
michael@0 | 209 | |
michael@0 | 210 | |
michael@0 | 211 | $dec =~ s/00A8/$umlaut/eg; |
michael@0 | 212 | $dec =~ s/00AF/$overscore/eg; |
michael@0 | 213 | # $dec =~ s/00B0//eg; |
michael@0 | 214 | $dec =~ s/00B4/$acute/eg; |
michael@0 | 215 | $dec =~ s/00B7/$dot/eg; |
michael@0 | 216 | # $dec =~ s/00B8//eg; |
michael@0 | 217 | $dec =~ s/0300/$grave/eg; |
michael@0 | 218 | $dec =~ s/0301/$acute/eg; |
michael@0 | 219 | $dec =~ s/0302/$hat/eg; |
michael@0 | 220 | $dec =~ s/0303/$tilde/eg; |
michael@0 | 221 | $dec =~ s/0304/$overscore/eg; |
michael@0 | 222 | $dec =~ s/0305/$overscore/eg; |
michael@0 | 223 | #$dec =~ s/0306/?/eg; |
michael@0 | 224 | $dec =~ s/0307/$dot/eg; |
michael@0 | 225 | $dec =~ s/0308/$umlaut/eg; |
michael@0 | 226 | #$dec =~ s/0309/?/eg; |
michael@0 | 227 | #$dec =~ s/030A/?/eg; |
michael@0 | 228 | $dec =~ s/030B/$doubleacute/eg; |
michael@0 | 229 | #$dec =~ s/030C/?/eg; |
michael@0 | 230 | $dec =~ s/030D/$acute/eg; |
michael@0 | 231 | $dec =~ s/030E/$doubleacute/eg; |
michael@0 | 232 | $dec =~ s/030F/$doublegrave/eg; |
michael@0 | 233 | |
michael@0 | 234 | # $dec =~ s/03[0-9A-F][0-9A-F]//eg; ## drop others |
michael@0 | 235 | return $dec; |
michael@0 | 236 | } |
michael@0 | 237 | sub rdecompose |
michael@0 | 238 | { |
michael@0 | 239 | my ($dec) = (@_); |
michael@0 | 240 | if(exists $table{$dec}) { |
michael@0 | 241 | $t = $table{$dec}; |
michael@0 | 242 | $t =~ s/<[a-zA-Z]*>//eg; |
michael@0 | 243 | $t = foldcombining($t); |
michael@0 | 244 | return rdecompose( $table{$t}); |
michael@0 | 245 | } |
michael@0 | 246 | return $dec; |
michael@0 | 247 | } |
michael@0 | 248 | sub decompose |
michael@0 | 249 | { |
michael@0 | 250 | my ($removeprefix, $dec) = (@_); |
michael@0 | 251 | $removeprefix .= " "; |
michael@0 | 252 | |
michael@0 | 253 | $dec =~ s/$removeprefix//eg; |
michael@0 | 254 | if($dec eq "0020") { |
michael@0 | 255 | $dec = "\\u0020"; |
michael@0 | 256 | } elsif($dec eq "005C") { |
michael@0 | 257 | $dec = "\\u005C"; |
michael@0 | 258 | } else { |
michael@0 | 259 | $k = "\/"; |
michael@0 | 260 | $dec =~ s/2044/$k/eg; |
michael@0 | 261 | $dec =~ s/([0-9A-F][0-9A-F][0-9A-F][0-9A-F])/rdecompose($1)/eg; |
michael@0 | 262 | $dec =~ s/([0-9A-F][0-9A-F][0-9A-F][0-9A-F])/\\u$1/g; |
michael@0 | 263 | $dec =~ s/\\u00([0-7][0-9A-F])/pack("C",hex($1))/eg; |
michael@0 | 264 | $dec =~ s/ //eg; |
michael@0 | 265 | } |
michael@0 | 266 | return $dec; |
michael@0 | 267 | } |
michael@0 | 268 | |
michael@0 | 269 | ###################################################################### |
michael@0 | 270 | # |
michael@0 | 271 | # Open the unicode database file |
michael@0 | 272 | # |
michael@0 | 273 | ###################################################################### |
michael@0 | 274 | open ( UNICODATA , "< UnicodeData-Latest.txt") |
michael@0 | 275 | || die "cannot find UnicodeData-Latest.txt"; |
michael@0 | 276 | |
michael@0 | 277 | open ( UNICODATA2 , "< UnicodeData-Latest.txt") |
michael@0 | 278 | || die "cannot find UnicodeData-Latest.txt"; |
michael@0 | 279 | ###################################################################### |
michael@0 | 280 | # |
michael@0 | 281 | # Open the output file |
michael@0 | 282 | # |
michael@0 | 283 | ###################################################################### |
michael@0 | 284 | open ( OUT , "> ../tables/transliterate.properties") |
michael@0 | 285 | || die "cannot open output ../tables/transliterate.properties file"; |
michael@0 | 286 | |
michael@0 | 287 | print OUT $header; |
michael@0 | 288 | |
michael@0 | 289 | # remove comments from $handcoded |
michael@0 | 290 | $handcoded =~ s/^#[^#].*\n//mg; |
michael@0 | 291 | print OUT $handcoded; |
michael@0 | 292 | |
michael@0 | 293 | ###################################################################### |
michael@0 | 294 | # |
michael@0 | 295 | # Process the file line by line |
michael@0 | 296 | # |
michael@0 | 297 | ###################################################################### |
michael@0 | 298 | while(<UNICODATA2>) { |
michael@0 | 299 | chop; |
michael@0 | 300 | @f = split(/;/ , $_); |
michael@0 | 301 | $udec = hex($u); |
michael@0 | 302 | if(($udec > 256 ) && ($f[5] ne "")) { |
michael@0 | 303 | $table{$f[0]}=$f[5]; |
michael@0 | 304 | } |
michael@0 | 305 | } |
michael@0 | 306 | while(<UNICODATA>) { |
michael@0 | 307 | chop; |
michael@0 | 308 | ###################################################################### |
michael@0 | 309 | # |
michael@0 | 310 | # Get value from fields |
michael@0 | 311 | # |
michael@0 | 312 | ###################################################################### |
michael@0 | 313 | @f = split(/;/ , $_); |
michael@0 | 314 | $u = $f[0]; # The unicode value |
michael@0 | 315 | $cmt = $f[1]; # The comment |
michael@0 | 316 | $dec = $f[5]; # The decomposed value |
michael@0 | 317 | $d1 = $f[6]; |
michael@0 | 318 | $d2 = $f[7]; |
michael@0 | 319 | $d3 = $f[8]; |
michael@0 | 320 | $udec = hex($u); |
michael@0 | 321 | |
michael@0 | 322 | if($udec > 128) |
michael@0 | 323 | { |
michael@0 | 324 | # not ASCII |
michael@0 | 325 | if($dec ne "") |
michael@0 | 326 | { |
michael@0 | 327 | # have decomposition |
michael@0 | 328 | if($dec =~ /</) { |
michael@0 | 329 | # formated decomposition |
michael@0 | 330 | if($dec =~ /<wide>/) { |
michael@0 | 331 | output($u,$cmt,$udec,&decompose("<wide>", $dec)); |
michael@0 | 332 | } elsif($dec =~ /<narrow>/) { |
michael@0 | 333 | # ignore non ASCII decomposition |
michael@0 | 334 | # warning($_); |
michael@0 | 335 | } elsif($dec =~ /<circle>/) { |
michael@0 | 336 | output($u,$cmt,$udec,&decompose("<circle>", "(".$dec.")")); |
michael@0 | 337 | } elsif($dec =~ /<fraction>/) { |
michael@0 | 338 | output($u,$cmt,$udec,&decompose("<fraction>", $dec)); |
michael@0 | 339 | } elsif($dec =~ /<small>/) { |
michael@0 | 340 | output($u,$cmt,$udec,&decompose("<small>", $dec)); |
michael@0 | 341 | } elsif($dec =~ /<vertical>/) { |
michael@0 | 342 | # warning($_); |
michael@0 | 343 | } elsif($dec =~ /<super>/) { |
michael@0 | 344 | output($u,$cmt,$udec,"^(".&decompose("<super>", $dec).")"); |
michael@0 | 345 | } elsif($dec =~ /<sub>/) { |
michael@0 | 346 | output($u,$cmt,$udec,"v(".&decompose("<sub>", $dec).")"); |
michael@0 | 347 | } elsif($dec =~ /<font>/) { |
michael@0 | 348 | output($u,$cmt,$udec,&decompose("<font>", $dec)); |
michael@0 | 349 | } elsif($dec =~ /<square>/) { |
michael@0 | 350 | # ignore <square> |
michael@0 | 351 | # warning($_); |
michael@0 | 352 | } elsif($dec =~ /<compat>/) { |
michael@0 | 353 | output($u,$cmt,$udec,&decompose("<compat>", $dec)); |
michael@0 | 354 | } elsif($dec =~ /<isolated>/) { |
michael@0 | 355 | # ignore <isolated> |
michael@0 | 356 | # warning($_); |
michael@0 | 357 | } elsif($dec =~ /<medial>/) { |
michael@0 | 358 | # ignore <medial> |
michael@0 | 359 | # warning($_); |
michael@0 | 360 | } elsif($dec =~ /<final>/) { |
michael@0 | 361 | # ignore <final> |
michael@0 | 362 | # warning($_); |
michael@0 | 363 | } elsif($dec =~ /<initial>/) { |
michael@0 | 364 | # ignore <initial> |
michael@0 | 365 | # warning($_); |
michael@0 | 366 | } elsif($dec =~ /<noBreak>/) { |
michael@0 | 367 | if($dec eq "<noBreak> 0020") |
michael@0 | 368 | { |
michael@0 | 369 | output($u,$cmt,$udec,"\\u0020"); |
michael@0 | 370 | } else { |
michael@0 | 371 | # ignore |
michael@0 | 372 | # warning($_); |
michael@0 | 373 | } |
michael@0 | 374 | } else { |
michael@0 | 375 | warning($_); |
michael@0 | 376 | } |
michael@0 | 377 | } else { |
michael@0 | 378 | # decomposition without format code |
michael@0 | 379 | if($cmt =~ /LATIN/) { |
michael@0 | 380 | $dec = foldcombining($dec); |
michael@0 | 381 | output($u,$cmt,$udec,&decompose("", $dec)); |
michael@0 | 382 | } elsif($cmt =~ /CYRILLIC/) { |
michael@0 | 383 | # ignore |
michael@0 | 384 | # warning($_); |
michael@0 | 385 | } elsif($cmt =~ /GREEK/) { |
michael@0 | 386 | # ignore |
michael@0 | 387 | # warning($_); |
michael@0 | 388 | } elsif($cmt =~ /ARABIC/) { |
michael@0 | 389 | # ignore |
michael@0 | 390 | # warning($_); |
michael@0 | 391 | } elsif($cmt =~ /CJK/) { |
michael@0 | 392 | # ignore |
michael@0 | 393 | # warning($_); |
michael@0 | 394 | } elsif($cmt =~ /HEBREW/) { |
michael@0 | 395 | # ignore |
michael@0 | 396 | # warning($_); |
michael@0 | 397 | } elsif($cmt =~ /DEVANAGARI/) { |
michael@0 | 398 | # ignore |
michael@0 | 399 | # warning($_); |
michael@0 | 400 | } elsif($cmt =~ /BENGALI/) { |
michael@0 | 401 | # ignore |
michael@0 | 402 | # warning($_); |
michael@0 | 403 | } elsif($cmt =~ /GURMUKHI/) { |
michael@0 | 404 | # ignore |
michael@0 | 405 | # warning($_); |
michael@0 | 406 | } elsif($cmt =~ /ORIYA/) { |
michael@0 | 407 | # ignore |
michael@0 | 408 | # warning($_); |
michael@0 | 409 | } elsif($cmt =~ /TAMIL/) { |
michael@0 | 410 | # ignore |
michael@0 | 411 | # warning($_); |
michael@0 | 412 | } elsif($cmt =~ /TELUGU/) { |
michael@0 | 413 | # ignore |
michael@0 | 414 | # warning($_); |
michael@0 | 415 | } elsif($cmt =~ /KANNADA/) { |
michael@0 | 416 | # ignore |
michael@0 | 417 | # warning($_); |
michael@0 | 418 | } elsif($cmt =~ /MALAYALAM/) { |
michael@0 | 419 | # ignore |
michael@0 | 420 | # warning($_); |
michael@0 | 421 | } elsif($cmt =~ /SINHALA/) { |
michael@0 | 422 | # ignore |
michael@0 | 423 | # warning($_); |
michael@0 | 424 | } elsif($cmt =~ /TIBETAN/) { |
michael@0 | 425 | # ignore |
michael@0 | 426 | # warning($_); |
michael@0 | 427 | } elsif($cmt =~ /MYANMAR/) { |
michael@0 | 428 | # ignore |
michael@0 | 429 | # warning($_); |
michael@0 | 430 | } elsif($cmt =~ /KATAKANA/) { |
michael@0 | 431 | # ignore |
michael@0 | 432 | # warning($_); |
michael@0 | 433 | } elsif($cmt =~ /HIRAGANA/) { |
michael@0 | 434 | # ignore |
michael@0 | 435 | # warning($_); |
michael@0 | 436 | } else { |
michael@0 | 437 | # ignore |
michael@0 | 438 | # warning($_); |
michael@0 | 439 | } |
michael@0 | 440 | } |
michael@0 | 441 | } else { |
michael@0 | 442 | # do not have decomposition |
michael@0 | 443 | if ($d1 ne "") |
michael@0 | 444 | { |
michael@0 | 445 | # are numeric characters |
michael@0 | 446 | output($u,$cmt,$udec,$d1); |
michael@0 | 447 | } elsif ($d2 ne "") { |
michael@0 | 448 | if($cmt =~ /CIRCLED/) { |
michael@0 | 449 | # circled |
michael@0 | 450 | output($u,$cmt,$udec,"(".$d2.")"); |
michael@0 | 451 | } else { |
michael@0 | 452 | # others, use [ ] |
michael@0 | 453 | output($u,$cmt,$udec,"[".$d2."]"); |
michael@0 | 454 | } |
michael@0 | 455 | } elsif ($d3 ne "") { |
michael@0 | 456 | if($cmt =~ /CIRCLED/) { |
michael@0 | 457 | # circled |
michael@0 | 458 | output($u,$cmt,$udec,"(".$d3.")"); |
michael@0 | 459 | } else { |
michael@0 | 460 | # others, use [ ] |
michael@0 | 461 | output($u,$cmt,$udec,"[".$d3."]"); |
michael@0 | 462 | } |
michael@0 | 463 | } else { |
michael@0 | 464 | # not numeric characters |
michael@0 | 465 | |
michael@0 | 466 | } # end of no decomposition |
michael@0 | 467 | } # end of have/not decomposition |
michael@0 | 468 | } |
michael@0 | 469 | } |
michael@0 | 470 | ###################################################################### |
michael@0 | 471 | # |
michael@0 | 472 | # Close files |
michael@0 | 473 | # |
michael@0 | 474 | ###################################################################### |
michael@0 | 475 | close(UNIDATA); |
michael@0 | 476 | close(OUT); |
michael@0 | 477 |