intl/uconv/tools/gen-big5hkscs-2001-mozilla.pl

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 #!/usr/bin/perl -w
michael@0 2 #
michael@0 3 # gen-big5hkscs-2001-mozilla.pl
michael@0 4 # a Perl script that generates Big5-HKSCS <-> Unicode
michael@0 5 # conversion tables for Mozilla
michael@0 6 #
michael@0 7 # Author (of the original Perl script):
michael@0 8 # Anthony Fok <anthony@thizlinux.com> <foka@debian.org>
michael@0 9 # Copyright (C) 2001, 2002 ThizLinux Laboratory Ltd.
michael@0 10 # License: GNU General Public License, v2 or later.
michael@0 11 #
michael@0 12 # This version includes original C source code from
michael@0 13 # glibc-2.2.5/iconvdata/big5hkscs.c by Ulrich Drepper <drepper@redhat.com>
michael@0 14 # Roger So <roger.so@sw-linux.com>
michael@0 15 #
michael@0 16 # First attempt for Qt-2.3.x: 2001-09-21
michael@0 17 # A working version for Qt-2.3.x: 2001-10-30
michael@0 18 # Ported to glibc-2.2.5 with HKSCS-2001: 2002-03-21
michael@0 19 # Adapted to generate conversion tables for Mozilla: 2002-11-26
michael@0 20 # Adapted to generate conversion tables for Mozilla: 2002-11-30
michael@0 21 # Cleaned up the script somewhat: 2002-12-04
michael@0 22 # Minor revisions for submitting to Mozilla Bugzilla: 2002-12-10
michael@0 23 #
michael@0 24 # Notes:
michael@0 25 #
michael@0 26 # 1. The latest version of this script may be found in:
michael@0 27 # http://www.thizlinux.com/~anthony/hkscs/gen-glibc-big5hkscs.pl
michael@0 28 # http://people.debian.org/~foka/hkscs/gen-glibc-big5hkscs.pl
michael@0 29 # Or, better yet, e-mail me and ask for the latest version.
michael@0 30 #
michael@0 31 # 2. This script generates data from 3 tables:
michael@0 32 # a. http://www.microsoft.com/typography/unicode/950.txt
michael@0 33 # b. http://www.info.gov.hk/digital21/chi/hkscs/download/big5-iso.txt
michael@0 34 # c. http://www.info.gov.hk/digital21/chi/hkscs/download/big5cmp.txt
michael@0 35 #
michael@0 36 # Make sure your big5-iso.txt is the latest HKSCS-2001 version.
michael@0 37 #
michael@0 38 # 3. [glibc]: I have currently split the ucs_to_big5_hkscs_?[] tables into
michael@0 39 # different areas similar to the way Ulrich and Roger did it,
michael@0 40 # but extended for HKSCS-2001.
michael@0 41 #
michael@0 42 # 4. [Mozilla]: This script is very quick-and-dirty in some places.
michael@0 43 # Call either gen_mozilla_uf() or gen_mozilla_ut() to generate
michael@0 44 # the appropriate tables for feeding into "fromu" or "tou".
michael@0 45 #
michael@0 46 # 5. [CharMapML]: The comments regarding TW-BIG5 herein need to be organized.
michael@0 47 # Also, please make sure "$hkscs_mode = 0;" for TW-BIG5 mode.
michael@0 48 # Otherwise, this script would generate a HKSCS table.
michael@0 49 # (Yes, I know, I should clean up this script and make it more modular,
michael@0 50 # and with command-line options or whatnot. I'll do that later. :-)
michael@0 51 #
michael@0 52 # If you have any questions or concerns, please feel free to contact me
michael@0 53 # at Anthony Fok <anthony@thizlinux.com> or <foka@debian.org> :-)
michael@0 54 #
michael@0 55 # Last but not least, special thanks to ThizLinux Laboratory Ltd. (HK)
michael@0 56 # for their generous support in this work.
michael@0 57 #
michael@0 58
michael@0 59 # 1. UDA3, 0x8840 - 0x8dfe
michael@0 60 # 2. UDA2, 0x8e40 - 0xa0fe
michael@0 61 # 3. VDA, 0xc6a1 - 0xc8fe
michael@0 62
michael@0 63 #use Getopt::Std;
michael@0 64
michael@0 65 my ( %b2u, %u2b, $unicode, $big5, $high, $low, $i, $count );
michael@0 66
michael@0 67 my $debug = 0;
michael@0 68 my $hkscs_mode = 1;
michael@0 69 my $kangxi = 0;
michael@0 70 my $use_range = 0;
michael@0 71 my $bmp_only = 1;
michael@0 72
michael@0 73 #
michael@0 74 # Subroutine Declaration
michael@0 75 #
michael@0 76 sub read_cp950();
michael@0 77 sub adjust_radicals();
michael@0 78 sub read_hkscs_main();
michael@0 79 sub read_hkscs_cmp();
michael@0 80 sub post_tuning();
michael@0 81 sub gen_charmapml();
michael@0 82 sub gen_check_b2u();
michael@0 83 sub gen_check_u2b();
michael@0 84 sub gen_mozilla_uf();
michael@0 85 sub gen_mozilla_ut();
michael@0 86 sub gen_glibc();
michael@0 87
michael@0 88 ###########################################################################
michael@0 89 #
michael@0 90 # Main program
michael@0 91 #
michael@0 92
michael@0 93 # First, read Microsoft's CP950 as base Big5.
michael@0 94 read_cp950 ();
michael@0 95
michael@0 96 # Add mappings to Kangxi Radicals.
michael@0 97 # The b2u direction is added only if $kangxi is not null.
michael@0 98 adjust_radicals ();
michael@0 99
michael@0 100 # Then, read the HKSCS table.
michael@0 101 # Again, see the $hkscs_mode variable.
michael@0 102 read_hkscs_main ();
michael@0 103 read_hkscs_cmp () if $hkscs_mode;
michael@0 104
michael@0 105 post_tuning ();
michael@0 106
michael@0 107
michael@0 108 # Then, choose one of the following:
michael@0 109 #gen_charmapml();
michael@0 110 gen_mozilla_uf();
michael@0 111 #gen_mozilla_ut();
michael@0 112 #gen_check_u2b();
michael@0 113 #gen_glibc();
michael@0 114
michael@0 115
michael@0 116 # End of program
michael@0 117 exit 0;
michael@0 118
michael@0 119
michael@0 120 #############################################################################
michael@0 121 #
michael@0 122 # Subroutines
michael@0 123 #
michael@0 124
michael@0 125 sub read_cp950() {
michael@0 126 open( CP950, "950.txt" ) or die;
michael@0 127 my $mode = 0;
michael@0 128 while (<CP950>) {
michael@0 129 s/\r//;
michael@0 130 chomp;
michael@0 131 next if /^$/;
michael@0 132 last if /^ENDCODEPAGE/;
michael@0 133
michael@0 134 if (/^DBCSTABLE (\d+)\s+;LeadByte = 0x([0-9a-f]{2})/) {
michael@0 135 $mode = 1;
michael@0 136 ( $count, $high ) = ( $1, $2 );
michael@0 137 $i = 0;
michael@0 138 next;
michael@0 139 }
michael@0 140 if (/^WCTABLE (\d+)/) {
michael@0 141 $mode = 2;
michael@0 142 $count = $1;
michael@0 143 $i = 0;
michael@0 144 next;
michael@0 145 }
michael@0 146 next if $mode == 0;
michael@0 147
michael@0 148 if ( $mode == 1 ) {
michael@0 149 ( $low, $unicode, $comment ) = split "\t";
michael@0 150 $low =~ s/^0x//;
michael@0 151 $unicode =~ s/^0x//;
michael@0 152 $big5 = $high . $low;
michael@0 153 $b2u{ uc($big5) } = uc($unicode);
michael@0 154 if ( ++$i == $count ) { $mode = 0; $count = 0; next; }
michael@0 155 }
michael@0 156
michael@0 157 if ( $mode == 2 ) {
michael@0 158 ( $unicode, $big5, $comment ) = split "\t";
michael@0 159 $unicode =~ s/^0x//;
michael@0 160 $big5 =~ s/^0x//;
michael@0 161 my $u = hex($unicode);
michael@0 162 my $b = hex($big5);
michael@0 163
michael@0 164 $u2b{ uc($unicode) } = uc($big5) unless
michael@0 165
michael@0 166 # Skip Microsoft's over-generous (or over-zealous?) mappings
michael@0 167 # "Faked" accented latin characters
michael@0 168 ( $b <= 0xFF and $b != $u )
michael@0 169
michael@0 170 # "Faked" Ideographic Annotation ___ Mark
michael@0 171 or ( $u >= 0x3192 and $u <= 0x319F )
michael@0 172
michael@0 173 # "Faked" Parenthesized Ideograph ___
michael@0 174 or ( $u >= 0x3220 and $u <= 0x3243 )
michael@0 175
michael@0 176 # "Faked" Circled Ideograph ___ except Circled Ideograph Correct
michael@0 177 or ( $u >= 0x3280 and $u <= 0x32B0 and $u != 0x32A3 )
michael@0 178
michael@0 179 # ¢F¢G¢D¡¦£g¡M
michael@0 180 or ( $u == 0xA2
michael@0 181 or $u == 0xA3
michael@0 182 or $u == 0xA5
michael@0 183 or $u == 0xB4
michael@0 184 or $u == 0xB5
michael@0 185 or $u == 0xB8 )
michael@0 186
michael@0 187 # ¡Â¢w¡ü¡E£»¡²¡Ã¢B¢X¡Ý¡[¡ó¡ò¡ã¡Ê
michael@0 188 or ( $u == 0x0305 # ???
michael@0 189 or $u == 0x2015
michael@0 190 or $u == 0x2016
michael@0 191 or $u == 0x2022
michael@0 192 or $u == 0x2024
michael@0 193 or $u == 0x2033
michael@0 194 or $u == 0x203E # ???
michael@0 195 or $u == 0x2216
michael@0 196 or $u == 0x2218
michael@0 197 or $u == 0x2263
michael@0 198 or $u == 0x2307
michael@0 199 or $u == 0x2609
michael@0 200 or $u == 0x2641
michael@0 201 or $u == 0x301C
michael@0 202 or $u == 0x3030 )
michael@0 203
michael@0 204 # ¡s¡¥¡N
michael@0 205 or ( $u == 0xFF3E or $u == 0xFF40 or $u == 0xFF64 );
michael@0 206
michael@0 207 if ( ++$i == $count ) { $mode = 0; $count = 0; next; }
michael@0 208 }
michael@0 209 }
michael@0 210 }
michael@0 211
michael@0 212 sub adjust_radicals() {
michael@0 213
michael@0 214 # B5+C6BF - B5+C6D7: Radicals (?)
michael@0 215
michael@0 216 # TW-BIG5 drafted by Autrijus uses Kangxi Radicals whenever possible.
michael@0 217 #
michael@0 218 # Big5-HKSCS tends towards using the character in Unicode CJK Ideographs
michael@0 219 # Note that HKSCS does not explicitly define
michael@0 220 # B5+C6CF, B5+C6D3, B5+C6D5, B5+C6D7 (ÆÏ¡BÆÓ¡BÆÕ¡BÆ×),
michael@0 221 # but do have these characters at B5+FBFD, B5+FCD3, B5+FEC1, B5+90C4,
michael@0 222 # mapped to U+5EF4, U+65E0, U+7676, U+96B6 respectively.
michael@0 223 #
michael@0 224 # As for B5+C6CD (ÆÍ), HKSCS maps it to U+2F33 just like TW-BIG5.
michael@0 225 # However, it also maps B5+FBF4 (ûô) to U+5E7A.
michael@0 226 $b2u{"C6BF"} = "2F02" if $kangxi;
michael@0 227 $u2b{"2F02"} = "C6BF"; # Æ¿
michael@0 228 $b2u{"C6C0"} = "2F03" if $kangxi;
michael@0 229 $u2b{"2F03"} = "C6C0"; # ÆÀ
michael@0 230 $b2u{"C6C1"} = "2F05" if $kangxi;
michael@0 231 $u2b{"2F05"} = "C6C1"; # ÆÁ
michael@0 232 $b2u{"C6C2"} = "2F07" if $kangxi;
michael@0 233 $u2b{"2F07"} = "C6C2"; # ÆÂ
michael@0 234 $b2u{"C6C3"} = "2F0C" if $kangxi;
michael@0 235 $u2b{"2F0C"} = "C6C3"; # ÆÃ
michael@0 236 $b2u{"C6C4"} = "2F0D" if $kangxi;
michael@0 237 $u2b{"2F0D"} = "C6C4"; # ÆÄ
michael@0 238 $b2u{"C6C5"} = "2F0E" if $kangxi;
michael@0 239 $u2b{"2F0E"} = "C6C5"; # ÆÅ
michael@0 240 $b2u{"C6C6"} = "2F13" if $kangxi;
michael@0 241 $u2b{"2F13"} = "C6C6"; # ÆÆ
michael@0 242 $b2u{"C6C7"} = "2F16" if $kangxi;
michael@0 243 $u2b{"2F16"} = "C6C7"; # ÆÇ
michael@0 244 $b2u{"C6C8"} = "2F19" if $kangxi;
michael@0 245 $u2b{"2F19"} = "C6C8"; # ÆÈ
michael@0 246 $b2u{"C6C9"} = "2F1B" if $kangxi;
michael@0 247 $u2b{"2F1B"} = "C6C9"; # ÆÉ
michael@0 248 $b2u{"C6CA"} = "2F22" if $kangxi;
michael@0 249 $u2b{"2F22"} = "C6CA"; # ÆÊ
michael@0 250 $b2u{"C6CB"} = "2F27" if $kangxi;
michael@0 251 $u2b{"2F27"} = "C6CB"; # ÆË
michael@0 252 $b2u{"C6CC"} = "2F2E" if $kangxi;
michael@0 253 $u2b{"2F2E"} = "C6CC"; # ÆÌ
michael@0 254 $b2u{"C6CD"} = "2F33" if $kangxi;
michael@0 255 $u2b{"2F33"} = "C6CD"; # ÆÍ
michael@0 256 $b2u{"C6CE"} = "2F34" if $kangxi;
michael@0 257 $u2b{"2F34"} = "C6CE"; # ÆÎ
michael@0 258 $b2u{"C6CF"} = "2F35" if $kangxi;
michael@0 259 $u2b{"2F35"} = "C6CF"; # ÆÏ
michael@0 260 $b2u{"C6D0"} = "2F39" if $kangxi;
michael@0 261 $u2b{"2F39"} = "C6D0"; # ÆÐ
michael@0 262 $b2u{"C6D1"} = "2F3A" if $kangxi;
michael@0 263 $u2b{"2F3A"} = "C6D1"; # ÆÑ
michael@0 264 $b2u{"C6D2"} = "2F41" if $kangxi;
michael@0 265 $u2b{"2F41"} = "C6D2"; # ÆÒ
michael@0 266 $b2u{"C6D3"} = "2F46" if $kangxi;
michael@0 267 $u2b{"2F46"} = "C6D3"; # ÆÓ
michael@0 268 $b2u{"C6D4"} = "2F67" if $kangxi;
michael@0 269 $u2b{"2F67"} = "C6D4"; # ÆÔ
michael@0 270 $b2u{"C6D5"} = "2F68" if $kangxi;
michael@0 271 $u2b{"2F68"} = "C6D5"; # ÆÕ
michael@0 272 $b2u{"C6D6"} = "2FA1" if $kangxi;
michael@0 273 $u2b{"2FA1"} = "C6D6"; # ÆÖ
michael@0 274 $b2u{"C6D7"} = "2FAA" if $kangxi;
michael@0 275 $u2b{"2FAA"} = "C6D7"; # Æ×
michael@0 276 }
michael@0 277
michael@0 278 sub read_hkscs_main() {
michael@0 279
michael@0 280 open( B2U, "<big5-iso.txt" ) or die;
michael@0 281 while (<B2U>) {
michael@0 282 next
michael@0 283 unless
michael@0 284 /([[:xdigit:]]{4})\s+([[:xdigit:]]{4})\s+([[:xdigit:]]{4})\s+([[:xdigit:]]{4,5})/;
michael@0 285 ( $big5, $iso1993, $iso2000, $iso2001 ) = ( $1, $2, $3, $4 );
michael@0 286
michael@0 287 my $b = hex($big5);
michael@0 288
michael@0 289 # For non-HKSCS mode, only take data in the VDA range (?)
michael@0 290 next unless $hkscs_mode
michael@0 291
michael@0 292 # Note that we don't go from B5+C6A1-B5+C6FE, but rather only
michael@0 293 # C6A1-C8D3 excluding C6BF-C6D7 (Kangxi Radicals)
michael@0 294 # because C8D4-C8FE are not assigned in TW-BIG5
michael@0 295 # if we are to follow Arphic PL Big-5 fonts. (To be discussed)
michael@0 296 or
michael@0 297 ( $b >= 0xC6A1 && $b <= 0xC8D3 and !( $b >= 0xC6BF && $b <= 0xC6D7 ) )
michael@0 298 or ( $b >= 0xF9D6 && $b <= 0xF9FE );
michael@0 299
michael@0 300 print STDERR
michael@0 301 "B2U, 2000: $big5 redefined from U+$b2u{$big5} to U+$iso2000.\n"
michael@0 302 if $debug
michael@0 303 and defined( $b2u{$big5} )
michael@0 304 and $b2u{$big5} ne $iso2000;
michael@0 305
michael@0 306 $b2u{$big5} = $bmp_only ? $iso2000 : $iso2001
michael@0 307 unless !$hkscs_mode
michael@0 308 and $b == 0xF9FE;
michael@0 309
michael@0 310 # B5+F9FE is mapped differently in TW-BIG5 and HKSCS, to
michael@0 311 # U+2593 (Dark Shade) and U+FFED (Halfwidth Black Square) respectively.
michael@0 312 # Which is more correct? I don't know! (To be discussed)
michael@0 313
michael@0 314 print STDERR
michael@0 315 "1993: U+$iso1993 redefined from $u2b{$iso1993} to $big5.\n"
michael@0 316 if $debug
michael@0 317 and defined( $u2b{$iso1993} )
michael@0 318 and $u2b{$iso1993} ne $big5;
michael@0 319
michael@0 320 $u2b{$iso1993} = $big5;
michael@0 321
michael@0 322 print STDERR
michael@0 323 "2000: U+$iso2000 redefined from $u2b{$iso2000} to $big5.\n"
michael@0 324 if $debug
michael@0 325 and defined( $u2b{$iso2000} )
michael@0 326 and $u2b{$iso2000} ne $big5;
michael@0 327
michael@0 328 $u2b{$iso2000} = $big5;
michael@0 329
michael@0 330 print STDERR
michael@0 331 "2001: U+$iso2001 redefined from $u2b{$iso2001} to $big5.\n"
michael@0 332 if $debug
michael@0 333 and defined( $u2b{$iso2001} )
michael@0 334 and $u2b{$iso2001} ne $big5;
michael@0 335
michael@0 336 $u2b{$iso2001} = $big5;
michael@0 337 }
michael@0 338 close B2U;
michael@0 339
michael@0 340 } # read_hkscs_main()
michael@0 341
michael@0 342
michael@0 343 sub read_hkscs_cmp() {
michael@0 344
michael@0 345 ###########################################################################
michael@0 346 # Add Big5 compatibility coding...
michael@0 347 #
michael@0 348 # Stephan, here is the code segment that you may want to implement
michael@0 349 # in your convertbig5hkscs2001.pl
michael@0 350 #
michael@0 351 open( B5CMP, "<big5cmp.txt" ) or die;
michael@0 352 $mode = 0;
michael@0 353 while (<B5CMP>) {
michael@0 354 if (/^=====/) { $mode = 1; next; }
michael@0 355 next if $mode == 0;
michael@0 356 last if $mode == 1 and /^\s+/;
michael@0 357 chomp;
michael@0 358 my ( $big5cmp, $big5 ) = split " ";
michael@0 359
michael@0 360 $big5cmp = uc($big5cmp);
michael@0 361 $big5 = uc($big5);
michael@0 362 my $uni = $b2u{$big5};
michael@0 363 my $unicmp = $b2u{$big5cmp};
michael@0 364
michael@0 365 print STDERR
michael@0 366 "Was: U+$unicmp -> $u2b{$unicmp}, $big5cmp -> U+$b2u{$big5cmp}\t"
michael@0 367 if $debug;
michael@0 368 $b2u{$big5cmp} = $uni;
michael@0 369 $u2b{$unicmp} = $big5;
michael@0 370 print STDERR
michael@0 371 "Now: U+$unicmp -> $u2b{$unicmp}, $big5cmp -> U+$b2u{$big5cmp}\n"
michael@0 372 if $debug;
michael@0 373 }
michael@0 374 close B5CMP;
michael@0 375 } # read_hkscs_cmp();
michael@0 376
michael@0 377
michael@0 378 sub post_tuning() {
michael@0 379
michael@0 380 # And finally, fine-tuning...
michael@0 381 for $i ( 0x00 .. 0x80 ) {
michael@0 382 $big5 = $unicode = sprintf( "%04X", $i );
michael@0 383 $b2u{$big5} = $unicode;
michael@0 384 }
michael@0 385
michael@0 386 # Add Euro '£á' (I wonder why this 950.txt doesn't have it.)
michael@0 387 $b2u{"A3E1"} = "20AC";
michael@0 388 $u2b{"20AC"} = "A3E1";
michael@0 389
michael@0 390 # Box drawing characters:
michael@0 391 # Align with Big-5E (To be discussed, as it differs from CP950 and HKSCS)
michael@0 392 # (To be discussed)
michael@0 393 if ( !$hkscs_mode ) {
michael@0 394 $u2b{"2550"} = "A2A4"; # Big5: ¢¤ (also B5-F9F9)
michael@0 395 $u2b{"255E"} = "A2A5"; # Big5: ¢¥ (also B5-F9E9)
michael@0 396 $u2b{"2561"} = "A2A7"; # Big5: ¢§ (also B5-F9EB)
michael@0 397 $u2b{"256A"} = "A2A6"; # Big5: ¢¦ (also B5-F9EA)
michael@0 398 $u2b{"256D"} = "A27E"; # Big5: ¢~ (also B5-F9FA)
michael@0 399 $u2b{"256E"} = "A2A1"; # Big5: ¢¡ (also B5-F9FB)
michael@0 400 $u2b{"256F"} = "A2A3"; # Big5: ¢£ (also B5-F9FD)
michael@0 401 $u2b{"2570"} = "A2A2"; # Big5: ¢¢ (also B5-F9FC)
michael@0 402 }
michael@0 403
michael@0 404 # "Hangzhou" or "Suzhou" Chinese numerals 10, 20, 30 (¢Ì¢Í¢Î)
michael@0 405 # (To be discussed)
michael@0 406 if ( !$hkscs_mode ) {
michael@0 407 $b2u{"A2CC"} = "3038";
michael@0 408 $u2b{"3038"} = "A2CC";
michael@0 409 $b2u{"A2CD"} = "3039";
michael@0 410 $u2b{"3039"} = "A2CD";
michael@0 411 $b2u{"A2CE"} = "303A";
michael@0 412 $u2b{"303A"} = "A2CE";
michael@0 413 }
michael@0 414
michael@0 415 # The character for ethnic group "Yi" (ÂU):
michael@0 416 # (To be discussed)
michael@0 417 $u2b{"5F5E"} = "C255"; # Always add this.
michael@0 418 if ( !$hkscs_mode ) {
michael@0 419 $b2u{"C255"} = "5F5E";
michael@0 420 }
michael@0 421
michael@0 422 } # post_tuning()
michael@0 423
michael@0 424
michael@0 425 sub gen_charmapml() {
michael@0 426
michael@0 427 ###########################################################################
michael@0 428 #
michael@0 429 # Codes for generating CharMapML XML file
michael@0 430
michael@0 431 print <<EOT;
michael@0 432 <?xml version="1.0" encoding="UTF-8" ?>
michael@0 433 <!DOCTYPE characterMapping SYSTEM "http://www.unicode.org/unicode/reports/tr22/CharacterMapping.dtd">
michael@0 434 EOT
michael@0 435
michael@0 436 if ($hkscs_mode) {
michael@0 437 print <<EOT;
michael@0 438 <characterMapping id="big5-hkscs-2001" version="1">
michael@0 439 <history>
michael@0 440 <modified version="1" date="2002-11-30">
michael@0 441 Trial version generated from 950.txt + part of big5-iso.txt (HKSCS-2001)
michael@0 442 with Euro added, with CP950's excessive fub (fallbacks uni->big5) removed,
michael@0 443 and with some other manual tweaking.
michael@0 444 </modified>
michael@0 445 </history>
michael@0 446 EOT
michael@0 447 }
michael@0 448 else {
michael@0 449 print <<EOT;
michael@0 450 <characterMapping id="tw-big5-2002" version="1">
michael@0 451 <history>
michael@0 452 <modified version="1" date="2002-11-30">
michael@0 453 Trial version generated from 950.txt + part of big5-iso.txt (HKSCS-2001)
michael@0 454 with Euro added, with CP950's excessive fub (fallbacks uni->big5) removed,
michael@0 455 and with some other manual tweaking.
michael@0 456 </modified>
michael@0 457 </history>
michael@0 458 EOT
michael@0 459 }
michael@0 460
michael@0 461 print <<EOT;
michael@0 462 <validity>
michael@0 463 <state type="FIRST" next="VALID" s="0" e="80" max="FFFF"/>
michael@0 464 <state type="FIRST" next="SECOND" s="81" e="FE" max="FFFF"/>
michael@0 465 <state type="SECOND" next="VALID" s="40" e="7E" max="FFFF"/>
michael@0 466 <state type="SECOND" next="VALID" s="A1" e="FE" max="FFFF"/>
michael@0 467 </validity>
michael@0 468 <assignments sub="3F">
michael@0 469 EOT
michael@0 470 print " <!-- One to one mappings -->\n";
michael@0 471 for $unicode ( sort { hex($a) <=> hex($b) } keys %u2b ) {
michael@0 472 $big5 = $u2b{$unicode};
michael@0 473 $u = hex($unicode);
michael@0 474 next
michael@0 475 unless defined( $b2u{$big5} )
michael@0 476 and $unicode eq $b2u{$big5}
michael@0 477 and
michael@0 478 not( $use_range and !$hkscs_mode and $u >= 0xE000 && $u <= 0xF6B0 );
michael@0 479 printf " <a u=\"%04X\" ", $u;
michael@0 480 if ( hex($big5) <= 0xFF ) {
michael@0 481 printf "b=\"%02X\"/>\n", hex($big5);
michael@0 482 }
michael@0 483 else {
michael@0 484 printf "b=\"%s %s\"/>\n", substr( $big5, 0, 2 ),
michael@0 485 substr( $big5, 2, 2 );
michael@0 486 }
michael@0 487 }
michael@0 488
michael@0 489 print " <!-- Fallback mappings from Unicode to bytes -->\n";
michael@0 490 for $unicode ( sort { hex($a) <=> hex($b) } keys %u2b ) {
michael@0 491 $big5 = $u2b{$unicode};
michael@0 492 next if defined( $b2u{$big5} ) and hex($unicode) == hex( $b2u{$big5} );
michael@0 493 if ( $unicode eq "F900" ) {
michael@0 494 print " <!-- CJK Compatibility Ideographs: U+F900 - U+FA6A.\n";
michael@0 495 print
michael@0 496 " These are included in CP950 (Unicode->Big5 direction only).\n";
michael@0 497 print " Should we include this area in TW-BIG5 or not? -->\n";
michael@0 498 }
michael@0 499 printf " <fub u=\"%04X\" b=\"%s %s\"/>\n", hex($unicode),
michael@0 500 substr( $big5, 0, 2 ), substr( $big5, 2, 2 );
michael@0 501 }
michael@0 502
michael@0 503 my %fbu;
michael@0 504 print " <!-- Fallback mappings from bytes to Unicode -->\n";
michael@0 505 for $big5 ( sort { hex($a) <=> hex($b) } keys %b2u ) {
michael@0 506 $unicode = $b2u{$big5};
michael@0 507 if ( !defined( $u2b{$unicode} ) or hex($big5) != hex( $u2b{$unicode} ) )
michael@0 508 {
michael@0 509 $fbu{$unicode} = $big5;
michael@0 510 }
michael@0 511 }
michael@0 512 for $unicode ( sort { hex($a) <=> hex($b) } keys %fbu ) {
michael@0 513 $big5 = $fbu{$unicode};
michael@0 514 printf " <fbu u=\"%04X\" b=\"%s %s\"/>\n", hex($unicode),
michael@0 515 substr( $big5, 0, 2 ), substr( $big5, 2, 2 );
michael@0 516 }
michael@0 517
michael@0 518 if ( $use_range and !$hkscs_mode ) {
michael@0 519 print <<EOT;
michael@0 520 <!-- Roundtrip-mappings that can be enumerated
michael@0 521 Note: We can only use the <range> tag for TW-BIG5.
michael@0 522 Big-5E and Big5-HKSCS have assigned characters in these areas,
michael@0 523 and we will have to use the <a> and <fub> tags instead.
michael@0 524 -->
michael@0 525 <!-- User-Defined Area 1 (UDA1) -->
michael@0 526 <range uFirst="E000" uLast="E310" bFirst="FA 40" bLast="FE FE" bMin="81 40" bMax="FE FE"/>
michael@0 527 <!-- User-Defined Area 2 (UDA2) -->
michael@0 528 <range uFirst="E311" uLast="EEB7" bFirst="8E 40" bLast="A0 FE" bMin="81 40" bMax="FE FE"/>
michael@0 529 <!-- User-Defined Area 3 (UDA3) -->
michael@0 530 <range uFirst="EEB8" uLast="F6B0" bFirst="81 40" bLast="8D FE" bMin="81 40" bMax="FE FE"/>
michael@0 531 EOT
michael@0 532 }
michael@0 533
michael@0 534 print <<EOT;
michael@0 535 </assignments>
michael@0 536 </characterMapping>
michael@0 537 EOT
michael@0 538
michael@0 539 } # gen_charmapml()
michael@0 540
michael@0 541 sub gen_check_b2u() {
michael@0 542
michael@0 543 ###########################################################################
michael@0 544 #
michael@0 545 # Codes for generating a raw table for verification and testing
michael@0 546 #
michael@0 547 # #print $u2b{"F7D1"}, "\n";
michael@0 548 # print $b2u{$u2b{"F7D1"}}, "\n";
michael@0 549 # print "FA59 -> U+", $b2u{"FA59"}, "\n";
michael@0 550
michael@0 551 foreach $big5 ( sort { hex($a) <=> hex($b) } keys %b2u ) {
michael@0 552 $unicode = $b2u{$big5};
michael@0 553 $big5 =~ s/^00//;
michael@0 554 print "U+", $unicode, ": ", $big5, "\n";
michael@0 555 }
michael@0 556 }
michael@0 557
michael@0 558 sub gen_check_u2b() {
michael@0 559 foreach $unicode ( sort { hex($a) <=> hex($b) } keys %u2b ) {
michael@0 560 $big5 = $u2b{$unicode};
michael@0 561 $big5 =~ s/^00//;
michael@0 562 print "U+", $unicode, ": ", $big5, "\n";
michael@0 563 }
michael@0 564
michael@0 565 }
michael@0 566
michael@0 567 ###########################################################################
michael@0 568 #
michael@0 569 # Codes for generating hkscs.ut and hkscs.uf files for Mozilla
michael@0 570 #
michael@0 571 sub gen_mozilla_uf() {
michael@0 572 # hkscs.uf
michael@0 573 foreach $unicode ( sort keys %u2b ) {
michael@0 574 $big5 = $u2b{$unicode};
michael@0 575 my $b = hex($big5);
michael@0 576 print "0x", uc($big5), "\t0x", uc($unicode), "\n"
michael@0 577 unless ( $b >= 0xA140 and $b <= 0xC6A0 )
michael@0 578 or ( $b >= 0xC940 and $b <= 0xF9D5 )
michael@0 579 or ( $b < 0x8140 )
michael@0 580 or ( hex($unicode) > 0xFFFF );
michael@0 581 }
michael@0 582 }
michael@0 583
michael@0 584 sub gen_mozilla_ut() {
michael@0 585 # hkscs.ut
michael@0 586 foreach $big5 ( sort keys %b2u ) {
michael@0 587 my $b = hex($big5);
michael@0 588 print "0x", uc($big5), "\t0x", uc( $b2u{$big5} ), "\n"
michael@0 589 unless ( $b >= 0xA140 and $b <= 0xC6A0 )
michael@0 590 or ( $b < 0x8140 )
michael@0 591 or ( $b >= 0xC940 and $b <= 0xF9D5 );
michael@0 592 }
michael@0 593 }
michael@0 594
michael@0 595
michael@0 596 ###########################################################################
michael@0 597
michael@0 598 sub gen_glibc() {
michael@0 599
michael@0 600 ##########################################################################
michael@0 601 #
michael@0 602 # Generate index for UCS4 to Big5-HKSCS conversion table
michael@0 603 #
michael@0 604 @index_array = ();
michael@0 605
michael@0 606 $mode = 0;
michael@0 607 $count = 0;
michael@0 608 for ( $uni = 0x81 ; $uni <= 0x2FFFF ; $uni++ ) {
michael@0 609 $unicode = sprintf( "%04X", $uni );
michael@0 610
michael@0 611 # print " /* U+$unicode */\t" if $low % 4 == 0;
michael@0 612 if ( defined( $u2b{$unicode} ) ) {
michael@0 613 if ( $mode == 0 ) {
michael@0 614 $range_start = $range_end = $uni;
michael@0 615
michael@0 616 # printf " { %7s, ", sprintf("0x%04X", $range_start);
michael@0 617 $mode = 1;
michael@0 618 }
michael@0 619 else {
michael@0 620 $range_end = $uni;
michael@0 621 }
michael@0 622 }
michael@0 623 elsif ( $mode == 1 and ( $uni - $range_end ) >= 0x80 ) {
michael@0 624
michael@0 625 # Start a new range if the gap is 0x80 or larger
michael@0 626 # printf "%7s, %5d },\n", sprintf("0x%04X", $range_end), $count;
michael@0 627 push @index_array, [ ( $range_start, $range_end, $count ) ];
michael@0 628 $count += $range_end - $range_start + 1;
michael@0 629 $mode = 0;
michael@0 630 }
michael@0 631 }
michael@0 632
michael@0 633 #
michael@0 634 # Note that $count and $range_end are used again as global variables
michael@0 635 # below
michael@0 636 #
michael@0 637
michael@0 638 ###########################################################################
michael@0 639 #
michael@0 640 # Start generating real C code...
michael@0 641 #
michael@0 642
michael@0 643 print <<'EOT';
michael@0 644 /* Mapping tables for Big5-HKSCS handling.
michael@0 645 Copyright (C) 1997, 1998, 2000, 2001, 2002 Free Software Foundation, Inc.
michael@0 646 This file is part of the GNU C Library.
michael@0 647 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
michael@0 648 Modified for Big5-HKSCS by Roger So <roger.so@sw-linux.com>, 2000.
michael@0 649 Updated for HKSCS-2001 by James Su <suzhe@turbolinux.com.cn>
michael@0 650 and Anthony Fok <anthony@thizlinux.com>, 2002
michael@0 651
michael@0 652 The GNU C Library is free software; you can redistribute it and/or
michael@0 653 modify it under the terms of the GNU Lesser General Public
michael@0 654 License as published by the Free Software Foundation; either
michael@0 655 version 2.1 of the License, or (at your option) any later version.
michael@0 656
michael@0 657 The GNU C Library is distributed in the hope that it will be useful,
michael@0 658 but WITHOUT ANY WARRANTY; without even the implied warranty of
michael@0 659 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
michael@0 660 Lesser General Public License for more details.
michael@0 661
michael@0 662 You should have received a copy of the GNU Lesser General Public
michael@0 663 License along with the GNU C Library; if not, write to the Free
michael@0 664 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
michael@0 665 02111-1307 USA. */
michael@0 666
michael@0 667 #include <dlfcn.h>
michael@0 668 #include <gconv.h>
michael@0 669 #include <stdint.h>
michael@0 670 #include <stdlib.h>
michael@0 671 #include <string.h>
michael@0 672 #include <wchar.h>
michael@0 673
michael@0 674
michael@0 675 /* Table for Big5-HKSCS to UCS conversion.
michael@0 676
michael@0 677 Original comments by Roger So when he updated the tables for HKSCS-1999:
michael@0 678
michael@0 679 With HKSCS mappings 0x8140-0xA0FE and 0xFA40-0xFEFE added; more info:
michael@0 680 http://www.digital21.gov.hk/eng/hkscs/index.html
michael@0 681 - spacehunt 07/01/2000
michael@0 682
michael@0 683 The BIG5-HKSCS mapping tables are generated from 950.txt, big5-iso.txt
michael@0 684 and big5cmp.txt using a Perl script while merging C source code from
michael@0 685 other developers. A copy of the source Perl script is available at:
michael@0 686
michael@0 687 http://www.thizlinux.com/~anthony/hkscs/gen-glibc-big5hkscs.pl
michael@0 688 http://people.debian.org/~foka/hkscs/gen-glibc-big5hkscs.pl
michael@0 689
michael@0 690 Revisions:
michael@0 691 2001-10-30 made codec for Qt
michael@0 692 2002-03-21 ported to glibc-2.2.5 and added HKSCS-2001
michael@0 693
michael@0 694 Todo:
michael@0 695 Use a hash for characters beyond BMP to save space and make it
michael@0 696 more efficient
michael@0 697
michael@0 698 - Anthony Fok <anthony@thizlinux.com> 21 Mar 2002
michael@0 699 On behalf of ThizLinux Laboratory Ltd., Hong Kong SAR, China
michael@0 700 */
michael@0 701
michael@0 702 EOT
michael@0 703
michael@0 704 ##########################################################################
michael@0 705 #
michael@0 706 # Generate Big5-HKSCS to Unicode conversion table
michael@0 707 #
michael@0 708
michael@0 709 ## print "Big5HKSCS to Unicode\n";
michael@0 710
michael@0 711 # for $high (0x81..0x8d, 0x8e..0xa0, 0xc6..0xc8, 0xf9, 0xfa..0xfe) {
michael@0 712
michael@0 713 $high_start = 0x88;
michael@0 714 $high_end = 0xfe;
michael@0 715
michael@0 716 print "static const uint16_t big5_hkscs_to_ucs[";
michael@0 717 print( ( $high_end - $high_start + 1 ) * 157 );
michael@0 718 print "] =\n{\n";
michael@0 719 for $high ( 0x88 .. 0xfe ) {
michael@0 720 for $low ( 0x40 .. 0x7e, 0xa1 .. 0xfe ) {
michael@0 721 if ( $low == 0x40 ) {
michael@0 722 print "\n" unless $high == $high_start;
michael@0 723 printf
michael@0 724 "\t/* Big5-HKSCS 0x%02X40..0x%02X7E, 0x%02XA1..0x%02XFE */\n",
michael@0 725 $high, $high, $high, $high;
michael@0 726 }
michael@0 727 elsif ( $low == 0xa1 ) {
michael@0 728 print "\t\t";
michael@0 729 }
michael@0 730 $big5 = sprintf( "%02X%02X", $high, $low );
michael@0 731 print "\t" if $low % 8 == 0;
michael@0 732 if ( defined( $b2u{$big5} ) ) {
michael@0 733 $unicode = $b2u{$big5};
michael@0 734 print "0x", $unicode, ",";
michael@0 735 }
michael@0 736 else {
michael@0 737 print "0x0000,"; # for glibc
michael@0 738 }
michael@0 739 print( ( $low % 8 == 7 or $low == 0x7e or $low == 0xfe )
michael@0 740 ? "\n"
michael@0 741 : "\t" );
michael@0 742 }
michael@0 743 }
michael@0 744 print "};\n\n";
michael@0 745
michael@0 746 ##########################################################################
michael@0 747 #
michael@0 748 # Generate Unicode to Big5-HKSCS conversion table
michael@0 749 #
michael@0 750 print "static const unsigned char ucs4_to_big5_hkscs[$count][2] =\n{\n";
michael@0 751 foreach $index (@index_array) {
michael@0 752 ( $start, $end ) = ( @$index[0], @$index[1] );
michael@0 753 printf( " /* U+%04X */\t", $start ) if ( $start % 4 != 0 );
michael@0 754 print "\t" x ( ( $start % 4 ) * 1.5 ) . " " x ( $start % 2 );
michael@0 755 for ( $i = $start ; $i <= $end ; $i++ ) {
michael@0 756 printf( " /* U+%04X */\t", $i ) if ( $i % 4 == 0 );
michael@0 757 $unicode = sprintf( "%04X", $i );
michael@0 758 if ( defined( $big5 = $u2b{$unicode} ) ) {
michael@0 759 if ( $big5 =~ /^00/ ) {
michael@0 760 print '"\x', substr( $big5, 2, 2 ), '\x00",';
michael@0 761 }
michael@0 762 else {
michael@0 763 print '"\x', substr( $big5, 0, 2 ), '\x',
michael@0 764 substr( $big5, 2, 2 ), '",';
michael@0 765 }
michael@0 766 }
michael@0 767 else {
michael@0 768 print '"\x00\x00",';
michael@0 769 }
michael@0 770 print( ( $i % 4 == 3 ) ? "\n" : " " ) unless $i == $end;
michael@0 771 }
michael@0 772 print $end == $range_end ? "\n" : "\n\n";
michael@0 773 }
michael@0 774 print "};\n\n";
michael@0 775
michael@0 776 ###########################################################################
michael@0 777
michael@0 778 print <<EOT;
michael@0 779 static struct
michael@0 780 {
michael@0 781 /* Note: We are going to split this table so that we can use
michael@0 782 uint16_t for "from" and "to" again. Anthony Fok, 2002-03-21 */
michael@0 783 uint32_t from;
michael@0 784 uint32_t to;
michael@0 785 uint32_t offset;
michael@0 786 } from_ucs4_idx[] =
michael@0 787 {
michael@0 788 EOT
michael@0 789 foreach $index (@index_array) {
michael@0 790 printf " { %7s, %7s, %5d },\n", sprintf( "0x%04X", @$index[0] ),
michael@0 791 sprintf( "0x%04X", @$index[1] ), @$index[2];
michael@0 792 }
michael@0 793 print "};\n\n";
michael@0 794
michael@0 795 #foreach $i (sort keys %b2u) {
michael@0 796 # print $b2u{$i} . ' ';
michael@0 797 #}
michael@0 798
michael@0 799 print <<'EOT';
michael@0 800 /* Definitions used in the body of the `gconv' function. */
michael@0 801 #define CHARSET_NAME "BIG5HKSCS//"
michael@0 802 #define FROM_LOOP from_big5
michael@0 803 #define TO_LOOP to_big5
michael@0 804 #define DEFINE_INIT 1
michael@0 805 #define DEFINE_FINI 1
michael@0 806 #define MIN_NEEDED_FROM 1
michael@0 807 #define MAX_NEEDED_FROM 2
michael@0 808 #define MIN_NEEDED_TO 4
michael@0 809
michael@0 810
michael@0 811 /* First define the conversion function from Big5-HKSCS to UCS4. */
michael@0 812 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
michael@0 813 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
michael@0 814 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
michael@0 815 #define LOOPFCT FROM_LOOP
michael@0 816 #define BODY \
michael@0 817 { \
michael@0 818 uint32_t ch = *inptr; \
michael@0 819 \
michael@0 820 if (ch >= 0x81 && ch <= 0xfe) \
michael@0 821 { \
michael@0 822 /* Two-byte character. First test whether the next character \
michael@0 823 is also available. */ \
michael@0 824 uint32_t ch2; \
michael@0 825 int idx; \
michael@0 826 \
michael@0 827 if (__builtin_expect (inptr + 1 >= inend, 0)) \
michael@0 828 { \
michael@0 829 /* The second character is not available. */ \
michael@0 830 result = __GCONV_INCOMPLETE_INPUT; \
michael@0 831 break; \
michael@0 832 } \
michael@0 833 \
michael@0 834 ch2 = inptr[1]; \
michael@0 835 /* See whether the second byte is in the correct range. */ \
michael@0 836 if ((ch2 >= 0x40 && ch2 <= 0x7e) || (ch2 >= 0xa1 && ch2 <= 0xfe)) \
michael@0 837 { \
michael@0 838 if (ch >= 0x88) \
michael@0 839 { \
michael@0 840 /* Look up the table */ \
michael@0 841 idx = (ch - 0x88) * 157 + ch2 - (ch2 <= 0x7e ? 0x40 : 0x62); \
michael@0 842 if ((ch = big5_hkscs_to_ucs[idx]) == 0) \
michael@0 843 { \
michael@0 844 /* This is illegal. */ \
michael@0 845 if (! ignore_errors_p ()) \
michael@0 846 { \
michael@0 847 result = __GCONV_ILLEGAL_INPUT; \
michael@0 848 break; \
michael@0 849 } \
michael@0 850 \
michael@0 851 ++inptr; \
michael@0 852 ++*irreversible; \
michael@0 853 continue; \
michael@0 854 } \
michael@0 855 } \
michael@0 856 else \
michael@0 857 { \
michael@0 858 /* 0x81..0x87 in UDA3, currently maps linearly to PUA */ \
michael@0 859 ch = (ch - 0x81) * 157 + ch2 - (ch2 <= 0x7e ? 0x40 : 0x62) \
michael@0 860 + 0xeeb8; \
michael@0 861 } \
michael@0 862 } \
michael@0 863 else \
michael@0 864 { \
michael@0 865 /* This is illegal. */ \
michael@0 866 if (! ignore_errors_p ()) \
michael@0 867 { \
michael@0 868 result = __GCONV_ILLEGAL_INPUT; \
michael@0 869 break; \
michael@0 870 } \
michael@0 871 \
michael@0 872 ++inptr; \
michael@0 873 ++*irreversible; \
michael@0 874 continue; \
michael@0 875 } \
michael@0 876 \
michael@0 877 inptr += 2; \
michael@0 878 } \
michael@0 879 else if (__builtin_expect (ch, 0) == 0xff) \
michael@0 880 { \
michael@0 881 result = __GCONV_ILLEGAL_INPUT; \
michael@0 882 break; \
michael@0 883 } \
michael@0 884 else /* 0x00 to 0x80 */ \
michael@0 885 ++inptr; \
michael@0 886 \
michael@0 887 put32 (outptr, ch); \
michael@0 888 outptr += 4; \
michael@0 889 }
michael@0 890 #define LOOP_NEED_FLAGS
michael@0 891 #include <iconv/loop.c>
michael@0 892
michael@0 893
michael@0 894 /* Next, define the other direction. */
michael@0 895 #define MIN_NEEDED_INPUT MIN_NEEDED_TO
michael@0 896 #define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
michael@0 897 #define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
michael@0 898 #define LOOPFCT TO_LOOP
michael@0 899 #define BODY \
michael@0 900 { \
michael@0 901 uint32_t ch = get32 (inptr); \
michael@0 902 const unsigned char *cp = ""; \
michael@0 903 unsigned char b5ch[2] = "\0\0"; \
michael@0 904 int i; \
michael@0 905 \
michael@0 906 for (i = 0; \
michael@0 907 i < (int) (sizeof (from_ucs4_idx) / sizeof (from_ucs4_idx[0])); \
michael@0 908 ++i) \
michael@0 909 { \
michael@0 910 if (ch < from_ucs4_idx[i].from) \
michael@0 911 break; \
michael@0 912 if (from_ucs4_idx[i].to >= ch) \
michael@0 913 { \
michael@0 914 cp = ucs4_to_big5_hkscs[from_ucs4_idx[i].offset \
michael@0 915 + ch - from_ucs4_idx[i].from]; \
michael@0 916 break; \
michael@0 917 } \
michael@0 918 } \
michael@0 919 \
michael@0 920 if (ch <= 0x80) \
michael@0 921 { \
michael@0 922 b5ch[0] = ch; \
michael@0 923 cp = b5ch; \
michael@0 924 } \
michael@0 925 \
michael@0 926 if (cp[0] == '\0' && ch != 0) \
michael@0 927 { \
michael@0 928 UNICODE_TAG_HANDLER (ch, 4); \
michael@0 929 \
michael@0 930 /* Illegal character. */ \
michael@0 931 STANDARD_ERR_HANDLER (4); \
michael@0 932 } \
michael@0 933 else \
michael@0 934 { \
michael@0 935 /* See whether there is enough room for the second byte we write. */ \
michael@0 936 if (__builtin_expect (cp[1], '\1') != '\0' \
michael@0 937 && __builtin_expect (outptr + 1 >= outend, 0)) \
michael@0 938 { \
michael@0 939 /* We have not enough room. */ \
michael@0 940 result = __GCONV_FULL_OUTPUT; \
michael@0 941 break; \
michael@0 942 } \
michael@0 943 \
michael@0 944 *outptr++ = cp[0]; \
michael@0 945 if (cp[1] != '\0') \
michael@0 946 *outptr++ = cp[1]; \
michael@0 947 } \
michael@0 948 \
michael@0 949 inptr += 4; \
michael@0 950 }
michael@0 951 #define LOOP_NEED_FLAGS
michael@0 952 #include <iconv/loop.c>
michael@0 953
michael@0 954
michael@0 955 /* Now define the toplevel functions. */
michael@0 956 #include <iconv/skeleton.c>
michael@0 957 EOT
michael@0 958
michael@0 959 }

mercurial