intl/uconv/tools/gen-big5hkscs-2001-mozilla.pl

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/uconv/tools/gen-big5hkscs-2001-mozilla.pl	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,959 @@
     1.4 +#!/usr/bin/perl -w
     1.5 +#
     1.6 +#  gen-big5hkscs-2001-mozilla.pl
     1.7 +#      a Perl script that generates Big5-HKSCS <-> Unicode
     1.8 +#      conversion tables for Mozilla
     1.9 +#
    1.10 +#  Author (of the original Perl script):
    1.11 +#      Anthony Fok <anthony@thizlinux.com> <foka@debian.org>
    1.12 +#  Copyright (C) 2001, 2002 ThizLinux Laboratory Ltd.
    1.13 +#  License: GNU General Public License, v2 or later.
    1.14 +#
    1.15 +#  This version includes original C source code from
    1.16 +#  glibc-2.2.5/iconvdata/big5hkscs.c by Ulrich Drepper <drepper@redhat.com>
    1.17 +#  Roger So <roger.so@sw-linux.com>
    1.18 +#
    1.19 +#                         First attempt for Qt-2.3.x: 2001-09-21
    1.20 +#                     A working version for Qt-2.3.x: 2001-10-30
    1.21 +#              Ported to glibc-2.2.5 with HKSCS-2001: 2002-03-21
    1.22 +#  Adapted to generate conversion tables for Mozilla: 2002-11-26
    1.23 +#  Adapted to generate conversion tables for Mozilla: 2002-11-30
    1.24 +#                     Cleaned up the script somewhat: 2002-12-04
    1.25 +# Minor revisions for submitting to Mozilla Bugzilla: 2002-12-10
    1.26 +#
    1.27 +#  Notes:
    1.28 +#
    1.29 +#   1. The latest version of this script may be found in:
    1.30 +#          http://www.thizlinux.com/~anthony/hkscs/gen-glibc-big5hkscs.pl
    1.31 +#          http://people.debian.org/~foka/hkscs/gen-glibc-big5hkscs.pl
    1.32 +#      Or, better yet, e-mail me and ask for the latest version.
    1.33 +#
    1.34 +#   2. This script generates data from 3 tables:
    1.35 +#       a. http://www.microsoft.com/typography/unicode/950.txt
    1.36 +#       b. http://www.info.gov.hk/digital21/chi/hkscs/download/big5-iso.txt
    1.37 +#       c. http://www.info.gov.hk/digital21/chi/hkscs/download/big5cmp.txt
    1.38 +#
    1.39 +#      Make sure your big5-iso.txt is the latest HKSCS-2001 version.
    1.40 +#
    1.41 +#   3. [glibc]: I have currently split the ucs_to_big5_hkscs_?[] tables into
    1.42 +#       different areas similar to the way Ulrich and Roger did it,
    1.43 +#       but extended for HKSCS-2001.
    1.44 +#
    1.45 +#   4. [Mozilla]: This script is very quick-and-dirty in some places.
    1.46 +#       Call either gen_mozilla_uf() or gen_mozilla_ut() to generate
    1.47 +#       the appropriate tables for feeding into "fromu" or "tou".
    1.48 +#
    1.49 +#   5. [CharMapML]: The comments regarding TW-BIG5 herein need to be organized.
    1.50 +#       Also, please make sure "$hkscs_mode = 0;" for TW-BIG5 mode.
    1.51 +#       Otherwise, this script would generate a HKSCS table.
    1.52 +#       (Yes, I know, I should clean up this script and make it more modular,
    1.53 +#       and with command-line options or whatnot.  I'll do that later.  :-)
    1.54 +#
    1.55 +#  If you have any questions or concerns, please feel free to contact me
    1.56 +#  at Anthony Fok <anthony@thizlinux.com> or <foka@debian.org>  :-)
    1.57 +#
    1.58 +#  Last but not least, special thanks to ThizLinux Laboratory Ltd. (HK)
    1.59 +#  for their generous support in this work.
    1.60 +#
    1.61 +
    1.62 +# 1. UDA3, 0x8840 - 0x8dfe
    1.63 +# 2. UDA2, 0x8e40 - 0xa0fe
    1.64 +# 3. VDA,  0xc6a1 - 0xc8fe
    1.65 +
    1.66 +#use Getopt::Std;
    1.67 +
    1.68 +my ( %b2u, %u2b, $unicode, $big5, $high, $low, $i, $count );
    1.69 +
    1.70 +my $debug = 0;
    1.71 +my $hkscs_mode = 1;
    1.72 +my $kangxi = 0;
    1.73 +my $use_range  = 0;
    1.74 +my $bmp_only  = 1;
    1.75 +
    1.76 +#
    1.77 +# Subroutine Declaration
    1.78 +#
    1.79 +sub read_cp950();
    1.80 +sub adjust_radicals();
    1.81 +sub read_hkscs_main();
    1.82 +sub read_hkscs_cmp();
    1.83 +sub post_tuning();
    1.84 +sub gen_charmapml();
    1.85 +sub gen_check_b2u();
    1.86 +sub gen_check_u2b();
    1.87 +sub gen_mozilla_uf();
    1.88 +sub gen_mozilla_ut();
    1.89 +sub gen_glibc();
    1.90 +
    1.91 +###########################################################################
    1.92 +#
    1.93 +# Main program
    1.94 +#
    1.95 +
    1.96 +# First, read Microsoft's CP950 as base Big5.
    1.97 +read_cp950 ();
    1.98 +
    1.99 +# Add mappings to Kangxi Radicals.
   1.100 +# The b2u direction is added only if $kangxi is not null.
   1.101 +adjust_radicals ();
   1.102 +
   1.103 +# Then, read the HKSCS table.
   1.104 +# Again, see the $hkscs_mode variable.
   1.105 +read_hkscs_main ();
   1.106 +read_hkscs_cmp () if $hkscs_mode;
   1.107 +
   1.108 +post_tuning ();
   1.109 +
   1.110 +
   1.111 +# Then, choose one of the following:
   1.112 +#gen_charmapml();
   1.113 +gen_mozilla_uf();
   1.114 +#gen_mozilla_ut();
   1.115 +#gen_check_u2b();
   1.116 +#gen_glibc();
   1.117 +
   1.118 +
   1.119 +# End of program
   1.120 +exit 0;
   1.121 +
   1.122 +
   1.123 +#############################################################################
   1.124 +#
   1.125 +#  Subroutines
   1.126 +#
   1.127 +
   1.128 +sub read_cp950() {
   1.129 +    open( CP950, "950.txt" ) or die;
   1.130 +    my $mode = 0;
   1.131 +    while (<CP950>) {
   1.132 +        s/\r//;
   1.133 +        chomp;
   1.134 +        next if /^$/;
   1.135 +        last if /^ENDCODEPAGE/;
   1.136 +
   1.137 +        if (/^DBCSTABLE (\d+)\s+;LeadByte = 0x([0-9a-f]{2})/) {
   1.138 +            $mode = 1;
   1.139 +            ( $count, $high ) = ( $1, $2 );
   1.140 +            $i = 0;
   1.141 +            next;
   1.142 +        }
   1.143 +        if (/^WCTABLE (\d+)/) {
   1.144 +            $mode  = 2;
   1.145 +            $count = $1;
   1.146 +            $i     = 0;
   1.147 +            next;
   1.148 +        }
   1.149 +        next if $mode == 0;
   1.150 +
   1.151 +        if ( $mode == 1 ) {
   1.152 +            ( $low, $unicode, $comment ) = split "\t";
   1.153 +            $low     =~ s/^0x//;
   1.154 +            $unicode =~ s/^0x//;
   1.155 +            $big5 = $high . $low;
   1.156 +            $b2u{ uc($big5) } = uc($unicode);
   1.157 +            if ( ++$i == $count ) { $mode = 0; $count = 0; next; }
   1.158 +        }
   1.159 +
   1.160 +        if ( $mode == 2 ) {
   1.161 +            ( $unicode, $big5, $comment ) = split "\t";
   1.162 +            $unicode =~ s/^0x//;
   1.163 +            $big5    =~ s/^0x//;
   1.164 +            my $u = hex($unicode);
   1.165 +            my $b = hex($big5);
   1.166 +
   1.167 +            $u2b{ uc($unicode) } = uc($big5) unless
   1.168 +
   1.169 +              # Skip Microsoft's over-generous (or over-zealous?) mappings
   1.170 +              # "Faked" accented latin characters
   1.171 +              ( $b <= 0xFF and $b != $u )
   1.172 +
   1.173 +              # "Faked" Ideographic Annotation ___ Mark
   1.174 +              or ( $u >= 0x3192 and $u <= 0x319F )
   1.175 +
   1.176 +              # "Faked" Parenthesized Ideograph ___
   1.177 +              or ( $u >= 0x3220 and $u <= 0x3243 )
   1.178 +
   1.179 +              # "Faked" Circled Ideograph ___ except Circled Ideograph Correct
   1.180 +              or ( $u >= 0x3280 and $u <= 0x32B0 and $u != 0x32A3 )
   1.181 +
   1.182 +              # ¢F¢G¢D¡¦£g¡M
   1.183 +              or ( $u == 0xA2
   1.184 +                or $u == 0xA3
   1.185 +                or $u == 0xA5
   1.186 +                or $u == 0xB4
   1.187 +                or $u == 0xB5
   1.188 +                or $u == 0xB8 )
   1.189 +
   1.190 +              # ¡Â¢w¡ü¡E£»¡²¡Ã¢B¢X¡Ý¡[¡ó¡ò¡ã¡Ê
   1.191 +              or ( $u == 0x0305		# ???
   1.192 +                or $u == 0x2015
   1.193 +                or $u == 0x2016
   1.194 +                or $u == 0x2022
   1.195 +                or $u == 0x2024
   1.196 +                or $u == 0x2033
   1.197 +                or $u == 0x203E		# ???
   1.198 +                or $u == 0x2216
   1.199 +                or $u == 0x2218
   1.200 +                or $u == 0x2263
   1.201 +                or $u == 0x2307
   1.202 +                or $u == 0x2609
   1.203 +                or $u == 0x2641
   1.204 +                or $u == 0x301C
   1.205 +                or $u == 0x3030 )
   1.206 +
   1.207 +              # ¡s¡¥¡N
   1.208 +              or ( $u == 0xFF3E or $u == 0xFF40 or $u == 0xFF64 );
   1.209 +
   1.210 +            if ( ++$i == $count ) { $mode = 0; $count = 0; next; }
   1.211 +        }
   1.212 +    }
   1.213 +}
   1.214 +
   1.215 +sub adjust_radicals() {
   1.216 +
   1.217 +    # B5+C6BF - B5+C6D7: Radicals (?)
   1.218 +
   1.219 +    # TW-BIG5 drafted by Autrijus uses Kangxi Radicals whenever possible.
   1.220 +    #
   1.221 +    #   Big5-HKSCS tends towards using the character in Unicode CJK Ideographs
   1.222 +    #   Note that HKSCS does not explicitly define
   1.223 +    #       B5+C6CF, B5+C6D3, B5+C6D5, B5+C6D7 (ÆÏ¡BÆÓ¡BÆÕ¡BÆ×),
   1.224 +    #   but do have these characters at B5+FBFD, B5+FCD3, B5+FEC1, B5+90C4,
   1.225 +    #   mapped to U+5EF4, U+65E0, U+7676, U+96B6 respectively.
   1.226 +    #
   1.227 +    #   As for B5+C6CD (ÆÍ), HKSCS maps it to U+2F33 just like TW-BIG5.
   1.228 +    #   However, it also maps B5+FBF4 (ûô) to U+5E7A.
   1.229 +    $b2u{"C6BF"} = "2F02" if $kangxi;
   1.230 +    $u2b{"2F02"} = "C6BF";              # Æ¿
   1.231 +    $b2u{"C6C0"} = "2F03" if $kangxi;
   1.232 +    $u2b{"2F03"} = "C6C0";              # ÆÀ
   1.233 +    $b2u{"C6C1"} = "2F05" if $kangxi;
   1.234 +    $u2b{"2F05"} = "C6C1";              # ÆÁ
   1.235 +    $b2u{"C6C2"} = "2F07" if $kangxi;
   1.236 +    $u2b{"2F07"} = "C6C2";              # ÆÂ
   1.237 +    $b2u{"C6C3"} = "2F0C" if $kangxi;
   1.238 +    $u2b{"2F0C"} = "C6C3";              # ÆÃ
   1.239 +    $b2u{"C6C4"} = "2F0D" if $kangxi;
   1.240 +    $u2b{"2F0D"} = "C6C4";              # ÆÄ
   1.241 +    $b2u{"C6C5"} = "2F0E" if $kangxi;
   1.242 +    $u2b{"2F0E"} = "C6C5";              # ÆÅ
   1.243 +    $b2u{"C6C6"} = "2F13" if $kangxi;
   1.244 +    $u2b{"2F13"} = "C6C6";              # ÆÆ
   1.245 +    $b2u{"C6C7"} = "2F16" if $kangxi;
   1.246 +    $u2b{"2F16"} = "C6C7";              # ÆÇ
   1.247 +    $b2u{"C6C8"} = "2F19" if $kangxi;
   1.248 +    $u2b{"2F19"} = "C6C8";              # ÆÈ
   1.249 +    $b2u{"C6C9"} = "2F1B" if $kangxi;
   1.250 +    $u2b{"2F1B"} = "C6C9";              # ÆÉ
   1.251 +    $b2u{"C6CA"} = "2F22" if $kangxi;
   1.252 +    $u2b{"2F22"} = "C6CA";              # ÆÊ
   1.253 +    $b2u{"C6CB"} = "2F27" if $kangxi;
   1.254 +    $u2b{"2F27"} = "C6CB";              # ÆË
   1.255 +    $b2u{"C6CC"} = "2F2E" if $kangxi;
   1.256 +    $u2b{"2F2E"} = "C6CC";              # ÆÌ
   1.257 +    $b2u{"C6CD"} = "2F33" if $kangxi;
   1.258 +    $u2b{"2F33"} = "C6CD";              # ÆÍ
   1.259 +    $b2u{"C6CE"} = "2F34" if $kangxi;
   1.260 +    $u2b{"2F34"} = "C6CE";              # ÆÎ
   1.261 +    $b2u{"C6CF"} = "2F35" if $kangxi;
   1.262 +    $u2b{"2F35"} = "C6CF";              # ÆÏ
   1.263 +    $b2u{"C6D0"} = "2F39" if $kangxi;
   1.264 +    $u2b{"2F39"} = "C6D0";              # ÆÐ
   1.265 +    $b2u{"C6D1"} = "2F3A" if $kangxi;
   1.266 +    $u2b{"2F3A"} = "C6D1";              # ÆÑ
   1.267 +    $b2u{"C6D2"} = "2F41" if $kangxi;
   1.268 +    $u2b{"2F41"} = "C6D2";              # ÆÒ
   1.269 +    $b2u{"C6D3"} = "2F46" if $kangxi;
   1.270 +    $u2b{"2F46"} = "C6D3";              # ÆÓ
   1.271 +    $b2u{"C6D4"} = "2F67" if $kangxi;
   1.272 +    $u2b{"2F67"} = "C6D4";              # ÆÔ
   1.273 +    $b2u{"C6D5"} = "2F68" if $kangxi;
   1.274 +    $u2b{"2F68"} = "C6D5";              # ÆÕ
   1.275 +    $b2u{"C6D6"} = "2FA1" if $kangxi;
   1.276 +    $u2b{"2FA1"} = "C6D6";              # ÆÖ
   1.277 +    $b2u{"C6D7"} = "2FAA" if $kangxi;
   1.278 +    $u2b{"2FAA"} = "C6D7";              # Æ×
   1.279 +}
   1.280 +
   1.281 +sub read_hkscs_main() {
   1.282 +
   1.283 +    open( B2U, "<big5-iso.txt" ) or die;
   1.284 +    while (<B2U>) {
   1.285 +        next
   1.286 +          unless
   1.287 +/([[:xdigit:]]{4})\s+([[:xdigit:]]{4})\s+([[:xdigit:]]{4})\s+([[:xdigit:]]{4,5})/;
   1.288 +        ( $big5, $iso1993, $iso2000, $iso2001 ) = ( $1, $2, $3, $4 );
   1.289 +
   1.290 +        my $b = hex($big5);
   1.291 +
   1.292 +        # For non-HKSCS mode, only take data in the VDA range (?)
   1.293 +        next unless $hkscs_mode
   1.294 +
   1.295 +          # Note that we don't go from B5+C6A1-B5+C6FE, but rather only
   1.296 +          # C6A1-C8D3 excluding C6BF-C6D7 (Kangxi Radicals)
   1.297 +          # because C8D4-C8FE are not assigned in TW-BIG5
   1.298 +          # if we are to follow Arphic PL Big-5 fonts.  (To be discussed)
   1.299 +          or
   1.300 +          ( $b >= 0xC6A1 && $b <= 0xC8D3 and !( $b >= 0xC6BF && $b <= 0xC6D7 ) )
   1.301 +          or ( $b >= 0xF9D6 && $b <= 0xF9FE );
   1.302 +
   1.303 +        print STDERR
   1.304 +          "B2U, 2000: $big5 redefined from U+$b2u{$big5} to U+$iso2000.\n"
   1.305 +          if $debug
   1.306 +          and defined( $b2u{$big5} )
   1.307 +          and $b2u{$big5} ne $iso2000;
   1.308 +
   1.309 +        $b2u{$big5} = $bmp_only ? $iso2000 : $iso2001
   1.310 +          unless !$hkscs_mode
   1.311 +          and $b == 0xF9FE;
   1.312 +
   1.313 +        # B5+F9FE is mapped differently in TW-BIG5 and HKSCS, to
   1.314 +        # U+2593 (Dark Shade) and U+FFED (Halfwidth Black Square) respectively.
   1.315 +        # Which is more correct?  I don't know!  (To be discussed)
   1.316 +
   1.317 +        print STDERR
   1.318 +          "1993: U+$iso1993 redefined from $u2b{$iso1993} to $big5.\n"
   1.319 +          if $debug
   1.320 +          and defined( $u2b{$iso1993} )
   1.321 +          and $u2b{$iso1993} ne $big5;
   1.322 +
   1.323 +        $u2b{$iso1993} = $big5;
   1.324 +
   1.325 +        print STDERR
   1.326 +          "2000: U+$iso2000 redefined from $u2b{$iso2000} to $big5.\n"
   1.327 +          if $debug
   1.328 +          and defined( $u2b{$iso2000} )
   1.329 +          and $u2b{$iso2000} ne $big5;
   1.330 +
   1.331 +        $u2b{$iso2000} = $big5;
   1.332 +
   1.333 +        print STDERR
   1.334 +          "2001: U+$iso2001 redefined from $u2b{$iso2001} to $big5.\n"
   1.335 +          if $debug
   1.336 +          and defined( $u2b{$iso2001} )
   1.337 +          and $u2b{$iso2001} ne $big5;
   1.338 +
   1.339 +        $u2b{$iso2001} = $big5;
   1.340 +    }
   1.341 +    close B2U;
   1.342 +
   1.343 +}    # read_hkscs_main()
   1.344 +
   1.345 +
   1.346 +sub read_hkscs_cmp() {
   1.347 +
   1.348 +    ###########################################################################
   1.349 +    # Add Big5 compatibility coding...
   1.350 +    #
   1.351 +    # Stephan, here is the code segment that you may want to implement
   1.352 +    # in your convertbig5hkscs2001.pl 
   1.353 +    #
   1.354 +    open( B5CMP, "<big5cmp.txt" ) or die;
   1.355 +    $mode = 0;
   1.356 +    while (<B5CMP>) {
   1.357 +        if (/^=====/) { $mode = 1; next; }
   1.358 +        next if $mode == 0;
   1.359 +        last if $mode == 1 and /^\s+/;
   1.360 +        chomp;
   1.361 +        my ( $big5cmp, $big5 ) = split " ";
   1.362 +
   1.363 +        $big5cmp = uc($big5cmp);
   1.364 +        $big5    = uc($big5);
   1.365 +        my $uni    = $b2u{$big5};
   1.366 +        my $unicmp = $b2u{$big5cmp};
   1.367 +
   1.368 +        print STDERR
   1.369 +          "Was: U+$unicmp -> $u2b{$unicmp}, $big5cmp -> U+$b2u{$big5cmp}\t"
   1.370 +          if $debug;
   1.371 +        $b2u{$big5cmp} = $uni;
   1.372 +        $u2b{$unicmp}  = $big5;
   1.373 +        print STDERR
   1.374 +          "Now:  U+$unicmp -> $u2b{$unicmp}, $big5cmp -> U+$b2u{$big5cmp}\n"
   1.375 +          if $debug;
   1.376 +    }
   1.377 +    close B5CMP;
   1.378 +}    # read_hkscs_cmp();
   1.379 +
   1.380 +
   1.381 +sub post_tuning() {
   1.382 +
   1.383 +    # And finally, fine-tuning...
   1.384 +    for $i ( 0x00 .. 0x80 ) {
   1.385 +        $big5 = $unicode = sprintf( "%04X", $i );
   1.386 +        $b2u{$big5} = $unicode;
   1.387 +    }
   1.388 +
   1.389 +    # Add Euro '£á' (I wonder why this 950.txt doesn't have it.)
   1.390 +    $b2u{"A3E1"} = "20AC";
   1.391 +    $u2b{"20AC"} = "A3E1";
   1.392 +
   1.393 +    # Box drawing characters:
   1.394 +    # Align with Big-5E (To be discussed, as it differs from CP950 and HKSCS)
   1.395 +    # (To be discussed)
   1.396 +    if ( !$hkscs_mode ) {
   1.397 +        $u2b{"2550"} = "A2A4";    # Big5: ¢¤	(also B5-F9F9)
   1.398 +        $u2b{"255E"} = "A2A5";    # Big5: ¢¥	(also B5-F9E9)
   1.399 +        $u2b{"2561"} = "A2A7";    # Big5: ¢§	(also B5-F9EB)
   1.400 +        $u2b{"256A"} = "A2A6";    # Big5: ¢¦	(also B5-F9EA)
   1.401 +        $u2b{"256D"} = "A27E";    # Big5: ¢~	(also B5-F9FA)
   1.402 +        $u2b{"256E"} = "A2A1";    # Big5: ¢¡	(also B5-F9FB)
   1.403 +        $u2b{"256F"} = "A2A3";    # Big5: ¢£	(also B5-F9FD)
   1.404 +        $u2b{"2570"} = "A2A2";    # Big5: ¢¢	(also B5-F9FC)
   1.405 +    }
   1.406 +
   1.407 +    # "Hangzhou" or "Suzhou" Chinese numerals 10, 20, 30 (¢Ì¢Í¢Î)
   1.408 +    # (To be discussed)
   1.409 +    if ( !$hkscs_mode ) {
   1.410 +        $b2u{"A2CC"} = "3038";
   1.411 +        $u2b{"3038"} = "A2CC";
   1.412 +        $b2u{"A2CD"} = "3039";
   1.413 +        $u2b{"3039"} = "A2CD";
   1.414 +        $b2u{"A2CE"} = "303A";
   1.415 +        $u2b{"303A"} = "A2CE";
   1.416 +    }
   1.417 +
   1.418 +    # The character for ethnic group "Yi" (ÂU):
   1.419 +    # (To be discussed)
   1.420 +    $u2b{"5F5E"} = "C255";    # Always add this.
   1.421 +    if ( !$hkscs_mode ) {
   1.422 +        $b2u{"C255"} = "5F5E";
   1.423 +    }
   1.424 +
   1.425 +}    # post_tuning()
   1.426 +
   1.427 +
   1.428 +sub gen_charmapml() {
   1.429 +
   1.430 +    ###########################################################################
   1.431 +    #
   1.432 +    #  Codes for generating CharMapML XML file
   1.433 +
   1.434 +    print <<EOT;
   1.435 +<?xml version="1.0" encoding="UTF-8" ?>
   1.436 +<!DOCTYPE characterMapping SYSTEM "http://www.unicode.org/unicode/reports/tr22/CharacterMapping.dtd">
   1.437 +EOT
   1.438 +
   1.439 +    if ($hkscs_mode) {
   1.440 +        print <<EOT;
   1.441 +<characterMapping id="big5-hkscs-2001" version="1">
   1.442 + <history>
   1.443 +  <modified version="1" date="2002-11-30">
   1.444 +   Trial version generated from 950.txt + part of big5-iso.txt (HKSCS-2001)
   1.445 +   with Euro added, with CP950's excessive fub (fallbacks uni->big5) removed,
   1.446 +   and with some other manual tweaking.
   1.447 +  </modified>
   1.448 + </history>
   1.449 +EOT
   1.450 +    }
   1.451 +    else {
   1.452 +        print <<EOT;
   1.453 +<characterMapping id="tw-big5-2002" version="1">
   1.454 + <history>
   1.455 +  <modified version="1" date="2002-11-30">
   1.456 +   Trial version generated from 950.txt + part of big5-iso.txt (HKSCS-2001)
   1.457 +   with Euro added, with CP950's excessive fub (fallbacks uni->big5) removed,
   1.458 +   and with some other manual tweaking.
   1.459 +  </modified>
   1.460 + </history>
   1.461 +EOT
   1.462 +    }
   1.463 +
   1.464 +    print <<EOT;
   1.465 + <validity>
   1.466 +  <state type="FIRST" next="VALID" s="0" e="80" max="FFFF"/>
   1.467 +  <state type="FIRST" next="SECOND" s="81" e="FE" max="FFFF"/>
   1.468 +  <state type="SECOND" next="VALID" s="40" e="7E" max="FFFF"/>
   1.469 +  <state type="SECOND" next="VALID" s="A1" e="FE" max="FFFF"/>
   1.470 + </validity>
   1.471 + <assignments sub="3F">
   1.472 +EOT
   1.473 +    print "  <!-- One to one mappings -->\n";
   1.474 +    for $unicode ( sort { hex($a) <=> hex($b) } keys %u2b ) {
   1.475 +        $big5 = $u2b{$unicode};
   1.476 +        $u    = hex($unicode);
   1.477 +        next
   1.478 +          unless defined( $b2u{$big5} )
   1.479 +          and $unicode eq $b2u{$big5}
   1.480 +          and
   1.481 +          not( $use_range and !$hkscs_mode and $u >= 0xE000 && $u <= 0xF6B0 );
   1.482 +        printf "  <a u=\"%04X\" ", $u;
   1.483 +        if ( hex($big5) <= 0xFF ) {
   1.484 +            printf "b=\"%02X\"/>\n", hex($big5);
   1.485 +        }
   1.486 +        else {
   1.487 +            printf "b=\"%s %s\"/>\n", substr( $big5, 0, 2 ),
   1.488 +              substr( $big5, 2, 2 );
   1.489 +        }
   1.490 +    }
   1.491 +
   1.492 +    print "  <!-- Fallback mappings from Unicode to bytes -->\n";
   1.493 +    for $unicode ( sort { hex($a) <=> hex($b) } keys %u2b ) {
   1.494 +        $big5 = $u2b{$unicode};
   1.495 +        next if defined( $b2u{$big5} ) and hex($unicode) == hex( $b2u{$big5} );
   1.496 +        if ( $unicode eq "F900" ) {
   1.497 +            print "  <!-- CJK Compatibility Ideographs: U+F900 - U+FA6A.\n";
   1.498 +            print
   1.499 +"       These are included in CP950 (Unicode->Big5 direction only).\n";
   1.500 +            print "       Should we include this area in TW-BIG5 or not? -->\n";
   1.501 +        }
   1.502 +        printf "  <fub u=\"%04X\" b=\"%s %s\"/>\n", hex($unicode),
   1.503 +          substr( $big5, 0, 2 ), substr( $big5, 2, 2 );
   1.504 +    }
   1.505 +
   1.506 +    my %fbu;
   1.507 +    print "  <!-- Fallback mappings from bytes to Unicode -->\n";
   1.508 +    for $big5 ( sort { hex($a) <=> hex($b) } keys %b2u ) {
   1.509 +        $unicode = $b2u{$big5};
   1.510 +        if ( !defined( $u2b{$unicode} ) or hex($big5) != hex( $u2b{$unicode} ) )
   1.511 +        {
   1.512 +            $fbu{$unicode} = $big5;
   1.513 +        }
   1.514 +    }
   1.515 +    for $unicode ( sort { hex($a) <=> hex($b) } keys %fbu ) {
   1.516 +        $big5 = $fbu{$unicode};
   1.517 +        printf "  <fbu u=\"%04X\" b=\"%s %s\"/>\n", hex($unicode),
   1.518 +          substr( $big5, 0, 2 ), substr( $big5, 2, 2 );
   1.519 +    }
   1.520 +
   1.521 +    if ( $use_range and !$hkscs_mode ) {
   1.522 +        print <<EOT;
   1.523 +  <!-- Roundtrip-mappings that can be enumerated
   1.524 +       Note: We can only use the <range> tag for TW-BIG5.
   1.525 +             Big-5E and Big5-HKSCS have assigned characters in these areas,
   1.526 +	     and we will have to use the <a> and <fub> tags instead.
   1.527 +    -->
   1.528 +  <!-- User-Defined Area 1 (UDA1) -->
   1.529 +  <range uFirst="E000" uLast="E310"  bFirst="FA 40" bLast="FE FE" bMin="81 40" bMax="FE FE"/>
   1.530 +  <!-- User-Defined Area 2 (UDA2) -->
   1.531 +  <range uFirst="E311" uLast="EEB7"  bFirst="8E 40" bLast="A0 FE" bMin="81 40" bMax="FE FE"/>
   1.532 +  <!-- User-Defined Area 3 (UDA3) -->
   1.533 +  <range uFirst="EEB8" uLast="F6B0"  bFirst="81 40" bLast="8D FE" bMin="81 40" bMax="FE FE"/>
   1.534 +EOT
   1.535 +    }
   1.536 +
   1.537 +    print <<EOT;
   1.538 + </assignments>
   1.539 +</characterMapping>
   1.540 +EOT
   1.541 +
   1.542 +}    # gen_charmapml()
   1.543 +
   1.544 +sub gen_check_b2u() {
   1.545 +
   1.546 +    ###########################################################################
   1.547 +    #
   1.548 +    #  Codes for generating a raw table for verification and testing
   1.549 +    #
   1.550 +    # #print $u2b{"F7D1"}, "\n";
   1.551 +    # print $b2u{$u2b{"F7D1"}}, "\n";
   1.552 +    # print "FA59 -> U+", $b2u{"FA59"}, "\n";
   1.553 +
   1.554 +    foreach $big5 ( sort { hex($a) <=> hex($b) } keys %b2u ) {
   1.555 +        $unicode = $b2u{$big5};
   1.556 +        $big5 =~ s/^00//;
   1.557 +        print "U+", $unicode, ": ", $big5, "\n";
   1.558 +    }
   1.559 +}
   1.560 +
   1.561 +sub gen_check_u2b() {
   1.562 +    foreach $unicode ( sort { hex($a) <=> hex($b) } keys %u2b ) {
   1.563 +        $big5 = $u2b{$unicode};
   1.564 +        $big5 =~ s/^00//;
   1.565 +        print "U+", $unicode, ": ", $big5, "\n";
   1.566 +    }
   1.567 +
   1.568 +}
   1.569 +
   1.570 +###########################################################################
   1.571 +#
   1.572 +#  Codes for generating hkscs.ut and hkscs.uf files for Mozilla
   1.573 +#
   1.574 +sub gen_mozilla_uf() {
   1.575 +    # hkscs.uf
   1.576 +    foreach $unicode ( sort keys %u2b ) {
   1.577 +        $big5 = $u2b{$unicode};
   1.578 +	my $b = hex($big5);
   1.579 +        print "0x", uc($big5), "\t0x", uc($unicode), "\n"
   1.580 +          unless ( $b >= 0xA140 and $b <= 0xC6A0 )
   1.581 +          or ( $b >= 0xC940 and $b <= 0xF9D5 )
   1.582 +          or ( $b < 0x8140 )
   1.583 +          or ( hex($unicode) > 0xFFFF );
   1.584 +    }
   1.585 +}
   1.586 +
   1.587 +sub gen_mozilla_ut() {
   1.588 +    # hkscs.ut
   1.589 +    foreach $big5 ( sort keys %b2u ) {
   1.590 +        my $b = hex($big5);
   1.591 +        print "0x", uc($big5), "\t0x", uc( $b2u{$big5} ), "\n"
   1.592 +          unless ( $b >= 0xA140 and $b <= 0xC6A0 )
   1.593 +	  or ( $b < 0x8140 )
   1.594 +          or ( $b >= 0xC940 and $b <= 0xF9D5 );
   1.595 +    }
   1.596 +}
   1.597 +
   1.598 +
   1.599 +###########################################################################
   1.600 +
   1.601 +sub gen_glibc() {
   1.602 +
   1.603 +    ##########################################################################
   1.604 +    #
   1.605 +    #   Generate index for UCS4 to Big5-HKSCS conversion table
   1.606 +    #
   1.607 +    @index_array = ();
   1.608 +
   1.609 +    $mode  = 0;
   1.610 +    $count = 0;
   1.611 +    for ( $uni = 0x81 ; $uni <= 0x2FFFF ; $uni++ ) {
   1.612 +        $unicode = sprintf( "%04X", $uni );
   1.613 +
   1.614 +        # print "  /* U+$unicode */\t" if $low % 4 == 0;
   1.615 +        if ( defined( $u2b{$unicode} ) ) {
   1.616 +            if ( $mode == 0 ) {
   1.617 +                $range_start = $range_end = $uni;
   1.618 +
   1.619 +                # printf "  { %7s, ", sprintf("0x%04X", $range_start);
   1.620 +                $mode = 1;
   1.621 +            }
   1.622 +            else {
   1.623 +                $range_end = $uni;
   1.624 +            }
   1.625 +        }
   1.626 +        elsif ( $mode == 1 and ( $uni - $range_end ) >= 0x80 ) {
   1.627 +
   1.628 +            # Start a new range if the gap is 0x80 or larger
   1.629 +            # printf "%7s, %5d },\n", sprintf("0x%04X", $range_end), $count;
   1.630 +            push @index_array, [ ( $range_start, $range_end, $count ) ];
   1.631 +            $count += $range_end - $range_start + 1;
   1.632 +            $mode = 0;
   1.633 +        }
   1.634 +    }
   1.635 +
   1.636 +    #
   1.637 +    #  Note that $count and $range_end are used again as global variables
   1.638 +    #  below
   1.639 +    #
   1.640 +
   1.641 +    ###########################################################################
   1.642 +    #
   1.643 +    #  Start generating real C code...
   1.644 +    #
   1.645 +
   1.646 +    print <<'EOT';
   1.647 +/* Mapping tables for Big5-HKSCS handling.
   1.648 +   Copyright (C) 1997, 1998, 2000, 2001, 2002 Free Software Foundation, Inc.
   1.649 +   This file is part of the GNU C Library.
   1.650 +   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
   1.651 +   Modified for Big5-HKSCS by Roger So <roger.so@sw-linux.com>, 2000.
   1.652 +   Updated for HKSCS-2001 by James Su <suzhe@turbolinux.com.cn>
   1.653 +                         and Anthony Fok <anthony@thizlinux.com>, 2002
   1.654 +
   1.655 +   The GNU C Library is free software; you can redistribute it and/or
   1.656 +   modify it under the terms of the GNU Lesser General Public
   1.657 +   License as published by the Free Software Foundation; either
   1.658 +   version 2.1 of the License, or (at your option) any later version.
   1.659 +
   1.660 +   The GNU C Library is distributed in the hope that it will be useful,
   1.661 +   but WITHOUT ANY WARRANTY; without even the implied warranty of
   1.662 +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   1.663 +   Lesser General Public License for more details.
   1.664 +
   1.665 +   You should have received a copy of the GNU Lesser General Public
   1.666 +   License along with the GNU C Library; if not, write to the Free
   1.667 +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   1.668 +   02111-1307 USA.  */
   1.669 +
   1.670 +#include <dlfcn.h>
   1.671 +#include <gconv.h>
   1.672 +#include <stdint.h>
   1.673 +#include <stdlib.h>
   1.674 +#include <string.h>
   1.675 +#include <wchar.h>
   1.676 +
   1.677 +
   1.678 +/* Table for Big5-HKSCS to UCS conversion.
   1.679 +
   1.680 +   Original comments by Roger So when he updated the tables for HKSCS-1999:
   1.681 +
   1.682 +     With HKSCS mappings 0x8140-0xA0FE and 0xFA40-0xFEFE added; more info:
   1.683 +     http://www.digital21.gov.hk/eng/hkscs/index.html
   1.684 +       - spacehunt 07/01/2000
   1.685 +
   1.686 +   The BIG5-HKSCS mapping tables are generated from 950.txt, big5-iso.txt
   1.687 +   and big5cmp.txt using a Perl script while merging C source code from
   1.688 +   other developers.  A copy of the source Perl script is available at:
   1.689 +
   1.690 +      http://www.thizlinux.com/~anthony/hkscs/gen-glibc-big5hkscs.pl
   1.691 +      http://people.debian.org/~foka/hkscs/gen-glibc-big5hkscs.pl
   1.692 +
   1.693 +  Revisions:
   1.694 +    2001-10-30  made codec for Qt
   1.695 +    2002-03-21  ported to glibc-2.2.5 and added HKSCS-2001
   1.696 +
   1.697 +  Todo:
   1.698 +    Use a hash for characters beyond BMP to save space and make it
   1.699 +    more efficient
   1.700 +
   1.701 +   - Anthony Fok <anthony@thizlinux.com>  21 Mar 2002
   1.702 +     On behalf of ThizLinux Laboratory Ltd., Hong Kong SAR, China
   1.703 +*/
   1.704 +
   1.705 +EOT
   1.706 +
   1.707 +    ##########################################################################
   1.708 +    #
   1.709 +    # Generate Big5-HKSCS to Unicode conversion table
   1.710 +    #
   1.711 +
   1.712 +    ## print "Big5HKSCS to Unicode\n";
   1.713 +
   1.714 +    # for $high (0x81..0x8d, 0x8e..0xa0, 0xc6..0xc8, 0xf9, 0xfa..0xfe) {
   1.715 +
   1.716 +    $high_start = 0x88;
   1.717 +    $high_end   = 0xfe;
   1.718 +
   1.719 +    print "static const uint16_t big5_hkscs_to_ucs[";
   1.720 +    print( ( $high_end - $high_start + 1 ) * 157 );
   1.721 +    print "] =\n{\n";
   1.722 +    for $high ( 0x88 .. 0xfe ) {
   1.723 +        for $low ( 0x40 .. 0x7e, 0xa1 .. 0xfe ) {
   1.724 +            if ( $low == 0x40 ) {
   1.725 +                print "\n" unless $high == $high_start;
   1.726 +                printf
   1.727 +                  "\t/* Big5-HKSCS 0x%02X40..0x%02X7E, 0x%02XA1..0x%02XFE */\n",
   1.728 +                  $high, $high, $high, $high;
   1.729 +            }
   1.730 +            elsif ( $low == 0xa1 ) {
   1.731 +                print "\t\t";
   1.732 +            }
   1.733 +            $big5 = sprintf( "%02X%02X", $high, $low );
   1.734 +            print "\t" if $low % 8 == 0;
   1.735 +            if ( defined( $b2u{$big5} ) ) {
   1.736 +                $unicode = $b2u{$big5};
   1.737 +                print "0x", $unicode, ",";
   1.738 +            }
   1.739 +            else {
   1.740 +                print "0x0000,";    # for glibc
   1.741 +            }
   1.742 +            print( ( $low % 8 == 7 or $low == 0x7e or $low == 0xfe ) 
   1.743 +                ? "\n"
   1.744 +                : "\t" );
   1.745 +        }
   1.746 +    }
   1.747 +    print "};\n\n";
   1.748 +
   1.749 +    ##########################################################################
   1.750 +    #
   1.751 +    # Generate Unicode to Big5-HKSCS conversion table
   1.752 +    #
   1.753 +    print "static const unsigned char ucs4_to_big5_hkscs[$count][2] =\n{\n";
   1.754 +    foreach $index (@index_array) {
   1.755 +        ( $start, $end ) = ( @$index[0], @$index[1] );
   1.756 +        printf( "  /* U+%04X */\t", $start ) if ( $start % 4 != 0 );
   1.757 +        print "\t" x ( ( $start % 4 ) * 1.5 ) . "    " x ( $start % 2 );
   1.758 +        for ( $i = $start ; $i <= $end ; $i++ ) {
   1.759 +            printf( "  /* U+%04X */\t", $i ) if ( $i % 4 == 0 );
   1.760 +            $unicode = sprintf( "%04X", $i );
   1.761 +            if ( defined( $big5 = $u2b{$unicode} ) ) {
   1.762 +                if ( $big5 =~ /^00/ ) {
   1.763 +                    print '"\x', substr( $big5, 2, 2 ), '\x00",';
   1.764 +                }
   1.765 +                else {
   1.766 +                    print '"\x', substr( $big5, 0, 2 ), '\x',
   1.767 +                      substr( $big5, 2, 2 ), '",';
   1.768 +                }
   1.769 +            }
   1.770 +            else {
   1.771 +                print '"\x00\x00",';
   1.772 +            }
   1.773 +            print( ( $i % 4 == 3 ) ? "\n" : " " ) unless $i == $end;
   1.774 +        }
   1.775 +        print $end == $range_end ? "\n" : "\n\n";
   1.776 +    }
   1.777 +    print "};\n\n";
   1.778 +
   1.779 +    ###########################################################################
   1.780 +
   1.781 +    print <<EOT;
   1.782 +static struct
   1.783 +{
   1.784 +    /* Note: We are going to split this table so that we can use
   1.785 +       uint16_t for "from" and "to" again.  Anthony Fok, 2002-03-21 */
   1.786 +    uint32_t from;
   1.787 +    uint32_t to;
   1.788 +    uint32_t offset;
   1.789 +} from_ucs4_idx[] =
   1.790 +{
   1.791 +EOT
   1.792 +    foreach $index (@index_array) {
   1.793 +        printf "    { %7s, %7s, %5d },\n", sprintf( "0x%04X", @$index[0] ),
   1.794 +          sprintf( "0x%04X", @$index[1] ), @$index[2];
   1.795 +    }
   1.796 +    print "};\n\n";
   1.797 +
   1.798 +    #foreach $i (sort keys %b2u) {
   1.799 +    #    print $b2u{$i} . ' ';
   1.800 +    #}
   1.801 +
   1.802 +    print <<'EOT';
   1.803 +/* Definitions used in the body of the `gconv' function.  */
   1.804 +#define CHARSET_NAME		"BIG5HKSCS//"
   1.805 +#define FROM_LOOP		from_big5
   1.806 +#define TO_LOOP			to_big5
   1.807 +#define DEFINE_INIT		1
   1.808 +#define DEFINE_FINI		1
   1.809 +#define MIN_NEEDED_FROM		1
   1.810 +#define MAX_NEEDED_FROM		2
   1.811 +#define MIN_NEEDED_TO		4
   1.812 +
   1.813 +
   1.814 +/* First define the conversion function from Big5-HKSCS to UCS4.  */
   1.815 +#define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
   1.816 +#define MAX_NEEDED_INPUT	MAX_NEEDED_FROM
   1.817 +#define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
   1.818 +#define LOOPFCT			FROM_LOOP
   1.819 +#define BODY \
   1.820 +  {									      \
   1.821 +    uint32_t ch = *inptr;						      \
   1.822 +									      \
   1.823 +    if (ch >= 0x81 && ch <= 0xfe)					      \
   1.824 +      {									      \
   1.825 +	/* Two-byte character.  First test whether the next character	      \
   1.826 +	   is also available.  */					      \
   1.827 +	uint32_t ch2;							      \
   1.828 +	int idx;							      \
   1.829 +									      \
   1.830 +	if (__builtin_expect (inptr + 1 >= inend, 0))			      \
   1.831 +	  {								      \
   1.832 +	    /* The second character is not available.  */		      \
   1.833 +	    result = __GCONV_INCOMPLETE_INPUT;				      \
   1.834 +	    break;							      \
   1.835 +	  }								      \
   1.836 +									      \
   1.837 +	ch2 = inptr[1];							      \
   1.838 +	/* See whether the second byte is in the correct range.  */	      \
   1.839 +	if ((ch2 >= 0x40 && ch2 <= 0x7e) || (ch2 >= 0xa1 && ch2 <= 0xfe))     \
   1.840 +	  {								      \
   1.841 +	    if (ch >= 0x88)						      \
   1.842 +	      {								      \
   1.843 +		/* Look up the table */					      \
   1.844 +		idx = (ch - 0x88) * 157 + ch2 - (ch2 <= 0x7e ? 0x40 : 0x62);  \
   1.845 +		if ((ch = big5_hkscs_to_ucs[idx]) == 0)			      \
   1.846 +		  {							      \
   1.847 +		    /* This is illegal.  */				      \
   1.848 +		    if (! ignore_errors_p ())				      \
   1.849 +		      {							      \
   1.850 +			result = __GCONV_ILLEGAL_INPUT;			      \
   1.851 +			break;						      \
   1.852 +		      }							      \
   1.853 +									      \
   1.854 +		    ++inptr;						      \
   1.855 +		    ++*irreversible;					      \
   1.856 +		    continue;						      \
   1.857 +		  }							      \
   1.858 +	      }								      \
   1.859 +	    else							      \
   1.860 +	      {								      \
   1.861 +		/* 0x81..0x87 in UDA3, currently maps linearly to PUA */      \
   1.862 +		ch = (ch - 0x81) * 157 + ch2 - (ch2 <= 0x7e ? 0x40 : 0x62)    \
   1.863 +		      + 0xeeb8;						      \
   1.864 +	      }								      \
   1.865 +	  }								      \
   1.866 +	else								      \
   1.867 +	  {								      \
   1.868 +	    /* This is illegal.  */					      \
   1.869 +	    if (! ignore_errors_p ())					      \
   1.870 +	      {								      \
   1.871 +		result = __GCONV_ILLEGAL_INPUT;				      \
   1.872 +		break;							      \
   1.873 +	      }								      \
   1.874 +									      \
   1.875 +	    ++inptr;							      \
   1.876 +	    ++*irreversible;						      \
   1.877 +	    continue;							      \
   1.878 +	  }								      \
   1.879 +									      \
   1.880 +	inptr += 2;							      \
   1.881 +      }									      \
   1.882 +    else if (__builtin_expect (ch, 0) == 0xff)				      \
   1.883 +      {									      \
   1.884 +	result = __GCONV_ILLEGAL_INPUT;					      \
   1.885 +	break;								      \
   1.886 +      }									      \
   1.887 +    else  /* 0x00 to 0x80 */						      \
   1.888 +      ++inptr;								      \
   1.889 +									      \
   1.890 +    put32 (outptr, ch);							      \
   1.891 +    outptr += 4;							      \
   1.892 +  }
   1.893 +#define LOOP_NEED_FLAGS
   1.894 +#include <iconv/loop.c>
   1.895 +
   1.896 +
   1.897 +/* Next, define the other direction.  */
   1.898 +#define MIN_NEEDED_INPUT	MIN_NEEDED_TO
   1.899 +#define MIN_NEEDED_OUTPUT	MIN_NEEDED_FROM
   1.900 +#define MAX_NEEDED_OUTPUT	MAX_NEEDED_FROM
   1.901 +#define LOOPFCT			TO_LOOP
   1.902 +#define BODY \
   1.903 +  {									      \
   1.904 +    uint32_t ch = get32 (inptr);					      \
   1.905 +    const unsigned char *cp = "";						      \
   1.906 +    unsigned char b5ch[2] = "\0\0";					      \
   1.907 +    int i;								      \
   1.908 +    									      \
   1.909 +    for (i = 0;								      \
   1.910 +	 i < (int) (sizeof (from_ucs4_idx) / sizeof (from_ucs4_idx[0]));      \
   1.911 +	 ++i)								      \
   1.912 +      {									      \
   1.913 +	if (ch < from_ucs4_idx[i].from)					      \
   1.914 +	  break;							      \
   1.915 +	if (from_ucs4_idx[i].to >= ch)					      \
   1.916 +	  {								      \
   1.917 +	    cp = ucs4_to_big5_hkscs[from_ucs4_idx[i].offset		      \
   1.918 +			  + ch - from_ucs4_idx[i].from];		      \
   1.919 +	    break;							      \
   1.920 +	  }								      \
   1.921 +      }									      \
   1.922 +									      \
   1.923 +    if (ch <= 0x80)							      \
   1.924 +      {									      \
   1.925 +	b5ch[0] = ch;							      \
   1.926 +	cp = b5ch;							      \
   1.927 +      }									      \
   1.928 +									      \
   1.929 +    if (cp[0] == '\0' && ch != 0)					      \
   1.930 +      {									      \
   1.931 +	UNICODE_TAG_HANDLER (ch, 4);					      \
   1.932 +									      \
   1.933 +	/* Illegal character.  */					      \
   1.934 +	STANDARD_ERR_HANDLER (4);					      \
   1.935 +      }									      \
   1.936 +    else								      \
   1.937 +      {									      \
   1.938 +	/* See whether there is enough room for the second byte we write.  */ \
   1.939 +	if (__builtin_expect (cp[1], '\1') != '\0'			      \
   1.940 +	    && __builtin_expect (outptr + 1 >= outend, 0))		      \
   1.941 +	  {								      \
   1.942 +	    /* We have not enough room.  */				      \
   1.943 +	    result = __GCONV_FULL_OUTPUT;				      \
   1.944 +	    break;							      \
   1.945 +	  }								      \
   1.946 +									      \
   1.947 +	*outptr++ = cp[0];						      \
   1.948 +	if (cp[1] != '\0')						      \
   1.949 +	  *outptr++ = cp[1];						      \
   1.950 +      }									      \
   1.951 +									      \
   1.952 +    inptr += 4;								      \
   1.953 +  }
   1.954 +#define LOOP_NEED_FLAGS
   1.955 +#include <iconv/loop.c>
   1.956 +
   1.957 +
   1.958 +/* Now define the toplevel functions.  */
   1.959 +#include <iconv/skeleton.c>
   1.960 +EOT
   1.961 +
   1.962 +}

mercurial