The Tor Browser: diff intl/icu/source/common/ucnvmbcs.c

     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/ucnvmbcs.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,5661 @@
     1.4 +/*
     1.5 +******************************************************************************
     1.6 +*
     1.7 +*   Copyright (C) 2000-2013, International Business Machines
     1.8 +*   Corporation and others.  All Rights Reserved.
     1.9 +*
    1.10 +******************************************************************************
    1.11 +*   file name:  ucnvmbcs.c
    1.12 +*   encoding:   US-ASCII
    1.13 +*   tab size:   8 (not used)
    1.14 +*   indentation:4
    1.15 +*
    1.16 +*   created on: 2000jul03
    1.17 +*   created by: Markus W. Scherer
    1.18 +*
    1.19 +*   The current code in this file replaces the previous implementation
    1.20 +*   of conversion code from multi-byte codepages to Unicode and back.
    1.21 +*   This implementation supports the following:
    1.22 +*   - legacy variable-length codepages with up to 4 bytes per character
    1.23 +*   - all Unicode code points (up to 0x10ffff)
    1.24 +*   - efficient distinction of unassigned vs. illegal byte sequences
    1.25 +*   - it is possible in fromUnicode() to directly deal with simple
    1.26 +*     stateful encodings (used for EBCDIC_STATEFUL)
    1.27 +*   - it is possible to convert Unicode code points
    1.28 +*     to a single zero byte (but not as a fallback except for SBCS)
    1.29 +*
    1.30 +*   Remaining limitations in fromUnicode:
    1.31 +*   - byte sequences must not have leading zero bytes
    1.32 +*   - except for SBCS codepages: no fallback mapping from Unicode to a zero byte
    1.33 +*   - limitation to up to 4 bytes per character
    1.34 +*
    1.35 +*   ICU 2.8 (late 2003) adds a secondary data structure which lifts some of these
    1.36 +*   limitations and adds m:n character mappings and other features.
    1.37 +*   See ucnv_ext.h for details.
    1.38 +*
    1.39 +*   Change history: 
    1.40 +*
    1.41 +*    5/6/2001       Ram       Moved  MBCS_SINGLE_RESULT_FROM_U,MBCS_STAGE_2_FROM_U,
    1.42 +*                             MBCS_VALUE_2_FROM_STAGE_2, MBCS_VALUE_4_FROM_STAGE_2
    1.43 +*                             macros to ucnvmbcs.h file
    1.44 +*/
    1.45 +
    1.46 +#include "unicode/utypes.h"
    1.47 +
    1.48 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
    1.49 +
    1.50 +#include "unicode/ucnv.h"
    1.51 +#include "unicode/ucnv_cb.h"
    1.52 +#include "unicode/udata.h"
    1.53 +#include "unicode/uset.h"
    1.54 +#include "unicode/utf8.h"
    1.55 +#include "unicode/utf16.h"
    1.56 +#include "ucnv_bld.h"
    1.57 +#include "ucnvmbcs.h"
    1.58 +#include "ucnv_ext.h"
    1.59 +#include "ucnv_cnv.h"
    1.60 +#include "cmemory.h"
    1.61 +#include "cstring.h"
    1.62 +#include "cmutex.h"
    1.63 +
    1.64 +/* control optimizations according to the platform */
    1.65 +#define MBCS_UNROLL_SINGLE_TO_BMP 1
    1.66 +#define MBCS_UNROLL_SINGLE_FROM_BMP 0
    1.67 +
    1.68 +/*
    1.69 + * _MBCSHeader versions 5.3 & 4.3
    1.70 + * (Note that the _MBCSHeader version is in addition to the converter formatVersion.)
    1.71 + *
    1.72 + * This version is optional. Version 5 is used for incompatible data format changes.
    1.73 + * makeconv will continue to generate version 4 files if possible.
    1.74 + *
    1.75 + * Changes from version 4:
    1.76 + *
    1.77 + * The main difference is an additional _MBCSHeader field with
    1.78 + * - the length (number of uint32_t) of the _MBCSHeader
    1.79 + * - flags for further incompatible data format changes
    1.80 + * - flags for further, backward compatible data format changes
    1.81 + *
    1.82 + * The MBCS_OPT_FROM_U flag indicates that most of the fromUnicode data is omitted from
    1.83 + * the file and needs to be reconstituted at load time.
    1.84 + * This requires a utf8Friendly format with an additional mbcsIndex table for fast
    1.85 + * (and UTF-8-friendly) fromUnicode conversion for Unicode code points up to maxFastUChar.
    1.86 + * (For details about these structures see below, and see ucnvmbcs.h.)
    1.87 + *
    1.88 + *   utf8Friendly also implies that the fromUnicode mappings are stored in ascending order
    1.89 + *   of the Unicode code points. (This requires that the .ucm file has the |0 etc.
    1.90 + *   precision markers for all mappings.)
    1.91 + *
    1.92 + *   All fallbacks have been moved to the extension table, leaving only roundtrips in the
    1.93 + *   omitted data that can be reconstituted from the toUnicode data.
    1.94 + *
    1.95 + *   Of the stage 2 table, the part corresponding to maxFastUChar and below is omitted.
    1.96 + *   With only roundtrip mappings in the base fromUnicode data, this part is fully
    1.97 + *   redundant with the mbcsIndex and will be reconstituted from that (also using the
    1.98 + *   stage 1 table which contains the information about how stage 2 was compacted).
    1.99 + *
   1.100 + *   The rest of the stage 2 table, the part for code points above maxFastUChar,
   1.101 + *   is stored in the file and will be appended to the reconstituted part.
   1.102 + *
   1.103 + *   The entire fromUBytes array is omitted from the file and will be reconstitued.
   1.104 + *   This is done by enumerating all toUnicode roundtrip mappings, performing
   1.105 + *   each mapping (using the stage 1 and reconstituted stage 2 tables) and
   1.106 + *   writing instead of reading the byte values.
   1.107 + *
   1.108 + * _MBCSHeader version 4.3
   1.109 + *
   1.110 + * Change from version 4.2:
   1.111 + * - Optional utf8Friendly data structures, with 64-entry stage 3 block
   1.112 + *   allocation for parts of the BMP, and an additional mbcsIndex in non-SBCS
   1.113 + *   files which can be used instead of stages 1 & 2.
   1.114 + *   Faster lookups for roundtrips from most commonly used characters,
   1.115 + *   and lookups from UTF-8 byte sequences with a natural bit distribution.
   1.116 + *   See ucnvmbcs.h for more details.
   1.117 + *
   1.118 + * Change from version 4.1:
   1.119 + * - Added an optional extension table structure at the end of the .cnv file.
   1.120 + *   It is present if the upper bits of the header flags field contains a non-zero
   1.121 + *   byte offset to it.
   1.122 + *   Files that contain only a conversion table and no base table
   1.123 + *   use the special outputType MBCS_OUTPUT_EXT_ONLY.
   1.124 + *   These contain the base table name between the MBCS header and the extension
   1.125 + *   data.
   1.126 + *
   1.127 + * Change from version 4.0:
   1.128 + * - Replace header.reserved with header.fromUBytesLength so that all
   1.129 + *   fields in the data have length.
   1.130 + *
   1.131 + * Changes from version 3 (for performance improvements):
   1.132 + * - new bit distribution for state table entries
   1.133 + * - reordered action codes
   1.134 + * - new data structure for single-byte fromUnicode
   1.135 + *   + stage 2 only contains indexes
   1.136 + *   + stage 3 stores 16 bits per character with classification bits 15..8
   1.137 + * - no multiplier for stage 1 entries
   1.138 + * - stage 2 for non-single-byte codepages contains the index and the flags in
   1.139 + *   one 32-bit value
   1.140 + * - 2-byte and 4-byte fromUnicode results are stored directly as 16/32-bit integers
   1.141 + *
   1.142 + * For more details about old versions of the MBCS data structure, see
   1.143 + * the corresponding versions of this file.
   1.144 + *
   1.145 + * Converting stateless codepage data ---------------------------------------***
   1.146 + * (or codepage data with simple states) to Unicode.
   1.147 + *
   1.148 + * Data structure and algorithm for converting from complex legacy codepages
   1.149 + * to Unicode. (Designed before 2000-may-22.)
   1.150 + *
   1.151 + * The basic idea is that the structure of legacy codepages can be described
   1.152 + * with state tables.
   1.153 + * When reading a byte stream, each input byte causes a state transition.
   1.154 + * Some transitions result in the output of a code point, some result in
   1.155 + * "unassigned" or "illegal" output.
   1.156 + * This is used here for character conversion.
   1.157 + *
   1.158 + * The data structure begins with a state table consisting of a row
   1.159 + * per state, with 256 entries (columns) per row for each possible input
   1.160 + * byte value.
   1.161 + * Each entry is 32 bits wide, with two formats distinguished by
   1.162 + * the sign bit (bit 31):
   1.163 + *
   1.164 + * One format for transitional entries (bit 31 not set) for non-final bytes, and
   1.165 + * one format for final entries (bit 31 set).
   1.166 + * Both formats contain the number of the next state in the same bit
   1.167 + * positions.
   1.168 + * State 0 is the initial state.
   1.169 + *
   1.170 + * Most of the time, the offset values of subsequent states are added
   1.171 + * up to a scalar value. This value will eventually be the index of
   1.172 + * the Unicode code point in a table that follows the state table.
   1.173 + * The effect is that the code points for final state table rows
   1.174 + * are contiguous. The code points of final state rows follow each other
   1.175 + * in the order of the references to those final states by previous
   1.176 + * states, etc.
   1.177 + *
   1.178 + * For some terminal states, the offset is itself the output Unicode
   1.179 + * code point (16 bits for a BMP code point or 20 bits for a supplementary
   1.180 + * code point (stored as code point minus 0x10000 so that 20 bits are enough).
   1.181 + * For others, the code point in the Unicode table is stored with either
   1.182 + * one or two code units: one for BMP code points, two for a pair of
   1.183 + * surrogates.
   1.184 + * All code points for a final state entry take up the same number of code
   1.185 + * units, regardless of whether they all actually _use_ the same number
   1.186 + * of code units. This is necessary for simple array access.
   1.187 + *
   1.188 + * An additional feature comes in with what in ICU is called "fallback"
   1.189 + * mappings:
   1.190 + *
   1.191 + * In addition to round-trippable, precise, 1:1 mappings, there are often
   1.192 + * mappings defined between similar, though not the same, characters.
   1.193 + * Typically, such mappings occur only in fromUnicode mapping tables because
   1.194 + * Unicode has a superset repertoire of most other codepages. However, it
   1.195 + * is possible to provide such mappings in the toUnicode tables, too.
   1.196 + * In this case, the fallback mappings are partly integrated into the
   1.197 + * general state tables because the structure of the encoding includes their
   1.198 + * byte sequences.
   1.199 + * For final entries in an initial state, fallback mappings are stored in
   1.200 + * the entry itself like with roundtrip mappings.
   1.201 + * For other final entries, they are stored in the code units table if
   1.202 + * the entry is for a pair of code units.
   1.203 + * For single-unit results in the code units table, there is no space to
   1.204 + * alternatively hold a fallback mapping; in this case, the code unit
   1.205 + * is stored as U+fffe (unassigned), and the fallback mapping needs to
   1.206 + * be looked up by the scalar offset value in a separate table.
   1.207 + *
   1.208 + * "Unassigned" state entries really mean "structurally unassigned",
   1.209 + * i.e., such a byte sequence will never have a mapping result.
   1.210 + *
   1.211 + * The interpretation of the bits in each entry is as follows:
   1.212 + *
   1.213 + * Bit 31 not set, not a terminal entry ("transitional"):
   1.214 + * 30..24 next state
   1.215 + * 23..0  offset delta, to be added up
   1.216 + *
   1.217 + * Bit 31 set, terminal ("final") entry:
   1.218 + * 30..24 next state (regardless of action code)
   1.219 + * 23..20 action code:
   1.220 + *        action codes 0 and 1 result in precise-mapping Unicode code points
   1.221 + *        0  valid byte sequence
   1.222 + *           19..16 not used, 0
   1.223 + *           15..0  16-bit Unicode BMP code point
   1.224 + *                  never U+fffe or U+ffff
   1.225 + *        1  valid byte sequence
   1.226 + *           19..0  20-bit Unicode supplementary code point
   1.227 + *                  never U+fffe or U+ffff
   1.228 + *
   1.229 + *        action codes 2 and 3 result in fallback (unidirectional-mapping) Unicode code points
   1.230 + *        2  valid byte sequence (fallback)
   1.231 + *           19..16 not used, 0
   1.232 + *           15..0  16-bit Unicode BMP code point as fallback result
   1.233 + *        3  valid byte sequence (fallback)
   1.234 + *           19..0  20-bit Unicode supplementary code point as fallback result
   1.235 + *
   1.236 + *        action codes 4 and 5 may result in roundtrip/fallback/unassigned/illegal results
   1.237 + *        depending on the code units they result in
   1.238 + *        4  valid byte sequence
   1.239 + *           19..9  not used, 0
   1.240 + *            8..0  final offset delta
   1.241 + *                  pointing to one 16-bit code unit which may be
   1.242 + *                  fffe  unassigned -- look for a fallback for this offset
   1.243 + *                  ffff  illegal
   1.244 + *        5  valid byte sequence
   1.245 + *           19..9  not used, 0
   1.246 + *            8..0  final offset delta
   1.247 + *                  pointing to two 16-bit code units
   1.248 + *                  (typically UTF-16 surrogates)
   1.249 + *                  the result depends on the first code unit as follows:
   1.250 + *                  0000..d7ff  roundtrip BMP code point (1st alone)
   1.251 + *                  d800..dbff  roundtrip surrogate pair (1st, 2nd)
   1.252 + *                  dc00..dfff  fallback surrogate pair (1st-400, 2nd)
   1.253 + *                  e000        roundtrip BMP code point (2nd alone)
   1.254 + *                  e001        fallback BMP code point (2nd alone)
   1.255 + *                  fffe        unassigned
   1.256 + *                  ffff        illegal
   1.257 + *           (the final offset deltas are at most 255 * 2,
   1.258 + *            times 2 because of storing code unit pairs)
   1.259 + *
   1.260 + *        6  unassigned byte sequence
   1.261 + *           19..16 not used, 0
   1.262 + *           15..0  16-bit Unicode BMP code point U+fffe (new with version 2)
   1.263 + *                  this does not contain a final offset delta because the main
   1.264 + *                  purpose of this action code is to save scalar offset values;
   1.265 + *                  therefore, fallback values cannot be assigned to byte
   1.266 + *                  sequences that result in this action code
   1.267 + *        7  illegal byte sequence
   1.268 + *           19..16 not used, 0
   1.269 + *           15..0  16-bit Unicode BMP code point U+ffff (new with version 2)
   1.270 + *        8  state change only
   1.271 + *           19..0  not used, 0
   1.272 + *           useful for state changes in simple stateful encodings,
   1.273 + *           at Shift-In/Shift-Out codes
   1.274 + *
   1.275 + *
   1.276 + *        9..15 reserved for future use
   1.277 + *           current implementations will only perform a state change
   1.278 + *           and ignore bits 19..0
   1.279 + *
   1.280 + * An encoding with contiguous ranges of unassigned byte sequences, like
   1.281 + * Shift-JIS and especially EUC-TW, can be stored efficiently by having
   1.282 + * at least two states for the trail bytes:
   1.283 + * One trail byte state that results in code points, and one that only
   1.284 + * has "unassigned" and "illegal" terminal states.
   1.285 + *
   1.286 + * Note: partly by accident, this data structure supports simple stateful
   1.287 + * encodings without any additional logic.
   1.288 + * Currently, only simple Shift-In/Shift-Out schemes are handled with
   1.289 + * appropriate state tables (especially EBCDIC_STATEFUL!).
   1.290 + *
   1.291 + * MBCS version 2 added:
   1.292 + * unassigned and illegal action codes have U+fffe and U+ffff
   1.293 + * instead of unused bits; this is useful for _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP()
   1.294 + *
   1.295 + * Converting from Unicode to codepage bytes --------------------------------***
   1.296 + *
   1.297 + * The conversion data structure for fromUnicode is designed for the known
   1.298 + * structure of Unicode. It maps from 21-bit code points (0..0x10ffff) to
   1.299 + * a sequence of 1..4 bytes, in addition to a flag that indicates if there is
   1.300 + * a roundtrip mapping.
   1.301 + *
   1.302 + * The lookup is done with a 3-stage trie, using 11/6/4 bits for stage 1/2/3
   1.303 + * like in the character properties table.
   1.304 + * The beginning of the trie is at offsetFromUTable, the beginning of stage 3
   1.305 + * with the resulting bytes is at offsetFromUBytes.
   1.306 + *
   1.307 + * Beginning with version 4, single-byte codepages have a significantly different
   1.308 + * trie compared to other codepages.
   1.309 + * In all cases, the entry in stage 1 is directly the index of the block of
   1.310 + * 64 entries in stage 2.
   1.311 + *
   1.312 + * Single-byte lookup:
   1.313 + *
   1.314 + * Stage 2 only contains 16-bit indexes directly to the 16-blocks in stage 3.
   1.315 + * Stage 3 contains one 16-bit word per result:
   1.316 + * Bits 15..8 indicate the kind of result:
   1.317 + *    f  roundtrip result
   1.318 + *    c  fallback result from private-use code point
   1.319 + *    8  fallback result from other code points
   1.320 + *    0  unassigned
   1.321 + * Bits 7..0 contain the codepage byte. A zero byte is always possible.
   1.322 + *
   1.323 + * In version 4.3, the runtime code can build an sbcsIndex for a utf8Friendly
   1.324 + * file. For 2-byte UTF-8 byte sequences and some 3-byte sequences the lookup
   1.325 + * becomes a 2-stage (single-index) trie lookup with 6 bits for stage 3.
   1.326 + * ASCII code points can be looked up with a linear array access into stage 3.
   1.327 + * See maxFastUChar and other details in ucnvmbcs.h.
   1.328 + *
   1.329 + * Multi-byte lookup:
   1.330 + *
   1.331 + * Stage 2 contains a 32-bit word for each 16-block in stage 3:
   1.332 + * Bits 31..16 contain flags for which stage 3 entries contain roundtrip results
   1.333 + *             test: MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)
   1.334 + *             If this test is false, then a non-zero result will be interpreted as
   1.335 + *             a fallback mapping.
   1.336 + * Bits 15..0  contain the index to stage 3, which must be multiplied by 16*(bytes per char)
   1.337 + *
   1.338 + * Stage 3 contains 2, 3, or 4 bytes per result.
   1.339 + * 2 or 4 bytes are stored as uint16_t/uint32_t in platform endianness,
   1.340 + * while 3 bytes are stored as bytes in big-endian order.
   1.341 + * Leading zero bytes are ignored, and the number of bytes is counted.
   1.342 + * A zero byte mapping result is possible as a roundtrip result.
   1.343 + * For some output types, the actual result is processed from this;
   1.344 + * see ucnv_MBCSFromUnicodeWithOffsets().
   1.345 + *
   1.346 + * Note that stage 1 always contains 0x440=1088 entries (0x440==0x110000>>10),
   1.347 + * or (version 3 and up) for BMP-only codepages, it contains 64 entries.
   1.348 + *
   1.349 + * In version 4.3, a utf8Friendly file contains an mbcsIndex table.
   1.350 + * For 2-byte UTF-8 byte sequences and most 3-byte sequences the lookup
   1.351 + * becomes a 2-stage (single-index) trie lookup with 6 bits for stage 3.
   1.352 + * ASCII code points can be looked up with a linear array access into stage 3.
   1.353 + * See maxFastUChar, mbcsIndex and other details in ucnvmbcs.h.
   1.354 + *
   1.355 + * In version 3, stage 2 blocks may overlap by multiples of the multiplier
   1.356 + * for compaction.
   1.357 + * In version 4, stage 2 blocks (and for single-byte codepages, stage 3 blocks)
   1.358 + * may overlap by any number of entries.
   1.359 + *
   1.360 + * MBCS version 2 added:
   1.361 + * the converter checks for known output types, which allows
   1.362 + * adding new ones without crashing an unaware converter
   1.363 + */
   1.364 +
   1.365 +static const UConverterImpl _SBCSUTF8Impl;
   1.366 +static const UConverterImpl _DBCSUTF8Impl;
   1.367 +
   1.368 +/* GB 18030 data ------------------------------------------------------------ */
   1.369 +
   1.370 +/* helper macros for linear values for GB 18030 four-byte sequences */
   1.371 +#define LINEAR_18030(a, b, c, d) ((((a)*10+(b))*126L+(c))*10L+(d))
   1.372 +
   1.373 +#define LINEAR_18030_BASE LINEAR_18030(0x81, 0x30, 0x81, 0x30)
   1.374 +
   1.375 +#define LINEAR(x) LINEAR_18030(x>>24, (x>>16)&0xff, (x>>8)&0xff, x&0xff)
   1.376 +
   1.377 +/*
   1.378 + * Some ranges of GB 18030 where both the Unicode code points and the
   1.379 + * GB four-byte sequences are contiguous and are handled algorithmically by
   1.380 + * the special callback functions below.
   1.381 + * The values are start & end of Unicode & GB codes.
   1.382 + *
   1.383 + * Note that single surrogates are not mapped by GB 18030
   1.384 + * as of the re-released mapping tables from 2000-nov-30.
   1.385 + */
   1.386 +static const uint32_t
   1.387 +gb18030Ranges[14][4]={
   1.388 +    {0x10000, 0x10FFFF, LINEAR(0x90308130), LINEAR(0xE3329A35)},
   1.389 +    {0x9FA6, 0xD7FF, LINEAR(0x82358F33), LINEAR(0x8336C738)},
   1.390 +    {0x0452, 0x1E3E, LINEAR(0x8130D330), LINEAR(0x8135F436)},
   1.391 +    {0x1E40, 0x200F, LINEAR(0x8135F438), LINEAR(0x8136A531)},
   1.392 +    {0xE865, 0xF92B, LINEAR(0x8336D030), LINEAR(0x84308534)},
   1.393 +    {0x2643, 0x2E80, LINEAR(0x8137A839), LINEAR(0x8138FD38)},
   1.394 +    {0xFA2A, 0xFE2F, LINEAR(0x84309C38), LINEAR(0x84318537)},
   1.395 +    {0x3CE1, 0x4055, LINEAR(0x8231D438), LINEAR(0x8232AF32)},
   1.396 +    {0x361B, 0x3917, LINEAR(0x8230A633), LINEAR(0x8230F237)},
   1.397 +    {0x49B8, 0x4C76, LINEAR(0x8234A131), LINEAR(0x8234E733)},
   1.398 +    {0x4160, 0x4336, LINEAR(0x8232C937), LINEAR(0x8232F837)},
   1.399 +    {0x478E, 0x4946, LINEAR(0x8233E838), LINEAR(0x82349638)},
   1.400 +    {0x44D7, 0x464B, LINEAR(0x8233A339), LINEAR(0x8233C931)},
   1.401 +    {0xFFE6, 0xFFFF, LINEAR(0x8431A234), LINEAR(0x8431A439)}
   1.402 +};
   1.403 +
   1.404 +/* bit flag for UConverter.options indicating GB 18030 special handling */
   1.405 +#define _MBCS_OPTION_GB18030 0x8000
   1.406 +
   1.407 +/* bit flag for UConverter.options indicating KEIS,JEF,JIF special handling */
   1.408 +#define _MBCS_OPTION_KEIS 0x01000
   1.409 +#define _MBCS_OPTION_JEF  0x02000
   1.410 +#define _MBCS_OPTION_JIPS 0x04000
   1.411 +
   1.412 +#define KEIS_SO_CHAR_1 0x0A
   1.413 +#define KEIS_SO_CHAR_2 0x42
   1.414 +#define KEIS_SI_CHAR_1 0x0A
   1.415 +#define KEIS_SI_CHAR_2 0x41
   1.416 +
   1.417 +#define JEF_SO_CHAR 0x28
   1.418 +#define JEF_SI_CHAR 0x29
   1.419 +
   1.420 +#define JIPS_SO_CHAR_1 0x1A
   1.421 +#define JIPS_SO_CHAR_2 0x70
   1.422 +#define JIPS_SI_CHAR_1 0x1A
   1.423 +#define JIPS_SI_CHAR_2 0x71
   1.424 +
   1.425 +enum SISO_Option {
   1.426 +    SI,
   1.427 +    SO
   1.428 +};
   1.429 +typedef enum SISO_Option SISO_Option;
   1.430 +
   1.431 +static int32_t getSISOBytes(SISO_Option option, uint32_t cnvOption, uint8_t *value) {
   1.432 +    int32_t SISOLength = 0;
   1.433 +
   1.434 +    switch (option) {
   1.435 +        case SI:
   1.436 +            if ((cnvOption&_MBCS_OPTION_KEIS)!=0) {
   1.437 +                value[0] = KEIS_SI_CHAR_1;
   1.438 +                value[1] = KEIS_SI_CHAR_2;
   1.439 +                SISOLength = 2;
   1.440 +            } else if ((cnvOption&_MBCS_OPTION_JEF)!=0) {
   1.441 +                value[0] = JEF_SI_CHAR;
   1.442 +                SISOLength = 1;
   1.443 +            } else if ((cnvOption&_MBCS_OPTION_JIPS)!=0) {
   1.444 +                value[0] = JIPS_SI_CHAR_1;
   1.445 +                value[1] = JIPS_SI_CHAR_2;
   1.446 +                SISOLength = 2;
   1.447 +            } else {
   1.448 +                value[0] = UCNV_SI;
   1.449 +                SISOLength = 1;
   1.450 +            }
   1.451 +            break;
   1.452 +        case SO:
   1.453 +            if ((cnvOption&_MBCS_OPTION_KEIS)!=0) {
   1.454 +                value[0] = KEIS_SO_CHAR_1;
   1.455 +                value[1] = KEIS_SO_CHAR_2;
   1.456 +                SISOLength = 2;
   1.457 +            } else if ((cnvOption&_MBCS_OPTION_JEF)!=0) {
   1.458 +                value[0] = JEF_SO_CHAR;
   1.459 +                SISOLength = 1;
   1.460 +            } else if ((cnvOption&_MBCS_OPTION_JIPS)!=0) {
   1.461 +                value[0] = JIPS_SO_CHAR_1;
   1.462 +                value[1] = JIPS_SO_CHAR_2;
   1.463 +                SISOLength = 2;
   1.464 +            } else {
   1.465 +                value[0] = UCNV_SO;
   1.466 +                SISOLength = 1;
   1.467 +            }
   1.468 +            break;
   1.469 +        default:
   1.470 +            /* Should never happen. */
   1.471 +            break;
   1.472 +    }
   1.473 +
   1.474 +    return SISOLength;
   1.475 +}
   1.476 +
   1.477 +/* Miscellaneous ------------------------------------------------------------ */
   1.478 +
   1.479 +/**
   1.480 + * Callback from ucnv_MBCSEnumToUnicode(), takes 32 mappings from
   1.481 + * consecutive sequences of bytes, starting from the one encoded in value,
   1.482 + * to Unicode code points. (Multiple mappings to reduce per-function call overhead.)
   1.483 + * Does not currently support m:n mappings or reverse fallbacks.
   1.484 + * This function will not be called for sequences of bytes with leading zeros.
   1.485 + *
   1.486 + * @param context an opaque pointer, as passed into ucnv_MBCSEnumToUnicode()
   1.487 + * @param value contains 1..4 bytes of the first byte sequence, right-aligned
   1.488 + * @param codePoints resulting Unicode code points, or negative if a byte sequence does
   1.489 + *        not map to anything
   1.490 + * @return TRUE to continue enumeration, FALSE to stop
   1.491 + */
   1.492 +typedef UBool U_CALLCONV
   1.493 +UConverterEnumToUCallback(const void *context, uint32_t value, UChar32 codePoints[32]);
   1.494 +
   1.495 +/* similar to ucnv_MBCSGetNextUChar() but recursive */
   1.496 +static UBool
   1.497 +enumToU(UConverterMBCSTable *mbcsTable, int8_t stateProps[],
   1.498 +        int32_t state, uint32_t offset,
   1.499 +        uint32_t value,
   1.500 +        UConverterEnumToUCallback *callback, const void *context,
   1.501 +        UErrorCode *pErrorCode) {
   1.502 +    UChar32 codePoints[32];
   1.503 +    const int32_t *row;
   1.504 +    const uint16_t *unicodeCodeUnits;
   1.505 +    UChar32 anyCodePoints;
   1.506 +    int32_t b, limit;
   1.507 +
   1.508 +    row=mbcsTable->stateTable[state];
   1.509 +    unicodeCodeUnits=mbcsTable->unicodeCodeUnits;
   1.510 +
   1.511 +    value<<=8;
   1.512 +    anyCodePoints=-1;  /* becomes non-negative if there is a mapping */
   1.513 +
   1.514 +    b=(stateProps[state]&0x38)<<2;
   1.515 +    if(b==0 && stateProps[state]>=0x40) {
   1.516 +        /* skip byte sequences with leading zeros because they are not stored in the fromUnicode table */
   1.517 +        codePoints[0]=U_SENTINEL;
   1.518 +        b=1;
   1.519 +    }
   1.520 +    limit=((stateProps[state]&7)+1)<<5;
   1.521 +    while(b<limit) {
   1.522 +        int32_t entry=row[b];
   1.523 +        if(MBCS_ENTRY_IS_TRANSITION(entry)) {
   1.524 +            int32_t nextState=MBCS_ENTRY_TRANSITION_STATE(entry);
   1.525 +            if(stateProps[nextState]>=0) {
   1.526 +                /* recurse to a state with non-ignorable actions */
   1.527 +                if(!enumToU(
   1.528 +                        mbcsTable, stateProps, nextState,
   1.529 +                        offset+MBCS_ENTRY_TRANSITION_OFFSET(entry),
   1.530 +                        value|(uint32_t)b,
   1.531 +                        callback, context,
   1.532 +                        pErrorCode)) {
   1.533 +                    return FALSE;
   1.534 +                }
   1.535 +            }
   1.536 +            codePoints[b&0x1f]=U_SENTINEL;
   1.537 +        } else {
   1.538 +            UChar32 c;
   1.539 +            int32_t action;
   1.540 +
   1.541 +            /*
   1.542 +             * An if-else-if chain provides more reliable performance for
   1.543 +             * the most common cases compared to a switch.
   1.544 +             */
   1.545 +            action=MBCS_ENTRY_FINAL_ACTION(entry);
   1.546 +            if(action==MBCS_STATE_VALID_DIRECT_16) {
   1.547 +                /* output BMP code point */
   1.548 +                c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
   1.549 +            } else if(action==MBCS_STATE_VALID_16) {
   1.550 +                int32_t finalOffset=offset+MBCS_ENTRY_FINAL_VALUE_16(entry);
   1.551 +                c=unicodeCodeUnits[finalOffset];
   1.552 +                if(c<0xfffe) {
   1.553 +                    /* output BMP code point */
   1.554 +                } else {
   1.555 +                    c=U_SENTINEL;
   1.556 +                }
   1.557 +            } else if(action==MBCS_STATE_VALID_16_PAIR) {
   1.558 +                int32_t finalOffset=offset+MBCS_ENTRY_FINAL_VALUE_16(entry);
   1.559 +                c=unicodeCodeUnits[finalOffset++];
   1.560 +                if(c<0xd800) {
   1.561 +                    /* output BMP code point below 0xd800 */
   1.562 +                } else if(c<=0xdbff) {
   1.563 +                    /* output roundtrip or fallback supplementary code point */
   1.564 +                    c=((c&0x3ff)<<10)+unicodeCodeUnits[finalOffset]+(0x10000-0xdc00);
   1.565 +                } else if(c==0xe000) {
   1.566 +                    /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
   1.567 +                    c=unicodeCodeUnits[finalOffset];
   1.568 +                } else {
   1.569 +                    c=U_SENTINEL;
   1.570 +                }
   1.571 +            } else if(action==MBCS_STATE_VALID_DIRECT_20) {
   1.572 +                /* output supplementary code point */
   1.573 +                c=(UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000);
   1.574 +            } else {
   1.575 +                c=U_SENTINEL;
   1.576 +            }
   1.577 +
   1.578 +            codePoints[b&0x1f]=c;
   1.579 +            anyCodePoints&=c;
   1.580 +        }
   1.581 +        if(((++b)&0x1f)==0) {
   1.582 +            if(anyCodePoints>=0) {
   1.583 +                if(!callback(context, value|(uint32_t)(b-0x20), codePoints)) {
   1.584 +                    return FALSE;
   1.585 +                }
   1.586 +                anyCodePoints=-1;
   1.587 +            }
   1.588 +        }
   1.589 +    }
   1.590 +    return TRUE;
   1.591 +}
   1.592 +
   1.593 +/*
   1.594 + * Only called if stateProps[state]==-1.
   1.595 + * A recursive call may do stateProps[state]|=0x40 if this state is the target of an
   1.596 + * MBCS_STATE_CHANGE_ONLY.
   1.597 + */
   1.598 +static int8_t
   1.599 +getStateProp(const int32_t (*stateTable)[256], int8_t stateProps[], int state) {
   1.600 +    const int32_t *row;
   1.601 +    int32_t min, max, entry, nextState;
   1.602 +
   1.603 +    row=stateTable[state];
   1.604 +    stateProps[state]=0;
   1.605 +
   1.606 +    /* find first non-ignorable state */
   1.607 +    for(min=0;; ++min) {
   1.608 +        entry=row[min];
   1.609 +        nextState=MBCS_ENTRY_STATE(entry);
   1.610 +        if(stateProps[nextState]==-1) {
   1.611 +            getStateProp(stateTable, stateProps, nextState);
   1.612 +        }
   1.613 +        if(MBCS_ENTRY_IS_TRANSITION(entry)) {
   1.614 +            if(stateProps[nextState]>=0) {
   1.615 +                break;
   1.616 +            }
   1.617 +        } else if(MBCS_ENTRY_FINAL_ACTION(entry)<MBCS_STATE_UNASSIGNED) {
   1.618 +            break;
   1.619 +        }
   1.620 +        if(min==0xff) {
   1.621 +            stateProps[state]=-0x40;  /* (int8_t)0xc0 */
   1.622 +            return stateProps[state];
   1.623 +        }
   1.624 +    }
   1.625 +    stateProps[state]|=(int8_t)((min>>5)<<3);
   1.626 +
   1.627 +    /* find last non-ignorable state */
   1.628 +    for(max=0xff; min<max; --max) {
   1.629 +        entry=row[max];
   1.630 +        nextState=MBCS_ENTRY_STATE(entry);
   1.631 +        if(stateProps[nextState]==-1) {
   1.632 +            getStateProp(stateTable, stateProps, nextState);
   1.633 +        }
   1.634 +        if(MBCS_ENTRY_IS_TRANSITION(entry)) {
   1.635 +            if(stateProps[nextState]>=0) {
   1.636 +                break;
   1.637 +            }
   1.638 +        } else if(MBCS_ENTRY_FINAL_ACTION(entry)<MBCS_STATE_UNASSIGNED) {
   1.639 +            break;
   1.640 +        }
   1.641 +    }
   1.642 +    stateProps[state]|=(int8_t)(max>>5);
   1.643 +
   1.644 +    /* recurse further and collect direct-state information */
   1.645 +    while(min<=max) {
   1.646 +        entry=row[min];
   1.647 +        nextState=MBCS_ENTRY_STATE(entry);
   1.648 +        if(stateProps[nextState]==-1) {
   1.649 +            getStateProp(stateTable, stateProps, nextState);
   1.650 +        }
   1.651 +        if(MBCS_ENTRY_IS_FINAL(entry)) {
   1.652 +            stateProps[nextState]|=0x40;
   1.653 +            if(MBCS_ENTRY_FINAL_ACTION(entry)<=MBCS_STATE_FALLBACK_DIRECT_20) {
   1.654 +                stateProps[state]|=0x40;
   1.655 +            }
   1.656 +        }
   1.657 +        ++min;
   1.658 +    }
   1.659 +    return stateProps[state];
   1.660 +}
   1.661 +
   1.662 +/*
   1.663 + * Internal function enumerating the toUnicode data of an MBCS converter.
   1.664 + * Currently only used for reconstituting data for a MBCS_OPT_NO_FROM_U
   1.665 + * table, but could also be used for a future ucnv_getUnicodeSet() option
   1.666 + * that includes reverse fallbacks (after updating this function's implementation).
   1.667 + * Currently only handles roundtrip mappings.
   1.668 + * Does not currently handle extensions.
   1.669 + */
   1.670 +static void
   1.671 +ucnv_MBCSEnumToUnicode(UConverterMBCSTable *mbcsTable,
   1.672 +                       UConverterEnumToUCallback *callback, const void *context,
   1.673 +                       UErrorCode *pErrorCode) {
   1.674 +    /*
   1.675 +     * Properties for each state, to speed up the enumeration.
   1.676 +     * Ignorable actions are unassigned/illegal/state-change-only:
   1.677 +     * They do not lead to mappings.
   1.678 +     *
   1.679 +     * Bits 7..6:
   1.680 +     * 1 direct/initial state (stateful converters have multiple)
   1.681 +     * 0 non-initial state with transitions or with non-ignorable result actions
   1.682 +     * -1 final state with only ignorable actions
   1.683 +     *
   1.684 +     * Bits 5..3:
   1.685 +     * The lowest byte value with non-ignorable actions is
   1.686 +     * value<<5 (rounded down).
   1.687 +     *
   1.688 +     * Bits 2..0:
   1.689 +     * The highest byte value with non-ignorable actions is
   1.690 +     * (value<<5)&0x1f (rounded up).
   1.691 +     */
   1.692 +    int8_t stateProps[MBCS_MAX_STATE_COUNT];
   1.693 +    int32_t state;
   1.694 +
   1.695 +    uprv_memset(stateProps, -1, sizeof(stateProps));
   1.696 +
   1.697 +    /* recurse from state 0 and set all stateProps */
   1.698 +    getStateProp(mbcsTable->stateTable, stateProps, 0);
   1.699 +
   1.700 +    for(state=0; state<mbcsTable->countStates; ++state) {
   1.701 +        /*if(stateProps[state]==-1) {
   1.702 +            printf("unused/unreachable <icu:state> %d\n", state);
   1.703 +        }*/
   1.704 +        if(stateProps[state]>=0x40) {
   1.705 +            /* start from each direct state */
   1.706 +            enumToU(
   1.707 +                mbcsTable, stateProps, state, 0, 0,
   1.708 +                callback, context,
   1.709 +                pErrorCode);
   1.710 +        }
   1.711 +    }
   1.712 +}
   1.713 +
   1.714 +U_CFUNC void
   1.715 +ucnv_MBCSGetFilteredUnicodeSetForUnicode(const UConverterSharedData *sharedData,
   1.716 +                                         const USetAdder *sa,
   1.717 +                                         UConverterUnicodeSet which,
   1.718 +                                         UConverterSetFilter filter,
   1.719 +                                         UErrorCode *pErrorCode) {
   1.720 +    const UConverterMBCSTable *mbcsTable;
   1.721 +    const uint16_t *table;
   1.722 +
   1.723 +    uint32_t st3;
   1.724 +    uint16_t st1, maxStage1, st2;
   1.725 +
   1.726 +    UChar32 c;
   1.727 +
   1.728 +    /* enumerate the from-Unicode trie table */
   1.729 +    mbcsTable=&sharedData->mbcs;
   1.730 +    table=mbcsTable->fromUnicodeTable;
   1.731 +    if(mbcsTable->unicodeMask&UCNV_HAS_SUPPLEMENTARY) {
   1.732 +        maxStage1=0x440;
   1.733 +    } else {
   1.734 +        maxStage1=0x40;
   1.735 +    }
   1.736 +
   1.737 +    c=0; /* keep track of the current code point while enumerating */
   1.738 +
   1.739 +    if(mbcsTable->outputType==MBCS_OUTPUT_1) {
   1.740 +        const uint16_t *stage2, *stage3, *results;
   1.741 +        uint16_t minValue;
   1.742 +
   1.743 +        results=(const uint16_t *)mbcsTable->fromUnicodeBytes;
   1.744 +
   1.745 +        /*
   1.746 +         * Set a threshold variable for selecting which mappings to use.
   1.747 +         * See ucnv_MBCSSingleFromBMPWithOffsets() and
   1.748 +         * MBCS_SINGLE_RESULT_FROM_U() for details.
   1.749 +         */
   1.750 +        if(which==UCNV_ROUNDTRIP_SET) {
   1.751 +            /* use only roundtrips */
   1.752 +            minValue=0xf00;
   1.753 +        } else /* UCNV_ROUNDTRIP_AND_FALLBACK_SET */ {
   1.754 +            /* use all roundtrip and fallback results */
   1.755 +            minValue=0x800;
   1.756 +        }
   1.757 +
   1.758 +        for(st1=0; st1<maxStage1; ++st1) {
   1.759 +            st2=table[st1];
   1.760 +            if(st2>maxStage1) {
   1.761 +                stage2=table+st2;
   1.762 +                for(st2=0; st2<64; ++st2) {
   1.763 +                    if((st3=stage2[st2])!=0) {
   1.764 +                        /* read the stage 3 block */
   1.765 +                        stage3=results+st3;
   1.766 +
   1.767 +                        do {
   1.768 +                            if(*stage3++>=minValue) {
   1.769 +                                sa->add(sa->set, c);
   1.770 +                            }
   1.771 +                        } while((++c&0xf)!=0);
   1.772 +                    } else {
   1.773 +                        c+=16; /* empty stage 3 block */
   1.774 +                    }
   1.775 +                }
   1.776 +            } else {
   1.777 +                c+=1024; /* empty stage 2 block */
   1.778 +            }
   1.779 +        }
   1.780 +    } else {
   1.781 +        const uint32_t *stage2;
   1.782 +        const uint8_t *stage3, *bytes;
   1.783 +        uint32_t st3Multiplier;
   1.784 +        uint32_t value;
   1.785 +        UBool useFallback;
   1.786 +
   1.787 +        bytes=mbcsTable->fromUnicodeBytes;
   1.788 +
   1.789 +        useFallback=(UBool)(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET);
   1.790 +
   1.791 +        switch(mbcsTable->outputType) {
   1.792 +        case MBCS_OUTPUT_3:
   1.793 +        case MBCS_OUTPUT_4_EUC:
   1.794 +            st3Multiplier=3;
   1.795 +            break;
   1.796 +        case MBCS_OUTPUT_4:
   1.797 +            st3Multiplier=4;
   1.798 +            break;
   1.799 +        default:
   1.800 +            st3Multiplier=2;
   1.801 +            break;
   1.802 +        }
   1.803 +
   1.804 +        for(st1=0; st1<maxStage1; ++st1) {
   1.805 +            st2=table[st1];
   1.806 +            if(st2>(maxStage1>>1)) {
   1.807 +                stage2=(const uint32_t *)table+st2;
   1.808 +                for(st2=0; st2<64; ++st2) {
   1.809 +                    if((st3=stage2[st2])!=0) {
   1.810 +                        /* read the stage 3 block */
   1.811 +                        stage3=bytes+st3Multiplier*16*(uint32_t)(uint16_t)st3;
   1.812 +
   1.813 +                        /* get the roundtrip flags for the stage 3 block */
   1.814 +                        st3>>=16;
   1.815 +
   1.816 +                        /*
   1.817 +                         * Add code points for which the roundtrip flag is set,
   1.818 +                         * or which map to non-zero bytes if we use fallbacks.
   1.819 +                         * See ucnv_MBCSFromUnicodeWithOffsets() for details.
   1.820 +                         */
   1.821 +                        switch(filter) {
   1.822 +                        case UCNV_SET_FILTER_NONE:
   1.823 +                            do {
   1.824 +                                if(st3&1) {
   1.825 +                                    sa->add(sa->set, c);
   1.826 +                                    stage3+=st3Multiplier;
   1.827 +                                } else if(useFallback) {
   1.828 +                                    uint8_t b=0;
   1.829 +                                    switch(st3Multiplier) {
   1.830 +                                    case 4:
   1.831 +                                        b|=*stage3++;
   1.832 +                                    case 3: /*fall through*/
   1.833 +                                        b|=*stage3++;
   1.834 +                                    case 2: /*fall through*/
   1.835 +                                        b|=stage3[0]|stage3[1];
   1.836 +                                        stage3+=2;
   1.837 +                                    default:
   1.838 +                                        break;
   1.839 +                                    }
   1.840 +                                    if(b!=0) {
   1.841 +                                        sa->add(sa->set, c);
   1.842 +                                    }
   1.843 +                                }
   1.844 +                                st3>>=1;
   1.845 +                            } while((++c&0xf)!=0);
   1.846 +                            break;
   1.847 +                        case UCNV_SET_FILTER_DBCS_ONLY:
   1.848 +                             /* Ignore single-byte results (<0x100). */
   1.849 +                            do {
   1.850 +                                if(((st3&1)!=0 || useFallback) && *((const uint16_t *)stage3)>=0x100) {
   1.851 +                                    sa->add(sa->set, c);
   1.852 +                                }
   1.853 +                                st3>>=1;
   1.854 +                                stage3+=2;  /* +=st3Multiplier */
   1.855 +                            } while((++c&0xf)!=0);
   1.856 +                            break;
   1.857 +                        case UCNV_SET_FILTER_2022_CN:
   1.858 +                             /* Only add code points that map to CNS 11643 planes 1 & 2 for non-EXT ISO-2022-CN. */
   1.859 +                            do {
   1.860 +                                if(((st3&1)!=0 || useFallback) && ((value=*stage3)==0x81 || value==0x82)) {
   1.861 +                                    sa->add(sa->set, c);
   1.862 +                                }
   1.863 +                                st3>>=1;
   1.864 +                                stage3+=3;  /* +=st3Multiplier */
   1.865 +                            } while((++c&0xf)!=0);
   1.866 +                            break;
   1.867 +                        case UCNV_SET_FILTER_SJIS:
   1.868 +                             /* Only add code points that map to Shift-JIS codes corresponding to JIS X 0208. */
   1.869 +                            do {
   1.870 +                                if(((st3&1)!=0 || useFallback) && (value=*((const uint16_t *)stage3))>=0x8140 && value<=0xeffc) {
   1.871 +                                    sa->add(sa->set, c);
   1.872 +                                }
   1.873 +                                st3>>=1;
   1.874 +                                stage3+=2;  /* +=st3Multiplier */
   1.875 +                            } while((++c&0xf)!=0);
   1.876 +                            break;
   1.877 +                        case UCNV_SET_FILTER_GR94DBCS:
   1.878 +                            /* Only add code points that map to ISO 2022 GR 94 DBCS codes (each byte A1..FE). */
   1.879 +                            do {
   1.880 +                                if( ((st3&1)!=0 || useFallback) &&
   1.881 +                                    (uint16_t)((value=*((const uint16_t *)stage3)) - 0xa1a1)<=(0xfefe - 0xa1a1) &&
   1.882 +                                    (uint8_t)(value-0xa1)<=(0xfe - 0xa1)
   1.883 +                                ) {
   1.884 +                                    sa->add(sa->set, c);
   1.885 +                                }
   1.886 +                                st3>>=1;
   1.887 +                                stage3+=2;  /* +=st3Multiplier */
   1.888 +                            } while((++c&0xf)!=0);
   1.889 +                            break;
   1.890 +                        case UCNV_SET_FILTER_HZ:
   1.891 +                            /* Only add code points that are suitable for HZ DBCS (lead byte A1..FD). */
   1.892 +                            do {
   1.893 +                                if( ((st3&1)!=0 || useFallback) &&
   1.894 +                                    (uint16_t)((value=*((const uint16_t *)stage3))-0xa1a1)<=(0xfdfe - 0xa1a1) &&
   1.895 +                                    (uint8_t)(value-0xa1)<=(0xfe - 0xa1)
   1.896 +                                ) {
   1.897 +                                    sa->add(sa->set, c);
   1.898 +                                }
   1.899 +                                st3>>=1;
   1.900 +                                stage3+=2;  /* +=st3Multiplier */
   1.901 +                            } while((++c&0xf)!=0);
   1.902 +                            break;
   1.903 +                        default:
   1.904 +                            *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
   1.905 +                            return;
   1.906 +                        }
   1.907 +                    } else {
   1.908 +                        c+=16; /* empty stage 3 block */
   1.909 +                    }
   1.910 +                }
   1.911 +            } else {
   1.912 +                c+=1024; /* empty stage 2 block */
   1.913 +            }
   1.914 +        }
   1.915 +    }
   1.916 +
   1.917 +    ucnv_extGetUnicodeSet(sharedData, sa, which, filter, pErrorCode);
   1.918 +}
   1.919 +
   1.920 +U_CFUNC void
   1.921 +ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
   1.922 +                                 const USetAdder *sa,
   1.923 +                                 UConverterUnicodeSet which,
   1.924 +                                 UErrorCode *pErrorCode) {
   1.925 +    ucnv_MBCSGetFilteredUnicodeSetForUnicode(
   1.926 +        sharedData, sa, which,
   1.927 +        sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ?
   1.928 +            UCNV_SET_FILTER_DBCS_ONLY :
   1.929 +            UCNV_SET_FILTER_NONE,
   1.930 +        pErrorCode);
   1.931 +}
   1.932 +
   1.933 +static void
   1.934 +ucnv_MBCSGetUnicodeSet(const UConverter *cnv,
   1.935 +                   const USetAdder *sa,
   1.936 +                   UConverterUnicodeSet which,
   1.937 +                   UErrorCode *pErrorCode) {
   1.938 +    if(cnv->options&_MBCS_OPTION_GB18030) {
   1.939 +        sa->addRange(sa->set, 0, 0xd7ff);
   1.940 +        sa->addRange(sa->set, 0xe000, 0x10ffff);
   1.941 +    } else {
   1.942 +        ucnv_MBCSGetUnicodeSetForUnicode(cnv->sharedData, sa, which, pErrorCode);
   1.943 +    }
   1.944 +}
   1.945 +
   1.946 +/* conversion extensions for input not in the main table -------------------- */
   1.947 +
   1.948 +/*
   1.949 + * Hardcoded extension handling for GB 18030.
   1.950 + * Definition of LINEAR macros and gb18030Ranges see near the beginning of the file.
   1.951 + *
   1.952 + * In the future, conversion extensions may handle m:n mappings and delta tables,
   1.953 + * see http://source.icu-project.org/repos/icu/icuhtml/trunk/design/conversion/conversion_extensions.html
   1.954 + *
   1.955 + * If an input character cannot be mapped, then these functions set an error
   1.956 + * code. The framework will then call the callback function.
   1.957 + */
   1.958 +
   1.959 +/*
   1.960 + * @return if(U_FAILURE) return the code point for cnv->fromUChar32
   1.961 + *         else return 0 after output has been written to the target
   1.962 + */
   1.963 +static UChar32
   1.964 +_extFromU(UConverter *cnv, const UConverterSharedData *sharedData,
   1.965 +          UChar32 cp,
   1.966 +          const UChar **source, const UChar *sourceLimit,
   1.967 +          uint8_t **target, const uint8_t *targetLimit,
   1.968 +          int32_t **offsets, int32_t sourceIndex,
   1.969 +          UBool flush,
   1.970 +          UErrorCode *pErrorCode) {
   1.971 +    const int32_t *cx;
   1.972 +
   1.973 +    cnv->useSubChar1=FALSE;
   1.974 +
   1.975 +    if( (cx=sharedData->mbcs.extIndexes)!=NULL &&
   1.976 +        ucnv_extInitialMatchFromU(
   1.977 +            cnv, cx,
   1.978 +            cp, source, sourceLimit,
   1.979 +            (char **)target, (char *)targetLimit,
   1.980 +            offsets, sourceIndex,
   1.981 +            flush,
   1.982 +            pErrorCode)
   1.983 +    ) {
   1.984 +        return 0; /* an extension mapping handled the input */
   1.985 +    }
   1.986 +
   1.987 +    /* GB 18030 */
   1.988 +    if((cnv->options&_MBCS_OPTION_GB18030)!=0) {
   1.989 +        const uint32_t *range;
   1.990 +        int32_t i;
   1.991 +
   1.992 +        range=gb18030Ranges[0];
   1.993 +        for(i=0; i<sizeof(gb18030Ranges)/sizeof(gb18030Ranges[0]); range+=4, ++i) {
   1.994 +            if(range[0]<=(uint32_t)cp && (uint32_t)cp<=range[1]) {
   1.995 +                /* found the Unicode code point, output the four-byte sequence for it */
   1.996 +                uint32_t linear;
   1.997 +                char bytes[4];
   1.998 +
   1.999 +                /* get the linear value of the first GB 18030 code in this range */
  1.1000 +                linear=range[2]-LINEAR_18030_BASE;
  1.1001 +
  1.1002 +                /* add the offset from the beginning of the range */
  1.1003 +                linear+=((uint32_t)cp-range[0]);
  1.1004 +
  1.1005 +                /* turn this into a four-byte sequence */
  1.1006 +                bytes[3]=(char)(0x30+linear%10); linear/=10;
  1.1007 +                bytes[2]=(char)(0x81+linear%126); linear/=126;
  1.1008 +                bytes[1]=(char)(0x30+linear%10); linear/=10;
  1.1009 +                bytes[0]=(char)(0x81+linear);
  1.1010 +
  1.1011 +                /* output this sequence */
  1.1012 +                ucnv_fromUWriteBytes(cnv,
  1.1013 +                                     bytes, 4, (char **)target, (char *)targetLimit,
  1.1014 +                                     offsets, sourceIndex, pErrorCode);
  1.1015 +                return 0;
  1.1016 +            }
  1.1017 +        }
  1.1018 +    }
  1.1019 +
  1.1020 +    /* no mapping */
  1.1021 +    *pErrorCode=U_INVALID_CHAR_FOUND;
  1.1022 +    return cp;
  1.1023 +}
  1.1024 +
  1.1025 +/*
  1.1026 + * Input sequence: cnv->toUBytes[0..length[
  1.1027 + * @return if(U_FAILURE) return the length (toULength, byteIndex) for the input
  1.1028 + *         else return 0 after output has been written to the target
  1.1029 + */
  1.1030 +static int8_t
  1.1031 +_extToU(UConverter *cnv, const UConverterSharedData *sharedData,
  1.1032 +        int8_t length,
  1.1033 +        const uint8_t **source, const uint8_t *sourceLimit,
  1.1034 +        UChar **target, const UChar *targetLimit,
  1.1035 +        int32_t **offsets, int32_t sourceIndex,
  1.1036 +        UBool flush,
  1.1037 +        UErrorCode *pErrorCode) {
  1.1038 +    const int32_t *cx;
  1.1039 +
  1.1040 +    if( (cx=sharedData->mbcs.extIndexes)!=NULL &&
  1.1041 +        ucnv_extInitialMatchToU(
  1.1042 +            cnv, cx,
  1.1043 +            length, (const char **)source, (const char *)sourceLimit,
  1.1044 +            target, targetLimit,
  1.1045 +            offsets, sourceIndex,
  1.1046 +            flush,
  1.1047 +            pErrorCode)
  1.1048 +    ) {
  1.1049 +        return 0; /* an extension mapping handled the input */
  1.1050 +    }
  1.1051 +
  1.1052 +    /* GB 18030 */
  1.1053 +    if(length==4 && (cnv->options&_MBCS_OPTION_GB18030)!=0) {
  1.1054 +        const uint32_t *range;
  1.1055 +        uint32_t linear;
  1.1056 +        int32_t i;
  1.1057 +
  1.1058 +        linear=LINEAR_18030(cnv->toUBytes[0], cnv->toUBytes[1], cnv->toUBytes[2], cnv->toUBytes[3]);
  1.1059 +        range=gb18030Ranges[0];
  1.1060 +        for(i=0; i<sizeof(gb18030Ranges)/sizeof(gb18030Ranges[0]); range+=4, ++i) {
  1.1061 +            if(range[2]<=linear && linear<=range[3]) {
  1.1062 +                /* found the sequence, output the Unicode code point for it */
  1.1063 +                *pErrorCode=U_ZERO_ERROR;
  1.1064 +
  1.1065 +                /* add the linear difference between the input and start sequences to the start code point */
  1.1066 +                linear=range[0]+(linear-range[2]);
  1.1067 +
  1.1068 +                /* output this code point */
  1.1069 +                ucnv_toUWriteCodePoint(cnv, linear, target, targetLimit, offsets, sourceIndex, pErrorCode);
  1.1070 +
  1.1071 +                return 0;
  1.1072 +            }
  1.1073 +        }
  1.1074 +    }
  1.1075 +
  1.1076 +    /* no mapping */
  1.1077 +    *pErrorCode=U_INVALID_CHAR_FOUND;
  1.1078 +    return length;
  1.1079 +}
  1.1080 +
  1.1081 +/* EBCDIC swap LF<->NL ------------------------------------------------------ */
  1.1082 +
  1.1083 +/*
  1.1084 + * This code modifies a standard EBCDIC<->Unicode mapping table for
  1.1085 + * OS/390 (z/OS) Unix System Services (Open Edition).
  1.1086 + * The difference is in the mapping of Line Feed and New Line control codes:
  1.1087 + * Standard EBCDIC maps
  1.1088 + *
  1.1089 + *   <U000A> \x25 |0
  1.1090 + *   <U0085> \x15 |0
  1.1091 + *
  1.1092 + * but OS/390 USS EBCDIC swaps the control codes for LF and NL,
  1.1093 + * mapping
  1.1094 + *
  1.1095 + *   <U000A> \x15 |0
  1.1096 + *   <U0085> \x25 |0
  1.1097 + *
  1.1098 + * This code modifies a loaded standard EBCDIC<->Unicode mapping table
  1.1099 + * by copying it into allocated memory and swapping the LF and NL values.
  1.1100 + * It allows to support the same EBCDIC charset in both versions without
  1.1101 + * duplicating the entire installed table.
  1.1102 + */
  1.1103 +
  1.1104 +/* standard EBCDIC codes */
  1.1105 +#define EBCDIC_LF 0x25
  1.1106 +#define EBCDIC_NL 0x15
  1.1107 +
  1.1108 +/* standard EBCDIC codes with roundtrip flag as stored in Unicode-to-single-byte tables */
  1.1109 +#define EBCDIC_RT_LF 0xf25
  1.1110 +#define EBCDIC_RT_NL 0xf15
  1.1111 +
  1.1112 +/* Unicode code points */
  1.1113 +#define U_LF 0x0a
  1.1114 +#define U_NL 0x85
  1.1115 +
  1.1116 +static UBool
  1.1117 +_EBCDICSwapLFNL(UConverterSharedData *sharedData, UErrorCode *pErrorCode) {
  1.1118 +    UConverterMBCSTable *mbcsTable;
  1.1119 +
  1.1120 +    const uint16_t *table, *results;
  1.1121 +    const uint8_t *bytes;
  1.1122 +
  1.1123 +    int32_t (*newStateTable)[256];
  1.1124 +    uint16_t *newResults;
  1.1125 +    uint8_t *p;
  1.1126 +    char *name;
  1.1127 +
  1.1128 +    uint32_t stage2Entry;
  1.1129 +    uint32_t size, sizeofFromUBytes;
  1.1130 +
  1.1131 +    mbcsTable=&sharedData->mbcs;
  1.1132 +
  1.1133 +    table=mbcsTable->fromUnicodeTable;
  1.1134 +    bytes=mbcsTable->fromUnicodeBytes;
  1.1135 +    results=(const uint16_t *)bytes;
  1.1136 +
  1.1137 +    /*
  1.1138 +     * Check that this is an EBCDIC table with SBCS portion -
  1.1139 +     * SBCS or EBCDIC_STATEFUL with standard EBCDIC LF and NL mappings.
  1.1140 +     *
  1.1141 +     * If not, ignore the option. Options are always ignored if they do not apply.
  1.1142 +     */
  1.1143 +    if(!(
  1.1144 +         (mbcsTable->outputType==MBCS_OUTPUT_1 || mbcsTable->outputType==MBCS_OUTPUT_2_SISO) &&
  1.1145 +         mbcsTable->stateTable[0][EBCDIC_LF]==MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF) &&
  1.1146 +         mbcsTable->stateTable[0][EBCDIC_NL]==MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL)
  1.1147 +    )) {
  1.1148 +        return FALSE;
  1.1149 +    }
  1.1150 +
  1.1151 +    if(mbcsTable->outputType==MBCS_OUTPUT_1) {
  1.1152 +        if(!(
  1.1153 +             EBCDIC_RT_LF==MBCS_SINGLE_RESULT_FROM_U(table, results, U_LF) &&
  1.1154 +             EBCDIC_RT_NL==MBCS_SINGLE_RESULT_FROM_U(table, results, U_NL)
  1.1155 +        )) {
  1.1156 +            return FALSE;
  1.1157 +        }
  1.1158 +    } else /* MBCS_OUTPUT_2_SISO */ {
  1.1159 +        stage2Entry=MBCS_STAGE_2_FROM_U(table, U_LF);
  1.1160 +        if(!(
  1.1161 +             MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_LF)!=0 &&
  1.1162 +             EBCDIC_LF==MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_LF)
  1.1163 +        )) {
  1.1164 +            return FALSE;
  1.1165 +        }
  1.1166 +
  1.1167 +        stage2Entry=MBCS_STAGE_2_FROM_U(table, U_NL);
  1.1168 +        if(!(
  1.1169 +             MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_NL)!=0 &&
  1.1170 +             EBCDIC_NL==MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_NL)
  1.1171 +        )) {
  1.1172 +            return FALSE;
  1.1173 +        }
  1.1174 +    }
  1.1175 +
  1.1176 +    if(mbcsTable->fromUBytesLength>0) {
  1.1177 +        /*
  1.1178 +         * We _know_ the number of bytes in the fromUnicodeBytes array
  1.1179 +         * starting with header.version 4.1.
  1.1180 +         */
  1.1181 +        sizeofFromUBytes=mbcsTable->fromUBytesLength;
  1.1182 +    } else {
  1.1183 +        /*
  1.1184 +         * Otherwise:
  1.1185 +         * There used to be code to enumerate the fromUnicode
  1.1186 +         * trie and find the highest entry, but it was removed in ICU 3.2
  1.1187 +         * because it was not tested and caused a low code coverage number.
  1.1188 +         * See Jitterbug 3674.
  1.1189 +         * This affects only some .cnv file formats with a header.version
  1.1190 +         * below 4.1, and only when swaplfnl is requested.
  1.1191 +         *
  1.1192 +         * ucnvmbcs.c revision 1.99 is the last one with the
  1.1193 +         * ucnv_MBCSSizeofFromUBytes() function.
  1.1194 +         */
  1.1195 +        *pErrorCode=U_INVALID_FORMAT_ERROR;
  1.1196 +        return FALSE;
  1.1197 +    }
  1.1198 +
  1.1199 +    /*
  1.1200 +     * The table has an appropriate format.
  1.1201 +     * Allocate and build
  1.1202 +     * - a modified to-Unicode state table
  1.1203 +     * - a modified from-Unicode output array
  1.1204 +     * - a converter name string with the swap option appended
  1.1205 +     */
  1.1206 +    size=
  1.1207 +        mbcsTable->countStates*1024+
  1.1208 +        sizeofFromUBytes+
  1.1209 +        UCNV_MAX_CONVERTER_NAME_LENGTH+20;
  1.1210 +    p=(uint8_t *)uprv_malloc(size);
  1.1211 +    if(p==NULL) {
  1.1212 +        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
  1.1213 +        return FALSE;
  1.1214 +    }
  1.1215 +
  1.1216 +    /* copy and modify the to-Unicode state table */
  1.1217 +    newStateTable=(int32_t (*)[256])p;
  1.1218 +    uprv_memcpy(newStateTable, mbcsTable->stateTable, mbcsTable->countStates*1024);
  1.1219 +
  1.1220 +    newStateTable[0][EBCDIC_LF]=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL);
  1.1221 +    newStateTable[0][EBCDIC_NL]=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF);
  1.1222 +
  1.1223 +    /* copy and modify the from-Unicode result table */
  1.1224 +    newResults=(uint16_t *)newStateTable[mbcsTable->countStates];
  1.1225 +    uprv_memcpy(newResults, bytes, sizeofFromUBytes);
  1.1226 +
  1.1227 +    /* conveniently, the table access macros work on the left side of expressions */
  1.1228 +    if(mbcsTable->outputType==MBCS_OUTPUT_1) {
  1.1229 +        MBCS_SINGLE_RESULT_FROM_U(table, newResults, U_LF)=EBCDIC_RT_NL;
  1.1230 +        MBCS_SINGLE_RESULT_FROM_U(table, newResults, U_NL)=EBCDIC_RT_LF;
  1.1231 +    } else /* MBCS_OUTPUT_2_SISO */ {
  1.1232 +        stage2Entry=MBCS_STAGE_2_FROM_U(table, U_LF);
  1.1233 +        MBCS_VALUE_2_FROM_STAGE_2(newResults, stage2Entry, U_LF)=EBCDIC_NL;
  1.1234 +
  1.1235 +        stage2Entry=MBCS_STAGE_2_FROM_U(table, U_NL);
  1.1236 +        MBCS_VALUE_2_FROM_STAGE_2(newResults, stage2Entry, U_NL)=EBCDIC_LF;
  1.1237 +    }
  1.1238 +
  1.1239 +    /* set the canonical converter name */
  1.1240 +    name=(char *)newResults+sizeofFromUBytes;
  1.1241 +    uprv_strcpy(name, sharedData->staticData->name);
  1.1242 +    uprv_strcat(name, UCNV_SWAP_LFNL_OPTION_STRING);
  1.1243 +
  1.1244 +    /* set the pointers */
  1.1245 +    umtx_lock(NULL);
  1.1246 +    if(mbcsTable->swapLFNLStateTable==NULL) {
  1.1247 +        mbcsTable->swapLFNLStateTable=newStateTable;
  1.1248 +        mbcsTable->swapLFNLFromUnicodeBytes=(uint8_t *)newResults;
  1.1249 +        mbcsTable->swapLFNLName=name;
  1.1250 +
  1.1251 +        newStateTable=NULL;
  1.1252 +    }
  1.1253 +    umtx_unlock(NULL);
  1.1254 +
  1.1255 +    /* release the allocated memory if another thread beat us to it */
  1.1256 +    if(newStateTable!=NULL) {
  1.1257 +        uprv_free(newStateTable);
  1.1258 +    }
  1.1259 +    return TRUE;
  1.1260 +}
  1.1261 +
  1.1262 +/* reconstitute omitted fromUnicode data ------------------------------------ */
  1.1263 +
  1.1264 +/* for details, compare with genmbcs.c MBCSAddFromUnicode() and transformEUC() */
  1.1265 +static UBool U_CALLCONV
  1.1266 +writeStage3Roundtrip(const void *context, uint32_t value, UChar32 codePoints[32]) {
  1.1267 +    UConverterMBCSTable *mbcsTable=(UConverterMBCSTable *)context;
  1.1268 +    const uint16_t *table;
  1.1269 +    uint32_t *stage2;
  1.1270 +    uint8_t *bytes, *p;
  1.1271 +    UChar32 c;
  1.1272 +    int32_t i, st3;
  1.1273 +
  1.1274 +    table=mbcsTable->fromUnicodeTable;
  1.1275 +    bytes=(uint8_t *)mbcsTable->fromUnicodeBytes;
  1.1276 +
  1.1277 +    /* for EUC outputTypes, modify the value like genmbcs.c's transformEUC() */
  1.1278 +    switch(mbcsTable->outputType) {
  1.1279 +    case MBCS_OUTPUT_3_EUC:
  1.1280 +        if(value<=0xffff) {
  1.1281 +            /* short sequences are stored directly */
  1.1282 +            /* code set 0 or 1 */
  1.1283 +        } else if(value<=0x8effff) {
  1.1284 +            /* code set 2 */
  1.1285 +            value&=0x7fff;
  1.1286 +        } else /* first byte is 0x8f */ {
  1.1287 +            /* code set 3 */
  1.1288 +            value&=0xff7f;
  1.1289 +        }
  1.1290 +        break;
  1.1291 +    case MBCS_OUTPUT_4_EUC:
  1.1292 +        if(value<=0xffffff) {
  1.1293 +            /* short sequences are stored directly */
  1.1294 +            /* code set 0 or 1 */
  1.1295 +        } else if(value<=0x8effffff) {
  1.1296 +            /* code set 2 */
  1.1297 +            value&=0x7fffff;
  1.1298 +        } else /* first byte is 0x8f */ {
  1.1299 +            /* code set 3 */
  1.1300 +            value&=0xff7fff;
  1.1301 +        }
  1.1302 +        break;
  1.1303 +    default:
  1.1304 +        break;
  1.1305 +    }
  1.1306 +
  1.1307 +    for(i=0; i<=0x1f; ++value, ++i) {
  1.1308 +        c=codePoints[i];
  1.1309 +        if(c<0) {
  1.1310 +            continue;
  1.1311 +        }
  1.1312 +
  1.1313 +        /* locate the stage 2 & 3 data */
  1.1314 +        stage2=((uint32_t *)table)+table[c>>10]+((c>>4)&0x3f);
  1.1315 +        p=bytes;
  1.1316 +        st3=(int32_t)(uint16_t)*stage2*16+(c&0xf);
  1.1317 +
  1.1318 +        /* write the codepage bytes into stage 3 */
  1.1319 +        switch(mbcsTable->outputType) {
  1.1320 +        case MBCS_OUTPUT_3:
  1.1321 +        case MBCS_OUTPUT_4_EUC:
  1.1322 +            p+=st3*3;
  1.1323 +            p[0]=(uint8_t)(value>>16);
  1.1324 +            p[1]=(uint8_t)(value>>8);
  1.1325 +            p[2]=(uint8_t)value;
  1.1326 +            break;
  1.1327 +        case MBCS_OUTPUT_4:
  1.1328 +            ((uint32_t *)p)[st3]=value;
  1.1329 +            break;
  1.1330 +        default:
  1.1331 +            /* 2 bytes per character */
  1.1332 +            ((uint16_t *)p)[st3]=(uint16_t)value;
  1.1333 +            break;
  1.1334 +        }
  1.1335 +
  1.1336 +        /* set the roundtrip flag */
  1.1337 +        *stage2|=(1UL<<(16+(c&0xf)));
  1.1338 +    }
  1.1339 +    return TRUE;
  1.1340 + }
  1.1341 +
  1.1342 +static void
  1.1343 +reconstituteData(UConverterMBCSTable *mbcsTable,
  1.1344 +                 uint32_t stage1Length, uint32_t stage2Length,
  1.1345 +                 uint32_t fullStage2Length,  /* lengths are numbers of units, not bytes */
  1.1346 +                 UErrorCode *pErrorCode) {
  1.1347 +    uint16_t *stage1;
  1.1348 +    uint32_t *stage2;
  1.1349 +    uint32_t dataLength=stage1Length*2+fullStage2Length*4+mbcsTable->fromUBytesLength;
  1.1350 +    mbcsTable->reconstitutedData=(uint8_t *)uprv_malloc(dataLength);
  1.1351 +    if(mbcsTable->reconstitutedData==NULL) {
  1.1352 +        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
  1.1353 +        return;
  1.1354 +    }
  1.1355 +    uprv_memset(mbcsTable->reconstitutedData, 0, dataLength);
  1.1356 +
  1.1357 +    /* copy existing data and reroute the pointers */
  1.1358 +    stage1=(uint16_t *)mbcsTable->reconstitutedData;
  1.1359 +    uprv_memcpy(stage1, mbcsTable->fromUnicodeTable, stage1Length*2);
  1.1360 +
  1.1361 +    stage2=(uint32_t *)(stage1+stage1Length);
  1.1362 +    uprv_memcpy(stage2+(fullStage2Length-stage2Length),
  1.1363 +                mbcsTable->fromUnicodeTable+stage1Length,
  1.1364 +                stage2Length*4);
  1.1365 +
  1.1366 +    mbcsTable->fromUnicodeTable=stage1;
  1.1367 +    mbcsTable->fromUnicodeBytes=(uint8_t *)(stage2+fullStage2Length);
  1.1368 +
  1.1369 +    /* indexes into stage 2 count from the bottom of the fromUnicodeTable */
  1.1370 +    stage2=(uint32_t *)stage1;
  1.1371 +
  1.1372 +    /* reconstitute the initial part of stage 2 from the mbcsIndex */
  1.1373 +    {
  1.1374 +        int32_t stageUTF8Length=((int32_t)mbcsTable->maxFastUChar+1)>>6;
  1.1375 +        int32_t stageUTF8Index=0;
  1.1376 +        int32_t st1, st2, st3, i;
  1.1377 +
  1.1378 +        for(st1=0; stageUTF8Index<stageUTF8Length; ++st1) {
  1.1379 +            st2=stage1[st1];
  1.1380 +            if(st2!=stage1Length/2) {
  1.1381 +                /* each stage 2 block has 64 entries corresponding to 16 entries in the mbcsIndex */
  1.1382 +                for(i=0; i<16; ++i) {
  1.1383 +                    st3=mbcsTable->mbcsIndex[stageUTF8Index++];
  1.1384 +                    if(st3!=0) {
  1.1385 +                        /* an stage 2 entry's index is per stage 3 16-block, not per stage 3 entry */
  1.1386 +                        st3>>=4;
  1.1387 +                        /*
  1.1388 +                         * 4 stage 2 entries point to 4 consecutive stage 3 16-blocks which are
  1.1389 +                         * allocated together as a single 64-block for access from the mbcsIndex
  1.1390 +                         */
  1.1391 +                        stage2[st2++]=st3++;
  1.1392 +                        stage2[st2++]=st3++;
  1.1393 +                        stage2[st2++]=st3++;
  1.1394 +                        stage2[st2++]=st3;
  1.1395 +                    } else {
  1.1396 +                        /* no stage 3 block, skip */
  1.1397 +                        st2+=4;
  1.1398 +                    }
  1.1399 +                }
  1.1400 +            } else {
  1.1401 +                /* no stage 2 block, skip */
  1.1402 +                stageUTF8Index+=16;
  1.1403 +            }
  1.1404 +        }
  1.1405 +    }
  1.1406 +
  1.1407 +    /* reconstitute fromUnicodeBytes with roundtrips from toUnicode data */
  1.1408 +    ucnv_MBCSEnumToUnicode(mbcsTable, writeStage3Roundtrip, mbcsTable, pErrorCode);
  1.1409 +}
  1.1410 +
  1.1411 +/* MBCS setup functions ----------------------------------------------------- */
  1.1412 +
  1.1413 +static void
  1.1414 +ucnv_MBCSLoad(UConverterSharedData *sharedData,
  1.1415 +          UConverterLoadArgs *pArgs,
  1.1416 +          const uint8_t *raw,
  1.1417 +          UErrorCode *pErrorCode) {
  1.1418 +    UDataInfo info;
  1.1419 +    UConverterMBCSTable *mbcsTable=&sharedData->mbcs;
  1.1420 +    _MBCSHeader *header=(_MBCSHeader *)raw;
  1.1421 +    uint32_t offset;
  1.1422 +    uint32_t headerLength;
  1.1423 +    UBool noFromU=FALSE;
  1.1424 +
  1.1425 +    if(header->version[0]==4) {
  1.1426 +        headerLength=MBCS_HEADER_V4_LENGTH;
  1.1427 +    } else if(header->version[0]==5 && header->version[1]>=3 &&
  1.1428 +              (header->options&MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0) {
  1.1429 +        headerLength=header->options&MBCS_OPT_LENGTH_MASK;
  1.1430 +        noFromU=(UBool)((header->options&MBCS_OPT_NO_FROM_U)!=0);
  1.1431 +    } else {
  1.1432 +        *pErrorCode=U_INVALID_TABLE_FORMAT;
  1.1433 +        return;
  1.1434 +    }
  1.1435 +
  1.1436 +    mbcsTable->outputType=(uint8_t)header->flags;
  1.1437 +    if(noFromU && mbcsTable->outputType==MBCS_OUTPUT_1) {
  1.1438 +        *pErrorCode=U_INVALID_TABLE_FORMAT;
  1.1439 +        return;
  1.1440 +    }
  1.1441 +
  1.1442 +    /* extension data, header version 4.2 and higher */
  1.1443 +    offset=header->flags>>8;
  1.1444 +    if(offset!=0) {
  1.1445 +        mbcsTable->extIndexes=(const int32_t *)(raw+offset);
  1.1446 +    }
  1.1447 +
  1.1448 +    if(mbcsTable->outputType==MBCS_OUTPUT_EXT_ONLY) {
  1.1449 +        UConverterLoadArgs args={ 0 };
  1.1450 +        UConverterSharedData *baseSharedData;
  1.1451 +        const int32_t *extIndexes;
  1.1452 +        const char *baseName;
  1.1453 +
  1.1454 +        /* extension-only file, load the base table and set values appropriately */
  1.1455 +        if((extIndexes=mbcsTable->extIndexes)==NULL) {
  1.1456 +            /* extension-only file without extension */
  1.1457 +            *pErrorCode=U_INVALID_TABLE_FORMAT;
  1.1458 +            return;
  1.1459 +        }
  1.1460 +
  1.1461 +        if(pArgs->nestedLoads!=1) {
  1.1462 +            /* an extension table must not be loaded as a base table */
  1.1463 +            *pErrorCode=U_INVALID_TABLE_FILE;
  1.1464 +            return;
  1.1465 +        }
  1.1466 +
  1.1467 +        /* load the base table */
  1.1468 +        baseName=(const char *)header+headerLength*4;
  1.1469 +        if(0==uprv_strcmp(baseName, sharedData->staticData->name)) {
  1.1470 +            /* forbid loading this same extension-only file */
  1.1471 +            *pErrorCode=U_INVALID_TABLE_FORMAT;
  1.1472 +            return;
  1.1473 +        }
  1.1474 +
  1.1475 +        /* TODO parse package name out of the prefix of the base name in the extension .cnv file? */
  1.1476 +        args.size=sizeof(UConverterLoadArgs);
  1.1477 +        args.nestedLoads=2;
  1.1478 +        args.onlyTestIsLoadable=pArgs->onlyTestIsLoadable;
  1.1479 +        args.reserved=pArgs->reserved;
  1.1480 +        args.options=pArgs->options;
  1.1481 +        args.pkg=pArgs->pkg;
  1.1482 +        args.name=baseName;
  1.1483 +        baseSharedData=ucnv_load(&args, pErrorCode);
  1.1484 +        if(U_FAILURE(*pErrorCode)) {
  1.1485 +            return;
  1.1486 +        }
  1.1487 +        if( baseSharedData->staticData->conversionType!=UCNV_MBCS ||
  1.1488 +            baseSharedData->mbcs.baseSharedData!=NULL
  1.1489 +        ) {
  1.1490 +            ucnv_unload(baseSharedData);
  1.1491 +            *pErrorCode=U_INVALID_TABLE_FORMAT;
  1.1492 +            return;
  1.1493 +        }
  1.1494 +        if(pArgs->onlyTestIsLoadable) {
  1.1495 +            /*
  1.1496 +             * Exit as soon as we know that we can load the converter
  1.1497 +             * and the format is valid and supported.
  1.1498 +             * The worst that can happen in the following code is a memory
  1.1499 +             * allocation error.
  1.1500 +             */
  1.1501 +            ucnv_unload(baseSharedData);
  1.1502 +            return;
  1.1503 +        }
  1.1504 +
  1.1505 +        /* copy the base table data */
  1.1506 +        uprv_memcpy(mbcsTable, &baseSharedData->mbcs, sizeof(UConverterMBCSTable));
  1.1507 +
  1.1508 +        /* overwrite values with relevant ones for the extension converter */
  1.1509 +        mbcsTable->baseSharedData=baseSharedData;
  1.1510 +        mbcsTable->extIndexes=extIndexes;
  1.1511 +
  1.1512 +        /*
  1.1513 +         * It would be possible to share the swapLFNL data with a base converter,
  1.1514 +         * but the generated name would have to be different, and the memory
  1.1515 +         * would have to be free'd only once.
  1.1516 +         * It is easier to just create the data for the extension converter
  1.1517 +         * separately when it is requested.
  1.1518 +         */
  1.1519 +        mbcsTable->swapLFNLStateTable=NULL;
  1.1520 +        mbcsTable->swapLFNLFromUnicodeBytes=NULL;
  1.1521 +        mbcsTable->swapLFNLName=NULL;
  1.1522 +
  1.1523 +        /*
  1.1524 +         * The reconstitutedData must be deleted only when the base converter
  1.1525 +         * is unloaded.
  1.1526 +         */
  1.1527 +        mbcsTable->reconstitutedData=NULL;
  1.1528 +
  1.1529 +        /*
  1.1530 +         * Set a special, runtime-only outputType if the extension converter
  1.1531 +         * is a DBCS version of a base converter that also maps single bytes.
  1.1532 +         */
  1.1533 +        if( sharedData->staticData->conversionType==UCNV_DBCS ||
  1.1534 +                (sharedData->staticData->conversionType==UCNV_MBCS &&
  1.1535 +                 sharedData->staticData->minBytesPerChar>=2)
  1.1536 +        ) {
  1.1537 +            if(baseSharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO) {
  1.1538 +                /* the base converter is SI/SO-stateful */
  1.1539 +                int32_t entry;
  1.1540 +
  1.1541 +                /* get the dbcs state from the state table entry for SO=0x0e */
  1.1542 +                entry=mbcsTable->stateTable[0][0xe];
  1.1543 +                if( MBCS_ENTRY_IS_FINAL(entry) &&
  1.1544 +                    MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_CHANGE_ONLY &&
  1.1545 +                    MBCS_ENTRY_FINAL_STATE(entry)!=0
  1.1546 +                ) {
  1.1547 +                    mbcsTable->dbcsOnlyState=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry);
  1.1548 +
  1.1549 +                    mbcsTable->outputType=MBCS_OUTPUT_DBCS_ONLY;
  1.1550 +                }
  1.1551 +            } else if(
  1.1552 +                baseSharedData->staticData->conversionType==UCNV_MBCS &&
  1.1553 +                baseSharedData->staticData->minBytesPerChar==1 &&
  1.1554 +                baseSharedData->staticData->maxBytesPerChar==2 &&
  1.1555 +                mbcsTable->countStates<=127
  1.1556 +            ) {
  1.1557 +                /* non-stateful base converter, need to modify the state table */
  1.1558 +                int32_t (*newStateTable)[256];
  1.1559 +                int32_t *state;
  1.1560 +                int32_t i, count;
  1.1561 +
  1.1562 +                /* allocate a new state table and copy the base state table contents */
  1.1563 +                count=mbcsTable->countStates;
  1.1564 +                newStateTable=(int32_t (*)[256])uprv_malloc((count+1)*1024);
  1.1565 +                if(newStateTable==NULL) {
  1.1566 +                    ucnv_unload(baseSharedData);
  1.1567 +                    *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
  1.1568 +                    return;
  1.1569 +                }
  1.1570 +
  1.1571 +                uprv_memcpy(newStateTable, mbcsTable->stateTable, count*1024);
  1.1572 +
  1.1573 +                /* change all final single-byte entries to go to a new all-illegal state */
  1.1574 +                state=newStateTable[0];
  1.1575 +                for(i=0; i<256; ++i) {
  1.1576 +                    if(MBCS_ENTRY_IS_FINAL(state[i])) {
  1.1577 +                        state[i]=MBCS_ENTRY_TRANSITION(count, 0);
  1.1578 +                    }
  1.1579 +                }
  1.1580 +
  1.1581 +                /* build the new all-illegal state */
  1.1582 +                state=newStateTable[count];
  1.1583 +                for(i=0; i<256; ++i) {
  1.1584 +                    state[i]=MBCS_ENTRY_FINAL(0, MBCS_STATE_ILLEGAL, 0);
  1.1585 +                }
  1.1586 +                mbcsTable->stateTable=(const int32_t (*)[256])newStateTable;
  1.1587 +                mbcsTable->countStates=(uint8_t)(count+1);
  1.1588 +                mbcsTable->stateTableOwned=TRUE;
  1.1589 +
  1.1590 +                mbcsTable->outputType=MBCS_OUTPUT_DBCS_ONLY;
  1.1591 +            }
  1.1592 +        }
  1.1593 +
  1.1594 +        /*
  1.1595 +         * unlike below for files with base tables, do not get the unicodeMask
  1.1596 +         * from the sharedData; instead, use the base table's unicodeMask,
  1.1597 +         * which we copied in the memcpy above;
  1.1598 +         * this is necessary because the static data unicodeMask, especially
  1.1599 +         * the UCNV_HAS_SUPPLEMENTARY flag, is part of the base table data
  1.1600 +         */
  1.1601 +    } else {
  1.1602 +        /* conversion file with a base table; an additional extension table is optional */
  1.1603 +        /* make sure that the output type is known */
  1.1604 +        switch(mbcsTable->outputType) {
  1.1605 +        case MBCS_OUTPUT_1:
  1.1606 +        case MBCS_OUTPUT_2:
  1.1607 +        case MBCS_OUTPUT_3:
  1.1608 +        case MBCS_OUTPUT_4:
  1.1609 +        case MBCS_OUTPUT_3_EUC:
  1.1610 +        case MBCS_OUTPUT_4_EUC:
  1.1611 +        case MBCS_OUTPUT_2_SISO:
  1.1612 +            /* OK */
  1.1613 +            break;
  1.1614 +        default:
  1.1615 +            *pErrorCode=U_INVALID_TABLE_FORMAT;
  1.1616 +            return;
  1.1617 +        }
  1.1618 +        if(pArgs->onlyTestIsLoadable) {
  1.1619 +            /*
  1.1620 +             * Exit as soon as we know that we can load the converter
  1.1621 +             * and the format is valid and supported.
  1.1622 +             * The worst that can happen in the following code is a memory
  1.1623 +             * allocation error.
  1.1624 +             */
  1.1625 +            return;
  1.1626 +        }
  1.1627 +
  1.1628 +        mbcsTable->countStates=(uint8_t)header->countStates;
  1.1629 +        mbcsTable->countToUFallbacks=header->countToUFallbacks;
  1.1630 +        mbcsTable->stateTable=(const int32_t (*)[256])(raw+headerLength*4);
  1.1631 +        mbcsTable->toUFallbacks=(const _MBCSToUFallback *)(mbcsTable->stateTable+header->countStates);
  1.1632 +        mbcsTable->unicodeCodeUnits=(const uint16_t *)(raw+header->offsetToUCodeUnits);
  1.1633 +
  1.1634 +        mbcsTable->fromUnicodeTable=(const uint16_t *)(raw+header->offsetFromUTable);
  1.1635 +        mbcsTable->fromUnicodeBytes=(const uint8_t *)(raw+header->offsetFromUBytes);
  1.1636 +        mbcsTable->fromUBytesLength=header->fromUBytesLength;
  1.1637 +
  1.1638 +        /*
  1.1639 +         * converter versions 6.1 and up contain a unicodeMask that is
  1.1640 +         * used here to select the most efficient function implementations
  1.1641 +         */
  1.1642 +        info.size=sizeof(UDataInfo);
  1.1643 +        udata_getInfo((UDataMemory *)sharedData->dataMemory, &info);
  1.1644 +        if(info.formatVersion[0]>6 || (info.formatVersion[0]==6 && info.formatVersion[1]>=1)) {
  1.1645 +            /* mask off possible future extensions to be safe */
  1.1646 +            mbcsTable->unicodeMask=(uint8_t)(sharedData->staticData->unicodeMask&3);
  1.1647 +        } else {
  1.1648 +            /* for older versions, assume worst case: contains anything possible (prevent over-optimizations) */
  1.1649 +            mbcsTable->unicodeMask=UCNV_HAS_SUPPLEMENTARY|UCNV_HAS_SURROGATES;
  1.1650 +        }
  1.1651 +
  1.1652 +        /*
  1.1653 +         * _MBCSHeader.version 4.3 adds utf8Friendly data structures.
  1.1654 +         * Check for the header version, SBCS vs. MBCS, and for whether the
  1.1655 +         * data structures are optimized for code points as high as what the
  1.1656 +         * runtime code is designed for.
  1.1657 +         * The implementation does not handle mapping tables with entries for
  1.1658 +         * unpaired surrogates.
  1.1659 +         */
  1.1660 +        if( header->version[1]>=3 &&
  1.1661 +            (mbcsTable->unicodeMask&UCNV_HAS_SURROGATES)==0 &&
  1.1662 +            (mbcsTable->countStates==1 ?
  1.1663 +                (header->version[2]>=(SBCS_FAST_MAX>>8)) :
  1.1664 +                (header->version[2]>=(MBCS_FAST_MAX>>8))
  1.1665 +            )
  1.1666 +        ) {
  1.1667 +            mbcsTable->utf8Friendly=TRUE;
  1.1668 +
  1.1669 +            if(mbcsTable->countStates==1) {
  1.1670 +                /*
  1.1671 +                 * SBCS: Stage 3 is allocated in 64-entry blocks for U+0000..SBCS_FAST_MAX or higher.
  1.1672 +                 * Build a table with indexes to each block, to be used instead of
  1.1673 +                 * the regular stage 1/2 table.
  1.1674 +                 */
  1.1675 +                int32_t i;
  1.1676 +                for(i=0; i<(SBCS_FAST_LIMIT>>6); ++i) {
  1.1677 +                    mbcsTable->sbcsIndex[i]=mbcsTable->fromUnicodeTable[mbcsTable->fromUnicodeTable[i>>4]+((i<<2)&0x3c)];
  1.1678 +                }
  1.1679 +                /* set SBCS_FAST_MAX to reflect the reach of sbcsIndex[] even if header->version[2]>(SBCS_FAST_MAX>>8) */
  1.1680 +                mbcsTable->maxFastUChar=SBCS_FAST_MAX;
  1.1681 +            } else {
  1.1682 +                /*
  1.1683 +                 * MBCS: Stage 3 is allocated in 64-entry blocks for U+0000..MBCS_FAST_MAX or higher.
  1.1684 +                 * The .cnv file is prebuilt with an additional stage table with indexes
  1.1685 +                 * to each block.
  1.1686 +                 */
  1.1687 +                mbcsTable->mbcsIndex=(const uint16_t *)
  1.1688 +                    (mbcsTable->fromUnicodeBytes+
  1.1689 +                     (noFromU ? 0 : mbcsTable->fromUBytesLength));
  1.1690 +                mbcsTable->maxFastUChar=(((UChar)header->version[2])<<8)|0xff;
  1.1691 +            }
  1.1692 +        }
  1.1693 +
  1.1694 +        /* calculate a bit set of 4 ASCII characters per bit that round-trip to ASCII bytes */
  1.1695 +        {
  1.1696 +            uint32_t asciiRoundtrips=0xffffffff;
  1.1697 +            int32_t i;
  1.1698 +
  1.1699 +            for(i=0; i<0x80; ++i) {
  1.1700 +                if(mbcsTable->stateTable[0][i]!=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, i)) {
  1.1701 +                    asciiRoundtrips&=~((uint32_t)1<<(i>>2));
  1.1702 +                }
  1.1703 +            }
  1.1704 +            mbcsTable->asciiRoundtrips=asciiRoundtrips;
  1.1705 +        }
  1.1706 +
  1.1707 +        if(noFromU) {
  1.1708 +            uint32_t stage1Length=
  1.1709 +                mbcsTable->unicodeMask&UCNV_HAS_SUPPLEMENTARY ?
  1.1710 +                    0x440 : 0x40;
  1.1711 +            uint32_t stage2Length=
  1.1712 +                (header->offsetFromUBytes-header->offsetFromUTable)/4-
  1.1713 +                stage1Length/2;
  1.1714 +            reconstituteData(mbcsTable, stage1Length, stage2Length, header->fullStage2Length, pErrorCode);
  1.1715 +        }
  1.1716 +    }
  1.1717 +
  1.1718 +    /* Set the impl pointer here so that it is set for both extension-only and base tables. */
  1.1719 +    if(mbcsTable->utf8Friendly) {
  1.1720 +        if(mbcsTable->countStates==1) {
  1.1721 +            sharedData->impl=&_SBCSUTF8Impl;
  1.1722 +        } else {
  1.1723 +            if(mbcsTable->outputType==MBCS_OUTPUT_2) {
  1.1724 +                sharedData->impl=&_DBCSUTF8Impl;
  1.1725 +            }
  1.1726 +        }
  1.1727 +    }
  1.1728 +
  1.1729 +    if(mbcsTable->outputType==MBCS_OUTPUT_DBCS_ONLY || mbcsTable->outputType==MBCS_OUTPUT_2_SISO) {
  1.1730 +        /*
  1.1731 +         * MBCS_OUTPUT_DBCS_ONLY: No SBCS mappings, therefore ASCII does not roundtrip.
  1.1732 +         * MBCS_OUTPUT_2_SISO: Bypass the ASCII fastpath to handle prevLength correctly.
  1.1733 +         */
  1.1734 +        mbcsTable->asciiRoundtrips=0;
  1.1735 +    }
  1.1736 +}
  1.1737 +
  1.1738 +static void
  1.1739 +ucnv_MBCSUnload(UConverterSharedData *sharedData) {
  1.1740 +    UConverterMBCSTable *mbcsTable=&sharedData->mbcs;
  1.1741 +
  1.1742 +    if(mbcsTable->swapLFNLStateTable!=NULL) {
  1.1743 +        uprv_free(mbcsTable->swapLFNLStateTable);
  1.1744 +    }
  1.1745 +    if(mbcsTable->stateTableOwned) {
  1.1746 +        uprv_free((void *)mbcsTable->stateTable);
  1.1747 +    }
  1.1748 +    if(mbcsTable->baseSharedData!=NULL) {
  1.1749 +        ucnv_unload(mbcsTable->baseSharedData);
  1.1750 +    }
  1.1751 +    if(mbcsTable->reconstitutedData!=NULL) {
  1.1752 +        uprv_free(mbcsTable->reconstitutedData);
  1.1753 +    }
  1.1754 +}
  1.1755 +
  1.1756 +static void
  1.1757 +ucnv_MBCSOpen(UConverter *cnv,
  1.1758 +              UConverterLoadArgs *pArgs,
  1.1759 +              UErrorCode *pErrorCode) {
  1.1760 +    UConverterMBCSTable *mbcsTable;
  1.1761 +    const int32_t *extIndexes;
  1.1762 +    uint8_t outputType;
  1.1763 +    int8_t maxBytesPerUChar;
  1.1764 +
  1.1765 +    if(pArgs->onlyTestIsLoadable) {
  1.1766 +        return;
  1.1767 +    }
  1.1768 +
  1.1769 +    mbcsTable=&cnv->sharedData->mbcs;
  1.1770 +    outputType=mbcsTable->outputType;
  1.1771 +
  1.1772 +    if(outputType==MBCS_OUTPUT_DBCS_ONLY) {
  1.1773 +        /* the swaplfnl option does not apply, remove it */
  1.1774 +        cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL;
  1.1775 +    }
  1.1776 +
  1.1777 +    if((pArgs->options&UCNV_OPTION_SWAP_LFNL)!=0) {
  1.1778 +        /* do this because double-checked locking is broken */
  1.1779 +        UBool isCached;
  1.1780 +
  1.1781 +        umtx_lock(NULL);
  1.1782 +        isCached=mbcsTable->swapLFNLStateTable!=NULL;
  1.1783 +        umtx_unlock(NULL);
  1.1784 +
  1.1785 +        if(!isCached) {
  1.1786 +            if(!_EBCDICSwapLFNL(cnv->sharedData, pErrorCode)) {
  1.1787 +                if(U_FAILURE(*pErrorCode)) {
  1.1788 +                    return; /* something went wrong */
  1.1789 +                }
  1.1790 +
  1.1791 +                /* the option does not apply, remove it */
  1.1792 +                cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL;
  1.1793 +            }
  1.1794 +        }
  1.1795 +    }
  1.1796 +
  1.1797 +    if(uprv_strstr(pArgs->name, "18030")!=NULL) {
  1.1798 +        if(uprv_strstr(pArgs->name, "gb18030")!=NULL || uprv_strstr(pArgs->name, "GB18030")!=NULL) {
  1.1799 +            /* set a flag for GB 18030 mode, which changes the callback behavior */
  1.1800 +            cnv->options|=_MBCS_OPTION_GB18030;
  1.1801 +        }
  1.1802 +    } else if((uprv_strstr(pArgs->name, "KEIS")!=NULL) || (uprv_strstr(pArgs->name, "keis")!=NULL)) {
  1.1803 +        /* set a flag for KEIS converter, which changes the SI/SO character sequence */
  1.1804 +        cnv->options|=_MBCS_OPTION_KEIS;
  1.1805 +    } else if((uprv_strstr(pArgs->name, "JEF")!=NULL) || (uprv_strstr(pArgs->name, "jef")!=NULL)) {
  1.1806 +        /* set a flag for JEF converter, which changes the SI/SO character sequence */
  1.1807 +        cnv->options|=_MBCS_OPTION_JEF;
  1.1808 +    } else if((uprv_strstr(pArgs->name, "JIPS")!=NULL) || (uprv_strstr(pArgs->name, "jips")!=NULL)) {
  1.1809 +        /* set a flag for JIPS converter, which changes the SI/SO character sequence */
  1.1810 +        cnv->options|=_MBCS_OPTION_JIPS;
  1.1811 +    }
  1.1812 +
  1.1813 +    /* fix maxBytesPerUChar depending on outputType and options etc. */
  1.1814 +    if(outputType==MBCS_OUTPUT_2_SISO) {
  1.1815 +        cnv->maxBytesPerUChar=3; /* SO+DBCS */
  1.1816 +    }
  1.1817 +
  1.1818 +    extIndexes=mbcsTable->extIndexes;
  1.1819 +    if(extIndexes!=NULL) {
  1.1820 +        maxBytesPerUChar=(int8_t)UCNV_GET_MAX_BYTES_PER_UCHAR(extIndexes);
  1.1821 +        if(outputType==MBCS_OUTPUT_2_SISO) {
  1.1822 +            ++maxBytesPerUChar; /* SO + multiple DBCS */
  1.1823 +        }
  1.1824 +
  1.1825 +        if(maxBytesPerUChar>cnv->maxBytesPerUChar) {
  1.1826 +            cnv->maxBytesPerUChar=maxBytesPerUChar;
  1.1827 +        }
  1.1828 +    }
  1.1829 +
  1.1830 +#if 0
  1.1831 +    /*
  1.1832 +     * documentation of UConverter fields used for status
  1.1833 +     * all of these fields are (re)set to 0 by ucnv_bld.c and ucnv_reset()
  1.1834 +     */
  1.1835 +
  1.1836 +    /* toUnicode */
  1.1837 +    cnv->toUnicodeStatus=0;     /* offset */
  1.1838 +    cnv->mode=0;                /* state */
  1.1839 +    cnv->toULength=0;           /* byteIndex */
  1.1840 +
  1.1841 +    /* fromUnicode */
  1.1842 +    cnv->fromUChar32=0;
  1.1843 +    cnv->fromUnicodeStatus=1;   /* prevLength */
  1.1844 +#endif
  1.1845 +}
  1.1846 +
  1.1847 +static const char *
  1.1848 +ucnv_MBCSGetName(const UConverter *cnv) {
  1.1849 +    if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0 && cnv->sharedData->mbcs.swapLFNLName!=NULL) {
  1.1850 +        return cnv->sharedData->mbcs.swapLFNLName;
  1.1851 +    } else {
  1.1852 +        return cnv->sharedData->staticData->name;
  1.1853 +    }
  1.1854 +}
  1.1855 +
  1.1856 +/* MBCS-to-Unicode conversion functions ------------------------------------- */
  1.1857 +
  1.1858 +static UChar32
  1.1859 +ucnv_MBCSGetFallback(UConverterMBCSTable *mbcsTable, uint32_t offset) {
  1.1860 +    const _MBCSToUFallback *toUFallbacks;
  1.1861 +    uint32_t i, start, limit;
  1.1862 +
  1.1863 +    limit=mbcsTable->countToUFallbacks;
  1.1864 +    if(limit>0) {
  1.1865 +        /* do a binary search for the fallback mapping */
  1.1866 +        toUFallbacks=mbcsTable->toUFallbacks;
  1.1867 +        start=0;
  1.1868 +        while(start<limit-1) {
  1.1869 +            i=(start+limit)/2;
  1.1870 +            if(offset<toUFallbacks[i].offset) {
  1.1871 +                limit=i;
  1.1872 +            } else {
  1.1873 +                start=i;
  1.1874 +            }
  1.1875 +        }
  1.1876 +
  1.1877 +        /* did we really find it? */
  1.1878 +        if(offset==toUFallbacks[start].offset) {
  1.1879 +            return toUFallbacks[start].codePoint;
  1.1880 +        }
  1.1881 +    }
  1.1882 +
  1.1883 +    return 0xfffe;
  1.1884 +}
  1.1885 +
  1.1886 +/* This version of ucnv_MBCSToUnicodeWithOffsets() is optimized for single-byte, single-state codepages. */
  1.1887 +static void
  1.1888 +ucnv_MBCSSingleToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
  1.1889 +                                UErrorCode *pErrorCode) {
  1.1890 +    UConverter *cnv;
  1.1891 +    const uint8_t *source, *sourceLimit;
  1.1892 +    UChar *target;
  1.1893 +    const UChar *targetLimit;
  1.1894 +    int32_t *offsets;
  1.1895 +
  1.1896 +    const int32_t (*stateTable)[256];
  1.1897 +
  1.1898 +    int32_t sourceIndex;
  1.1899 +
  1.1900 +    int32_t entry;
  1.1901 +    UChar c;
  1.1902 +    uint8_t action;
  1.1903 +
  1.1904 +    /* set up the local pointers */
  1.1905 +    cnv=pArgs->converter;
  1.1906 +    source=(const uint8_t *)pArgs->source;
  1.1907 +    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
  1.1908 +    target=pArgs->target;
  1.1909 +    targetLimit=pArgs->targetLimit;
  1.1910 +    offsets=pArgs->offsets;
  1.1911 +
  1.1912 +    if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
  1.1913 +        stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
  1.1914 +    } else {
  1.1915 +        stateTable=cnv->sharedData->mbcs.stateTable;
  1.1916 +    }
  1.1917 +
  1.1918 +    /* sourceIndex=-1 if the current character began in the previous buffer */
  1.1919 +    sourceIndex=0;
  1.1920 +
  1.1921 +    /* conversion loop */
  1.1922 +    while(source<sourceLimit) {
  1.1923 +        /*
  1.1924 +         * This following test is to see if available input would overflow the output.
  1.1925 +         * It does not catch output of more than one code unit that
  1.1926 +         * overflows as a result of a surrogate pair or callback output
  1.1927 +         * from the last source byte.
  1.1928 +         * Therefore, those situations also test for overflows and will
  1.1929 +         * then break the loop, too.
  1.1930 +         */
  1.1931 +        if(target>=targetLimit) {
  1.1932 +            /* target is full */
  1.1933 +            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.1934 +            break;
  1.1935 +        }
  1.1936 +
  1.1937 +        entry=stateTable[0][*source++];
  1.1938 +        /* MBCS_ENTRY_IS_FINAL(entry) */
  1.1939 +
  1.1940 +        /* test the most common case first */
  1.1941 +        if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
  1.1942 +            /* output BMP code point */
  1.1943 +            *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.1944 +            if(offsets!=NULL) {
  1.1945 +                *offsets++=sourceIndex;
  1.1946 +            }
  1.1947 +
  1.1948 +            /* normal end of action codes: prepare for a new character */
  1.1949 +            ++sourceIndex;
  1.1950 +            continue;
  1.1951 +        }
  1.1952 +
  1.1953 +        /*
  1.1954 +         * An if-else-if chain provides more reliable performance for
  1.1955 +         * the most common cases compared to a switch.
  1.1956 +         */
  1.1957 +        action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
  1.1958 +        if(action==MBCS_STATE_VALID_DIRECT_20 ||
  1.1959 +           (action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv))
  1.1960 +        ) {
  1.1961 +            entry=MBCS_ENTRY_FINAL_VALUE(entry);
  1.1962 +            /* output surrogate pair */
  1.1963 +            *target++=(UChar)(0xd800|(UChar)(entry>>10));
  1.1964 +            if(offsets!=NULL) {
  1.1965 +                *offsets++=sourceIndex;
  1.1966 +            }
  1.1967 +            c=(UChar)(0xdc00|(UChar)(entry&0x3ff));
  1.1968 +            if(target<targetLimit) {
  1.1969 +                *target++=c;
  1.1970 +                if(offsets!=NULL) {
  1.1971 +                    *offsets++=sourceIndex;
  1.1972 +                }
  1.1973 +            } else {
  1.1974 +                /* target overflow */
  1.1975 +                cnv->UCharErrorBuffer[0]=c;
  1.1976 +                cnv->UCharErrorBufferLength=1;
  1.1977 +                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.1978 +                break;
  1.1979 +            }
  1.1980 +
  1.1981 +            ++sourceIndex;
  1.1982 +            continue;
  1.1983 +        } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
  1.1984 +            if(UCNV_TO_U_USE_FALLBACK(cnv)) {
  1.1985 +                /* output BMP code point */
  1.1986 +                *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.1987 +                if(offsets!=NULL) {
  1.1988 +                    *offsets++=sourceIndex;
  1.1989 +                }
  1.1990 +
  1.1991 +                ++sourceIndex;
  1.1992 +                continue;
  1.1993 +            }
  1.1994 +        } else if(action==MBCS_STATE_UNASSIGNED) {
  1.1995 +            /* just fall through */
  1.1996 +        } else if(action==MBCS_STATE_ILLEGAL) {
  1.1997 +            /* callback(illegal) */
  1.1998 +            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.1999 +        } else {
  1.2000 +            /* reserved, must never occur */
  1.2001 +            ++sourceIndex;
  1.2002 +            continue;
  1.2003 +        }
  1.2004 +
  1.2005 +        if(U_FAILURE(*pErrorCode)) {
  1.2006 +            /* callback(illegal) */
  1.2007 +            break;
  1.2008 +        } else /* unassigned sequences indicated with byteIndex>0 */ {
  1.2009 +            /* try an extension mapping */
  1.2010 +            pArgs->source=(const char *)source;
  1.2011 +            cnv->toUBytes[0]=*(source-1);
  1.2012 +            cnv->toULength=_extToU(cnv, cnv->sharedData,
  1.2013 +                                    1, &source, sourceLimit,
  1.2014 +                                    &target, targetLimit,
  1.2015 +                                    &offsets, sourceIndex,
  1.2016 +                                    pArgs->flush,
  1.2017 +                                    pErrorCode);
  1.2018 +            sourceIndex+=1+(int32_t)(source-(const uint8_t *)pArgs->source);
  1.2019 +
  1.2020 +            if(U_FAILURE(*pErrorCode)) {
  1.2021 +                /* not mappable or buffer overflow */
  1.2022 +                break;
  1.2023 +            }
  1.2024 +        }
  1.2025 +    }
  1.2026 +
  1.2027 +    /* write back the updated pointers */
  1.2028 +    pArgs->source=(const char *)source;
  1.2029 +    pArgs->target=target;
  1.2030 +    pArgs->offsets=offsets;
  1.2031 +}
  1.2032 +
  1.2033 +/*
  1.2034 + * This version of ucnv_MBCSSingleToUnicodeWithOffsets() is optimized for single-byte, single-state codepages
  1.2035 + * that only map to and from the BMP.
  1.2036 + * In addition to single-byte optimizations, the offset calculations
  1.2037 + * become much easier.
  1.2038 + */
  1.2039 +static void
  1.2040 +ucnv_MBCSSingleToBMPWithOffsets(UConverterToUnicodeArgs *pArgs,
  1.2041 +                            UErrorCode *pErrorCode) {
  1.2042 +    UConverter *cnv;
  1.2043 +    const uint8_t *source, *sourceLimit, *lastSource;
  1.2044 +    UChar *target;
  1.2045 +    int32_t targetCapacity, length;
  1.2046 +    int32_t *offsets;
  1.2047 +
  1.2048 +    const int32_t (*stateTable)[256];
  1.2049 +
  1.2050 +    int32_t sourceIndex;
  1.2051 +
  1.2052 +    int32_t entry;
  1.2053 +    uint8_t action;
  1.2054 +
  1.2055 +    /* set up the local pointers */
  1.2056 +    cnv=pArgs->converter;
  1.2057 +    source=(const uint8_t *)pArgs->source;
  1.2058 +    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
  1.2059 +    target=pArgs->target;
  1.2060 +    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
  1.2061 +    offsets=pArgs->offsets;
  1.2062 +
  1.2063 +    if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
  1.2064 +        stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
  1.2065 +    } else {
  1.2066 +        stateTable=cnv->sharedData->mbcs.stateTable;
  1.2067 +    }
  1.2068 +
  1.2069 +    /* sourceIndex=-1 if the current character began in the previous buffer */
  1.2070 +    sourceIndex=0;
  1.2071 +    lastSource=source;
  1.2072 +
  1.2073 +    /*
  1.2074 +     * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
  1.2075 +     * for the minimum of the sourceLength and targetCapacity
  1.2076 +     */
  1.2077 +    length=(int32_t)(sourceLimit-source);
  1.2078 +    if(length<targetCapacity) {
  1.2079 +        targetCapacity=length;
  1.2080 +    }
  1.2081 +
  1.2082 +#if MBCS_UNROLL_SINGLE_TO_BMP
  1.2083 +    /* unrolling makes it faster on Pentium III/Windows 2000 */
  1.2084 +    /* unroll the loop with the most common case */
  1.2085 +unrolled:
  1.2086 +    if(targetCapacity>=16) {
  1.2087 +        int32_t count, loops, oredEntries;
  1.2088 +
  1.2089 +        loops=count=targetCapacity>>4;
  1.2090 +        do {
  1.2091 +            oredEntries=entry=stateTable[0][*source++];
  1.2092 +            *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.2093 +            oredEntries|=entry=stateTable[0][*source++];
  1.2094 +            *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.2095 +            oredEntries|=entry=stateTable[0][*source++];
  1.2096 +            *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.2097 +            oredEntries|=entry=stateTable[0][*source++];
  1.2098 +            *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.2099 +            oredEntries|=entry=stateTable[0][*source++];
  1.2100 +            *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.2101 +            oredEntries|=entry=stateTable[0][*source++];
  1.2102 +            *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.2103 +            oredEntries|=entry=stateTable[0][*source++];
  1.2104 +            *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.2105 +            oredEntries|=entry=stateTable[0][*source++];
  1.2106 +            *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.2107 +            oredEntries|=entry=stateTable[0][*source++];
  1.2108 +            *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.2109 +            oredEntries|=entry=stateTable[0][*source++];
  1.2110 +            *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.2111 +            oredEntries|=entry=stateTable[0][*source++];
  1.2112 +            *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.2113 +            oredEntries|=entry=stateTable[0][*source++];
  1.2114 +            *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.2115 +            oredEntries|=entry=stateTable[0][*source++];
  1.2116 +            *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.2117 +            oredEntries|=entry=stateTable[0][*source++];
  1.2118 +            *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.2119 +            oredEntries|=entry=stateTable[0][*source++];
  1.2120 +            *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.2121 +            oredEntries|=entry=stateTable[0][*source++];
  1.2122 +            *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.2123 +
  1.2124 +            /* were all 16 entries really valid? */
  1.2125 +            if(!MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(oredEntries)) {
  1.2126 +                /* no, return to the first of these 16 */
  1.2127 +                source-=16;
  1.2128 +                target-=16;
  1.2129 +                break;
  1.2130 +            }
  1.2131 +        } while(--count>0);
  1.2132 +        count=loops-count;
  1.2133 +        targetCapacity-=16*count;
  1.2134 +
  1.2135 +        if(offsets!=NULL) {
  1.2136 +            lastSource+=16*count;
  1.2137 +            while(count>0) {
  1.2138 +                *offsets++=sourceIndex++;
  1.2139 +                *offsets++=sourceIndex++;
  1.2140 +                *offsets++=sourceIndex++;
  1.2141 +                *offsets++=sourceIndex++;
  1.2142 +                *offsets++=sourceIndex++;
  1.2143 +                *offsets++=sourceIndex++;
  1.2144 +                *offsets++=sourceIndex++;
  1.2145 +                *offsets++=sourceIndex++;
  1.2146 +                *offsets++=sourceIndex++;
  1.2147 +                *offsets++=sourceIndex++;
  1.2148 +                *offsets++=sourceIndex++;
  1.2149 +                *offsets++=sourceIndex++;
  1.2150 +                *offsets++=sourceIndex++;
  1.2151 +                *offsets++=sourceIndex++;
  1.2152 +                *offsets++=sourceIndex++;
  1.2153 +                *offsets++=sourceIndex++;
  1.2154 +                --count;
  1.2155 +            }
  1.2156 +        }
  1.2157 +    }
  1.2158 +#endif
  1.2159 +
  1.2160 +    /* conversion loop */
  1.2161 +    while(targetCapacity > 0 && source < sourceLimit) {
  1.2162 +        entry=stateTable[0][*source++];
  1.2163 +        /* MBCS_ENTRY_IS_FINAL(entry) */
  1.2164 +
  1.2165 +        /* test the most common case first */
  1.2166 +        if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
  1.2167 +            /* output BMP code point */
  1.2168 +            *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.2169 +            --targetCapacity;
  1.2170 +            continue;
  1.2171 +        }
  1.2172 +
  1.2173 +        /*
  1.2174 +         * An if-else-if chain provides more reliable performance for
  1.2175 +         * the most common cases compared to a switch.
  1.2176 +         */
  1.2177 +        action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
  1.2178 +        if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
  1.2179 +            if(UCNV_TO_U_USE_FALLBACK(cnv)) {
  1.2180 +                /* output BMP code point */
  1.2181 +                *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.2182 +                --targetCapacity;
  1.2183 +                continue;
  1.2184 +            }
  1.2185 +        } else if(action==MBCS_STATE_UNASSIGNED) {
  1.2186 +            /* just fall through */
  1.2187 +        } else if(action==MBCS_STATE_ILLEGAL) {
  1.2188 +            /* callback(illegal) */
  1.2189 +            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.2190 +        } else {
  1.2191 +            /* reserved, must never occur */
  1.2192 +            continue;
  1.2193 +        }
  1.2194 +
  1.2195 +        /* set offsets since the start or the last extension */
  1.2196 +        if(offsets!=NULL) {
  1.2197 +            int32_t count=(int32_t)(source-lastSource);
  1.2198 +
  1.2199 +            /* predecrement: do not set the offset for the callback-causing character */
  1.2200 +            while(--count>0) {
  1.2201 +                *offsets++=sourceIndex++;
  1.2202 +            }
  1.2203 +            /* offset and sourceIndex are now set for the current character */
  1.2204 +        }
  1.2205 +
  1.2206 +        if(U_FAILURE(*pErrorCode)) {
  1.2207 +            /* callback(illegal) */
  1.2208 +            break;
  1.2209 +        } else /* unassigned sequences indicated with byteIndex>0 */ {
  1.2210 +            /* try an extension mapping */
  1.2211 +            lastSource=source;
  1.2212 +            cnv->toUBytes[0]=*(source-1);
  1.2213 +            cnv->toULength=_extToU(cnv, cnv->sharedData,
  1.2214 +                                    1, &source, sourceLimit,
  1.2215 +                                    &target, pArgs->targetLimit,
  1.2216 +                                    &offsets, sourceIndex,
  1.2217 +                                    pArgs->flush,
  1.2218 +                                    pErrorCode);
  1.2219 +            sourceIndex+=1+(int32_t)(source-lastSource);
  1.2220 +
  1.2221 +            if(U_FAILURE(*pErrorCode)) {
  1.2222 +                /* not mappable or buffer overflow */
  1.2223 +                break;
  1.2224 +            }
  1.2225 +
  1.2226 +            /* recalculate the targetCapacity after an extension mapping */
  1.2227 +            targetCapacity=(int32_t)(pArgs->targetLimit-target);
  1.2228 +            length=(int32_t)(sourceLimit-source);
  1.2229 +            if(length<targetCapacity) {
  1.2230 +                targetCapacity=length;
  1.2231 +            }
  1.2232 +        }
  1.2233 +
  1.2234 +#if MBCS_UNROLL_SINGLE_TO_BMP
  1.2235 +        /* unrolling makes it faster on Pentium III/Windows 2000 */
  1.2236 +        goto unrolled;
  1.2237 +#endif
  1.2238 +    }
  1.2239 +
  1.2240 +    if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=pArgs->targetLimit) {
  1.2241 +        /* target is full */
  1.2242 +        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.2243 +    }
  1.2244 +
  1.2245 +    /* set offsets since the start or the last callback */
  1.2246 +    if(offsets!=NULL) {
  1.2247 +        size_t count=source-lastSource;
  1.2248 +        while(count>0) {
  1.2249 +            *offsets++=sourceIndex++;
  1.2250 +            --count;
  1.2251 +        }
  1.2252 +    }
  1.2253 +
  1.2254 +    /* write back the updated pointers */
  1.2255 +    pArgs->source=(const char *)source;
  1.2256 +    pArgs->target=target;
  1.2257 +    pArgs->offsets=offsets;
  1.2258 +}
  1.2259 +
  1.2260 +static UBool
  1.2261 +hasValidTrailBytes(const int32_t (*stateTable)[256], uint8_t state) {
  1.2262 +    const int32_t *row=stateTable[state];
  1.2263 +    int32_t b, entry;
  1.2264 +    /* First test for final entries in this state for some commonly valid byte values. */
  1.2265 +    entry=row[0xa1];
  1.2266 +    if( !MBCS_ENTRY_IS_TRANSITION(entry) &&
  1.2267 +        MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL
  1.2268 +    ) {
  1.2269 +        return TRUE;
  1.2270 +    }
  1.2271 +    entry=row[0x41];
  1.2272 +    if( !MBCS_ENTRY_IS_TRANSITION(entry) &&
  1.2273 +        MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL
  1.2274 +    ) {
  1.2275 +        return TRUE;
  1.2276 +    }
  1.2277 +    /* Then test for final entries in this state. */
  1.2278 +    for(b=0; b<=0xff; ++b) {
  1.2279 +        entry=row[b];
  1.2280 +        if( !MBCS_ENTRY_IS_TRANSITION(entry) &&
  1.2281 +            MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL
  1.2282 +        ) {
  1.2283 +            return TRUE;
  1.2284 +        }
  1.2285 +    }
  1.2286 +    /* Then recurse for transition entries. */
  1.2287 +    for(b=0; b<=0xff; ++b) {
  1.2288 +        entry=row[b];
  1.2289 +        if( MBCS_ENTRY_IS_TRANSITION(entry) &&
  1.2290 +            hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry))
  1.2291 +        ) {
  1.2292 +            return TRUE;
  1.2293 +        }
  1.2294 +    }
  1.2295 +    return FALSE;
  1.2296 +}
  1.2297 +
  1.2298 +/*
  1.2299 + * Is byte b a single/lead byte in this state?
  1.2300 + * Recurse for transition states, because here we don't want to say that
  1.2301 + * b is a lead byte if all byte sequences that start with b are illegal.
  1.2302 + */
  1.2303 +static UBool
  1.2304 +isSingleOrLead(const int32_t (*stateTable)[256], uint8_t state, UBool isDBCSOnly, uint8_t b) {
  1.2305 +    const int32_t *row=stateTable[state];
  1.2306 +    int32_t entry=row[b];
  1.2307 +    if(MBCS_ENTRY_IS_TRANSITION(entry)) {   /* lead byte */
  1.2308 +        return hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry));
  1.2309 +    } else {
  1.2310 +        uint8_t action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
  1.2311 +        if(action==MBCS_STATE_CHANGE_ONLY && isDBCSOnly) {
  1.2312 +            return FALSE;   /* SI/SO are illegal for DBCS-only conversion */
  1.2313 +        } else {
  1.2314 +            return action!=MBCS_STATE_ILLEGAL;
  1.2315 +        }
  1.2316 +    }
  1.2317 +}
  1.2318 +
  1.2319 +U_CFUNC void
  1.2320 +ucnv_MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
  1.2321 +                          UErrorCode *pErrorCode) {
  1.2322 +    UConverter *cnv;
  1.2323 +    const uint8_t *source, *sourceLimit;
  1.2324 +    UChar *target;
  1.2325 +    const UChar *targetLimit;
  1.2326 +    int32_t *offsets;
  1.2327 +
  1.2328 +    const int32_t (*stateTable)[256];
  1.2329 +    const uint16_t *unicodeCodeUnits;
  1.2330 +
  1.2331 +    uint32_t offset;
  1.2332 +    uint8_t state;
  1.2333 +    int8_t byteIndex;
  1.2334 +    uint8_t *bytes;
  1.2335 +
  1.2336 +    int32_t sourceIndex, nextSourceIndex;
  1.2337 +
  1.2338 +    int32_t entry;
  1.2339 +    UChar c;
  1.2340 +    uint8_t action;
  1.2341 +
  1.2342 +    /* use optimized function if possible */
  1.2343 +    cnv=pArgs->converter;
  1.2344 +
  1.2345 +    if(cnv->preToULength>0) {
  1.2346 +        /*
  1.2347 +         * pass sourceIndex=-1 because we continue from an earlier buffer
  1.2348 +         * in the future, this may change with continuous offsets
  1.2349 +         */
  1.2350 +        ucnv_extContinueMatchToU(cnv, pArgs, -1, pErrorCode);
  1.2351 +
  1.2352 +        if(U_FAILURE(*pErrorCode) || cnv->preToULength<0) {
  1.2353 +            return;
  1.2354 +        }
  1.2355 +    }
  1.2356 +
  1.2357 +    if(cnv->sharedData->mbcs.countStates==1) {
  1.2358 +        if(!(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
  1.2359 +            ucnv_MBCSSingleToBMPWithOffsets(pArgs, pErrorCode);
  1.2360 +        } else {
  1.2361 +            ucnv_MBCSSingleToUnicodeWithOffsets(pArgs, pErrorCode);
  1.2362 +        }
  1.2363 +        return;
  1.2364 +    }
  1.2365 +
  1.2366 +    /* set up the local pointers */
  1.2367 +    source=(const uint8_t *)pArgs->source;
  1.2368 +    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
  1.2369 +    target=pArgs->target;
  1.2370 +    targetLimit=pArgs->targetLimit;
  1.2371 +    offsets=pArgs->offsets;
  1.2372 +
  1.2373 +    if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
  1.2374 +        stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
  1.2375 +    } else {
  1.2376 +        stateTable=cnv->sharedData->mbcs.stateTable;
  1.2377 +    }
  1.2378 +    unicodeCodeUnits=cnv->sharedData->mbcs.unicodeCodeUnits;
  1.2379 +
  1.2380 +    /* get the converter state from UConverter */
  1.2381 +    offset=cnv->toUnicodeStatus;
  1.2382 +    byteIndex=cnv->toULength;
  1.2383 +    bytes=cnv->toUBytes;
  1.2384 +
  1.2385 +    /*
  1.2386 +     * if we are in the SBCS state for a DBCS-only converter,
  1.2387 +     * then load the DBCS state from the MBCS data
  1.2388 +     * (dbcsOnlyState==0 if it is not a DBCS-only converter)
  1.2389 +     */
  1.2390 +    if((state=(uint8_t)(cnv->mode))==0) {
  1.2391 +        state=cnv->sharedData->mbcs.dbcsOnlyState;
  1.2392 +    }
  1.2393 +
  1.2394 +    /* sourceIndex=-1 if the current character began in the previous buffer */
  1.2395 +    sourceIndex=byteIndex==0 ? 0 : -1;
  1.2396 +    nextSourceIndex=0;
  1.2397 +
  1.2398 +    /* conversion loop */
  1.2399 +    while(source<sourceLimit) {
  1.2400 +        /*
  1.2401 +         * This following test is to see if available input would overflow the output.
  1.2402 +         * It does not catch output of more than one code unit that
  1.2403 +         * overflows as a result of a surrogate pair or callback output
  1.2404 +         * from the last source byte.
  1.2405 +         * Therefore, those situations also test for overflows and will
  1.2406 +         * then break the loop, too.
  1.2407 +         */
  1.2408 +        if(target>=targetLimit) {
  1.2409 +            /* target is full */
  1.2410 +            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.2411 +            break;
  1.2412 +        }
  1.2413 +
  1.2414 +        if(byteIndex==0) {
  1.2415 +            /* optimized loop for 1/2-byte input and BMP output */
  1.2416 +            if(offsets==NULL) {
  1.2417 +                do {
  1.2418 +                    entry=stateTable[state][*source];
  1.2419 +                    if(MBCS_ENTRY_IS_TRANSITION(entry)) {
  1.2420 +                        state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
  1.2421 +                        offset=MBCS_ENTRY_TRANSITION_OFFSET(entry);
  1.2422 +
  1.2423 +                        ++source;
  1.2424 +                        if( source<sourceLimit &&
  1.2425 +                            MBCS_ENTRY_IS_FINAL(entry=stateTable[state][*source]) &&
  1.2426 +                            MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16 &&
  1.2427 +                            (c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)])<0xfffe
  1.2428 +                        ) {
  1.2429 +                            ++source;
  1.2430 +                            *target++=c;
  1.2431 +                            state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
  1.2432 +                            offset=0;
  1.2433 +                        } else {
  1.2434 +                            /* set the state and leave the optimized loop */
  1.2435 +                            bytes[0]=*(source-1);
  1.2436 +                            byteIndex=1;
  1.2437 +                            break;
  1.2438 +                        }
  1.2439 +                    } else {
  1.2440 +                        if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
  1.2441 +                            /* output BMP code point */
  1.2442 +                            ++source;
  1.2443 +                            *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.2444 +                            state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
  1.2445 +                        } else {
  1.2446 +                            /* leave the optimized loop */
  1.2447 +                            break;
  1.2448 +                        }
  1.2449 +                    }
  1.2450 +                } while(source<sourceLimit && target<targetLimit);
  1.2451 +            } else /* offsets!=NULL */ {
  1.2452 +                do {
  1.2453 +                    entry=stateTable[state][*source];
  1.2454 +                    if(MBCS_ENTRY_IS_TRANSITION(entry)) {
  1.2455 +                        state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
  1.2456 +                        offset=MBCS_ENTRY_TRANSITION_OFFSET(entry);
  1.2457 +
  1.2458 +                        ++source;
  1.2459 +                        if( source<sourceLimit &&
  1.2460 +                            MBCS_ENTRY_IS_FINAL(entry=stateTable[state][*source]) &&
  1.2461 +                            MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16 &&
  1.2462 +                            (c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)])<0xfffe
  1.2463 +                        ) {
  1.2464 +                            ++source;
  1.2465 +                            *target++=c;
  1.2466 +                            if(offsets!=NULL) {
  1.2467 +                                *offsets++=sourceIndex;
  1.2468 +                                sourceIndex=(nextSourceIndex+=2);
  1.2469 +                            }
  1.2470 +                            state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
  1.2471 +                            offset=0;
  1.2472 +                        } else {
  1.2473 +                            /* set the state and leave the optimized loop */
  1.2474 +                            ++nextSourceIndex;
  1.2475 +                            bytes[0]=*(source-1);
  1.2476 +                            byteIndex=1;
  1.2477 +                            break;
  1.2478 +                        }
  1.2479 +                    } else {
  1.2480 +                        if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
  1.2481 +                            /* output BMP code point */
  1.2482 +                            ++source;
  1.2483 +                            *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.2484 +                            if(offsets!=NULL) {
  1.2485 +                                *offsets++=sourceIndex;
  1.2486 +                                sourceIndex=++nextSourceIndex;
  1.2487 +                            }
  1.2488 +                            state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
  1.2489 +                        } else {
  1.2490 +                            /* leave the optimized loop */
  1.2491 +                            break;
  1.2492 +                        }
  1.2493 +                    }
  1.2494 +                } while(source<sourceLimit && target<targetLimit);
  1.2495 +            }
  1.2496 +
  1.2497 +            /*
  1.2498 +             * these tests and break statements could be put inside the loop
  1.2499 +             * if C had "break outerLoop" like Java
  1.2500 +             */
  1.2501 +            if(source>=sourceLimit) {
  1.2502 +                break;
  1.2503 +            }
  1.2504 +            if(target>=targetLimit) {
  1.2505 +                /* target is full */
  1.2506 +                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.2507 +                break;
  1.2508 +            }
  1.2509 +
  1.2510 +            ++nextSourceIndex;
  1.2511 +            bytes[byteIndex++]=*source++;
  1.2512 +        } else /* byteIndex>0 */ {
  1.2513 +            ++nextSourceIndex;
  1.2514 +            entry=stateTable[state][bytes[byteIndex++]=*source++];
  1.2515 +        }
  1.2516 +
  1.2517 +        if(MBCS_ENTRY_IS_TRANSITION(entry)) {
  1.2518 +            state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
  1.2519 +            offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry);
  1.2520 +            continue;
  1.2521 +        }
  1.2522 +
  1.2523 +        /* save the previous state for proper extension mapping with SI/SO-stateful converters */
  1.2524 +        cnv->mode=state;
  1.2525 +
  1.2526 +        /* set the next state early so that we can reuse the entry variable */
  1.2527 +        state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
  1.2528 +
  1.2529 +        /*
  1.2530 +         * An if-else-if chain provides more reliable performance for
  1.2531 +         * the most common cases compared to a switch.
  1.2532 +         */
  1.2533 +        action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
  1.2534 +        if(action==MBCS_STATE_VALID_16) {
  1.2535 +            offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.2536 +            c=unicodeCodeUnits[offset];
  1.2537 +            if(c<0xfffe) {
  1.2538 +                /* output BMP code point */
  1.2539 +                *target++=c;
  1.2540 +                if(offsets!=NULL) {
  1.2541 +                    *offsets++=sourceIndex;
  1.2542 +                }
  1.2543 +                byteIndex=0;
  1.2544 +            } else if(c==0xfffe) {
  1.2545 +                if(UCNV_TO_U_USE_FALLBACK(cnv) && (entry=(int32_t)ucnv_MBCSGetFallback(&cnv->sharedData->mbcs, offset))!=0xfffe) {
  1.2546 +                    /* output fallback BMP code point */
  1.2547 +                    *target++=(UChar)entry;
  1.2548 +                    if(offsets!=NULL) {
  1.2549 +                        *offsets++=sourceIndex;
  1.2550 +                    }
  1.2551 +                    byteIndex=0;
  1.2552 +                }
  1.2553 +            } else {
  1.2554 +                /* callback(illegal) */
  1.2555 +                *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.2556 +            }
  1.2557 +        } else if(action==MBCS_STATE_VALID_DIRECT_16) {
  1.2558 +            /* output BMP code point */
  1.2559 +            *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.2560 +            if(offsets!=NULL) {
  1.2561 +                *offsets++=sourceIndex;
  1.2562 +            }
  1.2563 +            byteIndex=0;
  1.2564 +        } else if(action==MBCS_STATE_VALID_16_PAIR) {
  1.2565 +            offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.2566 +            c=unicodeCodeUnits[offset++];
  1.2567 +            if(c<0xd800) {
  1.2568 +                /* output BMP code point below 0xd800 */
  1.2569 +                *target++=c;
  1.2570 +                if(offsets!=NULL) {
  1.2571 +                    *offsets++=sourceIndex;
  1.2572 +                }
  1.2573 +                byteIndex=0;
  1.2574 +            } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) {
  1.2575 +                /* output roundtrip or fallback surrogate pair */
  1.2576 +                *target++=(UChar)(c&0xdbff);
  1.2577 +                if(offsets!=NULL) {
  1.2578 +                    *offsets++=sourceIndex;
  1.2579 +                }
  1.2580 +                byteIndex=0;
  1.2581 +                if(target<targetLimit) {
  1.2582 +                    *target++=unicodeCodeUnits[offset];
  1.2583 +                    if(offsets!=NULL) {
  1.2584 +                        *offsets++=sourceIndex;
  1.2585 +                    }
  1.2586 +                } else {
  1.2587 +                    /* target overflow */
  1.2588 +                    cnv->UCharErrorBuffer[0]=unicodeCodeUnits[offset];
  1.2589 +                    cnv->UCharErrorBufferLength=1;
  1.2590 +                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.2591 +
  1.2592 +                    offset=0;
  1.2593 +                    break;
  1.2594 +                }
  1.2595 +            } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) {
  1.2596 +                /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
  1.2597 +                *target++=unicodeCodeUnits[offset];
  1.2598 +                if(offsets!=NULL) {
  1.2599 +                    *offsets++=sourceIndex;
  1.2600 +                }
  1.2601 +                byteIndex=0;
  1.2602 +            } else if(c==0xffff) {
  1.2603 +                /* callback(illegal) */
  1.2604 +                *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.2605 +            }
  1.2606 +        } else if(action==MBCS_STATE_VALID_DIRECT_20 ||
  1.2607 +                  (action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv))
  1.2608 +        ) {
  1.2609 +            entry=MBCS_ENTRY_FINAL_VALUE(entry);
  1.2610 +            /* output surrogate pair */
  1.2611 +            *target++=(UChar)(0xd800|(UChar)(entry>>10));
  1.2612 +            if(offsets!=NULL) {
  1.2613 +                *offsets++=sourceIndex;
  1.2614 +            }
  1.2615 +            byteIndex=0;
  1.2616 +            c=(UChar)(0xdc00|(UChar)(entry&0x3ff));
  1.2617 +            if(target<targetLimit) {
  1.2618 +                *target++=c;
  1.2619 +                if(offsets!=NULL) {
  1.2620 +                    *offsets++=sourceIndex;
  1.2621 +                }
  1.2622 +            } else {
  1.2623 +                /* target overflow */
  1.2624 +                cnv->UCharErrorBuffer[0]=c;
  1.2625 +                cnv->UCharErrorBufferLength=1;
  1.2626 +                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.2627 +
  1.2628 +                offset=0;
  1.2629 +                break;
  1.2630 +            }
  1.2631 +        } else if(action==MBCS_STATE_CHANGE_ONLY) {
  1.2632 +            /*
  1.2633 +             * This serves as a state change without any output.
  1.2634 +             * It is useful for reading simple stateful encodings,
  1.2635 +             * for example using just Shift-In/Shift-Out codes.
  1.2636 +             * The 21 unused bits may later be used for more sophisticated
  1.2637 +             * state transitions.
  1.2638 +             */
  1.2639 +            if(cnv->sharedData->mbcs.dbcsOnlyState==0) {
  1.2640 +                byteIndex=0;
  1.2641 +            } else {
  1.2642 +                /* SI/SO are illegal for DBCS-only conversion */
  1.2643 +                state=(uint8_t)(cnv->mode); /* restore the previous state */
  1.2644 +
  1.2645 +                /* callback(illegal) */
  1.2646 +                *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.2647 +            }
  1.2648 +        } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
  1.2649 +            if(UCNV_TO_U_USE_FALLBACK(cnv)) {
  1.2650 +                /* output BMP code point */
  1.2651 +                *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.2652 +                if(offsets!=NULL) {
  1.2653 +                    *offsets++=sourceIndex;
  1.2654 +                }
  1.2655 +                byteIndex=0;
  1.2656 +            }
  1.2657 +        } else if(action==MBCS_STATE_UNASSIGNED) {
  1.2658 +            /* just fall through */
  1.2659 +        } else if(action==MBCS_STATE_ILLEGAL) {
  1.2660 +            /* callback(illegal) */
  1.2661 +            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.2662 +        } else {
  1.2663 +            /* reserved, must never occur */
  1.2664 +            byteIndex=0;
  1.2665 +        }
  1.2666 +
  1.2667 +        /* end of action codes: prepare for a new character */
  1.2668 +        offset=0;
  1.2669 +
  1.2670 +        if(byteIndex==0) {
  1.2671 +            sourceIndex=nextSourceIndex;
  1.2672 +        } else if(U_FAILURE(*pErrorCode)) {
  1.2673 +            /* callback(illegal) */
  1.2674 +            if(byteIndex>1) {
  1.2675 +                /*
  1.2676 +                 * Ticket 5691: consistent illegal sequences:
  1.2677 +                 * - We include at least the first byte in the illegal sequence.
  1.2678 +                 * - If any of the non-initial bytes could be the start of a character,
  1.2679 +                 *   we stop the illegal sequence before the first one of those.
  1.2680 +                 */
  1.2681 +                UBool isDBCSOnly=(UBool)(cnv->sharedData->mbcs.dbcsOnlyState!=0);
  1.2682 +                int8_t i;
  1.2683 +                for(i=1;
  1.2684 +                    i<byteIndex && !isSingleOrLead(stateTable, state, isDBCSOnly, bytes[i]);
  1.2685 +                    ++i) {}
  1.2686 +                if(i<byteIndex) {
  1.2687 +                    /* Back out some bytes. */
  1.2688 +                    int8_t backOutDistance=byteIndex-i;
  1.2689 +                    int32_t bytesFromThisBuffer=(int32_t)(source-(const uint8_t *)pArgs->source);
  1.2690 +                    byteIndex=i;  /* length of reported illegal byte sequence */
  1.2691 +                    if(backOutDistance<=bytesFromThisBuffer) {
  1.2692 +                        source-=backOutDistance;
  1.2693 +                    } else {
  1.2694 +                        /* Back out bytes from the previous buffer: Need to replay them. */
  1.2695 +                        cnv->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance);
  1.2696 +                        /* preToULength is negative! */
  1.2697 +                        uprv_memcpy(cnv->preToU, bytes+i, -cnv->preToULength);
  1.2698 +                        source=(const uint8_t *)pArgs->source;
  1.2699 +                    }
  1.2700 +                }
  1.2701 +            }
  1.2702 +            break;
  1.2703 +        } else /* unassigned sequences indicated with byteIndex>0 */ {
  1.2704 +            /* try an extension mapping */
  1.2705 +            pArgs->source=(const char *)source;
  1.2706 +            byteIndex=_extToU(cnv, cnv->sharedData,
  1.2707 +                              byteIndex, &source, sourceLimit,
  1.2708 +                              &target, targetLimit,
  1.2709 +                              &offsets, sourceIndex,
  1.2710 +                              pArgs->flush,
  1.2711 +                              pErrorCode);
  1.2712 +            sourceIndex=nextSourceIndex+=(int32_t)(source-(const uint8_t *)pArgs->source);
  1.2713 +
  1.2714 +            if(U_FAILURE(*pErrorCode)) {
  1.2715 +                /* not mappable or buffer overflow */
  1.2716 +                break;
  1.2717 +            }
  1.2718 +        }
  1.2719 +    }
  1.2720 +
  1.2721 +    /* set the converter state back into UConverter */
  1.2722 +    cnv->toUnicodeStatus=offset;
  1.2723 +    cnv->mode=state;
  1.2724 +    cnv->toULength=byteIndex;
  1.2725 +
  1.2726 +    /* write back the updated pointers */
  1.2727 +    pArgs->source=(const char *)source;
  1.2728 +    pArgs->target=target;
  1.2729 +    pArgs->offsets=offsets;
  1.2730 +}
  1.2731 +
  1.2732 +/*
  1.2733 + * This version of ucnv_MBCSGetNextUChar() is optimized for single-byte, single-state codepages.
  1.2734 + * We still need a conversion loop in case we find reserved action codes, which are to be ignored.
  1.2735 + */
  1.2736 +static UChar32
  1.2737 +ucnv_MBCSSingleGetNextUChar(UConverterToUnicodeArgs *pArgs,
  1.2738 +                        UErrorCode *pErrorCode) {
  1.2739 +    UConverter *cnv;
  1.2740 +    const int32_t (*stateTable)[256];
  1.2741 +    const uint8_t *source, *sourceLimit;
  1.2742 +
  1.2743 +    int32_t entry;
  1.2744 +    uint8_t action;
  1.2745 +
  1.2746 +    /* set up the local pointers */
  1.2747 +    cnv=pArgs->converter;
  1.2748 +    source=(const uint8_t *)pArgs->source;
  1.2749 +    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
  1.2750 +    if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
  1.2751 +        stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
  1.2752 +    } else {
  1.2753 +        stateTable=cnv->sharedData->mbcs.stateTable;
  1.2754 +    }
  1.2755 +
  1.2756 +    /* conversion loop */
  1.2757 +    while(source<sourceLimit) {
  1.2758 +        entry=stateTable[0][*source++];
  1.2759 +        /* MBCS_ENTRY_IS_FINAL(entry) */
  1.2760 +
  1.2761 +        /* write back the updated pointer early so that we can return directly */
  1.2762 +        pArgs->source=(const char *)source;
  1.2763 +
  1.2764 +        if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
  1.2765 +            /* output BMP code point */
  1.2766 +            return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.2767 +        }
  1.2768 +
  1.2769 +        /*
  1.2770 +         * An if-else-if chain provides more reliable performance for
  1.2771 +         * the most common cases compared to a switch.
  1.2772 +         */
  1.2773 +        action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
  1.2774 +        if( action==MBCS_STATE_VALID_DIRECT_20 ||
  1.2775 +            (action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv))
  1.2776 +        ) {
  1.2777 +            /* output supplementary code point */
  1.2778 +            return (UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000);
  1.2779 +        } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
  1.2780 +            if(UCNV_TO_U_USE_FALLBACK(cnv)) {
  1.2781 +                /* output BMP code point */
  1.2782 +                return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.2783 +            }
  1.2784 +        } else if(action==MBCS_STATE_UNASSIGNED) {
  1.2785 +            /* just fall through */
  1.2786 +        } else if(action==MBCS_STATE_ILLEGAL) {
  1.2787 +            /* callback(illegal) */
  1.2788 +            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.2789 +        } else {
  1.2790 +            /* reserved, must never occur */
  1.2791 +            continue;
  1.2792 +        }
  1.2793 +
  1.2794 +        if(U_FAILURE(*pErrorCode)) {
  1.2795 +            /* callback(illegal) */
  1.2796 +            break;
  1.2797 +        } else /* unassigned sequence */ {
  1.2798 +            /* defer to the generic implementation */
  1.2799 +            pArgs->source=(const char *)source-1;
  1.2800 +            return UCNV_GET_NEXT_UCHAR_USE_TO_U;
  1.2801 +        }
  1.2802 +    }
  1.2803 +
  1.2804 +    /* no output because of empty input or only state changes */
  1.2805 +    *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  1.2806 +    return 0xffff;
  1.2807 +}
  1.2808 +
  1.2809 +/*
  1.2810 + * Version of _MBCSToUnicodeWithOffsets() optimized for single-character
  1.2811 + * conversion without offset handling.
  1.2812 + *
  1.2813 + * When a character does not have a mapping to Unicode, then we return to the
  1.2814 + * generic ucnv_getNextUChar() code for extension/GB 18030 and error/callback
  1.2815 + * handling.
  1.2816 + * We also defer to the generic code in other complicated cases and have them
  1.2817 + * ultimately handled by _MBCSToUnicodeWithOffsets() itself.
  1.2818 + *
  1.2819 + * All normal mappings and errors are handled here.
  1.2820 + */
  1.2821 +static UChar32
  1.2822 +ucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
  1.2823 +                  UErrorCode *pErrorCode) {
  1.2824 +    UConverter *cnv;
  1.2825 +    const uint8_t *source, *sourceLimit, *lastSource;
  1.2826 +
  1.2827 +    const int32_t (*stateTable)[256];
  1.2828 +    const uint16_t *unicodeCodeUnits;
  1.2829 +
  1.2830 +    uint32_t offset;
  1.2831 +    uint8_t state;
  1.2832 +
  1.2833 +    int32_t entry;
  1.2834 +    UChar32 c;
  1.2835 +    uint8_t action;
  1.2836 +
  1.2837 +    /* use optimized function if possible */
  1.2838 +    cnv=pArgs->converter;
  1.2839 +
  1.2840 +    if(cnv->preToULength>0) {
  1.2841 +        /* use the generic code in ucnv_getNextUChar() to continue with a partial match */
  1.2842 +        return UCNV_GET_NEXT_UCHAR_USE_TO_U;
  1.2843 +    }
  1.2844 +
  1.2845 +    if(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SURROGATES) {
  1.2846 +        /*
  1.2847 +         * Using the generic ucnv_getNextUChar() code lets us deal correctly
  1.2848 +         * with the rare case of a codepage that maps single surrogates
  1.2849 +         * without adding the complexity to this already complicated function here.
  1.2850 +         */
  1.2851 +        return UCNV_GET_NEXT_UCHAR_USE_TO_U;
  1.2852 +    } else if(cnv->sharedData->mbcs.countStates==1) {
  1.2853 +        return ucnv_MBCSSingleGetNextUChar(pArgs, pErrorCode);
  1.2854 +    }
  1.2855 +
  1.2856 +    /* set up the local pointers */
  1.2857 +    source=lastSource=(const uint8_t *)pArgs->source;
  1.2858 +    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
  1.2859 +
  1.2860 +    if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
  1.2861 +        stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable;
  1.2862 +    } else {
  1.2863 +        stateTable=cnv->sharedData->mbcs.stateTable;
  1.2864 +    }
  1.2865 +    unicodeCodeUnits=cnv->sharedData->mbcs.unicodeCodeUnits;
  1.2866 +
  1.2867 +    /* get the converter state from UConverter */
  1.2868 +    offset=cnv->toUnicodeStatus;
  1.2869 +
  1.2870 +    /*
  1.2871 +     * if we are in the SBCS state for a DBCS-only converter,
  1.2872 +     * then load the DBCS state from the MBCS data
  1.2873 +     * (dbcsOnlyState==0 if it is not a DBCS-only converter)
  1.2874 +     */
  1.2875 +    if((state=(uint8_t)(cnv->mode))==0) {
  1.2876 +        state=cnv->sharedData->mbcs.dbcsOnlyState;
  1.2877 +    }
  1.2878 +
  1.2879 +    /* conversion loop */
  1.2880 +    c=U_SENTINEL;
  1.2881 +    while(source<sourceLimit) {
  1.2882 +        entry=stateTable[state][*source++];
  1.2883 +        if(MBCS_ENTRY_IS_TRANSITION(entry)) {
  1.2884 +            state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
  1.2885 +            offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry);
  1.2886 +
  1.2887 +            /* optimization for 1/2-byte input and BMP output */
  1.2888 +            if( source<sourceLimit &&
  1.2889 +                MBCS_ENTRY_IS_FINAL(entry=stateTable[state][*source]) &&
  1.2890 +                MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16 &&
  1.2891 +                (c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)])<0xfffe
  1.2892 +            ) {
  1.2893 +                ++source;
  1.2894 +                state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
  1.2895 +                /* output BMP code point */
  1.2896 +                break;
  1.2897 +            }
  1.2898 +        } else {
  1.2899 +            /* save the previous state for proper extension mapping with SI/SO-stateful converters */
  1.2900 +            cnv->mode=state;
  1.2901 +
  1.2902 +            /* set the next state early so that we can reuse the entry variable */
  1.2903 +            state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
  1.2904 +
  1.2905 +            /*
  1.2906 +             * An if-else-if chain provides more reliable performance for
  1.2907 +             * the most common cases compared to a switch.
  1.2908 +             */
  1.2909 +            action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
  1.2910 +            if(action==MBCS_STATE_VALID_DIRECT_16) {
  1.2911 +                /* output BMP code point */
  1.2912 +                c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.2913 +                break;
  1.2914 +            } else if(action==MBCS_STATE_VALID_16) {
  1.2915 +                offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.2916 +                c=unicodeCodeUnits[offset];
  1.2917 +                if(c<0xfffe) {
  1.2918 +                    /* output BMP code point */
  1.2919 +                    break;
  1.2920 +                } else if(c==0xfffe) {
  1.2921 +                    if(UCNV_TO_U_USE_FALLBACK(cnv) && (c=ucnv_MBCSGetFallback(&cnv->sharedData->mbcs, offset))!=0xfffe) {
  1.2922 +                        break;
  1.2923 +                    }
  1.2924 +                } else {
  1.2925 +                    /* callback(illegal) */
  1.2926 +                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.2927 +                }
  1.2928 +            } else if(action==MBCS_STATE_VALID_16_PAIR) {
  1.2929 +                offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.2930 +                c=unicodeCodeUnits[offset++];
  1.2931 +                if(c<0xd800) {
  1.2932 +                    /* output BMP code point below 0xd800 */
  1.2933 +                    break;
  1.2934 +                } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) {
  1.2935 +                    /* output roundtrip or fallback supplementary code point */
  1.2936 +                    c=((c&0x3ff)<<10)+unicodeCodeUnits[offset]+(0x10000-0xdc00);
  1.2937 +                    break;
  1.2938 +                } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) {
  1.2939 +                    /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
  1.2940 +                    c=unicodeCodeUnits[offset];
  1.2941 +                    break;
  1.2942 +                } else if(c==0xffff) {
  1.2943 +                    /* callback(illegal) */
  1.2944 +                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.2945 +                }
  1.2946 +            } else if(action==MBCS_STATE_VALID_DIRECT_20 ||
  1.2947 +                      (action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv))
  1.2948 +            ) {
  1.2949 +                /* output supplementary code point */
  1.2950 +                c=(UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000);
  1.2951 +                break;
  1.2952 +            } else if(action==MBCS_STATE_CHANGE_ONLY) {
  1.2953 +                /*
  1.2954 +                 * This serves as a state change without any output.
  1.2955 +                 * It is useful for reading simple stateful encodings,
  1.2956 +                 * for example using just Shift-In/Shift-Out codes.
  1.2957 +                 * The 21 unused bits may later be used for more sophisticated
  1.2958 +                 * state transitions.
  1.2959 +                 */
  1.2960 +                if(cnv->sharedData->mbcs.dbcsOnlyState!=0) {
  1.2961 +                    /* SI/SO are illegal for DBCS-only conversion */
  1.2962 +                    state=(uint8_t)(cnv->mode); /* restore the previous state */
  1.2963 +
  1.2964 +                    /* callback(illegal) */
  1.2965 +                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.2966 +                }
  1.2967 +            } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
  1.2968 +                if(UCNV_TO_U_USE_FALLBACK(cnv)) {
  1.2969 +                    /* output BMP code point */
  1.2970 +                    c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.2971 +                    break;
  1.2972 +                }
  1.2973 +            } else if(action==MBCS_STATE_UNASSIGNED) {
  1.2974 +                /* just fall through */
  1.2975 +            } else if(action==MBCS_STATE_ILLEGAL) {
  1.2976 +                /* callback(illegal) */
  1.2977 +                *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.2978 +            } else {
  1.2979 +                /* reserved (must never occur), or only state change */
  1.2980 +                offset=0;
  1.2981 +                lastSource=source;
  1.2982 +                continue;
  1.2983 +            }
  1.2984 +
  1.2985 +            /* end of action codes: prepare for a new character */
  1.2986 +            offset=0;
  1.2987 +
  1.2988 +            if(U_FAILURE(*pErrorCode)) {
  1.2989 +                /* callback(illegal) */
  1.2990 +                break;
  1.2991 +            } else /* unassigned sequence */ {
  1.2992 +                /* defer to the generic implementation */
  1.2993 +                cnv->toUnicodeStatus=0;
  1.2994 +                cnv->mode=state;
  1.2995 +                pArgs->source=(const char *)lastSource;
  1.2996 +                return UCNV_GET_NEXT_UCHAR_USE_TO_U;
  1.2997 +            }
  1.2998 +        }
  1.2999 +    }
  1.3000 +
  1.3001 +    if(c<0) {
  1.3002 +        if(U_SUCCESS(*pErrorCode) && source==sourceLimit && lastSource<source) {
  1.3003 +            /* incomplete character byte sequence */
  1.3004 +            uint8_t *bytes=cnv->toUBytes;
  1.3005 +            cnv->toULength=(int8_t)(source-lastSource);
  1.3006 +            do {
  1.3007 +                *bytes++=*lastSource++;
  1.3008 +            } while(lastSource<source);
  1.3009 +            *pErrorCode=U_TRUNCATED_CHAR_FOUND;
  1.3010 +        } else if(U_FAILURE(*pErrorCode)) {
  1.3011 +            /* callback(illegal) */
  1.3012 +            /*
  1.3013 +             * Ticket 5691: consistent illegal sequences:
  1.3014 +             * - We include at least the first byte in the illegal sequence.
  1.3015 +             * - If any of the non-initial bytes could be the start of a character,
  1.3016 +             *   we stop the illegal sequence before the first one of those.
  1.3017 +             */
  1.3018 +            UBool isDBCSOnly=(UBool)(cnv->sharedData->mbcs.dbcsOnlyState!=0);
  1.3019 +            uint8_t *bytes=cnv->toUBytes;
  1.3020 +            *bytes++=*lastSource++;     /* first byte */
  1.3021 +            if(lastSource==source) {
  1.3022 +                cnv->toULength=1;
  1.3023 +            } else /* lastSource<source: multi-byte character */ {
  1.3024 +                int8_t i;
  1.3025 +                for(i=1;
  1.3026 +                    lastSource<source && !isSingleOrLead(stateTable, state, isDBCSOnly, *lastSource);
  1.3027 +                    ++i
  1.3028 +                ) {
  1.3029 +                    *bytes++=*lastSource++;
  1.3030 +                }
  1.3031 +                cnv->toULength=i;
  1.3032 +                source=lastSource;
  1.3033 +            }
  1.3034 +        } else {
  1.3035 +            /* no output because of empty input or only state changes */
  1.3036 +            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  1.3037 +        }
  1.3038 +        c=0xffff;
  1.3039 +    }
  1.3040 +
  1.3041 +    /* set the converter state back into UConverter, ready for a new character */
  1.3042 +    cnv->toUnicodeStatus=0;
  1.3043 +    cnv->mode=state;
  1.3044 +
  1.3045 +    /* write back the updated pointer */
  1.3046 +    pArgs->source=(const char *)source;
  1.3047 +    return c;
  1.3048 +}
  1.3049 +
  1.3050 +#if 0
  1.3051 +/*
  1.3052 + * Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus
  1.3053 + * Removal improves code coverage.
  1.3054 + */
  1.3055 +/**
  1.3056 + * This version of ucnv_MBCSSimpleGetNextUChar() is optimized for single-byte, single-state codepages.
  1.3057 + * It does not handle the EBCDIC swaplfnl option (set in UConverter).
  1.3058 + * It does not handle conversion extensions (_extToU()).
  1.3059 + */
  1.3060 +U_CFUNC UChar32
  1.3061 +ucnv_MBCSSingleSimpleGetNextUChar(UConverterSharedData *sharedData,
  1.3062 +                              uint8_t b, UBool useFallback) {
  1.3063 +    int32_t entry;
  1.3064 +    uint8_t action;
  1.3065 +
  1.3066 +    entry=sharedData->mbcs.stateTable[0][b];
  1.3067 +    /* MBCS_ENTRY_IS_FINAL(entry) */
  1.3068 +
  1.3069 +    if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
  1.3070 +        /* output BMP code point */
  1.3071 +        return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.3072 +    }
  1.3073 +
  1.3074 +    /*
  1.3075 +     * An if-else-if chain provides more reliable performance for
  1.3076 +     * the most common cases compared to a switch.
  1.3077 +     */
  1.3078 +    action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
  1.3079 +    if(action==MBCS_STATE_VALID_DIRECT_20) {
  1.3080 +        /* output supplementary code point */
  1.3081 +        return 0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
  1.3082 +    } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
  1.3083 +        if(!TO_U_USE_FALLBACK(useFallback)) {
  1.3084 +            return 0xfffe;
  1.3085 +        }
  1.3086 +        /* output BMP code point */
  1.3087 +        return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.3088 +    } else if(action==MBCS_STATE_FALLBACK_DIRECT_20) {
  1.3089 +        if(!TO_U_USE_FALLBACK(useFallback)) {
  1.3090 +            return 0xfffe;
  1.3091 +        }
  1.3092 +        /* output supplementary code point */
  1.3093 +        return 0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
  1.3094 +    } else if(action==MBCS_STATE_UNASSIGNED) {
  1.3095 +        return 0xfffe;
  1.3096 +    } else if(action==MBCS_STATE_ILLEGAL) {
  1.3097 +        return 0xffff;
  1.3098 +    } else {
  1.3099 +        /* reserved, must never occur */
  1.3100 +        return 0xffff;
  1.3101 +    }
  1.3102 +}
  1.3103 +#endif
  1.3104 +
  1.3105 +/*
  1.3106 + * This is a simple version of _MBCSGetNextUChar() that is used
  1.3107 + * by other converter implementations.
  1.3108 + * It only returns an "assigned" result if it consumes the entire input.
  1.3109 + * It does not use state from the converter, nor error codes.
  1.3110 + * It does not handle the EBCDIC swaplfnl option (set in UConverter).
  1.3111 + * It handles conversion extensions but not GB 18030.
  1.3112 + *
  1.3113 + * Return value:
  1.3114 + * U+fffe   unassigned
  1.3115 + * U+ffff   illegal
  1.3116 + * otherwise the Unicode code point
  1.3117 + */
  1.3118 +U_CFUNC UChar32
  1.3119 +ucnv_MBCSSimpleGetNextUChar(UConverterSharedData *sharedData,
  1.3120 +                        const char *source, int32_t length,
  1.3121 +                        UBool useFallback) {
  1.3122 +    const int32_t (*stateTable)[256];
  1.3123 +    const uint16_t *unicodeCodeUnits;
  1.3124 +
  1.3125 +    uint32_t offset;
  1.3126 +    uint8_t state, action;
  1.3127 +
  1.3128 +    UChar32 c;
  1.3129 +    int32_t i, entry;
  1.3130 +
  1.3131 +    if(length<=0) {
  1.3132 +        /* no input at all: "illegal" */
  1.3133 +        return 0xffff;
  1.3134 +    }
  1.3135 +
  1.3136 +#if 0
  1.3137 +/*
  1.3138 + * Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus
  1.3139 + * TODO In future releases, verify that this function is never called for SBCS
  1.3140 + * conversions, i.e., that sharedData->mbcs.countStates==1 is still true.
  1.3141 + * Removal improves code coverage.
  1.3142 + */
  1.3143 +    /* use optimized function if possible */
  1.3144 +    if(sharedData->mbcs.countStates==1) {
  1.3145 +        if(length==1) {
  1.3146 +            return ucnv_MBCSSingleSimpleGetNextUChar(sharedData, (uint8_t)*source, useFallback);
  1.3147 +        } else {
  1.3148 +            return 0xffff; /* illegal: more than a single byte for an SBCS converter */
  1.3149 +        }
  1.3150 +    }
  1.3151 +#endif
  1.3152 +
  1.3153 +    /* set up the local pointers */
  1.3154 +    stateTable=sharedData->mbcs.stateTable;
  1.3155 +    unicodeCodeUnits=sharedData->mbcs.unicodeCodeUnits;
  1.3156 +
  1.3157 +    /* converter state */
  1.3158 +    offset=0;
  1.3159 +    state=sharedData->mbcs.dbcsOnlyState;
  1.3160 +
  1.3161 +    /* conversion loop */
  1.3162 +    for(i=0;;) {
  1.3163 +        entry=stateTable[state][(uint8_t)source[i++]];
  1.3164 +        if(MBCS_ENTRY_IS_TRANSITION(entry)) {
  1.3165 +            state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
  1.3166 +            offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry);
  1.3167 +
  1.3168 +            if(i==length) {
  1.3169 +                return 0xffff; /* truncated character */
  1.3170 +            }
  1.3171 +        } else {
  1.3172 +            /*
  1.3173 +             * An if-else-if chain provides more reliable performance for
  1.3174 +             * the most common cases compared to a switch.
  1.3175 +             */
  1.3176 +            action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
  1.3177 +            if(action==MBCS_STATE_VALID_16) {
  1.3178 +                offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.3179 +                c=unicodeCodeUnits[offset];
  1.3180 +                if(c!=0xfffe) {
  1.3181 +                    /* done */
  1.3182 +                } else if(UCNV_TO_U_USE_FALLBACK(cnv)) {
  1.3183 +                    c=ucnv_MBCSGetFallback(&sharedData->mbcs, offset);
  1.3184 +                /* else done with 0xfffe */
  1.3185 +                }
  1.3186 +                break;
  1.3187 +            } else if(action==MBCS_STATE_VALID_DIRECT_16) {
  1.3188 +                /* output BMP code point */
  1.3189 +                c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.3190 +                break;
  1.3191 +            } else if(action==MBCS_STATE_VALID_16_PAIR) {
  1.3192 +                offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.3193 +                c=unicodeCodeUnits[offset++];
  1.3194 +                if(c<0xd800) {
  1.3195 +                    /* output BMP code point below 0xd800 */
  1.3196 +                } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) {
  1.3197 +                    /* output roundtrip or fallback supplementary code point */
  1.3198 +                    c=(UChar32)(((c&0x3ff)<<10)+unicodeCodeUnits[offset]+(0x10000-0xdc00));
  1.3199 +                } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) {
  1.3200 +                    /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
  1.3201 +                    c=unicodeCodeUnits[offset];
  1.3202 +                } else if(c==0xffff) {
  1.3203 +                    return 0xffff;
  1.3204 +                } else {
  1.3205 +                    c=0xfffe;
  1.3206 +                }
  1.3207 +                break;
  1.3208 +            } else if(action==MBCS_STATE_VALID_DIRECT_20) {
  1.3209 +                /* output supplementary code point */
  1.3210 +                c=0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
  1.3211 +                break;
  1.3212 +            } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
  1.3213 +                if(!TO_U_USE_FALLBACK(useFallback)) {
  1.3214 +                    c=0xfffe;
  1.3215 +                    break;
  1.3216 +                }
  1.3217 +                /* output BMP code point */
  1.3218 +                c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry);
  1.3219 +                break;
  1.3220 +            } else if(action==MBCS_STATE_FALLBACK_DIRECT_20) {
  1.3221 +                if(!TO_U_USE_FALLBACK(useFallback)) {
  1.3222 +                    c=0xfffe;
  1.3223 +                    break;
  1.3224 +                }
  1.3225 +                /* output supplementary code point */
  1.3226 +                c=0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
  1.3227 +                break;
  1.3228 +            } else if(action==MBCS_STATE_UNASSIGNED) {
  1.3229 +                c=0xfffe;
  1.3230 +                break;
  1.3231 +            }
  1.3232 +
  1.3233 +            /*
  1.3234 +             * forbid MBCS_STATE_CHANGE_ONLY for this function,
  1.3235 +             * and MBCS_STATE_ILLEGAL and reserved action codes
  1.3236 +             */
  1.3237 +            return 0xffff;
  1.3238 +        }
  1.3239 +    }
  1.3240 +
  1.3241 +    if(i!=length) {
  1.3242 +        /* illegal for this function: not all input consumed */
  1.3243 +        return 0xffff;
  1.3244 +    }
  1.3245 +
  1.3246 +    if(c==0xfffe) {
  1.3247 +        /* try an extension mapping */
  1.3248 +        const int32_t *cx=sharedData->mbcs.extIndexes;
  1.3249 +        if(cx!=NULL) {
  1.3250 +            return ucnv_extSimpleMatchToU(cx, source, length, useFallback);
  1.3251 +        }
  1.3252 +    }
  1.3253 +
  1.3254 +    return c;
  1.3255 +}
  1.3256 +
  1.3257 +/* MBCS-from-Unicode conversion functions ----------------------------------- */
  1.3258 +
  1.3259 +/* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for double-byte codepages. */
  1.3260 +static void
  1.3261 +ucnv_MBCSDoubleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
  1.3262 +                                  UErrorCode *pErrorCode) {
  1.3263 +    UConverter *cnv;
  1.3264 +    const UChar *source, *sourceLimit;
  1.3265 +    uint8_t *target;
  1.3266 +    int32_t targetCapacity;
  1.3267 +    int32_t *offsets;
  1.3268 +
  1.3269 +    const uint16_t *table;
  1.3270 +    const uint16_t *mbcsIndex;
  1.3271 +    const uint8_t *bytes;
  1.3272 +
  1.3273 +    UChar32 c;
  1.3274 +
  1.3275 +    int32_t sourceIndex, nextSourceIndex;
  1.3276 +
  1.3277 +    uint32_t stage2Entry;
  1.3278 +    uint32_t asciiRoundtrips;
  1.3279 +    uint32_t value;
  1.3280 +    uint8_t unicodeMask;
  1.3281 +
  1.3282 +    /* use optimized function if possible */
  1.3283 +    cnv=pArgs->converter;
  1.3284 +    unicodeMask=cnv->sharedData->mbcs.unicodeMask;
  1.3285 +
  1.3286 +    /* set up the local pointers */
  1.3287 +    source=pArgs->source;
  1.3288 +    sourceLimit=pArgs->sourceLimit;
  1.3289 +    target=(uint8_t *)pArgs->target;
  1.3290 +    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
  1.3291 +    offsets=pArgs->offsets;
  1.3292 +
  1.3293 +    table=cnv->sharedData->mbcs.fromUnicodeTable;
  1.3294 +    mbcsIndex=cnv->sharedData->mbcs.mbcsIndex;
  1.3295 +    if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
  1.3296 +        bytes=cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
  1.3297 +    } else {
  1.3298 +        bytes=cnv->sharedData->mbcs.fromUnicodeBytes;
  1.3299 +    }
  1.3300 +    asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
  1.3301 +
  1.3302 +    /* get the converter state from UConverter */
  1.3303 +    c=cnv->fromUChar32;
  1.3304 +
  1.3305 +    /* sourceIndex=-1 if the current character began in the previous buffer */
  1.3306 +    sourceIndex= c==0 ? 0 : -1;
  1.3307 +    nextSourceIndex=0;
  1.3308 +
  1.3309 +    /* conversion loop */
  1.3310 +    if(c!=0 && targetCapacity>0) {
  1.3311 +        goto getTrail;
  1.3312 +    }
  1.3313 +
  1.3314 +    while(source<sourceLimit) {
  1.3315 +        /*
  1.3316 +         * This following test is to see if available input would overflow the output.
  1.3317 +         * It does not catch output of more than one byte that
  1.3318 +         * overflows as a result of a multi-byte character or callback output
  1.3319 +         * from the last source character.
  1.3320 +         * Therefore, those situations also test for overflows and will
  1.3321 +         * then break the loop, too.
  1.3322 +         */
  1.3323 +        if(targetCapacity>0) {
  1.3324 +            /*
  1.3325 +             * Get a correct Unicode code point:
  1.3326 +             * a single UChar for a BMP code point or
  1.3327 +             * a matched surrogate pair for a "supplementary code point".
  1.3328 +             */
  1.3329 +            c=*source++;
  1.3330 +            ++nextSourceIndex;
  1.3331 +            if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)) {
  1.3332 +                *target++=(uint8_t)c;
  1.3333 +                if(offsets!=NULL) {
  1.3334 +                    *offsets++=sourceIndex;
  1.3335 +                    sourceIndex=nextSourceIndex;
  1.3336 +                }
  1.3337 +                --targetCapacity;
  1.3338 +                c=0;
  1.3339 +                continue;
  1.3340 +            }
  1.3341 +            /*
  1.3342 +             * utf8Friendly table: Test for <=0xd7ff rather than <=MBCS_FAST_MAX
  1.3343 +             * to avoid dealing with surrogates.
  1.3344 +             * MBCS_FAST_MAX must be >=0xd7ff.
  1.3345 +             */
  1.3346 +            if(c<=0xd7ff) {
  1.3347 +                value=DBCS_RESULT_FROM_MOST_BMP(mbcsIndex, (const uint16_t *)bytes, c);
  1.3348 +                /* There are only roundtrips (!=0) and no-mapping (==0) entries. */
  1.3349 +                if(value==0) {
  1.3350 +                    goto unassigned;
  1.3351 +                }
  1.3352 +                /* output the value */
  1.3353 +            } else {
  1.3354 +                /*
  1.3355 +                 * This also tests if the codepage maps single surrogates.
  1.3356 +                 * If it does, then surrogates are not paired but mapped separately.
  1.3357 +                 * Note that in this case unmatched surrogates are not detected.
  1.3358 +                 */
  1.3359 +                if(U16_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) {
  1.3360 +                    if(U16_IS_SURROGATE_LEAD(c)) {
  1.3361 +getTrail:
  1.3362 +                        if(source<sourceLimit) {
  1.3363 +                            /* test the following code unit */
  1.3364 +                            UChar trail=*source;
  1.3365 +                            if(U16_IS_TRAIL(trail)) {
  1.3366 +                                ++source;
  1.3367 +                                ++nextSourceIndex;
  1.3368 +                                c=U16_GET_SUPPLEMENTARY(c, trail);
  1.3369 +                                if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
  1.3370 +                                    /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
  1.3371 +                                    /* callback(unassigned) */
  1.3372 +                                    goto unassigned;
  1.3373 +                                }
  1.3374 +                                /* convert this supplementary code point */
  1.3375 +                                /* exit this condition tree */
  1.3376 +                            } else {
  1.3377 +                                /* this is an unmatched lead code unit (1st surrogate) */
  1.3378 +                                /* callback(illegal) */
  1.3379 +                                *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.3380 +                                break;
  1.3381 +                            }
  1.3382 +                        } else {
  1.3383 +                            /* no more input */
  1.3384 +                            break;
  1.3385 +                        }
  1.3386 +                    } else {
  1.3387 +                        /* this is an unmatched trail code unit (2nd surrogate) */
  1.3388 +                        /* callback(illegal) */
  1.3389 +                        *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.3390 +                        break;
  1.3391 +                    }
  1.3392 +                }
  1.3393 +
  1.3394 +                /* convert the Unicode code point in c into codepage bytes */
  1.3395 +                stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
  1.3396 +
  1.3397 +                /* get the bytes and the length for the output */
  1.3398 +                /* MBCS_OUTPUT_2 */
  1.3399 +                value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
  1.3400 +
  1.3401 +                /* is this code point assigned, or do we use fallbacks? */
  1.3402 +                if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) ||
  1.3403 +                     (UCNV_FROM_U_USE_FALLBACK(cnv, c) && value!=0))
  1.3404 +                ) {
  1.3405 +                    /*
  1.3406 +                     * We allow a 0 byte output if the "assigned" bit is set for this entry.
  1.3407 +                     * There is no way with this data structure for fallback output
  1.3408 +                     * to be a zero byte.
  1.3409 +                     */
  1.3410 +
  1.3411 +unassigned:
  1.3412 +                    /* try an extension mapping */
  1.3413 +                    pArgs->source=source;
  1.3414 +                    c=_extFromU(cnv, cnv->sharedData,
  1.3415 +                                c, &source, sourceLimit,
  1.3416 +                                &target, target+targetCapacity,
  1.3417 +                                &offsets, sourceIndex,
  1.3418 +                                pArgs->flush,
  1.3419 +                                pErrorCode);
  1.3420 +                    nextSourceIndex+=(int32_t)(source-pArgs->source);
  1.3421 +
  1.3422 +                    if(U_FAILURE(*pErrorCode)) {
  1.3423 +                        /* not mappable or buffer overflow */
  1.3424 +                        break;
  1.3425 +                    } else {
  1.3426 +                        /* a mapping was written to the target, continue */
  1.3427 +
  1.3428 +                        /* recalculate the targetCapacity after an extension mapping */
  1.3429 +                        targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target);
  1.3430 +
  1.3431 +                        /* normal end of conversion: prepare for a new character */
  1.3432 +                        sourceIndex=nextSourceIndex;
  1.3433 +                        continue;
  1.3434 +                    }
  1.3435 +                }
  1.3436 +            }
  1.3437 +
  1.3438 +            /* write the output character bytes from value and length */
  1.3439 +            /* from the first if in the loop we know that targetCapacity>0 */
  1.3440 +            if(value<=0xff) {
  1.3441 +                /* this is easy because we know that there is enough space */
  1.3442 +                *target++=(uint8_t)value;
  1.3443 +                if(offsets!=NULL) {
  1.3444 +                    *offsets++=sourceIndex;
  1.3445 +                }
  1.3446 +                --targetCapacity;
  1.3447 +            } else /* length==2 */ {
  1.3448 +                *target++=(uint8_t)(value>>8);
  1.3449 +                if(2<=targetCapacity) {
  1.3450 +                    *target++=(uint8_t)value;
  1.3451 +                    if(offsets!=NULL) {
  1.3452 +                        *offsets++=sourceIndex;
  1.3453 +                        *offsets++=sourceIndex;
  1.3454 +                    }
  1.3455 +                    targetCapacity-=2;
  1.3456 +                } else {
  1.3457 +                    if(offsets!=NULL) {
  1.3458 +                        *offsets++=sourceIndex;
  1.3459 +                    }
  1.3460 +                    cnv->charErrorBuffer[0]=(char)value;
  1.3461 +                    cnv->charErrorBufferLength=1;
  1.3462 +
  1.3463 +                    /* target overflow */
  1.3464 +                    targetCapacity=0;
  1.3465 +                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.3466 +                    c=0;
  1.3467 +                    break;
  1.3468 +                }
  1.3469 +            }
  1.3470 +
  1.3471 +            /* normal end of conversion: prepare for a new character */
  1.3472 +            c=0;
  1.3473 +            sourceIndex=nextSourceIndex;
  1.3474 +            continue;
  1.3475 +        } else {
  1.3476 +            /* target is full */
  1.3477 +            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.3478 +            break;
  1.3479 +        }
  1.3480 +    }
  1.3481 +
  1.3482 +    /* set the converter state back into UConverter */
  1.3483 +    cnv->fromUChar32=c;
  1.3484 +
  1.3485 +    /* write back the updated pointers */
  1.3486 +    pArgs->source=source;
  1.3487 +    pArgs->target=(char *)target;
  1.3488 +    pArgs->offsets=offsets;
  1.3489 +}
  1.3490 +
  1.3491 +/* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for single-byte codepages. */
  1.3492 +static void
  1.3493 +ucnv_MBCSSingleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
  1.3494 +                                  UErrorCode *pErrorCode) {
  1.3495 +    UConverter *cnv;
  1.3496 +    const UChar *source, *sourceLimit;
  1.3497 +    uint8_t *target;
  1.3498 +    int32_t targetCapacity;
  1.3499 +    int32_t *offsets;
  1.3500 +
  1.3501 +    const uint16_t *table;
  1.3502 +    const uint16_t *results;
  1.3503 +
  1.3504 +    UChar32 c;
  1.3505 +
  1.3506 +    int32_t sourceIndex, nextSourceIndex;
  1.3507 +
  1.3508 +    uint16_t value, minValue;
  1.3509 +    UBool hasSupplementary;
  1.3510 +
  1.3511 +    /* set up the local pointers */
  1.3512 +    cnv=pArgs->converter;
  1.3513 +    source=pArgs->source;
  1.3514 +    sourceLimit=pArgs->sourceLimit;
  1.3515 +    target=(uint8_t *)pArgs->target;
  1.3516 +    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
  1.3517 +    offsets=pArgs->offsets;
  1.3518 +
  1.3519 +    table=cnv->sharedData->mbcs.fromUnicodeTable;
  1.3520 +    if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
  1.3521 +        results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
  1.3522 +    } else {
  1.3523 +        results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
  1.3524 +    }
  1.3525 +
  1.3526 +    if(cnv->useFallback) {
  1.3527 +        /* use all roundtrip and fallback results */
  1.3528 +        minValue=0x800;
  1.3529 +    } else {
  1.3530 +        /* use only roundtrips and fallbacks from private-use characters */
  1.3531 +        minValue=0xc00;
  1.3532 +    }
  1.3533 +    hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
  1.3534 +
  1.3535 +    /* get the converter state from UConverter */
  1.3536 +    c=cnv->fromUChar32;
  1.3537 +
  1.3538 +    /* sourceIndex=-1 if the current character began in the previous buffer */
  1.3539 +    sourceIndex= c==0 ? 0 : -1;
  1.3540 +    nextSourceIndex=0;
  1.3541 +
  1.3542 +    /* conversion loop */
  1.3543 +    if(c!=0 && targetCapacity>0) {
  1.3544 +        goto getTrail;
  1.3545 +    }
  1.3546 +
  1.3547 +    while(source<sourceLimit) {
  1.3548 +        /*
  1.3549 +         * This following test is to see if available input would overflow the output.
  1.3550 +         * It does not catch output of more than one byte that
  1.3551 +         * overflows as a result of a multi-byte character or callback output
  1.3552 +         * from the last source character.
  1.3553 +         * Therefore, those situations also test for overflows and will
  1.3554 +         * then break the loop, too.
  1.3555 +         */
  1.3556 +        if(targetCapacity>0) {
  1.3557 +            /*
  1.3558 +             * Get a correct Unicode code point:
  1.3559 +             * a single UChar for a BMP code point or
  1.3560 +             * a matched surrogate pair for a "supplementary code point".
  1.3561 +             */
  1.3562 +            c=*source++;
  1.3563 +            ++nextSourceIndex;
  1.3564 +            if(U16_IS_SURROGATE(c)) {
  1.3565 +                if(U16_IS_SURROGATE_LEAD(c)) {
  1.3566 +getTrail:
  1.3567 +                    if(source<sourceLimit) {
  1.3568 +                        /* test the following code unit */
  1.3569 +                        UChar trail=*source;
  1.3570 +                        if(U16_IS_TRAIL(trail)) {
  1.3571 +                            ++source;
  1.3572 +                            ++nextSourceIndex;
  1.3573 +                            c=U16_GET_SUPPLEMENTARY(c, trail);
  1.3574 +                            if(!hasSupplementary) {
  1.3575 +                                /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
  1.3576 +                                /* callback(unassigned) */
  1.3577 +                                goto unassigned;
  1.3578 +                            }
  1.3579 +                            /* convert this supplementary code point */
  1.3580 +                            /* exit this condition tree */
  1.3581 +                        } else {
  1.3582 +                            /* this is an unmatched lead code unit (1st surrogate) */
  1.3583 +                            /* callback(illegal) */
  1.3584 +                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.3585 +                            break;
  1.3586 +                        }
  1.3587 +                    } else {
  1.3588 +                        /* no more input */
  1.3589 +                        break;
  1.3590 +                    }
  1.3591 +                } else {
  1.3592 +                    /* this is an unmatched trail code unit (2nd surrogate) */
  1.3593 +                    /* callback(illegal) */
  1.3594 +                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.3595 +                    break;
  1.3596 +                }
  1.3597 +            }
  1.3598 +
  1.3599 +            /* convert the Unicode code point in c into codepage bytes */
  1.3600 +            value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
  1.3601 +
  1.3602 +            /* is this code point assigned, or do we use fallbacks? */
  1.3603 +            if(value>=minValue) {
  1.3604 +                /* assigned, write the output character bytes from value and length */
  1.3605 +                /* length==1 */
  1.3606 +                /* this is easy because we know that there is enough space */
  1.3607 +                *target++=(uint8_t)value;
  1.3608 +                if(offsets!=NULL) {
  1.3609 +                    *offsets++=sourceIndex;
  1.3610 +                }
  1.3611 +                --targetCapacity;
  1.3612 +
  1.3613 +                /* normal end of conversion: prepare for a new character */
  1.3614 +                c=0;
  1.3615 +                sourceIndex=nextSourceIndex;
  1.3616 +            } else { /* unassigned */
  1.3617 +unassigned:
  1.3618 +                /* try an extension mapping */
  1.3619 +                pArgs->source=source;
  1.3620 +                c=_extFromU(cnv, cnv->sharedData,
  1.3621 +                            c, &source, sourceLimit,
  1.3622 +                            &target, target+targetCapacity,
  1.3623 +                            &offsets, sourceIndex,
  1.3624 +                            pArgs->flush,
  1.3625 +                            pErrorCode);
  1.3626 +                nextSourceIndex+=(int32_t)(source-pArgs->source);
  1.3627 +
  1.3628 +                if(U_FAILURE(*pErrorCode)) {
  1.3629 +                    /* not mappable or buffer overflow */
  1.3630 +                    break;
  1.3631 +                } else {
  1.3632 +                    /* a mapping was written to the target, continue */
  1.3633 +
  1.3634 +                    /* recalculate the targetCapacity after an extension mapping */
  1.3635 +                    targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target);
  1.3636 +
  1.3637 +                    /* normal end of conversion: prepare for a new character */
  1.3638 +                    sourceIndex=nextSourceIndex;
  1.3639 +                }
  1.3640 +            }
  1.3641 +        } else {
  1.3642 +            /* target is full */
  1.3643 +            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.3644 +            break;
  1.3645 +        }
  1.3646 +    }
  1.3647 +
  1.3648 +    /* set the converter state back into UConverter */
  1.3649 +    cnv->fromUChar32=c;
  1.3650 +
  1.3651 +    /* write back the updated pointers */
  1.3652 +    pArgs->source=source;
  1.3653 +    pArgs->target=(char *)target;
  1.3654 +    pArgs->offsets=offsets;
  1.3655 +}
  1.3656 +
  1.3657 +/*
  1.3658 + * This version of ucnv_MBCSFromUnicode() is optimized for single-byte codepages
  1.3659 + * that map only to and from the BMP.
  1.3660 + * In addition to single-byte/state optimizations, the offset calculations
  1.3661 + * become much easier.
  1.3662 + * It would be possible to use the sbcsIndex for UTF-8-friendly tables,
  1.3663 + * but measurements have shown that this diminishes performance
  1.3664 + * in more cases than it improves it.
  1.3665 + * See SVN revision 21013 (2007-feb-06) for the last version with #if switches
  1.3666 + * for various MBCS and SBCS optimizations.
  1.3667 + */
  1.3668 +static void
  1.3669 +ucnv_MBCSSingleFromBMPWithOffsets(UConverterFromUnicodeArgs *pArgs,
  1.3670 +                              UErrorCode *pErrorCode) {
  1.3671 +    UConverter *cnv;
  1.3672 +    const UChar *source, *sourceLimit, *lastSource;
  1.3673 +    uint8_t *target;
  1.3674 +    int32_t targetCapacity, length;
  1.3675 +    int32_t *offsets;
  1.3676 +
  1.3677 +    const uint16_t *table;
  1.3678 +    const uint16_t *results;
  1.3679 +
  1.3680 +    UChar32 c;
  1.3681 +
  1.3682 +    int32_t sourceIndex;
  1.3683 +
  1.3684 +    uint32_t asciiRoundtrips;
  1.3685 +    uint16_t value, minValue;
  1.3686 +
  1.3687 +    /* set up the local pointers */
  1.3688 +    cnv=pArgs->converter;
  1.3689 +    source=pArgs->source;
  1.3690 +    sourceLimit=pArgs->sourceLimit;
  1.3691 +    target=(uint8_t *)pArgs->target;
  1.3692 +    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
  1.3693 +    offsets=pArgs->offsets;
  1.3694 +
  1.3695 +    table=cnv->sharedData->mbcs.fromUnicodeTable;
  1.3696 +    if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
  1.3697 +        results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
  1.3698 +    } else {
  1.3699 +        results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
  1.3700 +    }
  1.3701 +    asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
  1.3702 +
  1.3703 +    if(cnv->useFallback) {
  1.3704 +        /* use all roundtrip and fallback results */
  1.3705 +        minValue=0x800;
  1.3706 +    } else {
  1.3707 +        /* use only roundtrips and fallbacks from private-use characters */
  1.3708 +        minValue=0xc00;
  1.3709 +    }
  1.3710 +
  1.3711 +    /* get the converter state from UConverter */
  1.3712 +    c=cnv->fromUChar32;
  1.3713 +
  1.3714 +    /* sourceIndex=-1 if the current character began in the previous buffer */
  1.3715 +    sourceIndex= c==0 ? 0 : -1;
  1.3716 +    lastSource=source;
  1.3717 +
  1.3718 +    /*
  1.3719 +     * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
  1.3720 +     * for the minimum of the sourceLength and targetCapacity
  1.3721 +     */
  1.3722 +    length=(int32_t)(sourceLimit-source);
  1.3723 +    if(length<targetCapacity) {
  1.3724 +        targetCapacity=length;
  1.3725 +    }
  1.3726 +
  1.3727 +    /* conversion loop */
  1.3728 +    if(c!=0 && targetCapacity>0) {
  1.3729 +        goto getTrail;
  1.3730 +    }
  1.3731 +
  1.3732 +#if MBCS_UNROLL_SINGLE_FROM_BMP
  1.3733 +    /* unrolling makes it slower on Pentium III/Windows 2000?! */
  1.3734 +    /* unroll the loop with the most common case */
  1.3735 +unrolled:
  1.3736 +    if(targetCapacity>=4) {
  1.3737 +        int32_t count, loops;
  1.3738 +        uint16_t andedValues;
  1.3739 +
  1.3740 +        loops=count=targetCapacity>>2;
  1.3741 +        do {
  1.3742 +            c=*source++;
  1.3743 +            andedValues=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
  1.3744 +            *target++=(uint8_t)value;
  1.3745 +            c=*source++;
  1.3746 +            andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
  1.3747 +            *target++=(uint8_t)value;
  1.3748 +            c=*source++;
  1.3749 +            andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
  1.3750 +            *target++=(uint8_t)value;
  1.3751 +            c=*source++;
  1.3752 +            andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
  1.3753 +            *target++=(uint8_t)value;
  1.3754 +
  1.3755 +            /* were all 4 entries really valid? */
  1.3756 +            if(andedValues<minValue) {
  1.3757 +                /* no, return to the first of these 4 */
  1.3758 +                source-=4;
  1.3759 +                target-=4;
  1.3760 +                break;
  1.3761 +            }
  1.3762 +        } while(--count>0);
  1.3763 +        count=loops-count;
  1.3764 +        targetCapacity-=4*count;
  1.3765 +
  1.3766 +        if(offsets!=NULL) {
  1.3767 +            lastSource+=4*count;
  1.3768 +            while(count>0) {
  1.3769 +                *offsets++=sourceIndex++;
  1.3770 +                *offsets++=sourceIndex++;
  1.3771 +                *offsets++=sourceIndex++;
  1.3772 +                *offsets++=sourceIndex++;
  1.3773 +                --count;
  1.3774 +            }
  1.3775 +        }
  1.3776 +
  1.3777 +        c=0;
  1.3778 +    }
  1.3779 +#endif
  1.3780 +
  1.3781 +    while(targetCapacity>0) {
  1.3782 +        /*
  1.3783 +         * Get a correct Unicode code point:
  1.3784 +         * a single UChar for a BMP code point or
  1.3785 +         * a matched surrogate pair for a "supplementary code point".
  1.3786 +         */
  1.3787 +        c=*source++;
  1.3788 +        /*
  1.3789 +         * Do not immediately check for single surrogates:
  1.3790 +         * Assume that they are unassigned and check for them in that case.
  1.3791 +         * This speeds up the conversion of assigned characters.
  1.3792 +         */
  1.3793 +        /* convert the Unicode code point in c into codepage bytes */
  1.3794 +        if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)) {
  1.3795 +            *target++=(uint8_t)c;
  1.3796 +            --targetCapacity;
  1.3797 +            c=0;
  1.3798 +            continue;
  1.3799 +        }
  1.3800 +        value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
  1.3801 +        /* is this code point assigned, or do we use fallbacks? */
  1.3802 +        if(value>=minValue) {
  1.3803 +            /* assigned, write the output character bytes from value and length */
  1.3804 +            /* length==1 */
  1.3805 +            /* this is easy because we know that there is enough space */
  1.3806 +            *target++=(uint8_t)value;
  1.3807 +            --targetCapacity;
  1.3808 +
  1.3809 +            /* normal end of conversion: prepare for a new character */
  1.3810 +            c=0;
  1.3811 +            continue;
  1.3812 +        } else if(!U16_IS_SURROGATE(c)) {
  1.3813 +            /* normal, unassigned BMP character */
  1.3814 +        } else if(U16_IS_SURROGATE_LEAD(c)) {
  1.3815 +getTrail:
  1.3816 +            if(source<sourceLimit) {
  1.3817 +                /* test the following code unit */
  1.3818 +                UChar trail=*source;
  1.3819 +                if(U16_IS_TRAIL(trail)) {
  1.3820 +                    ++source;
  1.3821 +                    c=U16_GET_SUPPLEMENTARY(c, trail);
  1.3822 +                    /* this codepage does not map supplementary code points */
  1.3823 +                    /* callback(unassigned) */
  1.3824 +                } else {
  1.3825 +                    /* this is an unmatched lead code unit (1st surrogate) */
  1.3826 +                    /* callback(illegal) */
  1.3827 +                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.3828 +                    break;
  1.3829 +                }
  1.3830 +            } else {
  1.3831 +                /* no more input */
  1.3832 +                if (pArgs->flush) {
  1.3833 +                    *pErrorCode=U_TRUNCATED_CHAR_FOUND;
  1.3834 +                }
  1.3835 +                break;
  1.3836 +            }
  1.3837 +        } else {
  1.3838 +            /* this is an unmatched trail code unit (2nd surrogate) */
  1.3839 +            /* callback(illegal) */
  1.3840 +            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.3841 +            break;
  1.3842 +        }
  1.3843 +
  1.3844 +        /* c does not have a mapping */
  1.3845 +
  1.3846 +        /* get the number of code units for c to correctly advance sourceIndex */
  1.3847 +        length=U16_LENGTH(c);
  1.3848 +
  1.3849 +        /* set offsets since the start or the last extension */
  1.3850 +        if(offsets!=NULL) {
  1.3851 +            int32_t count=(int32_t)(source-lastSource);
  1.3852 +
  1.3853 +            /* do not set the offset for this character */
  1.3854 +            count-=length;
  1.3855 +
  1.3856 +            while(count>0) {
  1.3857 +                *offsets++=sourceIndex++;
  1.3858 +                --count;
  1.3859 +            }
  1.3860 +            /* offsets and sourceIndex are now set for the current character */
  1.3861 +        }
  1.3862 +
  1.3863 +        /* try an extension mapping */
  1.3864 +        lastSource=source;
  1.3865 +        c=_extFromU(cnv, cnv->sharedData,
  1.3866 +                    c, &source, sourceLimit,
  1.3867 +                    &target, (const uint8_t *)(pArgs->targetLimit),
  1.3868 +                    &offsets, sourceIndex,
  1.3869 +                    pArgs->flush,
  1.3870 +                    pErrorCode);
  1.3871 +        sourceIndex+=length+(int32_t)(source-lastSource);
  1.3872 +        lastSource=source;
  1.3873 +
  1.3874 +        if(U_FAILURE(*pErrorCode)) {
  1.3875 +            /* not mappable or buffer overflow */
  1.3876 +            break;
  1.3877 +        } else {
  1.3878 +            /* a mapping was written to the target, continue */
  1.3879 +
  1.3880 +            /* recalculate the targetCapacity after an extension mapping */
  1.3881 +            targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target);
  1.3882 +            length=(int32_t)(sourceLimit-source);
  1.3883 +            if(length<targetCapacity) {
  1.3884 +                targetCapacity=length;
  1.3885 +            }
  1.3886 +        }
  1.3887 +
  1.3888 +#if MBCS_UNROLL_SINGLE_FROM_BMP
  1.3889 +        /* unrolling makes it slower on Pentium III/Windows 2000?! */
  1.3890 +        goto unrolled;
  1.3891 +#endif
  1.3892 +    }
  1.3893 +
  1.3894 +    if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) {
  1.3895 +        /* target is full */
  1.3896 +        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.3897 +    }
  1.3898 +
  1.3899 +    /* set offsets since the start or the last callback */
  1.3900 +    if(offsets!=NULL) {
  1.3901 +        size_t count=source-lastSource;
  1.3902 +        if (count > 0 && *pErrorCode == U_TRUNCATED_CHAR_FOUND) {
  1.3903 +            /*
  1.3904 +            Caller gave us a partial supplementary character,
  1.3905 +            which this function couldn't convert in any case.
  1.3906 +            The callback will handle the offset.
  1.3907 +            */
  1.3908 +            count--;
  1.3909 +        }
  1.3910 +        while(count>0) {
  1.3911 +            *offsets++=sourceIndex++;
  1.3912 +            --count;
  1.3913 +        }
  1.3914 +    }
  1.3915 +
  1.3916 +    /* set the converter state back into UConverter */
  1.3917 +    cnv->fromUChar32=c;
  1.3918 +
  1.3919 +    /* write back the updated pointers */
  1.3920 +    pArgs->source=source;
  1.3921 +    pArgs->target=(char *)target;
  1.3922 +    pArgs->offsets=offsets;
  1.3923 +}
  1.3924 +
  1.3925 +U_CFUNC void
  1.3926 +ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
  1.3927 +                            UErrorCode *pErrorCode) {
  1.3928 +    UConverter *cnv;
  1.3929 +    const UChar *source, *sourceLimit;
  1.3930 +    uint8_t *target;
  1.3931 +    int32_t targetCapacity;
  1.3932 +    int32_t *offsets;
  1.3933 +
  1.3934 +    const uint16_t *table;
  1.3935 +    const uint16_t *mbcsIndex;
  1.3936 +    const uint8_t *p, *bytes;
  1.3937 +    uint8_t outputType;
  1.3938 +
  1.3939 +    UChar32 c;
  1.3940 +
  1.3941 +    int32_t prevSourceIndex, sourceIndex, nextSourceIndex;
  1.3942 +
  1.3943 +    uint32_t stage2Entry;
  1.3944 +    uint32_t asciiRoundtrips;
  1.3945 +    uint32_t value;
  1.3946 +    /* Shift-In and Shift-Out byte sequences differ by encoding scheme. */
  1.3947 +    uint8_t siBytes[2] = {0, 0};
  1.3948 +    uint8_t soBytes[2] = {0, 0};
  1.3949 +    uint8_t siLength, soLength;
  1.3950 +    int32_t length = 0, prevLength;
  1.3951 +    uint8_t unicodeMask;
  1.3952 +
  1.3953 +    cnv=pArgs->converter;
  1.3954 +
  1.3955 +    if(cnv->preFromUFirstCP>=0) {
  1.3956 +        /*
  1.3957 +         * pass sourceIndex=-1 because we continue from an earlier buffer
  1.3958 +         * in the future, this may change with continuous offsets
  1.3959 +         */
  1.3960 +        ucnv_extContinueMatchFromU(cnv, pArgs, -1, pErrorCode);
  1.3961 +
  1.3962 +        if(U_FAILURE(*pErrorCode) || cnv->preFromULength<0) {
  1.3963 +            return;
  1.3964 +        }
  1.3965 +    }
  1.3966 +
  1.3967 +    /* use optimized function if possible */
  1.3968 +    outputType=cnv->sharedData->mbcs.outputType;
  1.3969 +    unicodeMask=cnv->sharedData->mbcs.unicodeMask;
  1.3970 +    if(outputType==MBCS_OUTPUT_1 && !(unicodeMask&UCNV_HAS_SURROGATES)) {
  1.3971 +        if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
  1.3972 +            ucnv_MBCSSingleFromBMPWithOffsets(pArgs, pErrorCode);
  1.3973 +        } else {
  1.3974 +            ucnv_MBCSSingleFromUnicodeWithOffsets(pArgs, pErrorCode);
  1.3975 +        }
  1.3976 +        return;
  1.3977 +    } else if(outputType==MBCS_OUTPUT_2 && cnv->sharedData->mbcs.utf8Friendly) {
  1.3978 +        ucnv_MBCSDoubleFromUnicodeWithOffsets(pArgs, pErrorCode);
  1.3979 +        return;
  1.3980 +    }
  1.3981 +
  1.3982 +    /* set up the local pointers */
  1.3983 +    source=pArgs->source;
  1.3984 +    sourceLimit=pArgs->sourceLimit;
  1.3985 +    target=(uint8_t *)pArgs->target;
  1.3986 +    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
  1.3987 +    offsets=pArgs->offsets;
  1.3988 +
  1.3989 +    table=cnv->sharedData->mbcs.fromUnicodeTable;
  1.3990 +    if(cnv->sharedData->mbcs.utf8Friendly) {
  1.3991 +        mbcsIndex=cnv->sharedData->mbcs.mbcsIndex;
  1.3992 +    } else {
  1.3993 +        mbcsIndex=NULL;
  1.3994 +    }
  1.3995 +    if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
  1.3996 +        bytes=cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
  1.3997 +    } else {
  1.3998 +        bytes=cnv->sharedData->mbcs.fromUnicodeBytes;
  1.3999 +    }
  1.4000 +    asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
  1.4001 +
  1.4002 +    /* get the converter state from UConverter */
  1.4003 +    c=cnv->fromUChar32;
  1.4004 +
  1.4005 +    if(outputType==MBCS_OUTPUT_2_SISO) {
  1.4006 +        prevLength=cnv->fromUnicodeStatus;
  1.4007 +        if(prevLength==0) {
  1.4008 +            /* set the real value */
  1.4009 +            prevLength=1;
  1.4010 +        }
  1.4011 +    } else {
  1.4012 +        /* prevent fromUnicodeStatus from being set to something non-0 */
  1.4013 +        prevLength=0;
  1.4014 +    }
  1.4015 +
  1.4016 +    /* sourceIndex=-1 if the current character began in the previous buffer */
  1.4017 +    prevSourceIndex=-1;
  1.4018 +    sourceIndex= c==0 ? 0 : -1;
  1.4019 +    nextSourceIndex=0;
  1.4020 +
  1.4021 +    /* Get the SI/SO character for the converter */
  1.4022 +    siLength = getSISOBytes(SI, cnv->options, siBytes);
  1.4023 +    soLength = getSISOBytes(SO, cnv->options, soBytes);
  1.4024 +
  1.4025 +    /* conversion loop */
  1.4026 +    /*
  1.4027 +     * This is another piece of ugly code:
  1.4028 +     * A goto into the loop if the converter state contains a first surrogate
  1.4029 +     * from the previous function call.
  1.4030 +     * It saves me to check in each loop iteration a check of if(c==0)
  1.4031 +     * and duplicating the trail-surrogate-handling code in the else
  1.4032 +     * branch of that check.
  1.4033 +     * I could not find any other way to get around this other than
  1.4034 +     * using a function call for the conversion and callback, which would
  1.4035 +     * be even more inefficient.
  1.4036 +     *
  1.4037 +     * Markus Scherer 2000-jul-19
  1.4038 +     */
  1.4039 +    if(c!=0 && targetCapacity>0) {
  1.4040 +        goto getTrail;
  1.4041 +    }
  1.4042 +
  1.4043 +    while(source<sourceLimit) {
  1.4044 +        /*
  1.4045 +         * This following test is to see if available input would overflow the output.
  1.4046 +         * It does not catch output of more than one byte that
  1.4047 +         * overflows as a result of a multi-byte character or callback output
  1.4048 +         * from the last source character.
  1.4049 +         * Therefore, those situations also test for overflows and will
  1.4050 +         * then break the loop, too.
  1.4051 +         */
  1.4052 +        if(targetCapacity>0) {
  1.4053 +            /*
  1.4054 +             * Get a correct Unicode code point:
  1.4055 +             * a single UChar for a BMP code point or
  1.4056 +             * a matched surrogate pair for a "supplementary code point".
  1.4057 +             */
  1.4058 +            c=*source++;
  1.4059 +            ++nextSourceIndex;
  1.4060 +            if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)) {
  1.4061 +                *target++=(uint8_t)c;
  1.4062 +                if(offsets!=NULL) {
  1.4063 +                    *offsets++=sourceIndex;
  1.4064 +                    prevSourceIndex=sourceIndex;
  1.4065 +                    sourceIndex=nextSourceIndex;
  1.4066 +                }
  1.4067 +                --targetCapacity;
  1.4068 +                c=0;
  1.4069 +                continue;
  1.4070 +            }
  1.4071 +            /*
  1.4072 +             * utf8Friendly table: Test for <=0xd7ff rather than <=MBCS_FAST_MAX
  1.4073 +             * to avoid dealing with surrogates.
  1.4074 +             * MBCS_FAST_MAX must be >=0xd7ff.
  1.4075 +             */
  1.4076 +            if(c<=0xd7ff && mbcsIndex!=NULL) {
  1.4077 +                value=mbcsIndex[c>>6];
  1.4078 +
  1.4079 +                /* get the bytes and the length for the output (copied from below and adapted for utf8Friendly data) */
  1.4080 +                /* There are only roundtrips (!=0) and no-mapping (==0) entries. */
  1.4081 +                switch(outputType) {
  1.4082 +                case MBCS_OUTPUT_2:
  1.4083 +                    value=((const uint16_t *)bytes)[value +(c&0x3f)];
  1.4084 +                    if(value<=0xff) {
  1.4085 +                        if(value==0) {
  1.4086 +                            goto unassigned;
  1.4087 +                        } else {
  1.4088 +                            length=1;
  1.4089 +                        }
  1.4090 +                    } else {
  1.4091 +                        length=2;
  1.4092 +                    }
  1.4093 +                    break;
  1.4094 +                case MBCS_OUTPUT_2_SISO:
  1.4095 +                    /* 1/2-byte stateful with Shift-In/Shift-Out */
  1.4096 +                    /*
  1.4097 +                     * Save the old state in the converter object
  1.4098 +                     * right here, then change the local prevLength state variable if necessary.
  1.4099 +                     * Then, if this character turns out to be unassigned or a fallback that
  1.4100 +                     * is not taken, the callback code must not save the new state in the converter
  1.4101 +                     * because the new state is for a character that is not output.
  1.4102 +                     * However, the callback must still restore the state from the converter
  1.4103 +                     * in case the callback function changed it for its output.
  1.4104 +                     */
  1.4105 +                    cnv->fromUnicodeStatus=prevLength; /* save the old state */
  1.4106 +                    value=((const uint16_t *)bytes)[value +(c&0x3f)];
  1.4107 +                    if(value<=0xff) {
  1.4108 +                        if(value==0) {
  1.4109 +                            goto unassigned;
  1.4110 +                        } else if(prevLength<=1) {
  1.4111 +                            length=1;
  1.4112 +                        } else {
  1.4113 +                            /* change from double-byte mode to single-byte */
  1.4114 +                            if (siLength == 1) {
  1.4115 +                                value|=(uint32_t)siBytes[0]<<8;
  1.4116 +                                length = 2;
  1.4117 +                            } else if (siLength == 2) {
  1.4118 +                                value|=(uint32_t)siBytes[1]<<8;
  1.4119 +                                value|=(uint32_t)siBytes[0]<<16;
  1.4120 +                                length = 3;
  1.4121 +                            }
  1.4122 +                            prevLength=1;
  1.4123 +                        }
  1.4124 +                    } else {
  1.4125 +                        if(prevLength==2) {
  1.4126 +                            length=2;
  1.4127 +                        } else {
  1.4128 +                            /* change from single-byte mode to double-byte */
  1.4129 +                            if (soLength == 1) {
  1.4130 +                                value|=(uint32_t)soBytes[0]<<16;
  1.4131 +                                length = 3;
  1.4132 +                            } else if (soLength == 2) {
  1.4133 +                                value|=(uint32_t)soBytes[1]<<16;
  1.4134 +                                value|=(uint32_t)soBytes[0]<<24;
  1.4135 +                                length = 4;
  1.4136 +                            }
  1.4137 +                            prevLength=2;
  1.4138 +                        }
  1.4139 +                    }
  1.4140 +                    break;
  1.4141 +                case MBCS_OUTPUT_DBCS_ONLY:
  1.4142 +                    /* table with single-byte results, but only DBCS mappings used */
  1.4143 +                    value=((const uint16_t *)bytes)[value +(c&0x3f)];
  1.4144 +                    if(value<=0xff) {
  1.4145 +                        /* no mapping or SBCS result, not taken for DBCS-only */
  1.4146 +                        goto unassigned;
  1.4147 +                    } else {
  1.4148 +                        length=2;
  1.4149 +                    }
  1.4150 +                    break;
  1.4151 +                case MBCS_OUTPUT_3:
  1.4152 +                    p=bytes+(value+(c&0x3f))*3;
  1.4153 +                    value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
  1.4154 +                    if(value<=0xff) {
  1.4155 +                        if(value==0) {
  1.4156 +                            goto unassigned;
  1.4157 +                        } else {
  1.4158 +                            length=1;
  1.4159 +                        }
  1.4160 +                    } else if(value<=0xffff) {
  1.4161 +                        length=2;
  1.4162 +                    } else {
  1.4163 +                        length=3;
  1.4164 +                    }
  1.4165 +                    break;
  1.4166 +                case MBCS_OUTPUT_4:
  1.4167 +                    value=((const uint32_t *)bytes)[value +(c&0x3f)];
  1.4168 +                    if(value<=0xff) {
  1.4169 +                        if(value==0) {
  1.4170 +                            goto unassigned;
  1.4171 +                        } else {
  1.4172 +                            length=1;
  1.4173 +                        }
  1.4174 +                    } else if(value<=0xffff) {
  1.4175 +                        length=2;
  1.4176 +                    } else if(value<=0xffffff) {
  1.4177 +                        length=3;
  1.4178 +                    } else {
  1.4179 +                        length=4;
  1.4180 +                    }
  1.4181 +                    break;
  1.4182 +                case MBCS_OUTPUT_3_EUC:
  1.4183 +                    value=((const uint16_t *)bytes)[value +(c&0x3f)];
  1.4184 +                    /* EUC 16-bit fixed-length representation */
  1.4185 +                    if(value<=0xff) {
  1.4186 +                        if(value==0) {
  1.4187 +                            goto unassigned;
  1.4188 +                        } else {
  1.4189 +                            length=1;
  1.4190 +                        }
  1.4191 +                    } else if((value&0x8000)==0) {
  1.4192 +                        value|=0x8e8000;
  1.4193 +                        length=3;
  1.4194 +                    } else if((value&0x80)==0) {
  1.4195 +                        value|=0x8f0080;
  1.4196 +                        length=3;
  1.4197 +                    } else {
  1.4198 +                        length=2;
  1.4199 +                    }
  1.4200 +                    break;
  1.4201 +                case MBCS_OUTPUT_4_EUC:
  1.4202 +                    p=bytes+(value+(c&0x3f))*3;
  1.4203 +                    value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
  1.4204 +                    /* EUC 16-bit fixed-length representation applied to the first two bytes */
  1.4205 +                    if(value<=0xff) {
  1.4206 +                        if(value==0) {
  1.4207 +                            goto unassigned;
  1.4208 +                        } else {
  1.4209 +                            length=1;
  1.4210 +                        }
  1.4211 +                    } else if(value<=0xffff) {
  1.4212 +                        length=2;
  1.4213 +                    } else if((value&0x800000)==0) {
  1.4214 +                        value|=0x8e800000;
  1.4215 +                        length=4;
  1.4216 +                    } else if((value&0x8000)==0) {
  1.4217 +                        value|=0x8f008000;
  1.4218 +                        length=4;
  1.4219 +                    } else {
  1.4220 +                        length=3;
  1.4221 +                    }
  1.4222 +                    break;
  1.4223 +                default:
  1.4224 +                    /* must not occur */
  1.4225 +                    /*
  1.4226 +                     * To avoid compiler warnings that value & length may be
  1.4227 +                     * used without having been initialized, we set them here.
  1.4228 +                     * In reality, this is unreachable code.
  1.4229 +                     * Not having a default branch also causes warnings with
  1.4230 +                     * some compilers.
  1.4231 +                     */
  1.4232 +                    value=0;
  1.4233 +                    length=0;
  1.4234 +                    break;
  1.4235 +                }
  1.4236 +                /* output the value */
  1.4237 +            } else {
  1.4238 +                /*
  1.4239 +                 * This also tests if the codepage maps single surrogates.
  1.4240 +                 * If it does, then surrogates are not paired but mapped separately.
  1.4241 +                 * Note that in this case unmatched surrogates are not detected.
  1.4242 +                 */
  1.4243 +                if(U16_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) {
  1.4244 +                    if(U16_IS_SURROGATE_LEAD(c)) {
  1.4245 +getTrail:
  1.4246 +                        if(source<sourceLimit) {
  1.4247 +                            /* test the following code unit */
  1.4248 +                            UChar trail=*source;
  1.4249 +                            if(U16_IS_TRAIL(trail)) {
  1.4250 +                                ++source;
  1.4251 +                                ++nextSourceIndex;
  1.4252 +                                c=U16_GET_SUPPLEMENTARY(c, trail);
  1.4253 +                                if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
  1.4254 +                                    /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
  1.4255 +                                    cnv->fromUnicodeStatus=prevLength; /* save the old state */
  1.4256 +                                    /* callback(unassigned) */
  1.4257 +                                    goto unassigned;
  1.4258 +                                }
  1.4259 +                                /* convert this supplementary code point */
  1.4260 +                                /* exit this condition tree */
  1.4261 +                            } else {
  1.4262 +                                /* this is an unmatched lead code unit (1st surrogate) */
  1.4263 +                                /* callback(illegal) */
  1.4264 +                                *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.4265 +                                break;
  1.4266 +                            }
  1.4267 +                        } else {
  1.4268 +                            /* no more input */
  1.4269 +                            break;
  1.4270 +                        }
  1.4271 +                    } else {
  1.4272 +                        /* this is an unmatched trail code unit (2nd surrogate) */
  1.4273 +                        /* callback(illegal) */
  1.4274 +                        *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.4275 +                        break;
  1.4276 +                    }
  1.4277 +                }
  1.4278 +
  1.4279 +                /* convert the Unicode code point in c into codepage bytes */
  1.4280 +
  1.4281 +                /*
  1.4282 +                 * The basic lookup is a triple-stage compact array (trie) lookup.
  1.4283 +                 * For details see the beginning of this file.
  1.4284 +                 *
  1.4285 +                 * Single-byte codepages are handled with a different data structure
  1.4286 +                 * by _MBCSSingle... functions.
  1.4287 +                 *
  1.4288 +                 * The result consists of a 32-bit value from stage 2 and
  1.4289 +                 * a pointer to as many bytes as are stored per character.
  1.4290 +                 * The pointer points to the character's bytes in stage 3.
  1.4291 +                 * Bits 15..0 of the stage 2 entry contain the stage 3 index
  1.4292 +                 * for that pointer, while bits 31..16 are flags for which of
  1.4293 +                 * the 16 characters in the block are roundtrip-assigned.
  1.4294 +                 *
  1.4295 +                 * For 2-byte and 4-byte codepages, the bytes are stored as uint16_t
  1.4296 +                 * respectively as uint32_t, in the platform encoding.
  1.4297 +                 * For 3-byte codepages, the bytes are always stored in big-endian order.
  1.4298 +                 *
  1.4299 +                 * For EUC encodings that use only either 0x8e or 0x8f as the first
  1.4300 +                 * byte of their longest byte sequences, the first two bytes in
  1.4301 +                 * this third stage indicate with their 7th bits whether these bytes
  1.4302 +                 * are to be written directly or actually need to be preceeded by
  1.4303 +                 * one of the two Single-Shift codes. With this, the third stage
  1.4304 +                 * stores one byte fewer per character than the actual maximum length of
  1.4305 +                 * EUC byte sequences.
  1.4306 +                 *
  1.4307 +                 * Other than that, leading zero bytes are removed and the other
  1.4308 +                 * bytes output. A single zero byte may be output if the "assigned"
  1.4309 +                 * bit in stage 2 was on.
  1.4310 +                 * The data structure does not support zero byte output as a fallback,
  1.4311 +                 * and also does not allow output of leading zeros.
  1.4312 +                 */
  1.4313 +                stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
  1.4314 +
  1.4315 +                /* get the bytes and the length for the output */
  1.4316 +                switch(outputType) {
  1.4317 +                case MBCS_OUTPUT_2:
  1.4318 +                    value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
  1.4319 +                    if(value<=0xff) {
  1.4320 +                        length=1;
  1.4321 +                    } else {
  1.4322 +                        length=2;
  1.4323 +                    }
  1.4324 +                    break;
  1.4325 +                case MBCS_OUTPUT_2_SISO:
  1.4326 +                    /* 1/2-byte stateful with Shift-In/Shift-Out */
  1.4327 +                    /*
  1.4328 +                     * Save the old state in the converter object
  1.4329 +                     * right here, then change the local prevLength state variable if necessary.
  1.4330 +                     * Then, if this character turns out to be unassigned or a fallback that
  1.4331 +                     * is not taken, the callback code must not save the new state in the converter
  1.4332 +                     * because the new state is for a character that is not output.
  1.4333 +                     * However, the callback must still restore the state from the converter
  1.4334 +                     * in case the callback function changed it for its output.
  1.4335 +                     */
  1.4336 +                    cnv->fromUnicodeStatus=prevLength; /* save the old state */
  1.4337 +                    value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
  1.4338 +                    if(value<=0xff) {
  1.4339 +                        if(value==0 && MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)==0) {
  1.4340 +                            /* no mapping, leave value==0 */
  1.4341 +                            length=0;
  1.4342 +                        } else if(prevLength<=1) {
  1.4343 +                            length=1;
  1.4344 +                        } else {
  1.4345 +                            /* change from double-byte mode to single-byte */
  1.4346 +                            if (siLength == 1) {
  1.4347 +                                value|=(uint32_t)siBytes[0]<<8;
  1.4348 +                                length = 2;
  1.4349 +                            } else if (siLength == 2) {
  1.4350 +                                value|=(uint32_t)siBytes[1]<<8;
  1.4351 +                                value|=(uint32_t)siBytes[0]<<16;
  1.4352 +                                length = 3;
  1.4353 +                            }
  1.4354 +                            prevLength=1;
  1.4355 +                        }
  1.4356 +                    } else {
  1.4357 +                        if(prevLength==2) {
  1.4358 +                            length=2;
  1.4359 +                        } else {
  1.4360 +                            /* change from single-byte mode to double-byte */
  1.4361 +                            if (soLength == 1) {
  1.4362 +                                value|=(uint32_t)soBytes[0]<<16;
  1.4363 +                                length = 3;
  1.4364 +                            } else if (soLength == 2) {
  1.4365 +                                value|=(uint32_t)soBytes[1]<<16;
  1.4366 +                                value|=(uint32_t)soBytes[0]<<24;
  1.4367 +                                length = 4;
  1.4368 +                            }
  1.4369 +                            prevLength=2;
  1.4370 +                        }
  1.4371 +                    }
  1.4372 +                    break;
  1.4373 +                case MBCS_OUTPUT_DBCS_ONLY:
  1.4374 +                    /* table with single-byte results, but only DBCS mappings used */
  1.4375 +                    value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
  1.4376 +                    if(value<=0xff) {
  1.4377 +                        /* no mapping or SBCS result, not taken for DBCS-only */
  1.4378 +                        value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
  1.4379 +                        length=0;
  1.4380 +                    } else {
  1.4381 +                        length=2;
  1.4382 +                    }
  1.4383 +                    break;
  1.4384 +                case MBCS_OUTPUT_3:
  1.4385 +                    p=MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
  1.4386 +                    value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
  1.4387 +                    if(value<=0xff) {
  1.4388 +                        length=1;
  1.4389 +                    } else if(value<=0xffff) {
  1.4390 +                        length=2;
  1.4391 +                    } else {
  1.4392 +                        length=3;
  1.4393 +                    }
  1.4394 +                    break;
  1.4395 +                case MBCS_OUTPUT_4:
  1.4396 +                    value=MBCS_VALUE_4_FROM_STAGE_2(bytes, stage2Entry, c);
  1.4397 +                    if(value<=0xff) {
  1.4398 +                        length=1;
  1.4399 +                    } else if(value<=0xffff) {
  1.4400 +                        length=2;
  1.4401 +                    } else if(value<=0xffffff) {
  1.4402 +                        length=3;
  1.4403 +                    } else {
  1.4404 +                        length=4;
  1.4405 +                    }
  1.4406 +                    break;
  1.4407 +                case MBCS_OUTPUT_3_EUC:
  1.4408 +                    value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
  1.4409 +                    /* EUC 16-bit fixed-length representation */
  1.4410 +                    if(value<=0xff) {
  1.4411 +                        length=1;
  1.4412 +                    } else if((value&0x8000)==0) {
  1.4413 +                        value|=0x8e8000;
  1.4414 +                        length=3;
  1.4415 +                    } else if((value&0x80)==0) {
  1.4416 +                        value|=0x8f0080;
  1.4417 +                        length=3;
  1.4418 +                    } else {
  1.4419 +                        length=2;
  1.4420 +                    }
  1.4421 +                    break;
  1.4422 +                case MBCS_OUTPUT_4_EUC:
  1.4423 +                    p=MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
  1.4424 +                    value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
  1.4425 +                    /* EUC 16-bit fixed-length representation applied to the first two bytes */
  1.4426 +                    if(value<=0xff) {
  1.4427 +                        length=1;
  1.4428 +                    } else if(value<=0xffff) {
  1.4429 +                        length=2;
  1.4430 +                    } else if((value&0x800000)==0) {
  1.4431 +                        value|=0x8e800000;
  1.4432 +                        length=4;
  1.4433 +                    } else if((value&0x8000)==0) {
  1.4434 +                        value|=0x8f008000;
  1.4435 +                        length=4;
  1.4436 +                    } else {
  1.4437 +                        length=3;
  1.4438 +                    }
  1.4439 +                    break;
  1.4440 +                default:
  1.4441 +                    /* must not occur */
  1.4442 +                    /*
  1.4443 +                     * To avoid compiler warnings that value & length may be
  1.4444 +                     * used without having been initialized, we set them here.
  1.4445 +                     * In reality, this is unreachable code.
  1.4446 +                     * Not having a default branch also causes warnings with
  1.4447 +                     * some compilers.
  1.4448 +                     */
  1.4449 +                    value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
  1.4450 +                    length=0;
  1.4451 +                    break;
  1.4452 +                }
  1.4453 +
  1.4454 +                /* is this code point assigned, or do we use fallbacks? */
  1.4455 +                if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)!=0 ||
  1.4456 +                     (UCNV_FROM_U_USE_FALLBACK(cnv, c) && value!=0))
  1.4457 +                ) {
  1.4458 +                    /*
  1.4459 +                     * We allow a 0 byte output if the "assigned" bit is set for this entry.
  1.4460 +                     * There is no way with this data structure for fallback output
  1.4461 +                     * to be a zero byte.
  1.4462 +                     */
  1.4463 +
  1.4464 +unassigned:
  1.4465 +                    /* try an extension mapping */
  1.4466 +                    pArgs->source=source;
  1.4467 +                    c=_extFromU(cnv, cnv->sharedData,
  1.4468 +                                c, &source, sourceLimit,
  1.4469 +                                &target, target+targetCapacity,
  1.4470 +                                &offsets, sourceIndex,
  1.4471 +                                pArgs->flush,
  1.4472 +                                pErrorCode);
  1.4473 +                    nextSourceIndex+=(int32_t)(source-pArgs->source);
  1.4474 +                    prevLength=cnv->fromUnicodeStatus; /* restore SISO state */
  1.4475 +
  1.4476 +                    if(U_FAILURE(*pErrorCode)) {
  1.4477 +                        /* not mappable or buffer overflow */
  1.4478 +                        break;
  1.4479 +                    } else {
  1.4480 +                        /* a mapping was written to the target, continue */
  1.4481 +
  1.4482 +                        /* recalculate the targetCapacity after an extension mapping */
  1.4483 +                        targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target);
  1.4484 +
  1.4485 +                        /* normal end of conversion: prepare for a new character */
  1.4486 +                        if(offsets!=NULL) {
  1.4487 +                            prevSourceIndex=sourceIndex;
  1.4488 +                            sourceIndex=nextSourceIndex;
  1.4489 +                        }
  1.4490 +                        continue;
  1.4491 +                    }
  1.4492 +                }
  1.4493 +            }
  1.4494 +
  1.4495 +            /* write the output character bytes from value and length */
  1.4496 +            /* from the first if in the loop we know that targetCapacity>0 */
  1.4497 +            if(length<=targetCapacity) {
  1.4498 +                if(offsets==NULL) {
  1.4499 +                    switch(length) {
  1.4500 +                        /* each branch falls through to the next one */
  1.4501 +                    case 4:
  1.4502 +                        *target++=(uint8_t)(value>>24);
  1.4503 +                    case 3: /*fall through*/
  1.4504 +                        *target++=(uint8_t)(value>>16);
  1.4505 +                    case 2: /*fall through*/
  1.4506 +                        *target++=(uint8_t)(value>>8);
  1.4507 +                    case 1: /*fall through*/
  1.4508 +                        *target++=(uint8_t)value;
  1.4509 +                    default:
  1.4510 +                        /* will never occur */
  1.4511 +                        break;
  1.4512 +                    }
  1.4513 +                } else {
  1.4514 +                    switch(length) {
  1.4515 +                        /* each branch falls through to the next one */
  1.4516 +                    case 4:
  1.4517 +                        *target++=(uint8_t)(value>>24);
  1.4518 +                        *offsets++=sourceIndex;
  1.4519 +                    case 3: /*fall through*/
  1.4520 +                        *target++=(uint8_t)(value>>16);
  1.4521 +                        *offsets++=sourceIndex;
  1.4522 +                    case 2: /*fall through*/
  1.4523 +                        *target++=(uint8_t)(value>>8);
  1.4524 +                        *offsets++=sourceIndex;
  1.4525 +                    case 1: /*fall through*/
  1.4526 +                        *target++=(uint8_t)value;
  1.4527 +                        *offsets++=sourceIndex;
  1.4528 +                    default:
  1.4529 +                        /* will never occur */
  1.4530 +                        break;
  1.4531 +                    }
  1.4532 +                }
  1.4533 +                targetCapacity-=length;
  1.4534 +            } else {
  1.4535 +                uint8_t *charErrorBuffer;
  1.4536 +
  1.4537 +                /*
  1.4538 +                 * We actually do this backwards here:
  1.4539 +                 * In order to save an intermediate variable, we output
  1.4540 +                 * first to the overflow buffer what does not fit into the
  1.4541 +                 * regular target.
  1.4542 +                 */
  1.4543 +                /* we know that 1<=targetCapacity<length<=4 */
  1.4544 +                length-=targetCapacity;
  1.4545 +                charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
  1.4546 +                switch(length) {
  1.4547 +                    /* each branch falls through to the next one */
  1.4548 +                case 3:
  1.4549 +                    *charErrorBuffer++=(uint8_t)(value>>16);
  1.4550 +                case 2: /*fall through*/
  1.4551 +                    *charErrorBuffer++=(uint8_t)(value>>8);
  1.4552 +                case 1: /*fall through*/
  1.4553 +                    *charErrorBuffer=(uint8_t)value;
  1.4554 +                default:
  1.4555 +                    /* will never occur */
  1.4556 +                    break;
  1.4557 +                }
  1.4558 +                cnv->charErrorBufferLength=(int8_t)length;
  1.4559 +
  1.4560 +                /* now output what fits into the regular target */
  1.4561 +                value>>=8*length; /* length was reduced by targetCapacity */
  1.4562 +                switch(targetCapacity) {
  1.4563 +                    /* each branch falls through to the next one */
  1.4564 +                case 3:
  1.4565 +                    *target++=(uint8_t)(value>>16);
  1.4566 +                    if(offsets!=NULL) {
  1.4567 +                        *offsets++=sourceIndex;
  1.4568 +                    }
  1.4569 +                case 2: /*fall through*/
  1.4570 +                    *target++=(uint8_t)(value>>8);
  1.4571 +                    if(offsets!=NULL) {
  1.4572 +                        *offsets++=sourceIndex;
  1.4573 +                    }
  1.4574 +                case 1: /*fall through*/
  1.4575 +                    *target++=(uint8_t)value;
  1.4576 +                    if(offsets!=NULL) {
  1.4577 +                        *offsets++=sourceIndex;
  1.4578 +                    }
  1.4579 +                default:
  1.4580 +                    /* will never occur */
  1.4581 +                    break;
  1.4582 +                }
  1.4583 +
  1.4584 +                /* target overflow */
  1.4585 +                targetCapacity=0;
  1.4586 +                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.4587 +                c=0;
  1.4588 +                break;
  1.4589 +            }
  1.4590 +
  1.4591 +            /* normal end of conversion: prepare for a new character */
  1.4592 +            c=0;
  1.4593 +            if(offsets!=NULL) {
  1.4594 +                prevSourceIndex=sourceIndex;
  1.4595 +                sourceIndex=nextSourceIndex;
  1.4596 +            }
  1.4597 +            continue;
  1.4598 +        } else {
  1.4599 +            /* target is full */
  1.4600 +            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.4601 +            break;
  1.4602 +        }
  1.4603 +    }
  1.4604 +
  1.4605 +    /*
  1.4606 +     * the end of the input stream and detection of truncated input
  1.4607 +     * are handled by the framework, but for EBCDIC_STATEFUL conversion
  1.4608 +     * we need to emit an SI at the very end
  1.4609 +     *
  1.4610 +     * conditions:
  1.4611 +     *   successful
  1.4612 +     *   EBCDIC_STATEFUL in DBCS mode
  1.4613 +     *   end of input and no truncated input
  1.4614 +     */
  1.4615 +    if( U_SUCCESS(*pErrorCode) &&
  1.4616 +        outputType==MBCS_OUTPUT_2_SISO && prevLength==2 &&
  1.4617 +        pArgs->flush && source>=sourceLimit && c==0
  1.4618 +    ) {
  1.4619 +        /* EBCDIC_STATEFUL ending with DBCS: emit an SI to return the output stream to SBCS */
  1.4620 +        if(targetCapacity>0) {
  1.4621 +            *target++=(uint8_t)siBytes[0];
  1.4622 +            if (siLength == 2) {
  1.4623 +                if (targetCapacity<2) {
  1.4624 +                    cnv->charErrorBuffer[0]=(uint8_t)siBytes[1];
  1.4625 +                    cnv->charErrorBufferLength=1;
  1.4626 +                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.4627 +                } else {
  1.4628 +                    *target++=(uint8_t)siBytes[1];
  1.4629 +                }
  1.4630 +            }
  1.4631 +            if(offsets!=NULL) {
  1.4632 +                /* set the last source character's index (sourceIndex points at sourceLimit now) */
  1.4633 +                *offsets++=prevSourceIndex;
  1.4634 +            }
  1.4635 +        } else {
  1.4636 +            /* target is full */
  1.4637 +            cnv->charErrorBuffer[0]=(uint8_t)siBytes[0];
  1.4638 +            if (siLength == 2) {
  1.4639 +                cnv->charErrorBuffer[1]=(uint8_t)siBytes[1];
  1.4640 +            }
  1.4641 +            cnv->charErrorBufferLength=siLength;
  1.4642 +            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.4643 +        }
  1.4644 +        prevLength=1; /* we switched into SBCS */
  1.4645 +    }
  1.4646 +
  1.4647 +    /* set the converter state back into UConverter */
  1.4648 +    cnv->fromUChar32=c;
  1.4649 +    cnv->fromUnicodeStatus=prevLength;
  1.4650 +
  1.4651 +    /* write back the updated pointers */
  1.4652 +    pArgs->source=source;
  1.4653 +    pArgs->target=(char *)target;
  1.4654 +    pArgs->offsets=offsets;
  1.4655 +}
  1.4656 +
  1.4657 +/*
  1.4658 + * This is another simple conversion function for internal use by other
  1.4659 + * conversion implementations.
  1.4660 + * It does not use the converter state nor call callbacks.
  1.4661 + * It does not handle the EBCDIC swaplfnl option (set in UConverter).
  1.4662 + * It handles conversion extensions but not GB 18030.
  1.4663 + *
  1.4664 + * It converts one single Unicode code point into codepage bytes, encoded
  1.4665 + * as one 32-bit value. The function returns the number of bytes in *pValue:
  1.4666 + * 1..4 the number of bytes in *pValue
  1.4667 + * 0    unassigned (*pValue undefined)
  1.4668 + * -1   illegal (currently not used, *pValue undefined)
  1.4669 + *
  1.4670 + * *pValue will contain the resulting bytes with the last byte in bits 7..0,
  1.4671 + * the second to last byte in bits 15..8, etc.
  1.4672 + * Currently, the function assumes but does not check that 0<=c<=0x10ffff.
  1.4673 + */
  1.4674 +U_CFUNC int32_t
  1.4675 +ucnv_MBCSFromUChar32(UConverterSharedData *sharedData,
  1.4676 +                 UChar32 c, uint32_t *pValue,
  1.4677 +                 UBool useFallback) {
  1.4678 +    const int32_t *cx;
  1.4679 +    const uint16_t *table;
  1.4680 +#if 0
  1.4681 +/* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */
  1.4682 +    const uint8_t *p;
  1.4683 +#endif
  1.4684 +    uint32_t stage2Entry;
  1.4685 +    uint32_t value;
  1.4686 +    int32_t length;
  1.4687 +
  1.4688 +    /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
  1.4689 +    if(c<=0xffff || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
  1.4690 +        table=sharedData->mbcs.fromUnicodeTable;
  1.4691 +
  1.4692 +        /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
  1.4693 +        if(sharedData->mbcs.outputType==MBCS_OUTPUT_1) {
  1.4694 +            value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
  1.4695 +            /* is this code point assigned, or do we use fallbacks? */
  1.4696 +            if(useFallback ? value>=0x800 : value>=0xc00) {
  1.4697 +                *pValue=value&0xff;
  1.4698 +                return 1;
  1.4699 +            }
  1.4700 +        } else /* outputType!=MBCS_OUTPUT_1 */ {
  1.4701 +            stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
  1.4702 +
  1.4703 +            /* get the bytes and the length for the output */
  1.4704 +            switch(sharedData->mbcs.outputType) {
  1.4705 +            case MBCS_OUTPUT_2:
  1.4706 +                value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
  1.4707 +                if(value<=0xff) {
  1.4708 +                    length=1;
  1.4709 +                } else {
  1.4710 +                    length=2;
  1.4711 +                }
  1.4712 +                break;
  1.4713 +#if 0
  1.4714 +/* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */
  1.4715 +            case MBCS_OUTPUT_DBCS_ONLY:
  1.4716 +                /* table with single-byte results, but only DBCS mappings used */
  1.4717 +                value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
  1.4718 +                if(value<=0xff) {
  1.4719 +                    /* no mapping or SBCS result, not taken for DBCS-only */
  1.4720 +                    value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
  1.4721 +                    length=0;
  1.4722 +                } else {
  1.4723 +                    length=2;
  1.4724 +                }
  1.4725 +                break;
  1.4726 +            case MBCS_OUTPUT_3:
  1.4727 +                p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
  1.4728 +                value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
  1.4729 +                if(value<=0xff) {
  1.4730 +                    length=1;
  1.4731 +                } else if(value<=0xffff) {
  1.4732 +                    length=2;
  1.4733 +                } else {
  1.4734 +                    length=3;
  1.4735 +                }
  1.4736 +                break;
  1.4737 +            case MBCS_OUTPUT_4:
  1.4738 +                value=MBCS_VALUE_4_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
  1.4739 +                if(value<=0xff) {
  1.4740 +                    length=1;
  1.4741 +                } else if(value<=0xffff) {
  1.4742 +                    length=2;
  1.4743 +                } else if(value<=0xffffff) {
  1.4744 +                    length=3;
  1.4745 +                } else {
  1.4746 +                    length=4;
  1.4747 +                }
  1.4748 +                break;
  1.4749 +            case MBCS_OUTPUT_3_EUC:
  1.4750 +                value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
  1.4751 +                /* EUC 16-bit fixed-length representation */
  1.4752 +                if(value<=0xff) {
  1.4753 +                    length=1;
  1.4754 +                } else if((value&0x8000)==0) {
  1.4755 +                    value|=0x8e8000;
  1.4756 +                    length=3;
  1.4757 +                } else if((value&0x80)==0) {
  1.4758 +                    value|=0x8f0080;
  1.4759 +                    length=3;
  1.4760 +                } else {
  1.4761 +                    length=2;
  1.4762 +                }
  1.4763 +                break;
  1.4764 +            case MBCS_OUTPUT_4_EUC:
  1.4765 +                p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
  1.4766 +                value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
  1.4767 +                /* EUC 16-bit fixed-length representation applied to the first two bytes */
  1.4768 +                if(value<=0xff) {
  1.4769 +                    length=1;
  1.4770 +                } else if(value<=0xffff) {
  1.4771 +                    length=2;
  1.4772 +                } else if((value&0x800000)==0) {
  1.4773 +                    value|=0x8e800000;
  1.4774 +                    length=4;
  1.4775 +                } else if((value&0x8000)==0) {
  1.4776 +                    value|=0x8f008000;
  1.4777 +                    length=4;
  1.4778 +                } else {
  1.4779 +                    length=3;
  1.4780 +                }
  1.4781 +                break;
  1.4782 +#endif
  1.4783 +            default:
  1.4784 +                /* must not occur */
  1.4785 +                return -1;
  1.4786 +            }
  1.4787 +
  1.4788 +            /* is this code point assigned, or do we use fallbacks? */
  1.4789 +            if( MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) ||
  1.4790 +                (FROM_U_USE_FALLBACK(useFallback, c) && value!=0)
  1.4791 +            ) {
  1.4792 +                /*
  1.4793 +                 * We allow a 0 byte output if the "assigned" bit is set for this entry.
  1.4794 +                 * There is no way with this data structure for fallback output
  1.4795 +                 * to be a zero byte.
  1.4796 +                 */
  1.4797 +                /* assigned */
  1.4798 +                *pValue=value;
  1.4799 +                return length;
  1.4800 +            }
  1.4801 +        }
  1.4802 +    }
  1.4803 +
  1.4804 +    cx=sharedData->mbcs.extIndexes;
  1.4805 +    if(cx!=NULL) {
  1.4806 +        length=ucnv_extSimpleMatchFromU(cx, c, pValue, useFallback);
  1.4807 +        return length>=0 ? length : -length;  /* return abs(length); */
  1.4808 +    }
  1.4809 +
  1.4810 +    /* unassigned */
  1.4811 +    return 0;
  1.4812 +}
  1.4813 +
  1.4814 +
  1.4815 +#if 0
  1.4816 +/*
  1.4817 + * This function has been moved to ucnv2022.c for inlining.
  1.4818 + * This implementation is here only for documentation purposes
  1.4819 + */
  1.4820 +
  1.4821 +/**
  1.4822 + * This version of ucnv_MBCSFromUChar32() is optimized for single-byte codepages.
  1.4823 + * It does not handle the EBCDIC swaplfnl option (set in UConverter).
  1.4824 + * It does not handle conversion extensions (_extFromU()).
  1.4825 + *
  1.4826 + * It returns the codepage byte for the code point, or -1 if it is unassigned.
  1.4827 + */
  1.4828 +U_CFUNC int32_t
  1.4829 +ucnv_MBCSSingleFromUChar32(UConverterSharedData *sharedData,
  1.4830 +                       UChar32 c,
  1.4831 +                       UBool useFallback) {
  1.4832 +    const uint16_t *table;
  1.4833 +    int32_t value;
  1.4834 +
  1.4835 +    /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
  1.4836 +    if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
  1.4837 +        return -1;
  1.4838 +    }
  1.4839 +
  1.4840 +    /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
  1.4841 +    table=sharedData->mbcs.fromUnicodeTable;
  1.4842 +
  1.4843 +    /* get the byte for the output */
  1.4844 +    value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
  1.4845 +    /* is this code point assigned, or do we use fallbacks? */
  1.4846 +    if(useFallback ? value>=0x800 : value>=0xc00) {
  1.4847 +        return value&0xff;
  1.4848 +    } else {
  1.4849 +        return -1;
  1.4850 +    }
  1.4851 +}
  1.4852 +#endif
  1.4853 +
  1.4854 +/* MBCS-from-UTF-8 conversion functions ------------------------------------- */
  1.4855 +
  1.4856 +/* minimum code point values for n-byte UTF-8 sequences, n=0..4 */
  1.4857 +static const UChar32
  1.4858 +utf8_minLegal[5]={ 0, 0, 0x80, 0x800, 0x10000 };
  1.4859 +
  1.4860 +/* offsets for n-byte UTF-8 sequences that were calculated with ((lead<<6)+trail)<<6+trail... */
  1.4861 +static const UChar32
  1.4862 +utf8_offsets[7]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 };
  1.4863 +
  1.4864 +static void
  1.4865 +ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
  1.4866 +                  UConverterToUnicodeArgs *pToUArgs,
  1.4867 +                  UErrorCode *pErrorCode) {
  1.4868 +    UConverter *utf8, *cnv;
  1.4869 +    const uint8_t *source, *sourceLimit;
  1.4870 +    uint8_t *target;
  1.4871 +    int32_t targetCapacity;
  1.4872 +
  1.4873 +    const uint16_t *table, *sbcsIndex;
  1.4874 +    const uint16_t *results;
  1.4875 +
  1.4876 +    int8_t oldToULength, toULength, toULimit;
  1.4877 +
  1.4878 +    UChar32 c;
  1.4879 +    uint8_t b, t1, t2;
  1.4880 +
  1.4881 +    uint32_t asciiRoundtrips;
  1.4882 +    uint16_t value, minValue;
  1.4883 +    UBool hasSupplementary;
  1.4884 +
  1.4885 +    /* set up the local pointers */
  1.4886 +    utf8=pToUArgs->converter;
  1.4887 +    cnv=pFromUArgs->converter;
  1.4888 +    source=(uint8_t *)pToUArgs->source;
  1.4889 +    sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
  1.4890 +    target=(uint8_t *)pFromUArgs->target;
  1.4891 +    targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
  1.4892 +
  1.4893 +    table=cnv->sharedData->mbcs.fromUnicodeTable;
  1.4894 +    sbcsIndex=cnv->sharedData->mbcs.sbcsIndex;
  1.4895 +    if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
  1.4896 +        results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
  1.4897 +    } else {
  1.4898 +        results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
  1.4899 +    }
  1.4900 +    asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
  1.4901 +
  1.4902 +    if(cnv->useFallback) {
  1.4903 +        /* use all roundtrip and fallback results */
  1.4904 +        minValue=0x800;
  1.4905 +    } else {
  1.4906 +        /* use only roundtrips and fallbacks from private-use characters */
  1.4907 +        minValue=0xc00;
  1.4908 +    }
  1.4909 +    hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
  1.4910 +
  1.4911 +    /* get the converter state from the UTF-8 UConverter */
  1.4912 +    c=(UChar32)utf8->toUnicodeStatus;
  1.4913 +    if(c!=0) {
  1.4914 +        toULength=oldToULength=utf8->toULength;
  1.4915 +        toULimit=(int8_t)utf8->mode;
  1.4916 +    } else {
  1.4917 +        toULength=oldToULength=toULimit=0;
  1.4918 +    }
  1.4919 +
  1.4920 +    /*
  1.4921 +     * Make sure that the last byte sequence before sourceLimit is complete
  1.4922 +     * or runs into a lead byte.
  1.4923 +     * Do not go back into the bytes that will be read for finishing a partial
  1.4924 +     * sequence from the previous buffer.
  1.4925 +     * In the conversion loop compare source with sourceLimit only once
  1.4926 +     * per multi-byte character.
  1.4927 +     */
  1.4928 +    {
  1.4929 +        int32_t i, length;
  1.4930 +
  1.4931 +        length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength);
  1.4932 +        for(i=0; i<3 && i<length;) {
  1.4933 +            b=*(sourceLimit-i-1);
  1.4934 +            if(U8_IS_TRAIL(b)) {
  1.4935 +                ++i;
  1.4936 +            } else {
  1.4937 +                if(i<U8_COUNT_TRAIL_BYTES(b)) {
  1.4938 +                    /* exit the conversion loop before the lead byte if there are not enough trail bytes for it */
  1.4939 +                    sourceLimit-=i+1;
  1.4940 +                }
  1.4941 +                break;
  1.4942 +            }
  1.4943 +        }
  1.4944 +    }
  1.4945 +
  1.4946 +    if(c!=0 && targetCapacity>0) {
  1.4947 +        utf8->toUnicodeStatus=0;
  1.4948 +        utf8->toULength=0;
  1.4949 +        goto moreBytes;
  1.4950 +        /*
  1.4951 +         * Note: We could avoid the goto by duplicating some of the moreBytes
  1.4952 +         * code, but only up to the point of collecting a complete UTF-8
  1.4953 +         * sequence; then recurse for the toUBytes[toULength]
  1.4954 +         * and then continue with normal conversion.
  1.4955 +         *
  1.4956 +         * If so, move this code to just after initializing the minimum
  1.4957 +         * set of local variables for reading the UTF-8 input
  1.4958 +         * (utf8, source, target, limits but not cnv, table, minValue, etc.).
  1.4959 +         *
  1.4960 +         * Potential advantages:
  1.4961 +         * - avoid the goto
  1.4962 +         * - oldToULength could become a local variable in just those code blocks
  1.4963 +         *   that deal with buffer boundaries
  1.4964 +         * - possibly faster if the goto prevents some compiler optimizations
  1.4965 +         *   (this would need measuring to confirm)
  1.4966 +         * Disadvantage:
  1.4967 +         * - code duplication
  1.4968 +         */
  1.4969 +    }
  1.4970 +
  1.4971 +    /* conversion loop */
  1.4972 +    while(source<sourceLimit) {
  1.4973 +        if(targetCapacity>0) {
  1.4974 +            b=*source++;
  1.4975 +            if((int8_t)b>=0) {
  1.4976 +                /* convert ASCII */
  1.4977 +                if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) {
  1.4978 +                    *target++=(uint8_t)b;
  1.4979 +                    --targetCapacity;
  1.4980 +                    continue;
  1.4981 +                } else {
  1.4982 +                    c=b;
  1.4983 +                    value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, 0, c);
  1.4984 +                }
  1.4985 +            } else {
  1.4986 +                if(b<0xe0) {
  1.4987 +                    if( /* handle U+0080..U+07FF inline */
  1.4988 +                        b>=0xc2 &&
  1.4989 +                        (t1=(uint8_t)(*source-0x80)) <= 0x3f
  1.4990 +                    ) {
  1.4991 +                        c=b&0x1f;
  1.4992 +                        ++source;
  1.4993 +                        value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, c, t1);
  1.4994 +                        if(value>=minValue) {
  1.4995 +                            *target++=(uint8_t)value;
  1.4996 +                            --targetCapacity;
  1.4997 +                            continue;
  1.4998 +                        } else {
  1.4999 +                            c=(c<<6)|t1;
  1.5000 +                        }
  1.5001 +                    } else {
  1.5002 +                        c=-1;
  1.5003 +                    }
  1.5004 +                } else if(b==0xe0) {
  1.5005 +                    if( /* handle U+0800..U+0FFF inline */
  1.5006 +                        (t1=(uint8_t)(source[0]-0x80)) <= 0x3f && t1 >= 0x20 &&
  1.5007 +                        (t2=(uint8_t)(source[1]-0x80)) <= 0x3f
  1.5008 +                    ) {
  1.5009 +                        c=t1;
  1.5010 +                        source+=2;
  1.5011 +                        value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, c, t2);
  1.5012 +                        if(value>=minValue) {
  1.5013 +                            *target++=(uint8_t)value;
  1.5014 +                            --targetCapacity;
  1.5015 +                            continue;
  1.5016 +                        } else {
  1.5017 +                            c=(c<<6)|t2;
  1.5018 +                        }
  1.5019 +                    } else {
  1.5020 +                        c=-1;
  1.5021 +                    }
  1.5022 +                } else {
  1.5023 +                    c=-1;
  1.5024 +                }
  1.5025 +
  1.5026 +                if(c<0) {
  1.5027 +                    /* handle "complicated" and error cases, and continuing partial characters */
  1.5028 +                    oldToULength=0;
  1.5029 +                    toULength=1;
  1.5030 +                    toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
  1.5031 +                    c=b;
  1.5032 +moreBytes:
  1.5033 +                    while(toULength<toULimit) {
  1.5034 +                        /*
  1.5035 +                         * The sourceLimit may have been adjusted before the conversion loop
  1.5036 +                         * to stop before a truncated sequence.
  1.5037 +                         * Here we need to use the real limit in case we have two truncated
  1.5038 +                         * sequences at the end.
  1.5039 +                         * See ticket #7492.
  1.5040 +                         */
  1.5041 +                        if(source<(uint8_t *)pToUArgs->sourceLimit) {
  1.5042 +                            b=*source;
  1.5043 +                            if(U8_IS_TRAIL(b)) {
  1.5044 +                                ++source;
  1.5045 +                                ++toULength;
  1.5046 +                                c=(c<<6)+b;
  1.5047 +                            } else {
  1.5048 +                                break; /* sequence too short, stop with toULength<toULimit */
  1.5049 +                            }
  1.5050 +                        } else {
  1.5051 +                            /* store the partial UTF-8 character, compatible with the regular UTF-8 converter */
  1.5052 +                            source-=(toULength-oldToULength);
  1.5053 +                            while(oldToULength<toULength) {
  1.5054 +                                utf8->toUBytes[oldToULength++]=*source++;
  1.5055 +                            }
  1.5056 +                            utf8->toUnicodeStatus=c;
  1.5057 +                            utf8->toULength=toULength;
  1.5058 +                            utf8->mode=toULimit;
  1.5059 +                            pToUArgs->source=(char *)source;
  1.5060 +                            pFromUArgs->target=(char *)target;
  1.5061 +                            return;
  1.5062 +                        }
  1.5063 +                    }
  1.5064 +
  1.5065 +                    if( toULength==toULimit &&      /* consumed all trail bytes */
  1.5066 +                        (toULength==3 || toULength==2) &&             /* BMP */
  1.5067 +                        (c-=utf8_offsets[toULength])>=utf8_minLegal[toULength] &&
  1.5068 +                        (c<=0xd7ff || 0xe000<=c)    /* not a surrogate */
  1.5069 +                    ) {
  1.5070 +                        value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
  1.5071 +                    } else if(
  1.5072 +                        toULength==toULimit && toULength==4 &&
  1.5073 +                        (0x10000<=(c-=utf8_offsets[4]) && c<=0x10ffff)
  1.5074 +                    ) {
  1.5075 +                        /* supplementary code point */
  1.5076 +                        if(!hasSupplementary) {
  1.5077 +                            /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
  1.5078 +                            value=0;
  1.5079 +                        } else {
  1.5080 +                            value=MBCS_SINGLE_RESULT_FROM_U(table, results, c);
  1.5081 +                        }
  1.5082 +                    } else {
  1.5083 +                        /* error handling: illegal UTF-8 byte sequence */
  1.5084 +                        source-=(toULength-oldToULength);
  1.5085 +                        while(oldToULength<toULength) {
  1.5086 +                            utf8->toUBytes[oldToULength++]=*source++;
  1.5087 +                        }
  1.5088 +                        utf8->toULength=toULength;
  1.5089 +                        pToUArgs->source=(char *)source;
  1.5090 +                        pFromUArgs->target=(char *)target;
  1.5091 +                        *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.5092 +                        return;
  1.5093 +                    }
  1.5094 +                }
  1.5095 +            }
  1.5096 +
  1.5097 +            if(value>=minValue) {
  1.5098 +                /* output the mapping for c */
  1.5099 +                *target++=(uint8_t)value;
  1.5100 +                --targetCapacity;
  1.5101 +            } else {
  1.5102 +                /* value<minValue means c is unassigned (unmappable) */
  1.5103 +                /*
  1.5104 +                 * Try an extension mapping.
  1.5105 +                 * Pass in no source because we don't have UTF-16 input.
  1.5106 +                 * If we have a partial match on c, we will return and revert
  1.5107 +                 * to UTF-8->UTF-16->charset conversion.
  1.5108 +                 */
  1.5109 +                static const UChar nul=0;
  1.5110 +                const UChar *noSource=&nul;
  1.5111 +                c=_extFromU(cnv, cnv->sharedData,
  1.5112 +                            c, &noSource, noSource,
  1.5113 +                            &target, target+targetCapacity,
  1.5114 +                            NULL, -1,
  1.5115 +                            pFromUArgs->flush,
  1.5116 +                            pErrorCode);
  1.5117 +
  1.5118 +                if(U_FAILURE(*pErrorCode)) {
  1.5119 +                    /* not mappable or buffer overflow */
  1.5120 +                    cnv->fromUChar32=c;
  1.5121 +                    break;
  1.5122 +                } else if(cnv->preFromUFirstCP>=0) {
  1.5123 +                    /*
  1.5124 +                     * Partial match, return and revert to pivoting.
  1.5125 +                     * In normal from-UTF-16 conversion, we would just continue
  1.5126 +                     * but then exit the loop because the extension match would
  1.5127 +                     * have consumed the source.
  1.5128 +                     */
  1.5129 +                    *pErrorCode=U_USING_DEFAULT_WARNING;
  1.5130 +                    break;
  1.5131 +                } else {
  1.5132 +                    /* a mapping was written to the target, continue */
  1.5133 +
  1.5134 +                    /* recalculate the targetCapacity after an extension mapping */
  1.5135 +                    targetCapacity=(int32_t)(pFromUArgs->targetLimit-(char *)target);
  1.5136 +                }
  1.5137 +            }
  1.5138 +        } else {
  1.5139 +            /* target is full */
  1.5140 +            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.5141 +            break;
  1.5142 +        }
  1.5143 +    }
  1.5144 +
  1.5145 +    /*
  1.5146 +     * The sourceLimit may have been adjusted before the conversion loop
  1.5147 +     * to stop before a truncated sequence.
  1.5148 +     * If so, then collect the truncated sequence now.
  1.5149 +     */
  1.5150 +    if(U_SUCCESS(*pErrorCode) &&
  1.5151 +            cnv->preFromUFirstCP<0 &&
  1.5152 +            source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
  1.5153 +        c=utf8->toUBytes[0]=b=*source++;
  1.5154 +        toULength=1;
  1.5155 +        toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
  1.5156 +        while(source<sourceLimit) {
  1.5157 +            utf8->toUBytes[toULength++]=b=*source++;
  1.5158 +            c=(c<<6)+b;
  1.5159 +        }
  1.5160 +        utf8->toUnicodeStatus=c;
  1.5161 +        utf8->toULength=toULength;
  1.5162 +        utf8->mode=toULimit;
  1.5163 +    }
  1.5164 +
  1.5165 +    /* write back the updated pointers */
  1.5166 +    pToUArgs->source=(char *)source;
  1.5167 +    pFromUArgs->target=(char *)target;
  1.5168 +}
  1.5169 +
  1.5170 +static void
  1.5171 +ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
  1.5172 +                  UConverterToUnicodeArgs *pToUArgs,
  1.5173 +                  UErrorCode *pErrorCode) {
  1.5174 +    UConverter *utf8, *cnv;
  1.5175 +    const uint8_t *source, *sourceLimit;
  1.5176 +    uint8_t *target;
  1.5177 +    int32_t targetCapacity;
  1.5178 +
  1.5179 +    const uint16_t *table, *mbcsIndex;
  1.5180 +    const uint16_t *results;
  1.5181 +
  1.5182 +    int8_t oldToULength, toULength, toULimit;
  1.5183 +
  1.5184 +    UChar32 c;
  1.5185 +    uint8_t b, t1, t2;
  1.5186 +
  1.5187 +    uint32_t stage2Entry;
  1.5188 +    uint32_t asciiRoundtrips;
  1.5189 +    uint16_t value;
  1.5190 +    UBool hasSupplementary;
  1.5191 +
  1.5192 +    /* set up the local pointers */
  1.5193 +    utf8=pToUArgs->converter;
  1.5194 +    cnv=pFromUArgs->converter;
  1.5195 +    source=(uint8_t *)pToUArgs->source;
  1.5196 +    sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
  1.5197 +    target=(uint8_t *)pFromUArgs->target;
  1.5198 +    targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
  1.5199 +
  1.5200 +    table=cnv->sharedData->mbcs.fromUnicodeTable;
  1.5201 +    mbcsIndex=cnv->sharedData->mbcs.mbcsIndex;
  1.5202 +    if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) {
  1.5203 +        results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes;
  1.5204 +    } else {
  1.5205 +        results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes;
  1.5206 +    }
  1.5207 +    asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips;
  1.5208 +
  1.5209 +    hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY);
  1.5210 +
  1.5211 +    /* get the converter state from the UTF-8 UConverter */
  1.5212 +    c=(UChar32)utf8->toUnicodeStatus;
  1.5213 +    if(c!=0) {
  1.5214 +        toULength=oldToULength=utf8->toULength;
  1.5215 +        toULimit=(int8_t)utf8->mode;
  1.5216 +    } else {
  1.5217 +        toULength=oldToULength=toULimit=0;
  1.5218 +    }
  1.5219 +
  1.5220 +    /*
  1.5221 +     * Make sure that the last byte sequence before sourceLimit is complete
  1.5222 +     * or runs into a lead byte.
  1.5223 +     * Do not go back into the bytes that will be read for finishing a partial
  1.5224 +     * sequence from the previous buffer.
  1.5225 +     * In the conversion loop compare source with sourceLimit only once
  1.5226 +     * per multi-byte character.
  1.5227 +     */
  1.5228 +    {
  1.5229 +        int32_t i, length;
  1.5230 +
  1.5231 +        length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength);
  1.5232 +        for(i=0; i<3 && i<length;) {
  1.5233 +            b=*(sourceLimit-i-1);
  1.5234 +            if(U8_IS_TRAIL(b)) {
  1.5235 +                ++i;
  1.5236 +            } else {
  1.5237 +                if(i<U8_COUNT_TRAIL_BYTES(b)) {
  1.5238 +                    /* exit the conversion loop before the lead byte if there are not enough trail bytes for it */
  1.5239 +                    sourceLimit-=i+1;
  1.5240 +                }
  1.5241 +                break;
  1.5242 +            }
  1.5243 +        }
  1.5244 +    }
  1.5245 +
  1.5246 +    if(c!=0 && targetCapacity>0) {
  1.5247 +        utf8->toUnicodeStatus=0;
  1.5248 +        utf8->toULength=0;
  1.5249 +        goto moreBytes;
  1.5250 +        /* See note in ucnv_SBCSFromUTF8() about this goto. */
  1.5251 +    }
  1.5252 +
  1.5253 +    /* conversion loop */
  1.5254 +    while(source<sourceLimit) {
  1.5255 +        if(targetCapacity>0) {
  1.5256 +            b=*source++;
  1.5257 +            if((int8_t)b>=0) {
  1.5258 +                /* convert ASCII */
  1.5259 +                if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) {
  1.5260 +                    *target++=b;
  1.5261 +                    --targetCapacity;
  1.5262 +                    continue;
  1.5263 +                } else {
  1.5264 +                    value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, 0, b);
  1.5265 +                    if(value==0) {
  1.5266 +                        c=b;
  1.5267 +                        goto unassigned;
  1.5268 +                    }
  1.5269 +                }
  1.5270 +            } else {
  1.5271 +                if(b>0xe0) {
  1.5272 +                    if( /* handle U+1000..U+D7FF inline */
  1.5273 +                        (((t1=(uint8_t)(source[0]-0x80), b<0xed) && (t1 <= 0x3f)) ||
  1.5274 +                                                        (b==0xed && (t1 <= 0x1f))) &&
  1.5275 +                        (t2=(uint8_t)(source[1]-0x80)) <= 0x3f
  1.5276 +                    ) {
  1.5277 +                        c=((b&0xf)<<6)|t1;
  1.5278 +                        source+=2;
  1.5279 +                        value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, c, t2);
  1.5280 +                        if(value==0) {
  1.5281 +                            c=(c<<6)|t2;
  1.5282 +                            goto unassigned;
  1.5283 +                        }
  1.5284 +                    } else {
  1.5285 +                        c=-1;
  1.5286 +                    }
  1.5287 +                } else if(b<0xe0) {
  1.5288 +                    if( /* handle U+0080..U+07FF inline */
  1.5289 +                        b>=0xc2 &&
  1.5290 +                        (t1=(uint8_t)(*source-0x80)) <= 0x3f
  1.5291 +                    ) {
  1.5292 +                        c=b&0x1f;
  1.5293 +                        ++source;
  1.5294 +                        value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, c, t1);
  1.5295 +                        if(value==0) {
  1.5296 +                            c=(c<<6)|t1;
  1.5297 +                            goto unassigned;
  1.5298 +                        }
  1.5299 +                    } else {
  1.5300 +                        c=-1;
  1.5301 +                    }
  1.5302 +                } else {
  1.5303 +                    c=-1;
  1.5304 +                }
  1.5305 +
  1.5306 +                if(c<0) {
  1.5307 +                    /* handle "complicated" and error cases, and continuing partial characters */
  1.5308 +                    oldToULength=0;
  1.5309 +                    toULength=1;
  1.5310 +                    toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
  1.5311 +                    c=b;
  1.5312 +moreBytes:
  1.5313 +                    while(toULength<toULimit) {
  1.5314 +                        /*
  1.5315 +                         * The sourceLimit may have been adjusted before the conversion loop
  1.5316 +                         * to stop before a truncated sequence.
  1.5317 +                         * Here we need to use the real limit in case we have two truncated
  1.5318 +                         * sequences at the end.
  1.5319 +                         * See ticket #7492.
  1.5320 +                         */
  1.5321 +                        if(source<(uint8_t *)pToUArgs->sourceLimit) {
  1.5322 +                            b=*source;
  1.5323 +                            if(U8_IS_TRAIL(b)) {
  1.5324 +                                ++source;
  1.5325 +                                ++toULength;
  1.5326 +                                c=(c<<6)+b;
  1.5327 +                            } else {
  1.5328 +                                break; /* sequence too short, stop with toULength<toULimit */
  1.5329 +                            }
  1.5330 +                        } else {
  1.5331 +                            /* store the partial UTF-8 character, compatible with the regular UTF-8 converter */
  1.5332 +                            source-=(toULength-oldToULength);
  1.5333 +                            while(oldToULength<toULength) {
  1.5334 +                                utf8->toUBytes[oldToULength++]=*source++;
  1.5335 +                            }
  1.5336 +                            utf8->toUnicodeStatus=c;
  1.5337 +                            utf8->toULength=toULength;
  1.5338 +                            utf8->mode=toULimit;
  1.5339 +                            pToUArgs->source=(char *)source;
  1.5340 +                            pFromUArgs->target=(char *)target;
  1.5341 +                            return;
  1.5342 +                        }
  1.5343 +                    }
  1.5344 +
  1.5345 +                    if( toULength==toULimit &&      /* consumed all trail bytes */
  1.5346 +                        (toULength==3 || toULength==2) &&             /* BMP */
  1.5347 +                        (c-=utf8_offsets[toULength])>=utf8_minLegal[toULength] &&
  1.5348 +                        (c<=0xd7ff || 0xe000<=c)    /* not a surrogate */
  1.5349 +                    ) {
  1.5350 +                        stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
  1.5351 +                    } else if(
  1.5352 +                        toULength==toULimit && toULength==4 &&
  1.5353 +                        (0x10000<=(c-=utf8_offsets[4]) && c<=0x10ffff)
  1.5354 +                    ) {
  1.5355 +                        /* supplementary code point */
  1.5356 +                        if(!hasSupplementary) {
  1.5357 +                            /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
  1.5358 +                            stage2Entry=0;
  1.5359 +                        } else {
  1.5360 +                            stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
  1.5361 +                        }
  1.5362 +                    } else {
  1.5363 +                        /* error handling: illegal UTF-8 byte sequence */
  1.5364 +                        source-=(toULength-oldToULength);
  1.5365 +                        while(oldToULength<toULength) {
  1.5366 +                            utf8->toUBytes[oldToULength++]=*source++;
  1.5367 +                        }
  1.5368 +                        utf8->toULength=toULength;
  1.5369 +                        pToUArgs->source=(char *)source;
  1.5370 +                        pFromUArgs->target=(char *)target;
  1.5371 +                        *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  1.5372 +                        return;
  1.5373 +                    }
  1.5374 +
  1.5375 +                    /* get the bytes and the length for the output */
  1.5376 +                    /* MBCS_OUTPUT_2 */
  1.5377 +                    value=MBCS_VALUE_2_FROM_STAGE_2(results, stage2Entry, c);
  1.5378 +
  1.5379 +                    /* is this code point assigned, or do we use fallbacks? */
  1.5380 +                    if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) ||
  1.5381 +                         (UCNV_FROM_U_USE_FALLBACK(cnv, c) && value!=0))
  1.5382 +                    ) {
  1.5383 +                        goto unassigned;
  1.5384 +                    }
  1.5385 +                }
  1.5386 +            }
  1.5387 +
  1.5388 +            /* write the output character bytes from value and length */
  1.5389 +            /* from the first if in the loop we know that targetCapacity>0 */
  1.5390 +            if(value<=0xff) {
  1.5391 +                /* this is easy because we know that there is enough space */
  1.5392 +                *target++=(uint8_t)value;
  1.5393 +                --targetCapacity;
  1.5394 +            } else /* length==2 */ {
  1.5395 +                *target++=(uint8_t)(value>>8);
  1.5396 +                if(2<=targetCapacity) {
  1.5397 +                    *target++=(uint8_t)value;
  1.5398 +                    targetCapacity-=2;
  1.5399 +                } else {
  1.5400 +                    cnv->charErrorBuffer[0]=(char)value;
  1.5401 +                    cnv->charErrorBufferLength=1;
  1.5402 +
  1.5403 +                    /* target overflow */
  1.5404 +                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.5405 +                    break;
  1.5406 +                }
  1.5407 +            }
  1.5408 +            continue;
  1.5409 +
  1.5410 +unassigned:
  1.5411 +            {
  1.5412 +                /*
  1.5413 +                 * Try an extension mapping.
  1.5414 +                 * Pass in no source because we don't have UTF-16 input.
  1.5415 +                 * If we have a partial match on c, we will return and revert
  1.5416 +                 * to UTF-8->UTF-16->charset conversion.
  1.5417 +                 */
  1.5418 +                static const UChar nul=0;
  1.5419 +                const UChar *noSource=&nul;
  1.5420 +                c=_extFromU(cnv, cnv->sharedData,
  1.5421 +                            c, &noSource, noSource,
  1.5422 +                            &target, target+targetCapacity,
  1.5423 +                            NULL, -1,
  1.5424 +                            pFromUArgs->flush,
  1.5425 +                            pErrorCode);
  1.5426 +
  1.5427 +                if(U_FAILURE(*pErrorCode)) {
  1.5428 +                    /* not mappable or buffer overflow */
  1.5429 +                    cnv->fromUChar32=c;
  1.5430 +                    break;
  1.5431 +                } else if(cnv->preFromUFirstCP>=0) {
  1.5432 +                    /*
  1.5433 +                     * Partial match, return and revert to pivoting.
  1.5434 +                     * In normal from-UTF-16 conversion, we would just continue
  1.5435 +                     * but then exit the loop because the extension match would
  1.5436 +                     * have consumed the source.
  1.5437 +                     */
  1.5438 +                    *pErrorCode=U_USING_DEFAULT_WARNING;
  1.5439 +                    break;
  1.5440 +                } else {
  1.5441 +                    /* a mapping was written to the target, continue */
  1.5442 +
  1.5443 +                    /* recalculate the targetCapacity after an extension mapping */
  1.5444 +                    targetCapacity=(int32_t)(pFromUArgs->targetLimit-(char *)target);
  1.5445 +                    continue;
  1.5446 +                }
  1.5447 +            }
  1.5448 +        } else {
  1.5449 +            /* target is full */
  1.5450 +            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  1.5451 +            break;
  1.5452 +        }
  1.5453 +    }
  1.5454 +
  1.5455 +    /*
  1.5456 +     * The sourceLimit may have been adjusted before the conversion loop
  1.5457 +     * to stop before a truncated sequence.
  1.5458 +     * If so, then collect the truncated sequence now.
  1.5459 +     */
  1.5460 +    if(U_SUCCESS(*pErrorCode) &&
  1.5461 +            cnv->preFromUFirstCP<0 &&
  1.5462 +            source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
  1.5463 +        c=utf8->toUBytes[0]=b=*source++;
  1.5464 +        toULength=1;
  1.5465 +        toULimit=U8_COUNT_TRAIL_BYTES(b)+1;
  1.5466 +        while(source<sourceLimit) {
  1.5467 +            utf8->toUBytes[toULength++]=b=*source++;
  1.5468 +            c=(c<<6)+b;
  1.5469 +        }
  1.5470 +        utf8->toUnicodeStatus=c;
  1.5471 +        utf8->toULength=toULength;
  1.5472 +        utf8->mode=toULimit;
  1.5473 +    }
  1.5474 +
  1.5475 +    /* write back the updated pointers */
  1.5476 +    pToUArgs->source=(char *)source;
  1.5477 +    pFromUArgs->target=(char *)target;
  1.5478 +}
  1.5479 +
  1.5480 +/* miscellaneous ------------------------------------------------------------ */
  1.5481 +
  1.5482 +static void
  1.5483 +ucnv_MBCSGetStarters(const UConverter* cnv,
  1.5484 +                 UBool starters[256],
  1.5485 +                 UErrorCode *pErrorCode) {
  1.5486 +    const int32_t *state0;
  1.5487 +    int i;
  1.5488 +
  1.5489 +    state0=cnv->sharedData->mbcs.stateTable[cnv->sharedData->mbcs.dbcsOnlyState];
  1.5490 +    for(i=0; i<256; ++i) {
  1.5491 +        /* all bytes that cause a state transition from state 0 are lead bytes */
  1.5492 +        starters[i]= (UBool)MBCS_ENTRY_IS_TRANSITION(state0[i]);
  1.5493 +    }
  1.5494 +}
  1.5495 +
  1.5496 +/*
  1.5497 + * This is an internal function that allows other converter implementations
  1.5498 + * to check whether a byte is a lead byte.
  1.5499 + */
  1.5500 +U_CFUNC UBool
  1.5501 +ucnv_MBCSIsLeadByte(UConverterSharedData *sharedData, char byte) {
  1.5502 +    return (UBool)MBCS_ENTRY_IS_TRANSITION(sharedData->mbcs.stateTable[0][(uint8_t)byte]);
  1.5503 +}
  1.5504 +
  1.5505 +static void
  1.5506 +ucnv_MBCSWriteSub(UConverterFromUnicodeArgs *pArgs,
  1.5507 +              int32_t offsetIndex,
  1.5508 +              UErrorCode *pErrorCode) {
  1.5509 +    UConverter *cnv=pArgs->converter;
  1.5510 +    char *p, *subchar;
  1.5511 +    char buffer[4];
  1.5512 +    int32_t length;
  1.5513 +
  1.5514 +    /* first, select between subChar and subChar1 */
  1.5515 +    if( cnv->subChar1!=0 &&
  1.5516 +        (cnv->sharedData->mbcs.extIndexes!=NULL ?
  1.5517 +            cnv->useSubChar1 :
  1.5518 +            (cnv->invalidUCharBuffer[0]<=0xff))
  1.5519 +    ) {
  1.5520 +        /* select subChar1 if it is set (not 0) and the unmappable Unicode code point is up to U+00ff (IBM MBCS behavior) */
  1.5521 +        subchar=(char *)&cnv->subChar1;
  1.5522 +        length=1;
  1.5523 +    } else {
  1.5524 +        /* select subChar in all other cases */
  1.5525 +        subchar=(char *)cnv->subChars;
  1.5526 +        length=cnv->subCharLen;
  1.5527 +    }
  1.5528 +
  1.5529 +    /* reset the selector for the next code point */
  1.5530 +    cnv->useSubChar1=FALSE;
  1.5531 +
  1.5532 +    if (cnv->sharedData->mbcs.outputType == MBCS_OUTPUT_2_SISO) {
  1.5533 +        p=buffer;
  1.5534 +
  1.5535 +        /* fromUnicodeStatus contains prevLength */
  1.5536 +        switch(length) {
  1.5537 +        case 1:
  1.5538 +            if(cnv->fromUnicodeStatus==2) {
  1.5539 +                /* DBCS mode and SBCS sub char: change to SBCS */
  1.5540 +                cnv->fromUnicodeStatus=1;
  1.5541 +                *p++=UCNV_SI;
  1.5542 +            }
  1.5543 +            *p++=subchar[0];
  1.5544 +            break;
  1.5545 +        case 2:
  1.5546 +            if(cnv->fromUnicodeStatus<=1) {
  1.5547 +                /* SBCS mode and DBCS sub char: change to DBCS */
  1.5548 +                cnv->fromUnicodeStatus=2;
  1.5549 +                *p++=UCNV_SO;
  1.5550 +            }
  1.5551 +            *p++=subchar[0];
  1.5552 +            *p++=subchar[1];
  1.5553 +            break;
  1.5554 +        default:
  1.5555 +            *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  1.5556 +            return;
  1.5557 +        }
  1.5558 +        subchar=buffer;
  1.5559 +        length=(int32_t)(p-buffer);
  1.5560 +    }
  1.5561 +
  1.5562 +    ucnv_cbFromUWriteBytes(pArgs, subchar, length, offsetIndex, pErrorCode);
  1.5563 +}
  1.5564 +
  1.5565 +U_CFUNC UConverterType
  1.5566 +ucnv_MBCSGetType(const UConverter* converter) {
  1.5567 +    /* SBCS, DBCS, and EBCDIC_STATEFUL are replaced by MBCS, but here we cheat a little */
  1.5568 +    if(converter->sharedData->mbcs.countStates==1) {
  1.5569 +        return (UConverterType)UCNV_SBCS;
  1.5570 +    } else if((converter->sharedData->mbcs.outputType&0xff)==MBCS_OUTPUT_2_SISO) {
  1.5571 +        return (UConverterType)UCNV_EBCDIC_STATEFUL;
  1.5572 +    } else if(converter->sharedData->staticData->minBytesPerChar==2 && converter->sharedData->staticData->maxBytesPerChar==2) {
  1.5573 +        return (UConverterType)UCNV_DBCS;
  1.5574 +    }
  1.5575 +    return (UConverterType)UCNV_MBCS;
  1.5576 +}
  1.5577 +
  1.5578 +static const UConverterImpl _SBCSUTF8Impl={
  1.5579 +    UCNV_MBCS,
  1.5580 +
  1.5581 +    ucnv_MBCSLoad,
  1.5582 +    ucnv_MBCSUnload,
  1.5583 +
  1.5584 +    ucnv_MBCSOpen,
  1.5585 +    NULL,
  1.5586 +    NULL,
  1.5587 +
  1.5588 +    ucnv_MBCSToUnicodeWithOffsets,
  1.5589 +    ucnv_MBCSToUnicodeWithOffsets,
  1.5590 +    ucnv_MBCSFromUnicodeWithOffsets,
  1.5591 +    ucnv_MBCSFromUnicodeWithOffsets,
  1.5592 +    ucnv_MBCSGetNextUChar,
  1.5593 +
  1.5594 +    ucnv_MBCSGetStarters,
  1.5595 +    ucnv_MBCSGetName,
  1.5596 +    ucnv_MBCSWriteSub,
  1.5597 +    NULL,
  1.5598 +    ucnv_MBCSGetUnicodeSet,
  1.5599 +
  1.5600 +    NULL,
  1.5601 +    ucnv_SBCSFromUTF8
  1.5602 +};
  1.5603 +
  1.5604 +static const UConverterImpl _DBCSUTF8Impl={
  1.5605 +    UCNV_MBCS,
  1.5606 +
  1.5607 +    ucnv_MBCSLoad,
  1.5608 +    ucnv_MBCSUnload,
  1.5609 +
  1.5610 +    ucnv_MBCSOpen,
  1.5611 +    NULL,
  1.5612 +    NULL,
  1.5613 +
  1.5614 +    ucnv_MBCSToUnicodeWithOffsets,
  1.5615 +    ucnv_MBCSToUnicodeWithOffsets,
  1.5616 +    ucnv_MBCSFromUnicodeWithOffsets,
  1.5617 +    ucnv_MBCSFromUnicodeWithOffsets,
  1.5618 +    ucnv_MBCSGetNextUChar,
  1.5619 +
  1.5620 +    ucnv_MBCSGetStarters,
  1.5621 +    ucnv_MBCSGetName,
  1.5622 +    ucnv_MBCSWriteSub,
  1.5623 +    NULL,
  1.5624 +    ucnv_MBCSGetUnicodeSet,
  1.5625 +
  1.5626 +    NULL,
  1.5627 +    ucnv_DBCSFromUTF8
  1.5628 +};
  1.5629 +
  1.5630 +static const UConverterImpl _MBCSImpl={
  1.5631 +    UCNV_MBCS,
  1.5632 +
  1.5633 +    ucnv_MBCSLoad,
  1.5634 +    ucnv_MBCSUnload,
  1.5635 +
  1.5636 +    ucnv_MBCSOpen,
  1.5637 +    NULL,
  1.5638 +    NULL,
  1.5639 +
  1.5640 +    ucnv_MBCSToUnicodeWithOffsets,
  1.5641 +    ucnv_MBCSToUnicodeWithOffsets,
  1.5642 +    ucnv_MBCSFromUnicodeWithOffsets,
  1.5643 +    ucnv_MBCSFromUnicodeWithOffsets,
  1.5644 +    ucnv_MBCSGetNextUChar,
  1.5645 +
  1.5646 +    ucnv_MBCSGetStarters,
  1.5647 +    ucnv_MBCSGetName,
  1.5648 +    ucnv_MBCSWriteSub,
  1.5649 +    NULL,
  1.5650 +    ucnv_MBCSGetUnicodeSet
  1.5651 +};
  1.5652 +
  1.5653 +
  1.5654 +/* Static data is in tools/makeconv/ucnvstat.c for data-based
  1.5655 + * converters. Be sure to update it as well.
  1.5656 + */
  1.5657 +
  1.5658 +const UConverterSharedData _MBCSData={
  1.5659 +    sizeof(UConverterSharedData), 1,
  1.5660 +    NULL, NULL, NULL, FALSE, &_MBCSImpl, 
  1.5661 +    0
  1.5662 +};
  1.5663 +
  1.5664 +#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
The Tor Browser / file diff

diff: intl/icu/source/common/ucnvmbcs.c

intl/icu/source/common/ucnvmbcs.c