1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/ucnv_lmb.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1377 @@ 1.4 +/* 1.5 +********************************************************************** 1.6 +* Copyright (C) 2000-2011, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +********************************************************************** 1.9 +* file name: ucnv_lmb.cpp 1.10 +* encoding: US-ASCII 1.11 +* tab size: 4 (not used) 1.12 +* indentation:4 1.13 +* 1.14 +* created on: 2000feb09 1.15 +* created by: Brendan Murray 1.16 +* extensively hacked up by: Jim Snyder-Grant 1.17 +* 1.18 +* Modification History: 1.19 +* 1.20 +* Date Name Description 1.21 +* 1.22 +* 06/20/2000 helena OS/400 port changes; mostly typecast. 1.23 +* 06/27/2000 Jim Snyder-Grant Deal with partial characters and small buffers. 1.24 +* Add comments to document LMBCS format and implementation 1.25 +* restructured order & breakdown of functions 1.26 +* 06/28/2000 helena Major rewrite for the callback API changes. 1.27 +*/ 1.28 + 1.29 +#include "unicode/utypes.h" 1.30 + 1.31 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION 1.32 + 1.33 +#include "unicode/ucnv_err.h" 1.34 +#include "unicode/ucnv.h" 1.35 +#include "unicode/uset.h" 1.36 +#include "cmemory.h" 1.37 +#include "cstring.h" 1.38 +#include "uassert.h" 1.39 +#include "ucnv_imp.h" 1.40 +#include "ucnv_bld.h" 1.41 +#include "ucnv_cnv.h" 1.42 + 1.43 +#ifdef EBCDIC_RTL 1.44 + #include "ascii_a.h" 1.45 +#endif 1.46 + 1.47 +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 1.48 + 1.49 +/* 1.50 + LMBCS 1.51 + 1.52 + (Lotus Multi-Byte Character Set) 1.53 + 1.54 + LMBCS was invented in the late 1980's and is primarily used in Lotus Notes 1.55 + databases and in Lotus 1-2-3 files. Programmers who work with the APIs 1.56 + into these products will sometimes need to deal with strings in this format. 1.57 + 1.58 + The code in this file provides an implementation for an ICU converter of 1.59 + LMBCS to and from Unicode. 1.60 + 1.61 + Since the LMBCS character set is only sparsely documented in existing 1.62 + printed or online material, we have added extensive annotation to this 1.63 + file to serve as a guide to understanding LMBCS. 1.64 + 1.65 + LMBCS was originally designed with these four sometimes-competing design goals: 1.66 + 1.67 + -Provide encodings for the characters in 12 existing national standards 1.68 + (plus a few other characters) 1.69 + -Minimal memory footprint 1.70 + -Maximal speed of conversion into the existing national character sets 1.71 + -No need to track a changing state as you interpret a string. 1.72 + 1.73 + 1.74 + All of the national character sets LMBCS was trying to encode are 'ANSI' 1.75 + based, in that the bytes from 0x20 - 0x7F are almost exactly the 1.76 + same common Latin unaccented characters and symbols in all character sets. 1.77 + 1.78 + So, in order to help meet the speed & memory design goals, the common ANSI 1.79 + bytes from 0x20-0x7F are represented by the same single-byte values in LMBCS. 1.80 + 1.81 + The general LMBCS code unit is from 1-3 bytes. We can describe the 3 bytes as 1.82 + follows: 1.83 + 1.84 + [G] D1 [D2] 1.85 + 1.86 + That is, a sometimes-optional 'group' byte, followed by 1 and sometimes 2 1.87 + data bytes. The maximum size of a LMBCS chjaracter is 3 bytes: 1.88 +*/ 1.89 +#define ULMBCS_CHARSIZE_MAX 3 1.90 +/* 1.91 + The single-byte values from 0x20 to 0x7F are examples of single D1 bytes. 1.92 + We often have to figure out if byte values are below or above this, so we 1.93 + use the ANSI nomenclature 'C0' and 'C1' to refer to the range of control 1.94 + characters just above & below the common lower-ANSI range */ 1.95 +#define ULMBCS_C0END 0x1F 1.96 +#define ULMBCS_C1START 0x80 1.97 +/* 1.98 + Since LMBCS is always dealing in byte units. we create a local type here for 1.99 + dealing with these units of LMBCS code units: 1.100 + 1.101 +*/ 1.102 +typedef uint8_t ulmbcs_byte_t; 1.103 + 1.104 +/* 1.105 + Most of the values less than 0x20 are reserved in LMBCS to announce 1.106 + which national character standard is being used for the 'D' bytes. 1.107 + In the comments we show the common name and the IBM character-set ID 1.108 + for these character-set announcers: 1.109 +*/ 1.110 + 1.111 +#define ULMBCS_GRP_L1 0x01 /* Latin-1 :ibm-850 */ 1.112 +#define ULMBCS_GRP_GR 0x02 /* Greek :ibm-851 */ 1.113 +#define ULMBCS_GRP_HE 0x03 /* Hebrew :ibm-1255 */ 1.114 +#define ULMBCS_GRP_AR 0x04 /* Arabic :ibm-1256 */ 1.115 +#define ULMBCS_GRP_RU 0x05 /* Cyrillic :ibm-1251 */ 1.116 +#define ULMBCS_GRP_L2 0x06 /* Latin-2 :ibm-852 */ 1.117 +#define ULMBCS_GRP_TR 0x08 /* Turkish :ibm-1254 */ 1.118 +#define ULMBCS_GRP_TH 0x0B /* Thai :ibm-874 */ 1.119 +#define ULMBCS_GRP_JA 0x10 /* Japanese :ibm-943 */ 1.120 +#define ULMBCS_GRP_KO 0x11 /* Korean :ibm-1261 */ 1.121 +#define ULMBCS_GRP_TW 0x12 /* Chinese SC :ibm-950 */ 1.122 +#define ULMBCS_GRP_CN 0x13 /* Chinese TC :ibm-1386 */ 1.123 + 1.124 +/* 1.125 + So, the beginning of understanding LMBCS is that IF the first byte of a LMBCS 1.126 + character is one of those 12 values, you can interpret the remaining bytes of 1.127 + that character as coming from one of those character sets. Since the lower 1.128 + ANSI bytes already are represented in single bytes, using one of the character 1.129 + set announcers is used to announce a character that starts with a byte of 1.130 + 0x80 or greater. 1.131 + 1.132 + The character sets are arranged so that the single byte sets all appear 1.133 + before the multi-byte character sets. When we need to tell whether a 1.134 + group byte is for a single byte char set or not we use this define: */ 1.135 + 1.136 +#define ULMBCS_DOUBLEOPTGROUP_START 0x10 1.137 + 1.138 +/* 1.139 +However, to fully understand LMBCS, you must also understand a series of 1.140 +exceptions & optimizations made in service of the design goals. 1.141 + 1.142 +First, those of you who are character set mavens may have noticed that 1.143 +the 'double-byte' character sets are actually multi-byte character sets 1.144 +that can have 1 or two bytes, even in the upper-ascii range. To force 1.145 +each group byte to introduce a fixed-width encoding (to make it faster to 1.146 +count characters), we use a convention of doubling up on the group byte 1.147 +to introduce any single-byte character > 0x80 in an otherwise double-byte 1.148 +character set. So, for example, the LMBCS sequence x10 x10 xAE is the 1.149 +same as '0xAE' in the Japanese code page 943. 1.150 + 1.151 +Next, you will notice that the list of group bytes has some gaps. 1.152 +These are used in various ways. 1.153 + 1.154 +We reserve a few special single byte values for common control 1.155 +characters. These are in the same place as their ANSI eqivalents for speed. 1.156 +*/ 1.157 + 1.158 +#define ULMBCS_HT 0x09 /* Fixed control char - Horizontal Tab */ 1.159 +#define ULMBCS_LF 0x0A /* Fixed control char - Line Feed */ 1.160 +#define ULMBCS_CR 0x0D /* Fixed control char - Carriage Return */ 1.161 + 1.162 +/* Then, 1-2-3 reserved a special single-byte character to put at the 1.163 +beginning of internal 'system' range names: */ 1.164 + 1.165 +#define ULMBCS_123SYSTEMRANGE 0x19 1.166 + 1.167 +/* Then we needed a place to put all the other ansi control characters 1.168 +that must be moved to different values because LMBCS reserves those 1.169 +values for other purposes. To represent the control characters, we start 1.170 +with a first byte of 0xF & add the control chaarcter value as the 1.171 +second byte */ 1.172 +#define ULMBCS_GRP_CTRL 0x0F 1.173 + 1.174 +/* For the C0 controls (less than 0x20), we add 0x20 to preserve the 1.175 +useful doctrine that any byte less than 0x20 in a LMBCS char must be 1.176 +the first byte of a character:*/ 1.177 +#define ULMBCS_CTRLOFFSET 0x20 1.178 + 1.179 +/* 1.180 +Where to put the characters that aren't part of any of the 12 national 1.181 +character sets? The first thing that was done, in the earlier years of 1.182 +LMBCS, was to use up the spaces of the form 1.183 + 1.184 + [G] D1, 1.185 + 1.186 + where 'G' was one of the single-byte character groups, and 1.187 + D1 was less than 0x80. These sequences are gathered together 1.188 + into a Lotus-invented doublebyte character set to represent a 1.189 + lot of stray values. Internally, in this implementation, we track this 1.190 + as group '0', as a place to tuck this exceptions list.*/ 1.191 + 1.192 +#define ULMBCS_GRP_EXCEPT 0x00 1.193 +/* 1.194 + Finally, as the durability and usefulness of UNICODE became clear, 1.195 + LOTUS added a new group 0x14 to hold Unicode values not otherwise 1.196 + represented in LMBCS: */ 1.197 +#define ULMBCS_GRP_UNICODE 0x14 1.198 +/* The two bytes appearing after a 0x14 are intrepreted as UFT-16 BE 1.199 +(Big-Endian) characters. The exception comes when the UTF16 1.200 +representation would have a zero as the second byte. In that case, 1.201 +'F6' is used in its place, and the bytes are swapped. (This prevents 1.202 +LMBCS from encoding any Unicode values of the form U+F6xx, but that's OK: 1.203 +0xF6xx is in the middle of the Private Use Area.)*/ 1.204 +#define ULMBCS_UNICOMPATZERO 0xF6 1.205 + 1.206 +/* It is also useful in our code to have a constant for the size of 1.207 +a LMBCS char that holds a literal Unicode value */ 1.208 +#define ULMBCS_UNICODE_SIZE 3 1.209 + 1.210 +/* 1.211 +To squish the LMBCS representations down even further, and to make 1.212 +translations even faster,sometimes the optimization group byte can be dropped 1.213 +from a LMBCS character. This is decided on a process-by-process basis. The 1.214 +group byte that is dropped is called the 'optimization group'. 1.215 + 1.216 +For Notes, the optimzation group is always 0x1.*/ 1.217 +#define ULMBCS_DEFAULTOPTGROUP 0x1 1.218 +/* For 1-2-3 files, the optimzation group is stored in the header of the 1-2-3 1.219 +file. 1.220 + 1.221 + In any case, when using ICU, you either pass in the 1.222 +optimization group as part of the name of the converter (LMBCS-1, LMBCS-2, 1.223 +etc.). Using plain 'LMBCS' as the name of the converter will give you 1.224 +LMBCS-1. 1.225 + 1.226 + 1.227 +*** Implementation strategy *** 1.228 + 1.229 + 1.230 +Because of the extensive use of other character sets, the LMBCS converter 1.231 +keeps a mapping between optimization groups and IBM character sets, so that 1.232 +ICU converters can be created and used as needed. */ 1.233 + 1.234 +/* As you can see, even though any byte below 0x20 could be an optimization 1.235 +byte, only those at 0x13 or below can map to an actual converter. To limit 1.236 +some loops and searches, we define a value for that last group converter:*/ 1.237 + 1.238 +#define ULMBCS_GRP_LAST 0x13 /* last LMBCS group that has a converter */ 1.239 + 1.240 +static const char * const OptGroupByteToCPName[ULMBCS_GRP_LAST + 1] = { 1.241 + /* 0x0000 */ "lmb-excp", /* internal home for the LOTUS exceptions list */ 1.242 + /* 0x0001 */ "ibm-850", 1.243 + /* 0x0002 */ "ibm-851", 1.244 + /* 0x0003 */ "windows-1255", 1.245 + /* 0x0004 */ "windows-1256", 1.246 + /* 0x0005 */ "windows-1251", 1.247 + /* 0x0006 */ "ibm-852", 1.248 + /* 0x0007 */ NULL, /* Unused */ 1.249 + /* 0x0008 */ "windows-1254", 1.250 + /* 0x0009 */ NULL, /* Control char HT */ 1.251 + /* 0x000A */ NULL, /* Control char LF */ 1.252 + /* 0x000B */ "windows-874", 1.253 + /* 0x000C */ NULL, /* Unused */ 1.254 + /* 0x000D */ NULL, /* Control char CR */ 1.255 + /* 0x000E */ NULL, /* Unused */ 1.256 + /* 0x000F */ NULL, /* Control chars: 0x0F20 + C0/C1 character: algorithmic */ 1.257 + /* 0x0010 */ "windows-932", 1.258 + /* 0x0011 */ "windows-949", 1.259 + /* 0x0012 */ "windows-950", 1.260 + /* 0x0013 */ "windows-936" 1.261 + 1.262 + /* The rest are null, including the 0x0014 Unicode compatibility region 1.263 + and 0x0019, the 1-2-3 system range control char */ 1.264 +}; 1.265 + 1.266 + 1.267 +/* That's approximately all the data that's needed for translating 1.268 + LMBCS to Unicode. 1.269 + 1.270 + 1.271 +However, to translate Unicode to LMBCS, we need some more support. 1.272 + 1.273 +That's because there are often more than one possible mappings from a Unicode 1.274 +code point back into LMBCS. The first thing we do is look up into a table 1.275 +to figure out if there are more than one possible mappings. This table, 1.276 +arranged by Unicode values (including ranges) either lists which group 1.277 +to use, or says that it could go into one or more of the SBCS sets, or 1.278 +into one or more of the DBCS sets. (If the character exists in both DBCS & 1.279 +SBCS, the table will place it in the SBCS sets, to make the LMBCS code point 1.280 +length as small as possible. Here's the two special markers we use to indicate 1.281 +ambiguous mappings: */ 1.282 + 1.283 +#define ULMBCS_AMBIGUOUS_SBCS 0x80 /* could fit in more than one 1.284 + LMBCS sbcs native encoding 1.285 + (example: most accented latin) */ 1.286 +#define ULMBCS_AMBIGUOUS_MBCS 0x81 /* could fit in more than one 1.287 + LMBCS mbcs native encoding 1.288 + (example: Unihan) */ 1.289 +#define ULMBCS_AMBIGUOUS_ALL 0x82 1.290 +/* And here's a simple way to see if a group falls in an appropriate range */ 1.291 +#define ULMBCS_AMBIGUOUS_MATCH(agroup, xgroup) \ 1.292 + ((((agroup) == ULMBCS_AMBIGUOUS_SBCS) && \ 1.293 + (xgroup) < ULMBCS_DOUBLEOPTGROUP_START) || \ 1.294 + (((agroup) == ULMBCS_AMBIGUOUS_MBCS) && \ 1.295 + (xgroup) >= ULMBCS_DOUBLEOPTGROUP_START)) || \ 1.296 + ((agroup) == ULMBCS_AMBIGUOUS_ALL) 1.297 + 1.298 + 1.299 +/* The table & some code to use it: */ 1.300 + 1.301 + 1.302 +static const struct _UniLMBCSGrpMap 1.303 +{ 1.304 + const UChar uniStartRange; 1.305 + const UChar uniEndRange; 1.306 + const ulmbcs_byte_t GrpType; 1.307 +} UniLMBCSGrpMap[] 1.308 += 1.309 +{ 1.310 + 1.311 + {0x0001, 0x001F, ULMBCS_GRP_CTRL}, 1.312 + {0x0080, 0x009F, ULMBCS_GRP_CTRL}, 1.313 + {0x00A0, 0x00A6, ULMBCS_AMBIGUOUS_SBCS}, 1.314 + {0x00A7, 0x00A8, ULMBCS_AMBIGUOUS_ALL}, 1.315 + {0x00A9, 0x00AF, ULMBCS_AMBIGUOUS_SBCS}, 1.316 + {0x00B0, 0x00B1, ULMBCS_AMBIGUOUS_ALL}, 1.317 + {0x00B2, 0x00B3, ULMBCS_AMBIGUOUS_SBCS}, 1.318 + {0x00B4, 0x00B4, ULMBCS_AMBIGUOUS_ALL}, 1.319 + {0x00B5, 0x00B5, ULMBCS_AMBIGUOUS_SBCS}, 1.320 + {0x00B6, 0x00B6, ULMBCS_AMBIGUOUS_ALL}, 1.321 + {0x00B7, 0x00D6, ULMBCS_AMBIGUOUS_SBCS}, 1.322 + {0x00D7, 0x00D7, ULMBCS_AMBIGUOUS_ALL}, 1.323 + {0x00D8, 0x00F6, ULMBCS_AMBIGUOUS_SBCS}, 1.324 + {0x00F7, 0x00F7, ULMBCS_AMBIGUOUS_ALL}, 1.325 + {0x00F8, 0x01CD, ULMBCS_AMBIGUOUS_SBCS}, 1.326 + {0x01CE, 0x01CE, ULMBCS_GRP_TW }, 1.327 + {0x01CF, 0x02B9, ULMBCS_AMBIGUOUS_SBCS}, 1.328 + {0x02BA, 0x02BA, ULMBCS_GRP_CN}, 1.329 + {0x02BC, 0x02C8, ULMBCS_AMBIGUOUS_SBCS}, 1.330 + {0x02C9, 0x02D0, ULMBCS_AMBIGUOUS_MBCS}, 1.331 + {0x02D8, 0x02DD, ULMBCS_AMBIGUOUS_SBCS}, 1.332 + {0x0384, 0x0390, ULMBCS_AMBIGUOUS_SBCS}, 1.333 + {0x0391, 0x03A9, ULMBCS_AMBIGUOUS_ALL}, 1.334 + {0x03AA, 0x03B0, ULMBCS_AMBIGUOUS_SBCS}, 1.335 + {0x03B1, 0x03C9, ULMBCS_AMBIGUOUS_ALL}, 1.336 + {0x03CA, 0x03CE, ULMBCS_AMBIGUOUS_SBCS}, 1.337 + {0x0400, 0x0400, ULMBCS_GRP_RU}, 1.338 + {0x0401, 0x0401, ULMBCS_AMBIGUOUS_ALL}, 1.339 + {0x0402, 0x040F, ULMBCS_GRP_RU}, 1.340 + {0x0410, 0x0431, ULMBCS_AMBIGUOUS_ALL}, 1.341 + {0x0432, 0x044E, ULMBCS_GRP_RU}, 1.342 + {0x044F, 0x044F, ULMBCS_AMBIGUOUS_ALL}, 1.343 + {0x0450, 0x0491, ULMBCS_GRP_RU}, 1.344 + {0x05B0, 0x05F2, ULMBCS_GRP_HE}, 1.345 + {0x060C, 0x06AF, ULMBCS_GRP_AR}, 1.346 + {0x0E01, 0x0E5B, ULMBCS_GRP_TH}, 1.347 + {0x200C, 0x200F, ULMBCS_AMBIGUOUS_SBCS}, 1.348 + {0x2010, 0x2010, ULMBCS_AMBIGUOUS_MBCS}, 1.349 + {0x2013, 0x2014, ULMBCS_AMBIGUOUS_SBCS}, 1.350 + {0x2015, 0x2015, ULMBCS_AMBIGUOUS_MBCS}, 1.351 + {0x2016, 0x2016, ULMBCS_AMBIGUOUS_MBCS}, 1.352 + {0x2017, 0x2017, ULMBCS_AMBIGUOUS_SBCS}, 1.353 + {0x2018, 0x2019, ULMBCS_AMBIGUOUS_ALL}, 1.354 + {0x201A, 0x201B, ULMBCS_AMBIGUOUS_SBCS}, 1.355 + {0x201C, 0x201D, ULMBCS_AMBIGUOUS_ALL}, 1.356 + {0x201E, 0x201F, ULMBCS_AMBIGUOUS_SBCS}, 1.357 + {0x2020, 0x2021, ULMBCS_AMBIGUOUS_ALL}, 1.358 + {0x2022, 0x2024, ULMBCS_AMBIGUOUS_SBCS}, 1.359 + {0x2025, 0x2025, ULMBCS_AMBIGUOUS_MBCS}, 1.360 + {0x2026, 0x2026, ULMBCS_AMBIGUOUS_ALL}, 1.361 + {0x2027, 0x2027, ULMBCS_GRP_TW}, 1.362 + {0x2030, 0x2030, ULMBCS_AMBIGUOUS_ALL}, 1.363 + {0x2031, 0x2031, ULMBCS_AMBIGUOUS_SBCS}, 1.364 + {0x2032, 0x2033, ULMBCS_AMBIGUOUS_MBCS}, 1.365 + {0x2035, 0x2035, ULMBCS_AMBIGUOUS_MBCS}, 1.366 + {0x2039, 0x203A, ULMBCS_AMBIGUOUS_SBCS}, 1.367 + {0x203B, 0x203B, ULMBCS_AMBIGUOUS_MBCS}, 1.368 + {0x203C, 0x203C, ULMBCS_GRP_EXCEPT}, 1.369 + {0x2074, 0x2074, ULMBCS_GRP_KO}, 1.370 + {0x207F, 0x207F, ULMBCS_GRP_EXCEPT}, 1.371 + {0x2081, 0x2084, ULMBCS_GRP_KO}, 1.372 + {0x20A4, 0x20AC, ULMBCS_AMBIGUOUS_SBCS}, 1.373 + {0x2103, 0x2109, ULMBCS_AMBIGUOUS_MBCS}, 1.374 + {0x2111, 0x2120, ULMBCS_AMBIGUOUS_SBCS}, 1.375 + /*zhujin: upgrade, for regressiont test, spr HKIA4YHTSU*/ 1.376 + {0x2121, 0x2121, ULMBCS_AMBIGUOUS_MBCS}, 1.377 + {0x2122, 0x2126, ULMBCS_AMBIGUOUS_SBCS}, 1.378 + {0x212B, 0x212B, ULMBCS_AMBIGUOUS_MBCS}, 1.379 + {0x2135, 0x2135, ULMBCS_AMBIGUOUS_SBCS}, 1.380 + {0x2153, 0x2154, ULMBCS_GRP_KO}, 1.381 + {0x215B, 0x215E, ULMBCS_GRP_EXCEPT}, 1.382 + {0x2160, 0x2179, ULMBCS_AMBIGUOUS_MBCS}, 1.383 + {0x2190, 0x2193, ULMBCS_AMBIGUOUS_ALL}, 1.384 + {0x2194, 0x2195, ULMBCS_GRP_EXCEPT}, 1.385 + {0x2196, 0x2199, ULMBCS_AMBIGUOUS_MBCS}, 1.386 + {0x21A8, 0x21A8, ULMBCS_GRP_EXCEPT}, 1.387 + {0x21B8, 0x21B9, ULMBCS_GRP_CN}, 1.388 + {0x21D0, 0x21D1, ULMBCS_GRP_EXCEPT}, 1.389 + {0x21D2, 0x21D2, ULMBCS_AMBIGUOUS_MBCS}, 1.390 + {0x21D3, 0x21D3, ULMBCS_GRP_EXCEPT}, 1.391 + {0x21D4, 0x21D4, ULMBCS_AMBIGUOUS_MBCS}, 1.392 + {0x21D5, 0x21D5, ULMBCS_GRP_EXCEPT}, 1.393 + {0x21E7, 0x21E7, ULMBCS_GRP_CN}, 1.394 + {0x2200, 0x2200, ULMBCS_AMBIGUOUS_MBCS}, 1.395 + {0x2201, 0x2201, ULMBCS_GRP_EXCEPT}, 1.396 + {0x2202, 0x2202, ULMBCS_AMBIGUOUS_MBCS}, 1.397 + {0x2203, 0x2203, ULMBCS_AMBIGUOUS_MBCS}, 1.398 + {0x2204, 0x2206, ULMBCS_GRP_EXCEPT}, 1.399 + {0x2207, 0x2208, ULMBCS_AMBIGUOUS_MBCS}, 1.400 + {0x2209, 0x220A, ULMBCS_GRP_EXCEPT}, 1.401 + {0x220B, 0x220B, ULMBCS_AMBIGUOUS_MBCS}, 1.402 + {0x220F, 0x2215, ULMBCS_AMBIGUOUS_MBCS}, 1.403 + {0x2219, 0x2219, ULMBCS_GRP_EXCEPT}, 1.404 + {0x221A, 0x221A, ULMBCS_AMBIGUOUS_MBCS}, 1.405 + {0x221B, 0x221C, ULMBCS_GRP_EXCEPT}, 1.406 + {0x221D, 0x221E, ULMBCS_AMBIGUOUS_MBCS}, 1.407 + {0x221F, 0x221F, ULMBCS_GRP_EXCEPT}, 1.408 + {0x2220, 0x2220, ULMBCS_AMBIGUOUS_MBCS}, 1.409 + {0x2223, 0x222A, ULMBCS_AMBIGUOUS_MBCS}, 1.410 + {0x222B, 0x223D, ULMBCS_AMBIGUOUS_MBCS}, 1.411 + {0x2245, 0x2248, ULMBCS_GRP_EXCEPT}, 1.412 + {0x224C, 0x224C, ULMBCS_GRP_TW}, 1.413 + {0x2252, 0x2252, ULMBCS_AMBIGUOUS_MBCS}, 1.414 + {0x2260, 0x2261, ULMBCS_AMBIGUOUS_MBCS}, 1.415 + {0x2262, 0x2265, ULMBCS_GRP_EXCEPT}, 1.416 + {0x2266, 0x226F, ULMBCS_AMBIGUOUS_MBCS}, 1.417 + {0x2282, 0x2283, ULMBCS_AMBIGUOUS_MBCS}, 1.418 + {0x2284, 0x2285, ULMBCS_GRP_EXCEPT}, 1.419 + {0x2286, 0x2287, ULMBCS_AMBIGUOUS_MBCS}, 1.420 + {0x2288, 0x2297, ULMBCS_GRP_EXCEPT}, 1.421 + {0x2299, 0x22BF, ULMBCS_AMBIGUOUS_MBCS}, 1.422 + {0x22C0, 0x22C0, ULMBCS_GRP_EXCEPT}, 1.423 + {0x2310, 0x2310, ULMBCS_GRP_EXCEPT}, 1.424 + {0x2312, 0x2312, ULMBCS_AMBIGUOUS_MBCS}, 1.425 + {0x2318, 0x2321, ULMBCS_GRP_EXCEPT}, 1.426 + {0x2318, 0x2321, ULMBCS_GRP_CN}, 1.427 + {0x2460, 0x24E9, ULMBCS_AMBIGUOUS_MBCS}, 1.428 + {0x2500, 0x2500, ULMBCS_AMBIGUOUS_SBCS}, 1.429 + {0x2501, 0x2501, ULMBCS_AMBIGUOUS_MBCS}, 1.430 + {0x2502, 0x2502, ULMBCS_AMBIGUOUS_ALL}, 1.431 + {0x2503, 0x2503, ULMBCS_AMBIGUOUS_MBCS}, 1.432 + {0x2504, 0x2505, ULMBCS_GRP_TW}, 1.433 + {0x2506, 0x2665, ULMBCS_AMBIGUOUS_ALL}, 1.434 + {0x2666, 0x2666, ULMBCS_GRP_EXCEPT}, 1.435 + {0x2667, 0x2669, ULMBCS_AMBIGUOUS_SBCS}, 1.436 + {0x266A, 0x266A, ULMBCS_AMBIGUOUS_ALL}, 1.437 + {0x266B, 0x266C, ULMBCS_AMBIGUOUS_SBCS}, 1.438 + {0x266D, 0x266D, ULMBCS_AMBIGUOUS_MBCS}, 1.439 + {0x266E, 0x266E, ULMBCS_AMBIGUOUS_SBCS}, 1.440 + {0x266F, 0x266F, ULMBCS_GRP_JA}, 1.441 + {0x2670, 0x2E7F, ULMBCS_AMBIGUOUS_SBCS}, 1.442 + {0x2E80, 0xF861, ULMBCS_AMBIGUOUS_MBCS}, 1.443 + {0xF862, 0xF8FF, ULMBCS_GRP_EXCEPT}, 1.444 + {0xF900, 0xFA2D, ULMBCS_AMBIGUOUS_MBCS}, 1.445 + {0xFB00, 0xFEFF, ULMBCS_AMBIGUOUS_SBCS}, 1.446 + {0xFF01, 0xFFEE, ULMBCS_AMBIGUOUS_MBCS}, 1.447 + {0xFFFF, 0xFFFF, ULMBCS_GRP_UNICODE} 1.448 +}; 1.449 + 1.450 +static ulmbcs_byte_t 1.451 +FindLMBCSUniRange(UChar uniChar) 1.452 +{ 1.453 + const struct _UniLMBCSGrpMap * pTable = UniLMBCSGrpMap; 1.454 + 1.455 + while (uniChar > pTable->uniEndRange) 1.456 + { 1.457 + pTable++; 1.458 + } 1.459 + 1.460 + if (uniChar >= pTable->uniStartRange) 1.461 + { 1.462 + return pTable->GrpType; 1.463 + } 1.464 + return ULMBCS_GRP_UNICODE; 1.465 +} 1.466 + 1.467 +/* 1.468 +We also ask the creator of a converter to send in a preferred locale 1.469 +that we can use in resolving ambiguous mappings. They send the locale 1.470 +in as a string, and we map it, if possible, to one of the 1.471 +LMBCS groups. We use this table, and the associated code, to 1.472 +do the lookup: */ 1.473 + 1.474 +/************************************************** 1.475 + This table maps locale ID's to LMBCS opt groups. 1.476 + The default return is group 0x01. Note that for 1.477 + performance reasons, the table is sorted in 1.478 + increasing alphabetic order, with the notable 1.479 + exception of zhTW. This is to force the check 1.480 + for Traditonal Chinese before dropping back to 1.481 + Simplified. 1.482 + 1.483 + Note too that the Latin-1 groups have been 1.484 + commented out because it's the default, and 1.485 + this shortens the table, allowing a serial 1.486 + search to go quickly. 1.487 + *************************************************/ 1.488 + 1.489 +static const struct _LocaleLMBCSGrpMap 1.490 +{ 1.491 + const char *LocaleID; 1.492 + const ulmbcs_byte_t OptGroup; 1.493 +} LocaleLMBCSGrpMap[] = 1.494 +{ 1.495 + {"ar", ULMBCS_GRP_AR}, 1.496 + {"be", ULMBCS_GRP_RU}, 1.497 + {"bg", ULMBCS_GRP_L2}, 1.498 + /* {"ca", ULMBCS_GRP_L1}, */ 1.499 + {"cs", ULMBCS_GRP_L2}, 1.500 + /* {"da", ULMBCS_GRP_L1}, */ 1.501 + /* {"de", ULMBCS_GRP_L1}, */ 1.502 + {"el", ULMBCS_GRP_GR}, 1.503 + /* {"en", ULMBCS_GRP_L1}, */ 1.504 + /* {"es", ULMBCS_GRP_L1}, */ 1.505 + /* {"et", ULMBCS_GRP_L1}, */ 1.506 + /* {"fi", ULMBCS_GRP_L1}, */ 1.507 + /* {"fr", ULMBCS_GRP_L1}, */ 1.508 + {"he", ULMBCS_GRP_HE}, 1.509 + {"hu", ULMBCS_GRP_L2}, 1.510 + /* {"is", ULMBCS_GRP_L1}, */ 1.511 + /* {"it", ULMBCS_GRP_L1}, */ 1.512 + {"iw", ULMBCS_GRP_HE}, 1.513 + {"ja", ULMBCS_GRP_JA}, 1.514 + {"ko", ULMBCS_GRP_KO}, 1.515 + /* {"lt", ULMBCS_GRP_L1}, */ 1.516 + /* {"lv", ULMBCS_GRP_L1}, */ 1.517 + {"mk", ULMBCS_GRP_RU}, 1.518 + /* {"nl", ULMBCS_GRP_L1}, */ 1.519 + /* {"no", ULMBCS_GRP_L1}, */ 1.520 + {"pl", ULMBCS_GRP_L2}, 1.521 + /* {"pt", ULMBCS_GRP_L1}, */ 1.522 + {"ro", ULMBCS_GRP_L2}, 1.523 + {"ru", ULMBCS_GRP_RU}, 1.524 + {"sh", ULMBCS_GRP_L2}, 1.525 + {"sk", ULMBCS_GRP_L2}, 1.526 + {"sl", ULMBCS_GRP_L2}, 1.527 + {"sq", ULMBCS_GRP_L2}, 1.528 + {"sr", ULMBCS_GRP_RU}, 1.529 + /* {"sv", ULMBCS_GRP_L1}, */ 1.530 + {"th", ULMBCS_GRP_TH}, 1.531 + {"tr", ULMBCS_GRP_TR}, 1.532 + {"uk", ULMBCS_GRP_RU}, 1.533 + /* {"vi", ULMBCS_GRP_L1}, */ 1.534 + {"zhTW", ULMBCS_GRP_TW}, 1.535 + {"zh", ULMBCS_GRP_CN}, 1.536 + {NULL, ULMBCS_GRP_L1} 1.537 +}; 1.538 + 1.539 + 1.540 +static ulmbcs_byte_t 1.541 +FindLMBCSLocale(const char *LocaleID) 1.542 +{ 1.543 + const struct _LocaleLMBCSGrpMap *pTable = LocaleLMBCSGrpMap; 1.544 + 1.545 + if ((!LocaleID) || (!*LocaleID)) 1.546 + { 1.547 + return 0; 1.548 + } 1.549 + 1.550 + while (pTable->LocaleID) 1.551 + { 1.552 + if (*pTable->LocaleID == *LocaleID) /* Check only first char for speed */ 1.553 + { 1.554 + /* First char matches - check whole name, for entry-length */ 1.555 + if (uprv_strncmp(pTable->LocaleID, LocaleID, strlen(pTable->LocaleID)) == 0) 1.556 + return pTable->OptGroup; 1.557 + } 1.558 + else 1.559 + if (*pTable->LocaleID > *LocaleID) /* Sorted alphabetically - exit */ 1.560 + break; 1.561 + pTable++; 1.562 + } 1.563 + return ULMBCS_GRP_L1; 1.564 +} 1.565 + 1.566 + 1.567 +/* 1.568 + Before we get to the main body of code, here's how we hook up to the rest 1.569 + of ICU. ICU converters are required to define a structure that includes 1.570 + some function pointers, and some common data, in the style of a C++ 1.571 + vtable. There is also room in there for converter-specific data. LMBCS 1.572 + uses that converter-specific data to keep track of the 12 subconverters 1.573 + we use, the optimization group, and the group (if any) that matches the 1.574 + locale. We have one structure instantiated for each of the 12 possible 1.575 + optimization groups. To avoid typos & to avoid boring the reader, we 1.576 + put the declarations of these structures and functions into macros. To see 1.577 + the definitions of these structures, see unicode\ucnv_bld.h 1.578 +*/ 1.579 + 1.580 +typedef struct 1.581 + { 1.582 + UConverterSharedData *OptGrpConverter[ULMBCS_GRP_LAST+1]; /* Converter per Opt. grp. */ 1.583 + uint8_t OptGroup; /* default Opt. grp. for this LMBCS session */ 1.584 + uint8_t localeConverterIndex; /* reasonable locale match for index */ 1.585 + } 1.586 +UConverterDataLMBCS; 1.587 + 1.588 +static void _LMBCSClose(UConverter * _this); 1.589 + 1.590 +#define DECLARE_LMBCS_DATA(n) \ 1.591 +static const UConverterImpl _LMBCSImpl##n={\ 1.592 + UCNV_LMBCS_##n,\ 1.593 + NULL,NULL,\ 1.594 + _LMBCSOpen##n,\ 1.595 + _LMBCSClose,\ 1.596 + NULL,\ 1.597 + _LMBCSToUnicodeWithOffsets,\ 1.598 + _LMBCSToUnicodeWithOffsets,\ 1.599 + _LMBCSFromUnicode,\ 1.600 + _LMBCSFromUnicode,\ 1.601 + NULL,\ 1.602 + NULL,\ 1.603 + NULL,\ 1.604 + NULL,\ 1.605 + _LMBCSSafeClone,\ 1.606 + ucnv_getCompleteUnicodeSet\ 1.607 +};\ 1.608 +static const UConverterStaticData _LMBCSStaticData##n={\ 1.609 + sizeof(UConverterStaticData),\ 1.610 + "LMBCS-" #n,\ 1.611 + 0, UCNV_IBM, UCNV_LMBCS_##n, 1, 3,\ 1.612 + { 0x3f, 0, 0, 0 },1,FALSE,FALSE,0,0,{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} \ 1.613 +};\ 1.614 +const UConverterSharedData _LMBCSData##n={\ 1.615 + sizeof(UConverterSharedData), ~((uint32_t) 0),\ 1.616 + NULL, NULL, &_LMBCSStaticData##n, FALSE, &_LMBCSImpl##n, \ 1.617 + 0 \ 1.618 +}; 1.619 + 1.620 + /* The only function we needed to duplicate 12 times was the 'open' 1.621 +function, which will do basically the same thing except set a different 1.622 +optimization group. So, we put the common stuff into a worker function, 1.623 +and set up another macro to stamp out the 12 open functions:*/ 1.624 +#define DEFINE_LMBCS_OPEN(n) \ 1.625 +static void \ 1.626 + _LMBCSOpen##n(UConverter* _this, UConverterLoadArgs* pArgs, UErrorCode* err) \ 1.627 +{ _LMBCSOpenWorker(_this, pArgs, err, n); } 1.628 + 1.629 + 1.630 + 1.631 +/* Here's the open worker & the common close function */ 1.632 +static void 1.633 +_LMBCSOpenWorker(UConverter* _this, 1.634 + UConverterLoadArgs *pArgs, 1.635 + UErrorCode* err, 1.636 + ulmbcs_byte_t OptGroup) 1.637 +{ 1.638 + UConverterDataLMBCS * extraInfo = _this->extraInfo = 1.639 + (UConverterDataLMBCS*)uprv_malloc (sizeof (UConverterDataLMBCS)); 1.640 + if(extraInfo != NULL) 1.641 + { 1.642 + UConverterNamePieces stackPieces; 1.643 + UConverterLoadArgs stackArgs={ (int32_t)sizeof(UConverterLoadArgs) }; 1.644 + ulmbcs_byte_t i; 1.645 + 1.646 + uprv_memset(extraInfo, 0, sizeof(UConverterDataLMBCS)); 1.647 + 1.648 + stackArgs.onlyTestIsLoadable = pArgs->onlyTestIsLoadable; 1.649 + 1.650 + for (i=0; i <= ULMBCS_GRP_LAST && U_SUCCESS(*err); i++) 1.651 + { 1.652 + if(OptGroupByteToCPName[i] != NULL) { 1.653 + extraInfo->OptGrpConverter[i] = ucnv_loadSharedData(OptGroupByteToCPName[i], &stackPieces, &stackArgs, err); 1.654 + } 1.655 + } 1.656 + 1.657 + if(U_FAILURE(*err) || pArgs->onlyTestIsLoadable) { 1.658 + _LMBCSClose(_this); 1.659 + return; 1.660 + } 1.661 + extraInfo->OptGroup = OptGroup; 1.662 + extraInfo->localeConverterIndex = FindLMBCSLocale(pArgs->locale); 1.663 + } 1.664 + else 1.665 + { 1.666 + *err = U_MEMORY_ALLOCATION_ERROR; 1.667 + } 1.668 +} 1.669 + 1.670 +static void 1.671 +_LMBCSClose(UConverter * _this) 1.672 +{ 1.673 + if (_this->extraInfo != NULL) 1.674 + { 1.675 + ulmbcs_byte_t Ix; 1.676 + UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS *) _this->extraInfo; 1.677 + 1.678 + for (Ix=0; Ix <= ULMBCS_GRP_LAST; Ix++) 1.679 + { 1.680 + if (extraInfo->OptGrpConverter[Ix] != NULL) 1.681 + ucnv_unloadSharedDataIfReady(extraInfo->OptGrpConverter[Ix]); 1.682 + } 1.683 + if (!_this->isExtraLocal) { 1.684 + uprv_free (_this->extraInfo); 1.685 + _this->extraInfo = NULL; 1.686 + } 1.687 + } 1.688 +} 1.689 + 1.690 +typedef struct LMBCSClone { 1.691 + UConverter cnv; 1.692 + UConverterDataLMBCS lmbcs; 1.693 +} LMBCSClone; 1.694 + 1.695 +static UConverter * 1.696 +_LMBCSSafeClone(const UConverter *cnv, 1.697 + void *stackBuffer, 1.698 + int32_t *pBufferSize, 1.699 + UErrorCode *status) { 1.700 + LMBCSClone *newLMBCS; 1.701 + UConverterDataLMBCS *extraInfo; 1.702 + int32_t i; 1.703 + 1.704 + if(*pBufferSize<=0) { 1.705 + *pBufferSize=(int32_t)sizeof(LMBCSClone); 1.706 + return NULL; 1.707 + } 1.708 + 1.709 + extraInfo=(UConverterDataLMBCS *)cnv->extraInfo; 1.710 + newLMBCS=(LMBCSClone *)stackBuffer; 1.711 + 1.712 + /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ 1.713 + 1.714 + uprv_memcpy(&newLMBCS->lmbcs, extraInfo, sizeof(UConverterDataLMBCS)); 1.715 + 1.716 + /* share the subconverters */ 1.717 + for(i = 0; i <= ULMBCS_GRP_LAST; ++i) { 1.718 + if(extraInfo->OptGrpConverter[i] != NULL) { 1.719 + ucnv_incrementRefCount(extraInfo->OptGrpConverter[i]); 1.720 + } 1.721 + } 1.722 + 1.723 + newLMBCS->cnv.extraInfo = &newLMBCS->lmbcs; 1.724 + newLMBCS->cnv.isExtraLocal = TRUE; 1.725 + return &newLMBCS->cnv; 1.726 +} 1.727 + 1.728 +/* 1.729 + * There used to be a _LMBCSGetUnicodeSet() function here (up to svn revision 20117) 1.730 + * which added all code points except for U+F6xx 1.731 + * because those cannot be represented in the Unicode group. 1.732 + * However, it turns out that windows-950 has roundtrips for all of U+F6xx 1.733 + * which means that LMBCS can convert all Unicode code points after all. 1.734 + * We now simply use ucnv_getCompleteUnicodeSet(). 1.735 + * 1.736 + * This may need to be looked at again as Lotus uses _LMBCSGetUnicodeSet(). (091216) 1.737 + */ 1.738 + 1.739 +/* 1.740 + Here's the basic helper function that we use when converting from 1.741 + Unicode to LMBCS, and we suspect that a Unicode character will fit into 1.742 + one of the 12 groups. The return value is the number of bytes written 1.743 + starting at pStartLMBCS (if any). 1.744 +*/ 1.745 + 1.746 +static size_t 1.747 +LMBCSConversionWorker ( 1.748 + UConverterDataLMBCS * extraInfo, /* subconverters, opt & locale groups */ 1.749 + ulmbcs_byte_t group, /* The group to try */ 1.750 + ulmbcs_byte_t * pStartLMBCS, /* where to put the results */ 1.751 + UChar * pUniChar, /* The input unicode character */ 1.752 + ulmbcs_byte_t * lastConverterIndex, /* output: track last successful group used */ 1.753 + UBool * groups_tried /* output: track any unsuccessful groups */ 1.754 +) 1.755 +{ 1.756 + ulmbcs_byte_t * pLMBCS = pStartLMBCS; 1.757 + UConverterSharedData * xcnv = extraInfo->OptGrpConverter[group]; 1.758 + 1.759 + int bytesConverted; 1.760 + uint32_t value; 1.761 + ulmbcs_byte_t firstByte; 1.762 + 1.763 + U_ASSERT(xcnv); 1.764 + U_ASSERT(group<ULMBCS_GRP_UNICODE); 1.765 + 1.766 + bytesConverted = ucnv_MBCSFromUChar32(xcnv, *pUniChar, &value, FALSE); 1.767 + 1.768 + /* get the first result byte */ 1.769 + if(bytesConverted > 0) { 1.770 + firstByte = (ulmbcs_byte_t)(value >> ((bytesConverted - 1) * 8)); 1.771 + } else { 1.772 + /* most common failure mode is an unassigned character */ 1.773 + groups_tried[group] = TRUE; 1.774 + return 0; 1.775 + } 1.776 + 1.777 + *lastConverterIndex = group; 1.778 + 1.779 + /* All initial byte values in lower ascii range should have been caught by now, 1.780 + except with the exception group. 1.781 + */ 1.782 + U_ASSERT((firstByte <= ULMBCS_C0END) || (firstByte >= ULMBCS_C1START) || (group == ULMBCS_GRP_EXCEPT)); 1.783 + 1.784 + /* use converted data: first write 0, 1 or two group bytes */ 1.785 + if (group != ULMBCS_GRP_EXCEPT && extraInfo->OptGroup != group) 1.786 + { 1.787 + *pLMBCS++ = group; 1.788 + if (bytesConverted == 1 && group >= ULMBCS_DOUBLEOPTGROUP_START) 1.789 + { 1.790 + *pLMBCS++ = group; 1.791 + } 1.792 + } 1.793 + 1.794 + /* don't emit control chars */ 1.795 + if ( bytesConverted == 1 && firstByte < 0x20 ) 1.796 + return 0; 1.797 + 1.798 + 1.799 + /* then move over the converted data */ 1.800 + switch(bytesConverted) 1.801 + { 1.802 + case 4: 1.803 + *pLMBCS++ = (ulmbcs_byte_t)(value >> 24); 1.804 + case 3: /*fall through*/ 1.805 + *pLMBCS++ = (ulmbcs_byte_t)(value >> 16); 1.806 + case 2: /*fall through*/ 1.807 + *pLMBCS++ = (ulmbcs_byte_t)(value >> 8); 1.808 + case 1: /*fall through*/ 1.809 + *pLMBCS++ = (ulmbcs_byte_t)value; 1.810 + default: 1.811 + /* will never occur */ 1.812 + break; 1.813 + } 1.814 + 1.815 + return (pLMBCS - pStartLMBCS); 1.816 +} 1.817 + 1.818 + 1.819 +/* This is a much simpler version of above, when we 1.820 +know we are writing LMBCS using the Unicode group 1.821 +*/ 1.822 +static size_t 1.823 +LMBCSConvertUni(ulmbcs_byte_t * pLMBCS, UChar uniChar) 1.824 +{ 1.825 + /* encode into LMBCS Unicode range */ 1.826 + uint8_t LowCh = (uint8_t)(uniChar & 0x00FF); 1.827 + uint8_t HighCh = (uint8_t)(uniChar >> 8); 1.828 + 1.829 + *pLMBCS++ = ULMBCS_GRP_UNICODE; 1.830 + 1.831 + if (LowCh == 0) 1.832 + { 1.833 + *pLMBCS++ = ULMBCS_UNICOMPATZERO; 1.834 + *pLMBCS++ = HighCh; 1.835 + } 1.836 + else 1.837 + { 1.838 + *pLMBCS++ = HighCh; 1.839 + *pLMBCS++ = LowCh; 1.840 + } 1.841 + return ULMBCS_UNICODE_SIZE; 1.842 +} 1.843 + 1.844 + 1.845 + 1.846 +/* The main Unicode to LMBCS conversion function */ 1.847 +static void 1.848 +_LMBCSFromUnicode(UConverterFromUnicodeArgs* args, 1.849 + UErrorCode* err) 1.850 +{ 1.851 + ulmbcs_byte_t lastConverterIndex = 0; 1.852 + UChar uniChar; 1.853 + ulmbcs_byte_t LMBCS[ULMBCS_CHARSIZE_MAX]; 1.854 + ulmbcs_byte_t * pLMBCS; 1.855 + int32_t bytes_written; 1.856 + UBool groups_tried[ULMBCS_GRP_LAST+1]; 1.857 + UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo; 1.858 + int sourceIndex = 0; 1.859 + 1.860 + /* Basic strategy: attempt to fill in local LMBCS 1-char buffer.(LMBCS) 1.861 + If that succeeds, see if it will all fit into the target & copy it over 1.862 + if it does. 1.863 + 1.864 + We try conversions in the following order: 1.865 + 1.866 + 1. Single-byte ascii & special fixed control chars (&null) 1.867 + 2. Look up group in table & try that (could be 1.868 + A) Unicode group 1.869 + B) control group, 1.870 + C) national encoding, 1.871 + or ambiguous SBCS or MBCS group (on to step 4...) 1.872 + 1.873 + 3. If its ambiguous, try this order: 1.874 + A) The optimization group 1.875 + B) The locale group 1.876 + C) The last group that succeeded with this string. 1.877 + D) every other group that's relevent (single or double) 1.878 + E) If its single-byte ambiguous, try the exceptions group 1.879 + 1.880 + 4. And as a grand fallback: Unicode 1.881 + */ 1.882 + 1.883 + /*Fix for SPR#DJOE66JFN3 (Lotus)*/ 1.884 + ulmbcs_byte_t OldConverterIndex = 0; 1.885 + 1.886 + while (args->source < args->sourceLimit && !U_FAILURE(*err)) 1.887 + { 1.888 + /*Fix for SPR#DJOE66JFN3 (Lotus)*/ 1.889 + OldConverterIndex = extraInfo->localeConverterIndex; 1.890 + 1.891 + if (args->target >= args->targetLimit) 1.892 + { 1.893 + *err = U_BUFFER_OVERFLOW_ERROR; 1.894 + break; 1.895 + } 1.896 + uniChar = *(args->source); 1.897 + bytes_written = 0; 1.898 + pLMBCS = LMBCS; 1.899 + 1.900 + /* check cases in rough order of how common they are, for speed */ 1.901 + 1.902 + /* single byte matches: strategy 1 */ 1.903 + /*Fix for SPR#DJOE66JFN3 (Lotus)*/ 1.904 + if((uniChar>=0x80) && (uniChar<=0xff) 1.905 + /*Fix for SPR#JUYA6XAERU and TSAO7GL5NK (Lotus)*/ &&(uniChar!=0xB1) &&(uniChar!=0xD7) &&(uniChar!=0xF7) 1.906 + &&(uniChar!=0xB0) &&(uniChar!=0xB4) &&(uniChar!=0xB6) &&(uniChar!=0xA7) &&(uniChar!=0xA8)) 1.907 + { 1.908 + extraInfo->localeConverterIndex = ULMBCS_GRP_L1; 1.909 + } 1.910 + if (((uniChar > ULMBCS_C0END) && (uniChar < ULMBCS_C1START)) || 1.911 + uniChar == 0 || uniChar == ULMBCS_HT || uniChar == ULMBCS_CR || 1.912 + uniChar == ULMBCS_LF || uniChar == ULMBCS_123SYSTEMRANGE 1.913 + ) 1.914 + { 1.915 + *pLMBCS++ = (ulmbcs_byte_t ) uniChar; 1.916 + bytes_written = 1; 1.917 + } 1.918 + 1.919 + 1.920 + if (!bytes_written) 1.921 + { 1.922 + /* Check by UNICODE range (Strategy 2) */ 1.923 + ulmbcs_byte_t group = FindLMBCSUniRange(uniChar); 1.924 + 1.925 + if (group == ULMBCS_GRP_UNICODE) /* (Strategy 2A) */ 1.926 + { 1.927 + pLMBCS += LMBCSConvertUni(pLMBCS,uniChar); 1.928 + 1.929 + bytes_written = (int32_t)(pLMBCS - LMBCS); 1.930 + } 1.931 + else if (group == ULMBCS_GRP_CTRL) /* (Strategy 2B) */ 1.932 + { 1.933 + /* Handle control characters here */ 1.934 + if (uniChar <= ULMBCS_C0END) 1.935 + { 1.936 + *pLMBCS++ = ULMBCS_GRP_CTRL; 1.937 + *pLMBCS++ = (ulmbcs_byte_t)(ULMBCS_CTRLOFFSET + uniChar); 1.938 + } 1.939 + else if (uniChar >= ULMBCS_C1START && uniChar <= ULMBCS_C1START + ULMBCS_CTRLOFFSET) 1.940 + { 1.941 + *pLMBCS++ = ULMBCS_GRP_CTRL; 1.942 + *pLMBCS++ = (ulmbcs_byte_t ) (uniChar & 0x00FF); 1.943 + } 1.944 + bytes_written = (int32_t)(pLMBCS - LMBCS); 1.945 + } 1.946 + else if (group < ULMBCS_GRP_UNICODE) /* (Strategy 2C) */ 1.947 + { 1.948 + /* a specific converter has been identified - use it */ 1.949 + bytes_written = (int32_t)LMBCSConversionWorker ( 1.950 + extraInfo, group, pLMBCS, &uniChar, 1.951 + &lastConverterIndex, groups_tried); 1.952 + } 1.953 + if (!bytes_written) /* the ambiguous group cases (Strategy 3) */ 1.954 + { 1.955 + uprv_memset(groups_tried, 0, sizeof(groups_tried)); 1.956 + 1.957 + /* check for non-default optimization group (Strategy 3A )*/ 1.958 + if ((extraInfo->OptGroup != 1) && (ULMBCS_AMBIGUOUS_MATCH(group, extraInfo->OptGroup))) 1.959 + { 1.960 + /*zhujin: upgrade, merge #39299 here (Lotus) */ 1.961 + /*To make R5 compatible translation, look for exceptional group first for non-DBCS*/ 1.962 + 1.963 + if(extraInfo->localeConverterIndex < ULMBCS_DOUBLEOPTGROUP_START) 1.964 + { 1.965 + bytes_written = LMBCSConversionWorker (extraInfo, 1.966 + ULMBCS_GRP_L1, pLMBCS, &uniChar, 1.967 + &lastConverterIndex, groups_tried); 1.968 + 1.969 + if(!bytes_written) 1.970 + { 1.971 + bytes_written = LMBCSConversionWorker (extraInfo, 1.972 + ULMBCS_GRP_EXCEPT, pLMBCS, &uniChar, 1.973 + &lastConverterIndex, groups_tried); 1.974 + } 1.975 + if(!bytes_written) 1.976 + { 1.977 + bytes_written = LMBCSConversionWorker (extraInfo, 1.978 + extraInfo->localeConverterIndex, pLMBCS, &uniChar, 1.979 + &lastConverterIndex, groups_tried); 1.980 + } 1.981 + } 1.982 + else 1.983 + { 1.984 + bytes_written = LMBCSConversionWorker (extraInfo, 1.985 + extraInfo->localeConverterIndex, pLMBCS, &uniChar, 1.986 + &lastConverterIndex, groups_tried); 1.987 + } 1.988 + } 1.989 + /* check for locale optimization group (Strategy 3B) */ 1.990 + if (!bytes_written && (extraInfo->localeConverterIndex) && (ULMBCS_AMBIGUOUS_MATCH(group, extraInfo->localeConverterIndex))) 1.991 + { 1.992 + bytes_written = (int32_t)LMBCSConversionWorker (extraInfo, 1.993 + extraInfo->localeConverterIndex, pLMBCS, &uniChar, &lastConverterIndex, groups_tried); 1.994 + } 1.995 + /* check for last optimization group used for this string (Strategy 3C) */ 1.996 + if (!bytes_written && (lastConverterIndex) && (ULMBCS_AMBIGUOUS_MATCH(group, lastConverterIndex))) 1.997 + { 1.998 + bytes_written = (int32_t)LMBCSConversionWorker (extraInfo, 1.999 + lastConverterIndex, pLMBCS, &uniChar, &lastConverterIndex, groups_tried); 1.1000 + } 1.1001 + if (!bytes_written) 1.1002 + { 1.1003 + /* just check every possible matching converter (Strategy 3D) */ 1.1004 + ulmbcs_byte_t grp_start; 1.1005 + ulmbcs_byte_t grp_end; 1.1006 + ulmbcs_byte_t grp_ix; 1.1007 + grp_start = (ulmbcs_byte_t)((group == ULMBCS_AMBIGUOUS_MBCS) 1.1008 + ? ULMBCS_DOUBLEOPTGROUP_START 1.1009 + : ULMBCS_GRP_L1); 1.1010 + grp_end = (ulmbcs_byte_t)((group == ULMBCS_AMBIGUOUS_MBCS) 1.1011 + ? ULMBCS_GRP_LAST 1.1012 + : ULMBCS_GRP_TH); 1.1013 + if(group == ULMBCS_AMBIGUOUS_ALL) 1.1014 + { 1.1015 + grp_start = ULMBCS_GRP_L1; 1.1016 + grp_end = ULMBCS_GRP_LAST; 1.1017 + } 1.1018 + for (grp_ix = grp_start; 1.1019 + grp_ix <= grp_end && !bytes_written; 1.1020 + grp_ix++) 1.1021 + { 1.1022 + if (extraInfo->OptGrpConverter [grp_ix] && !groups_tried [grp_ix]) 1.1023 + { 1.1024 + bytes_written = (int32_t)LMBCSConversionWorker (extraInfo, 1.1025 + grp_ix, pLMBCS, &uniChar, 1.1026 + &lastConverterIndex, groups_tried); 1.1027 + } 1.1028 + } 1.1029 + /* a final conversion fallback to the exceptions group if its likely 1.1030 + to be single byte (Strategy 3E) */ 1.1031 + if (!bytes_written && grp_start == ULMBCS_GRP_L1) 1.1032 + { 1.1033 + bytes_written = (int32_t)LMBCSConversionWorker (extraInfo, 1.1034 + ULMBCS_GRP_EXCEPT, pLMBCS, &uniChar, 1.1035 + &lastConverterIndex, groups_tried); 1.1036 + } 1.1037 + } 1.1038 + /* all of our other strategies failed. Fallback to Unicode. (Strategy 4)*/ 1.1039 + if (!bytes_written) 1.1040 + { 1.1041 + 1.1042 + pLMBCS += LMBCSConvertUni(pLMBCS, uniChar); 1.1043 + bytes_written = (int32_t)(pLMBCS - LMBCS); 1.1044 + } 1.1045 + } 1.1046 + } 1.1047 + 1.1048 + /* we have a translation. increment source and write as much as posible to target */ 1.1049 + args->source++; 1.1050 + pLMBCS = LMBCS; 1.1051 + while (args->target < args->targetLimit && bytes_written--) 1.1052 + { 1.1053 + *(args->target)++ = *pLMBCS++; 1.1054 + if (args->offsets) 1.1055 + { 1.1056 + *(args->offsets)++ = sourceIndex; 1.1057 + } 1.1058 + } 1.1059 + sourceIndex++; 1.1060 + if (bytes_written > 0) 1.1061 + { 1.1062 + /* write any bytes that didn't fit in target to the error buffer, 1.1063 + common code will move this to target if we get called back with 1.1064 + enough target room 1.1065 + */ 1.1066 + uint8_t * pErrorBuffer = args->converter->charErrorBuffer; 1.1067 + *err = U_BUFFER_OVERFLOW_ERROR; 1.1068 + args->converter->charErrorBufferLength = (int8_t)bytes_written; 1.1069 + while (bytes_written--) 1.1070 + { 1.1071 + *pErrorBuffer++ = *pLMBCS++; 1.1072 + } 1.1073 + } 1.1074 + /*Fix for SPR#DJOE66JFN3 (Lotus)*/ 1.1075 + extraInfo->localeConverterIndex = OldConverterIndex; 1.1076 + } 1.1077 +} 1.1078 + 1.1079 + 1.1080 +/* Now, the Unicode from LMBCS section */ 1.1081 + 1.1082 + 1.1083 +/* A function to call when we are looking at the Unicode group byte in LMBCS */ 1.1084 +static UChar 1.1085 +GetUniFromLMBCSUni(char const ** ppLMBCSin) /* Called with LMBCS-style Unicode byte stream */ 1.1086 +{ 1.1087 + uint8_t HighCh = *(*ppLMBCSin)++; /* Big-endian Unicode in LMBCS compatibility group*/ 1.1088 + uint8_t LowCh = *(*ppLMBCSin)++; 1.1089 + 1.1090 + if (HighCh == ULMBCS_UNICOMPATZERO ) 1.1091 + { 1.1092 + HighCh = LowCh; 1.1093 + LowCh = 0; /* zero-byte in LSB special character */ 1.1094 + } 1.1095 + return (UChar)((HighCh << 8) | LowCh); 1.1096 +} 1.1097 + 1.1098 + 1.1099 + 1.1100 +/* CHECK_SOURCE_LIMIT: Helper macro to verify that there are at least'index' 1.1101 + bytes left in source up to sourceLimit.Errors appropriately if not. 1.1102 + If we reach the limit, then update the source pointer to there to consume 1.1103 + all input as required by ICU converter semantics. 1.1104 +*/ 1.1105 + 1.1106 +#define CHECK_SOURCE_LIMIT(index) \ 1.1107 + if (args->source+index > args->sourceLimit){\ 1.1108 + *err = U_TRUNCATED_CHAR_FOUND;\ 1.1109 + args->source = args->sourceLimit;\ 1.1110 + return 0xffff;} 1.1111 + 1.1112 +/* Return the Unicode representation for the current LMBCS character */ 1.1113 + 1.1114 +static UChar32 1.1115 +_LMBCSGetNextUCharWorker(UConverterToUnicodeArgs* args, 1.1116 + UErrorCode* err) 1.1117 +{ 1.1118 + UChar32 uniChar = 0; /* an output UNICODE char */ 1.1119 + ulmbcs_byte_t CurByte; /* A byte from the input stream */ 1.1120 + 1.1121 + /* error check */ 1.1122 + if (args->source >= args->sourceLimit) 1.1123 + { 1.1124 + *err = U_ILLEGAL_ARGUMENT_ERROR; 1.1125 + return 0xffff; 1.1126 + } 1.1127 + /* Grab first byte & save address for error recovery */ 1.1128 + CurByte = *((ulmbcs_byte_t *) (args->source++)); 1.1129 + 1.1130 + /* 1.1131 + * at entry of each if clause: 1.1132 + * 1. 'CurByte' points at the first byte of a LMBCS character 1.1133 + * 2. '*source'points to the next byte of the source stream after 'CurByte' 1.1134 + * 1.1135 + * the job of each if clause is: 1.1136 + * 1. set '*source' to point at the beginning of next char (nop if LMBCS char is only 1 byte) 1.1137 + * 2. set 'uniChar' up with the right Unicode value, or set 'err' appropriately 1.1138 + */ 1.1139 + 1.1140 + /* First lets check the simple fixed values. */ 1.1141 + 1.1142 + if(((CurByte > ULMBCS_C0END) && (CurByte < ULMBCS_C1START)) /* ascii range */ 1.1143 + || (CurByte == 0) 1.1144 + || CurByte == ULMBCS_HT || CurByte == ULMBCS_CR 1.1145 + || CurByte == ULMBCS_LF || CurByte == ULMBCS_123SYSTEMRANGE) 1.1146 + { 1.1147 + uniChar = CurByte; 1.1148 + } 1.1149 + else 1.1150 + { 1.1151 + UConverterDataLMBCS * extraInfo; 1.1152 + ulmbcs_byte_t group; 1.1153 + UConverterSharedData *cnv; 1.1154 + 1.1155 + if (CurByte == ULMBCS_GRP_CTRL) /* Control character group - no opt group update */ 1.1156 + { 1.1157 + ulmbcs_byte_t C0C1byte; 1.1158 + CHECK_SOURCE_LIMIT(1); 1.1159 + C0C1byte = *(args->source)++; 1.1160 + uniChar = (C0C1byte < ULMBCS_C1START) ? C0C1byte - ULMBCS_CTRLOFFSET : C0C1byte; 1.1161 + } 1.1162 + else 1.1163 + if (CurByte == ULMBCS_GRP_UNICODE) /* Unicode compatibility group: BigEndian UTF16 */ 1.1164 + { 1.1165 + CHECK_SOURCE_LIMIT(2); 1.1166 + 1.1167 + /* don't check for error indicators fffe/ffff below */ 1.1168 + return GetUniFromLMBCSUni(&(args->source)); 1.1169 + } 1.1170 + else if (CurByte <= ULMBCS_CTRLOFFSET) 1.1171 + { 1.1172 + group = CurByte; /* group byte is in the source */ 1.1173 + extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo; 1.1174 + if (group > ULMBCS_GRP_LAST || (cnv = extraInfo->OptGrpConverter[group]) == NULL) 1.1175 + { 1.1176 + /* this is not a valid group byte - no converter*/ 1.1177 + *err = U_INVALID_CHAR_FOUND; 1.1178 + } 1.1179 + else if (group >= ULMBCS_DOUBLEOPTGROUP_START) /* double byte conversion */ 1.1180 + { 1.1181 + 1.1182 + CHECK_SOURCE_LIMIT(2); 1.1183 + 1.1184 + /* check for LMBCS doubled-group-byte case */ 1.1185 + if (*args->source == group) { 1.1186 + /* single byte */ 1.1187 + ++args->source; 1.1188 + uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source, 1, FALSE); 1.1189 + ++args->source; 1.1190 + } else { 1.1191 + /* double byte */ 1.1192 + uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source, 2, FALSE); 1.1193 + args->source += 2; 1.1194 + } 1.1195 + } 1.1196 + else { /* single byte conversion */ 1.1197 + CHECK_SOURCE_LIMIT(1); 1.1198 + CurByte = *(args->source)++; 1.1199 + 1.1200 + if (CurByte >= ULMBCS_C1START) 1.1201 + { 1.1202 + uniChar = _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv, CurByte); 1.1203 + } 1.1204 + else 1.1205 + { 1.1206 + /* The non-optimizable oddballs where there is an explicit byte 1.1207 + * AND the second byte is not in the upper ascii range 1.1208 + */ 1.1209 + char bytes[2]; 1.1210 + 1.1211 + extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo; 1.1212 + cnv = extraInfo->OptGrpConverter [ULMBCS_GRP_EXCEPT]; 1.1213 + 1.1214 + /* Lookup value must include opt group */ 1.1215 + bytes[0] = group; 1.1216 + bytes[1] = CurByte; 1.1217 + uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, bytes, 2, FALSE); 1.1218 + } 1.1219 + } 1.1220 + } 1.1221 + else if (CurByte >= ULMBCS_C1START) /* group byte is implicit */ 1.1222 + { 1.1223 + extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo; 1.1224 + group = extraInfo->OptGroup; 1.1225 + cnv = extraInfo->OptGrpConverter[group]; 1.1226 + if (group >= ULMBCS_DOUBLEOPTGROUP_START) /* double byte conversion */ 1.1227 + { 1.1228 + if (!ucnv_MBCSIsLeadByte(cnv, CurByte)) 1.1229 + { 1.1230 + CHECK_SOURCE_LIMIT(0); 1.1231 + 1.1232 + /* let the MBCS conversion consume CurByte again */ 1.1233 + uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source - 1, 1, FALSE); 1.1234 + } 1.1235 + else 1.1236 + { 1.1237 + CHECK_SOURCE_LIMIT(1); 1.1238 + /* let the MBCS conversion consume CurByte again */ 1.1239 + uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source - 1, 2, FALSE); 1.1240 + ++args->source; 1.1241 + } 1.1242 + } 1.1243 + else /* single byte conversion */ 1.1244 + { 1.1245 + uniChar = _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv, CurByte); 1.1246 + } 1.1247 + } 1.1248 + } 1.1249 + return uniChar; 1.1250 +} 1.1251 + 1.1252 + 1.1253 +/* The exported function that converts lmbcs to one or more 1.1254 + UChars - currently UTF-16 1.1255 +*/ 1.1256 +static void 1.1257 +_LMBCSToUnicodeWithOffsets(UConverterToUnicodeArgs* args, 1.1258 + UErrorCode* err) 1.1259 +{ 1.1260 + char LMBCS [ULMBCS_CHARSIZE_MAX]; 1.1261 + UChar uniChar; /* one output UNICODE char */ 1.1262 + const char * saveSource; /* beginning of current code point */ 1.1263 + const char * pStartLMBCS = args->source; /* beginning of whole string */ 1.1264 + const char * errSource = NULL; /* pointer to actual input in case an error occurs */ 1.1265 + int8_t savebytes = 0; 1.1266 + 1.1267 + /* Process from source to limit, or until error */ 1.1268 + while (U_SUCCESS(*err) && args->sourceLimit > args->source && args->targetLimit > args->target) 1.1269 + { 1.1270 + saveSource = args->source; /* beginning of current code point */ 1.1271 + 1.1272 + if (args->converter->toULength) /* reassemble char from previous call */ 1.1273 + { 1.1274 + const char *saveSourceLimit; 1.1275 + size_t size_old = args->converter->toULength; 1.1276 + 1.1277 + /* limit from source is either remainder of temp buffer, or user limit on source */ 1.1278 + size_t size_new_maybe_1 = sizeof(LMBCS) - size_old; 1.1279 + size_t size_new_maybe_2 = args->sourceLimit - args->source; 1.1280 + size_t size_new = (size_new_maybe_1 < size_new_maybe_2) ? size_new_maybe_1 : size_new_maybe_2; 1.1281 + 1.1282 + 1.1283 + uprv_memcpy(LMBCS, args->converter->toUBytes, size_old); 1.1284 + uprv_memcpy(LMBCS + size_old, args->source, size_new); 1.1285 + saveSourceLimit = args->sourceLimit; 1.1286 + args->source = errSource = LMBCS; 1.1287 + args->sourceLimit = LMBCS+size_old+size_new; 1.1288 + savebytes = (int8_t)(size_old+size_new); 1.1289 + uniChar = (UChar) _LMBCSGetNextUCharWorker(args, err); 1.1290 + args->source = saveSource + ((args->source - LMBCS) - size_old); 1.1291 + args->sourceLimit = saveSourceLimit; 1.1292 + 1.1293 + if (*err == U_TRUNCATED_CHAR_FOUND) 1.1294 + { 1.1295 + /* evil special case: source buffers so small a char spans more than 2 buffers */ 1.1296 + args->converter->toULength = savebytes; 1.1297 + uprv_memcpy(args->converter->toUBytes, LMBCS, savebytes); 1.1298 + args->source = args->sourceLimit; 1.1299 + *err = U_ZERO_ERROR; 1.1300 + return; 1.1301 + } 1.1302 + else 1.1303 + { 1.1304 + /* clear the partial-char marker */ 1.1305 + args->converter->toULength = 0; 1.1306 + } 1.1307 + } 1.1308 + else 1.1309 + { 1.1310 + errSource = saveSource; 1.1311 + uniChar = (UChar) _LMBCSGetNextUCharWorker(args, err); 1.1312 + savebytes = (int8_t)(args->source - saveSource); 1.1313 + } 1.1314 + if (U_SUCCESS(*err)) 1.1315 + { 1.1316 + if (uniChar < 0xfffe) 1.1317 + { 1.1318 + *(args->target)++ = uniChar; 1.1319 + if(args->offsets) 1.1320 + { 1.1321 + *(args->offsets)++ = (int32_t)(saveSource - pStartLMBCS); 1.1322 + } 1.1323 + } 1.1324 + else if (uniChar == 0xfffe) 1.1325 + { 1.1326 + *err = U_INVALID_CHAR_FOUND; 1.1327 + } 1.1328 + else /* if (uniChar == 0xffff) */ 1.1329 + { 1.1330 + *err = U_ILLEGAL_CHAR_FOUND; 1.1331 + } 1.1332 + } 1.1333 + } 1.1334 + /* if target ran out before source, return U_BUFFER_OVERFLOW_ERROR */ 1.1335 + if (U_SUCCESS(*err) && args->sourceLimit > args->source && args->targetLimit <= args->target) 1.1336 + { 1.1337 + *err = U_BUFFER_OVERFLOW_ERROR; 1.1338 + } 1.1339 + else if (U_FAILURE(*err)) 1.1340 + { 1.1341 + /* If character incomplete or unmappable/illegal, store it in toUBytes[] */ 1.1342 + args->converter->toULength = savebytes; 1.1343 + if (savebytes > 0) { 1.1344 + uprv_memcpy(args->converter->toUBytes, errSource, savebytes); 1.1345 + } 1.1346 + if (*err == U_TRUNCATED_CHAR_FOUND) { 1.1347 + *err = U_ZERO_ERROR; 1.1348 + } 1.1349 + } 1.1350 +} 1.1351 + 1.1352 +/* And now, the macroized declarations of data & functions: */ 1.1353 +DEFINE_LMBCS_OPEN(1) 1.1354 +DEFINE_LMBCS_OPEN(2) 1.1355 +DEFINE_LMBCS_OPEN(3) 1.1356 +DEFINE_LMBCS_OPEN(4) 1.1357 +DEFINE_LMBCS_OPEN(5) 1.1358 +DEFINE_LMBCS_OPEN(6) 1.1359 +DEFINE_LMBCS_OPEN(8) 1.1360 +DEFINE_LMBCS_OPEN(11) 1.1361 +DEFINE_LMBCS_OPEN(16) 1.1362 +DEFINE_LMBCS_OPEN(17) 1.1363 +DEFINE_LMBCS_OPEN(18) 1.1364 +DEFINE_LMBCS_OPEN(19) 1.1365 + 1.1366 + 1.1367 +DECLARE_LMBCS_DATA(1) 1.1368 +DECLARE_LMBCS_DATA(2) 1.1369 +DECLARE_LMBCS_DATA(3) 1.1370 +DECLARE_LMBCS_DATA(4) 1.1371 +DECLARE_LMBCS_DATA(5) 1.1372 +DECLARE_LMBCS_DATA(6) 1.1373 +DECLARE_LMBCS_DATA(8) 1.1374 +DECLARE_LMBCS_DATA(11) 1.1375 +DECLARE_LMBCS_DATA(16) 1.1376 +DECLARE_LMBCS_DATA(17) 1.1377 +DECLARE_LMBCS_DATA(18) 1.1378 +DECLARE_LMBCS_DATA(19) 1.1379 + 1.1380 +#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */