michael@0: /* michael@0: ****************************************************************************** michael@0: * michael@0: * Copyright (C) 2002-2011, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: * michael@0: ****************************************************************************** michael@0: * file name: ucnvbocu.cpp michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created on: 2002mar27 michael@0: * created by: Markus W. Scherer michael@0: * michael@0: * This is an implementation of the Binary Ordered Compression for Unicode, michael@0: * in its MIME-friendly form as defined in http://www.unicode.org/notes/tn6/ michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: #if !UCONFIG_NO_CONVERSION michael@0: michael@0: #include "unicode/ucnv.h" michael@0: #include "unicode/ucnv_cb.h" michael@0: #include "unicode/utf16.h" michael@0: #include "putilimp.h" michael@0: #include "ucnv_bld.h" michael@0: #include "ucnv_cnv.h" michael@0: #include "uassert.h" michael@0: michael@0: /* BOCU-1 constants and macros ---------------------------------------------- */ michael@0: michael@0: /* michael@0: * BOCU-1 encodes the code points of a Unicode string as michael@0: * a sequence of byte-encoded differences (slope detection), michael@0: * preserving lexical order. michael@0: * michael@0: * Optimize the difference-taking for runs of Unicode text within michael@0: * small scripts: michael@0: * michael@0: * Most small scripts are allocated within aligned 128-blocks of Unicode michael@0: * code points. Lexical order is preserved if the "previous code point" state michael@0: * is always moved into the middle of such a block. michael@0: * michael@0: * Additionally, "prev" is moved from anywhere in the Unihan and Hangul michael@0: * areas into the middle of those areas. michael@0: * michael@0: * C0 control codes and space are encoded with their US-ASCII bytes. michael@0: * "prev" is reset for C0 controls but not for space. michael@0: */ michael@0: michael@0: /* initial value for "prev": middle of the ASCII range */ michael@0: #define BOCU1_ASCII_PREV 0x40 michael@0: michael@0: /* bounding byte values for differences */ michael@0: #define BOCU1_MIN 0x21 michael@0: #define BOCU1_MIDDLE 0x90 michael@0: #define BOCU1_MAX_LEAD 0xfe michael@0: #define BOCU1_MAX_TRAIL 0xff michael@0: #define BOCU1_RESET 0xff michael@0: michael@0: /* number of lead bytes */ michael@0: #define BOCU1_COUNT (BOCU1_MAX_LEAD-BOCU1_MIN+1) michael@0: michael@0: /* adjust trail byte counts for the use of some C0 control byte values */ michael@0: #define BOCU1_TRAIL_CONTROLS_COUNT 20 michael@0: #define BOCU1_TRAIL_BYTE_OFFSET (BOCU1_MIN-BOCU1_TRAIL_CONTROLS_COUNT) michael@0: michael@0: /* number of trail bytes */ michael@0: #define BOCU1_TRAIL_COUNT ((BOCU1_MAX_TRAIL-BOCU1_MIN+1)+BOCU1_TRAIL_CONTROLS_COUNT) michael@0: michael@0: /* michael@0: * number of positive and negative single-byte codes michael@0: * (counting 0==BOCU1_MIDDLE among the positive ones) michael@0: */ michael@0: #define BOCU1_SINGLE 64 michael@0: michael@0: /* number of lead bytes for positive and negative 2/3/4-byte sequences */ michael@0: #define BOCU1_LEAD_2 43 michael@0: #define BOCU1_LEAD_3 3 michael@0: #define BOCU1_LEAD_4 1 michael@0: michael@0: /* The difference value range for single-byters. */ michael@0: #define BOCU1_REACH_POS_1 (BOCU1_SINGLE-1) michael@0: #define BOCU1_REACH_NEG_1 (-BOCU1_SINGLE) michael@0: michael@0: /* The difference value range for double-byters. */ michael@0: #define BOCU1_REACH_POS_2 (BOCU1_REACH_POS_1+BOCU1_LEAD_2*BOCU1_TRAIL_COUNT) michael@0: #define BOCU1_REACH_NEG_2 (BOCU1_REACH_NEG_1-BOCU1_LEAD_2*BOCU1_TRAIL_COUNT) michael@0: michael@0: /* The difference value range for 3-byters. */ michael@0: #define BOCU1_REACH_POS_3 \ michael@0: (BOCU1_REACH_POS_2+BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT) michael@0: michael@0: #define BOCU1_REACH_NEG_3 (BOCU1_REACH_NEG_2-BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT) michael@0: michael@0: /* The lead byte start values. */ michael@0: #define BOCU1_START_POS_2 (BOCU1_MIDDLE+BOCU1_REACH_POS_1+1) michael@0: #define BOCU1_START_POS_3 (BOCU1_START_POS_2+BOCU1_LEAD_2) michael@0: #define BOCU1_START_POS_4 (BOCU1_START_POS_3+BOCU1_LEAD_3) michael@0: /* ==BOCU1_MAX_LEAD */ michael@0: michael@0: #define BOCU1_START_NEG_2 (BOCU1_MIDDLE+BOCU1_REACH_NEG_1) michael@0: #define BOCU1_START_NEG_3 (BOCU1_START_NEG_2-BOCU1_LEAD_2) michael@0: #define BOCU1_START_NEG_4 (BOCU1_START_NEG_3-BOCU1_LEAD_3) michael@0: /* ==BOCU1_MIN+1 */ michael@0: michael@0: /* The length of a byte sequence, according to the lead byte (!=BOCU1_RESET). */ michael@0: #define BOCU1_LENGTH_FROM_LEAD(lead) \ michael@0: ((BOCU1_START_NEG_2<=(lead) && (lead)>24 : 4) michael@0: michael@0: /* michael@0: * 12 commonly used C0 control codes (and space) are only used to encode michael@0: * themselves directly, michael@0: * which makes BOCU-1 MIME-usable and reasonably safe for michael@0: * ASCII-oriented software. michael@0: * michael@0: * These controls are michael@0: * 0 NUL michael@0: * michael@0: * 7 BEL michael@0: * 8 BS michael@0: * michael@0: * 9 TAB michael@0: * a LF michael@0: * b VT michael@0: * c FF michael@0: * d CR michael@0: * michael@0: * e SO michael@0: * f SI michael@0: * michael@0: * 1a SUB michael@0: * 1b ESC michael@0: * michael@0: * The other 20 C0 controls are also encoded directly (to preserve order) michael@0: * but are also used as trail bytes in difference encoding michael@0: * (for better compression). michael@0: */ michael@0: #define BOCU1_TRAIL_TO_BYTE(t) ((t)>=BOCU1_TRAIL_CONTROLS_COUNT ? (t)+BOCU1_TRAIL_BYTE_OFFSET : bocu1TrailToByte[t]) michael@0: michael@0: /* michael@0: * Byte value map for control codes, michael@0: * from external byte values 0x00..0x20 michael@0: * to trail byte values 0..19 (0..0x13) as used in the difference calculation. michael@0: * External byte values that are illegal as trail bytes are mapped to -1. michael@0: */ michael@0: static const int8_t michael@0: bocu1ByteToTrail[BOCU1_MIN]={ michael@0: /* 0 1 2 3 4 5 6 7 */ michael@0: -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, -1, michael@0: michael@0: /* 8 9 a b c d e f */ michael@0: -1, -1, -1, -1, -1, -1, -1, -1, michael@0: michael@0: /* 10 11 12 13 14 15 16 17 */ michael@0: 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, michael@0: michael@0: /* 18 19 1a 1b 1c 1d 1e 1f */ michael@0: 0x0e, 0x0f, -1, -1, 0x10, 0x11, 0x12, 0x13, michael@0: michael@0: /* 20 */ michael@0: -1 michael@0: }; michael@0: michael@0: /* michael@0: * Byte value map for control codes, michael@0: * from trail byte values 0..19 (0..0x13) as used in the difference calculation michael@0: * to external byte values 0x00..0x20. michael@0: */ michael@0: static const int8_t michael@0: bocu1TrailToByte[BOCU1_TRAIL_CONTROLS_COUNT]={ michael@0: /* 0 1 2 3 4 5 6 7 */ michael@0: 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x10, 0x11, michael@0: michael@0: /* 8 9 a b c d e f */ michael@0: 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, michael@0: michael@0: /* 10 11 12 13 */ michael@0: 0x1c, 0x1d, 0x1e, 0x1f michael@0: }; michael@0: michael@0: /** michael@0: * Integer division and modulo with negative numerators michael@0: * yields negative modulo results and quotients that are one more than michael@0: * what we need here. michael@0: * This macro adjust the results so that the modulo-value m is always >=0. michael@0: * michael@0: * For positive n, the if() condition is always FALSE. michael@0: * michael@0: * @param n Number to be split into quotient and rest. michael@0: * Will be modified to contain the quotient. michael@0: * @param d Divisor. michael@0: * @param m Output variable for the rest (modulo result). michael@0: */ michael@0: #define NEGDIVMOD(n, d, m) { \ michael@0: (m)=(n)%(d); \ michael@0: (n)/=(d); \ michael@0: if((m)<0) { \ michael@0: --(n); \ michael@0: (m)+=(d); \ michael@0: } \ michael@0: } michael@0: michael@0: /* Faster versions of packDiff() for single-byte-encoded diff values. */ michael@0: michael@0: /** Is a diff value encodable in a single byte? */ michael@0: #define DIFF_IS_SINGLE(diff) (BOCU1_REACH_NEG_1<=(diff) && (diff)<=BOCU1_REACH_POS_1) michael@0: michael@0: /** Encode a diff value in a single byte. */ michael@0: #define PACK_SINGLE_DIFF(diff) (BOCU1_MIDDLE+(diff)) michael@0: michael@0: /** Is a diff value encodable in two bytes? */ michael@0: #define DIFF_IS_DOUBLE(diff) (BOCU1_REACH_NEG_2<=(diff) && (diff)<=BOCU1_REACH_POS_2) michael@0: michael@0: /* BOCU-1 implementation functions ------------------------------------------ */ michael@0: michael@0: #define BOCU1_SIMPLE_PREV(c) (((c)&~0x7f)+BOCU1_ASCII_PREV) michael@0: michael@0: /** michael@0: * Compute the next "previous" value for differencing michael@0: * from the current code point. michael@0: * michael@0: * @param c current code point, 0x3040..0xd7a3 (rest handled by macro below) michael@0: * @return "previous code point" state value michael@0: */ michael@0: static inline int32_t michael@0: bocu1Prev(int32_t c) { michael@0: /* compute new prev */ michael@0: if(/* 0x3040<=c && */ c<=0x309f) { michael@0: /* Hiragana is not 128-aligned */ michael@0: return 0x3070; michael@0: } else if(0x4e00<=c && c<=0x9fa5) { michael@0: /* CJK Unihan */ michael@0: return 0x4e00-BOCU1_REACH_NEG_2; michael@0: } else if(0xac00<=c /* && c<=0xd7a3 */) { michael@0: /* Korean Hangul */ michael@0: return (0xd7a3+0xac00)/2; michael@0: } else { michael@0: /* mostly small scripts */ michael@0: return BOCU1_SIMPLE_PREV(c); michael@0: } michael@0: } michael@0: michael@0: /** Fast version of bocu1Prev() for most scripts. */ michael@0: #define BOCU1_PREV(c) ((c)<0x3040 || (c)>0xd7a3 ? BOCU1_SIMPLE_PREV(c) : bocu1Prev(c)) michael@0: michael@0: /* michael@0: * The BOCU-1 converter uses the standard setup code in ucnv.c/ucnv_bld.c. michael@0: * The UConverter fields are used as follows: michael@0: * michael@0: * fromUnicodeStatus encoder's prev (0 will be interpreted as BOCU1_ASCII_PREV) michael@0: * michael@0: * toUnicodeStatus decoder's prev (0 will be interpreted as BOCU1_ASCII_PREV) michael@0: * mode decoder's incomplete (diff<<2)|count (ignored when toULength==0) michael@0: */ michael@0: michael@0: /* BOCU-1-from-Unicode conversion functions --------------------------------- */ michael@0: michael@0: /** michael@0: * Encode a difference -0x10ffff..0x10ffff in 1..4 bytes michael@0: * and return a packed integer with them. michael@0: * michael@0: * The encoding favors small absolute differences with short encodings michael@0: * to compress runs of same-script characters. michael@0: * michael@0: * Optimized version with unrolled loops and fewer floating-point operations michael@0: * than the standard packDiff(). michael@0: * michael@0: * @param diff difference value -0x10ffff..0x10ffff michael@0: * @return michael@0: * 0x010000zz for 1-byte sequence zz michael@0: * 0x0200yyzz for 2-byte sequence yy zz michael@0: * 0x03xxyyzz for 3-byte sequence xx yy zz michael@0: * 0xwwxxyyzz for 4-byte sequence ww xx yy zz (ww>0x03) michael@0: */ michael@0: static int32_t michael@0: packDiff(int32_t diff) { michael@0: int32_t result, m; michael@0: michael@0: U_ASSERT(!DIFF_IS_SINGLE(diff)); /* assume we won't be called where diff==BOCU1_REACH_NEG_1=-64 */ michael@0: if(diff>=BOCU1_REACH_NEG_1) { michael@0: /* mostly positive differences, and single-byte negative ones */ michael@0: #if 0 /* single-byte case handled in macros, see below */ michael@0: if(diff<=BOCU1_REACH_POS_1) { michael@0: /* single byte */ michael@0: return 0x01000000|(BOCU1_MIDDLE+diff); michael@0: } else michael@0: #endif michael@0: if(diff<=BOCU1_REACH_POS_2) { michael@0: /* two bytes */ michael@0: diff-=BOCU1_REACH_POS_1+1; michael@0: result=0x02000000; michael@0: michael@0: m=diff%BOCU1_TRAIL_COUNT; michael@0: diff/=BOCU1_TRAIL_COUNT; michael@0: result|=BOCU1_TRAIL_TO_BYTE(m); michael@0: michael@0: result|=(BOCU1_START_POS_2+diff)<<8; michael@0: } else if(diff<=BOCU1_REACH_POS_3) { michael@0: /* three bytes */ michael@0: diff-=BOCU1_REACH_POS_2+1; michael@0: result=0x03000000; michael@0: michael@0: m=diff%BOCU1_TRAIL_COUNT; michael@0: diff/=BOCU1_TRAIL_COUNT; michael@0: result|=BOCU1_TRAIL_TO_BYTE(m); michael@0: michael@0: m=diff%BOCU1_TRAIL_COUNT; michael@0: diff/=BOCU1_TRAIL_COUNT; michael@0: result|=BOCU1_TRAIL_TO_BYTE(m)<<8; michael@0: michael@0: result|=(BOCU1_START_POS_3+diff)<<16; michael@0: } else { michael@0: /* four bytes */ michael@0: diff-=BOCU1_REACH_POS_3+1; michael@0: michael@0: m=diff%BOCU1_TRAIL_COUNT; michael@0: diff/=BOCU1_TRAIL_COUNT; michael@0: result=BOCU1_TRAIL_TO_BYTE(m); michael@0: michael@0: m=diff%BOCU1_TRAIL_COUNT; michael@0: diff/=BOCU1_TRAIL_COUNT; michael@0: result|=BOCU1_TRAIL_TO_BYTE(m)<<8; michael@0: michael@0: /* michael@0: * We know that / and % would deliver quotient 0 and rest=diff. michael@0: * Avoid division and modulo for performance. michael@0: */ michael@0: result|=BOCU1_TRAIL_TO_BYTE(diff)<<16; michael@0: michael@0: result|=((uint32_t)BOCU1_START_POS_4)<<24; michael@0: } michael@0: } else { michael@0: /* two- to four-byte negative differences */ michael@0: if(diff>=BOCU1_REACH_NEG_2) { michael@0: /* two bytes */ michael@0: diff-=BOCU1_REACH_NEG_1; michael@0: result=0x02000000; michael@0: michael@0: NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); michael@0: result|=BOCU1_TRAIL_TO_BYTE(m); michael@0: michael@0: result|=(BOCU1_START_NEG_2+diff)<<8; michael@0: } else if(diff>=BOCU1_REACH_NEG_3) { michael@0: /* three bytes */ michael@0: diff-=BOCU1_REACH_NEG_2; michael@0: result=0x03000000; michael@0: michael@0: NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); michael@0: result|=BOCU1_TRAIL_TO_BYTE(m); michael@0: michael@0: NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); michael@0: result|=BOCU1_TRAIL_TO_BYTE(m)<<8; michael@0: michael@0: result|=(BOCU1_START_NEG_3+diff)<<16; michael@0: } else { michael@0: /* four bytes */ michael@0: diff-=BOCU1_REACH_NEG_3; michael@0: michael@0: NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); michael@0: result=BOCU1_TRAIL_TO_BYTE(m); michael@0: michael@0: NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); michael@0: result|=BOCU1_TRAIL_TO_BYTE(m)<<8; michael@0: michael@0: /* michael@0: * We know that NEGDIVMOD would deliver michael@0: * quotient -1 and rest=diff+BOCU1_TRAIL_COUNT. michael@0: * Avoid division and modulo for performance. michael@0: */ michael@0: m=diff+BOCU1_TRAIL_COUNT; michael@0: result|=BOCU1_TRAIL_TO_BYTE(m)<<16; michael@0: michael@0: result|=BOCU1_MIN<<24; michael@0: } michael@0: } michael@0: return result; michael@0: } michael@0: michael@0: michael@0: static void michael@0: _Bocu1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, michael@0: UErrorCode *pErrorCode) { michael@0: UConverter *cnv; michael@0: const UChar *source, *sourceLimit; michael@0: uint8_t *target; michael@0: int32_t targetCapacity; michael@0: int32_t *offsets; michael@0: michael@0: int32_t prev, c, diff; michael@0: michael@0: int32_t sourceIndex, nextSourceIndex; michael@0: michael@0: U_ALIGN_CODE(16) michael@0: michael@0: /* set up the local pointers */ michael@0: cnv=pArgs->converter; michael@0: source=pArgs->source; michael@0: sourceLimit=pArgs->sourceLimit; michael@0: target=(uint8_t *)pArgs->target; michael@0: targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); michael@0: offsets=pArgs->offsets; michael@0: michael@0: /* get the converter state from UConverter */ michael@0: c=cnv->fromUChar32; michael@0: prev=(int32_t)cnv->fromUnicodeStatus; michael@0: if(prev==0) { michael@0: prev=BOCU1_ASCII_PREV; michael@0: } michael@0: michael@0: /* sourceIndex=-1 if the current character began in the previous buffer */ michael@0: sourceIndex= c==0 ? 0 : -1; michael@0: nextSourceIndex=0; michael@0: michael@0: /* conversion loop */ michael@0: if(c!=0 && targetCapacity>0) { michael@0: goto getTrail; michael@0: } michael@0: michael@0: fastSingle: michael@0: /* fast loop for single-byte differences */ michael@0: /* use only one loop counter variable, targetCapacity, not also source */ michael@0: diff=(int32_t)(sourceLimit-source); michael@0: if(targetCapacity>diff) { michael@0: targetCapacity=diff; michael@0: } michael@0: while(targetCapacity>0 && (c=*source)<0x3000) { michael@0: if(c<=0x20) { michael@0: if(c!=0x20) { michael@0: prev=BOCU1_ASCII_PREV; michael@0: } michael@0: *target++=(uint8_t)c; michael@0: *offsets++=nextSourceIndex++; michael@0: ++source; michael@0: --targetCapacity; michael@0: } else { michael@0: diff=c-prev; michael@0: if(DIFF_IS_SINGLE(diff)) { michael@0: prev=BOCU1_SIMPLE_PREV(c); michael@0: *target++=(uint8_t)PACK_SINGLE_DIFF(diff); michael@0: *offsets++=nextSourceIndex++; michael@0: ++source; michael@0: --targetCapacity; michael@0: } else { michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: /* restore real values */ michael@0: targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target); michael@0: sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */ michael@0: michael@0: /* regular loop for all cases */ michael@0: while(source0) { michael@0: c=*source++; michael@0: ++nextSourceIndex; michael@0: michael@0: if(c<=0x20) { michael@0: /* michael@0: * ISO C0 control & space: michael@0: * Encode directly for MIME compatibility, michael@0: * and reset state except for space, to not disrupt compression. michael@0: */ michael@0: if(c!=0x20) { michael@0: prev=BOCU1_ASCII_PREV; michael@0: } michael@0: *target++=(uint8_t)c; michael@0: *offsets++=sourceIndex; michael@0: --targetCapacity; michael@0: michael@0: sourceIndex=nextSourceIndex; michael@0: continue; michael@0: } michael@0: michael@0: if(U16_IS_LEAD(c)) { michael@0: getTrail: michael@0: if(source=0) { michael@0: diff-=BOCU1_REACH_POS_1+1; michael@0: m=diff%BOCU1_TRAIL_COUNT; michael@0: diff/=BOCU1_TRAIL_COUNT; michael@0: diff+=BOCU1_START_POS_2; michael@0: } else { michael@0: diff-=BOCU1_REACH_NEG_1; michael@0: NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); michael@0: diff+=BOCU1_START_NEG_2; michael@0: } michael@0: *target++=(uint8_t)diff; michael@0: *target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m); michael@0: *offsets++=sourceIndex; michael@0: *offsets++=sourceIndex; michael@0: targetCapacity-=2; michael@0: sourceIndex=nextSourceIndex; michael@0: } else { michael@0: int32_t length; /* will be 2..4 */ michael@0: michael@0: diff=packDiff(diff); michael@0: length=BOCU1_LENGTH_FROM_PACKED(diff); michael@0: michael@0: /* write the output character bytes from diff and length */ michael@0: /* from the first if in the loop we know that targetCapacity>0 */ michael@0: if(length<=targetCapacity) { michael@0: switch(length) { michael@0: /* each branch falls through to the next one */ michael@0: case 4: michael@0: *target++=(uint8_t)(diff>>24); michael@0: *offsets++=sourceIndex; michael@0: case 3: /*fall through*/ michael@0: *target++=(uint8_t)(diff>>16); michael@0: *offsets++=sourceIndex; michael@0: case 2: /*fall through*/ michael@0: *target++=(uint8_t)(diff>>8); michael@0: *offsets++=sourceIndex; michael@0: /* case 1: handled above */ michael@0: *target++=(uint8_t)diff; michael@0: *offsets++=sourceIndex; michael@0: default: michael@0: /* will never occur */ michael@0: break; michael@0: } michael@0: targetCapacity-=length; michael@0: sourceIndex=nextSourceIndex; michael@0: } else { michael@0: uint8_t *charErrorBuffer; michael@0: michael@0: /* michael@0: * We actually do this backwards here: michael@0: * In order to save an intermediate variable, we output michael@0: * first to the overflow buffer what does not fit into the michael@0: * regular target. michael@0: */ michael@0: /* we know that 1<=targetCapacitycharErrorBuffer; michael@0: switch(length) { michael@0: /* each branch falls through to the next one */ michael@0: case 3: michael@0: *charErrorBuffer++=(uint8_t)(diff>>16); michael@0: case 2: /*fall through*/ michael@0: *charErrorBuffer++=(uint8_t)(diff>>8); michael@0: case 1: /*fall through*/ michael@0: *charErrorBuffer=(uint8_t)diff; michael@0: default: michael@0: /* will never occur */ michael@0: break; michael@0: } michael@0: cnv->charErrorBufferLength=(int8_t)length; michael@0: michael@0: /* now output what fits into the regular target */ michael@0: diff>>=8*length; /* length was reduced by targetCapacity */ michael@0: switch(targetCapacity) { michael@0: /* each branch falls through to the next one */ michael@0: case 3: michael@0: *target++=(uint8_t)(diff>>16); michael@0: *offsets++=sourceIndex; michael@0: case 2: /*fall through*/ michael@0: *target++=(uint8_t)(diff>>8); michael@0: *offsets++=sourceIndex; michael@0: case 1: /*fall through*/ michael@0: *target++=(uint8_t)diff; michael@0: *offsets++=sourceIndex; michael@0: default: michael@0: /* will never occur */ michael@0: break; michael@0: } michael@0: michael@0: /* target overflow */ michael@0: targetCapacity=0; michael@0: *pErrorCode=U_BUFFER_OVERFLOW_ERROR; michael@0: break; michael@0: } michael@0: } michael@0: } else { michael@0: /* target is full */ michael@0: *pErrorCode=U_BUFFER_OVERFLOW_ERROR; michael@0: break; michael@0: } michael@0: } michael@0: michael@0: /* set the converter state back into UConverter */ michael@0: cnv->fromUChar32= c<0 ? -c : 0; michael@0: cnv->fromUnicodeStatus=(uint32_t)prev; michael@0: michael@0: /* write back the updated pointers */ michael@0: pArgs->source=source; michael@0: pArgs->target=(char *)target; michael@0: pArgs->offsets=offsets; michael@0: } michael@0: michael@0: /* michael@0: * Identical to _Bocu1FromUnicodeWithOffsets but without offset handling. michael@0: * If a change is made in the original function, then either michael@0: * change this function the same way or michael@0: * re-copy the original function and remove the variables michael@0: * offsets, sourceIndex, and nextSourceIndex. michael@0: */ michael@0: static void michael@0: _Bocu1FromUnicode(UConverterFromUnicodeArgs *pArgs, michael@0: UErrorCode *pErrorCode) { michael@0: UConverter *cnv; michael@0: const UChar *source, *sourceLimit; michael@0: uint8_t *target; michael@0: int32_t targetCapacity; michael@0: michael@0: int32_t prev, c, diff; michael@0: michael@0: /* set up the local pointers */ michael@0: cnv=pArgs->converter; michael@0: source=pArgs->source; michael@0: sourceLimit=pArgs->sourceLimit; michael@0: target=(uint8_t *)pArgs->target; michael@0: targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); michael@0: michael@0: /* get the converter state from UConverter */ michael@0: c=cnv->fromUChar32; michael@0: prev=(int32_t)cnv->fromUnicodeStatus; michael@0: if(prev==0) { michael@0: prev=BOCU1_ASCII_PREV; michael@0: } michael@0: michael@0: /* conversion loop */ michael@0: if(c!=0 && targetCapacity>0) { michael@0: goto getTrail; michael@0: } michael@0: michael@0: fastSingle: michael@0: /* fast loop for single-byte differences */ michael@0: /* use only one loop counter variable, targetCapacity, not also source */ michael@0: diff=(int32_t)(sourceLimit-source); michael@0: if(targetCapacity>diff) { michael@0: targetCapacity=diff; michael@0: } michael@0: while(targetCapacity>0 && (c=*source)<0x3000) { michael@0: if(c<=0x20) { michael@0: if(c!=0x20) { michael@0: prev=BOCU1_ASCII_PREV; michael@0: } michael@0: *target++=(uint8_t)c; michael@0: } else { michael@0: diff=c-prev; michael@0: if(DIFF_IS_SINGLE(diff)) { michael@0: prev=BOCU1_SIMPLE_PREV(c); michael@0: *target++=(uint8_t)PACK_SINGLE_DIFF(diff); michael@0: } else { michael@0: break; michael@0: } michael@0: } michael@0: ++source; michael@0: --targetCapacity; michael@0: } michael@0: /* restore real values */ michael@0: targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target); michael@0: michael@0: /* regular loop for all cases */ michael@0: while(source0) { michael@0: c=*source++; michael@0: michael@0: if(c<=0x20) { michael@0: /* michael@0: * ISO C0 control & space: michael@0: * Encode directly for MIME compatibility, michael@0: * and reset state except for space, to not disrupt compression. michael@0: */ michael@0: if(c!=0x20) { michael@0: prev=BOCU1_ASCII_PREV; michael@0: } michael@0: *target++=(uint8_t)c; michael@0: --targetCapacity; michael@0: continue; michael@0: } michael@0: michael@0: if(U16_IS_LEAD(c)) { michael@0: getTrail: michael@0: if(source=0) { michael@0: diff-=BOCU1_REACH_POS_1+1; michael@0: m=diff%BOCU1_TRAIL_COUNT; michael@0: diff/=BOCU1_TRAIL_COUNT; michael@0: diff+=BOCU1_START_POS_2; michael@0: } else { michael@0: diff-=BOCU1_REACH_NEG_1; michael@0: NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); michael@0: diff+=BOCU1_START_NEG_2; michael@0: } michael@0: *target++=(uint8_t)diff; michael@0: *target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m); michael@0: targetCapacity-=2; michael@0: } else { michael@0: int32_t length; /* will be 2..4 */ michael@0: michael@0: diff=packDiff(diff); michael@0: length=BOCU1_LENGTH_FROM_PACKED(diff); michael@0: michael@0: /* write the output character bytes from diff and length */ michael@0: /* from the first if in the loop we know that targetCapacity>0 */ michael@0: if(length<=targetCapacity) { michael@0: switch(length) { michael@0: /* each branch falls through to the next one */ michael@0: case 4: michael@0: *target++=(uint8_t)(diff>>24); michael@0: case 3: /*fall through*/ michael@0: *target++=(uint8_t)(diff>>16); michael@0: /* case 2: handled above */ michael@0: *target++=(uint8_t)(diff>>8); michael@0: /* case 1: handled above */ michael@0: *target++=(uint8_t)diff; michael@0: default: michael@0: /* will never occur */ michael@0: break; michael@0: } michael@0: targetCapacity-=length; michael@0: } else { michael@0: uint8_t *charErrorBuffer; michael@0: michael@0: /* michael@0: * We actually do this backwards here: michael@0: * In order to save an intermediate variable, we output michael@0: * first to the overflow buffer what does not fit into the michael@0: * regular target. michael@0: */ michael@0: /* we know that 1<=targetCapacitycharErrorBuffer; michael@0: switch(length) { michael@0: /* each branch falls through to the next one */ michael@0: case 3: michael@0: *charErrorBuffer++=(uint8_t)(diff>>16); michael@0: case 2: /*fall through*/ michael@0: *charErrorBuffer++=(uint8_t)(diff>>8); michael@0: case 1: /*fall through*/ michael@0: *charErrorBuffer=(uint8_t)diff; michael@0: default: michael@0: /* will never occur */ michael@0: break; michael@0: } michael@0: cnv->charErrorBufferLength=(int8_t)length; michael@0: michael@0: /* now output what fits into the regular target */ michael@0: diff>>=8*length; /* length was reduced by targetCapacity */ michael@0: switch(targetCapacity) { michael@0: /* each branch falls through to the next one */ michael@0: case 3: michael@0: *target++=(uint8_t)(diff>>16); michael@0: case 2: /*fall through*/ michael@0: *target++=(uint8_t)(diff>>8); michael@0: case 1: /*fall through*/ michael@0: *target++=(uint8_t)diff; michael@0: default: michael@0: /* will never occur */ michael@0: break; michael@0: } michael@0: michael@0: /* target overflow */ michael@0: targetCapacity=0; michael@0: *pErrorCode=U_BUFFER_OVERFLOW_ERROR; michael@0: break; michael@0: } michael@0: } michael@0: } else { michael@0: /* target is full */ michael@0: *pErrorCode=U_BUFFER_OVERFLOW_ERROR; michael@0: break; michael@0: } michael@0: } michael@0: michael@0: /* set the converter state back into UConverter */ michael@0: cnv->fromUChar32= c<0 ? -c : 0; michael@0: cnv->fromUnicodeStatus=(uint32_t)prev; michael@0: michael@0: /* write back the updated pointers */ michael@0: pArgs->source=source; michael@0: pArgs->target=(char *)target; michael@0: } michael@0: michael@0: /* BOCU-1-to-Unicode conversion functions ----------------------------------- */ michael@0: michael@0: /** michael@0: * Function for BOCU-1 decoder; handles multi-byte lead bytes. michael@0: * michael@0: * @param b lead byte; michael@0: * BOCU1_MIN<=b=BOCU1_START_NEG_2) { michael@0: /* positive difference */ michael@0: if(b=BOCU1_START_NEG_3) { michael@0: /* two bytes */ michael@0: diff=((int32_t)b-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1; michael@0: count=1; michael@0: } else if(b>BOCU1_MIN) { michael@0: /* three bytes */ michael@0: diff=((int32_t)b-BOCU1_START_NEG_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_2; michael@0: count=2; michael@0: } else { michael@0: /* four bytes */ michael@0: diff=-BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_3; michael@0: count=3; michael@0: } michael@0: } michael@0: michael@0: /* return the state for decoding the trail byte(s) */ michael@0: return (diff<<2)|count; michael@0: } michael@0: michael@0: /** michael@0: * Function for BOCU-1 decoder; handles multi-byte trail bytes. michael@0: * michael@0: * @param count number of remaining trail bytes including this one michael@0: * @param b trail byte michael@0: * @return new delta for diff including b - <0 indicates an error michael@0: * michael@0: * @see decodeBocu1 michael@0: */ michael@0: static inline int32_t michael@0: decodeBocu1TrailByte(int32_t count, int32_t b) { michael@0: if(b<=0x20) { michael@0: /* skip some C0 controls and make the trail byte range contiguous */ michael@0: b=bocu1ByteToTrail[b]; michael@0: /* b<0 for an illegal trail byte value will result in return<0 below */ michael@0: #if BOCU1_MAX_TRAIL<0xff michael@0: } else if(b>BOCU1_MAX_TRAIL) { michael@0: return -99; michael@0: #endif michael@0: } else { michael@0: b-=BOCU1_TRAIL_BYTE_OFFSET; michael@0: } michael@0: michael@0: /* add trail byte into difference and decrement count */ michael@0: if(count==1) { michael@0: return b; michael@0: } else if(count==2) { michael@0: return b*BOCU1_TRAIL_COUNT; michael@0: } else /* count==3 */ { michael@0: return b*(BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT); michael@0: } michael@0: } michael@0: michael@0: static void michael@0: _Bocu1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, michael@0: UErrorCode *pErrorCode) { michael@0: UConverter *cnv; michael@0: const uint8_t *source, *sourceLimit; michael@0: UChar *target; michael@0: const UChar *targetLimit; michael@0: int32_t *offsets; michael@0: michael@0: int32_t prev, count, diff, c; michael@0: michael@0: int8_t byteIndex; michael@0: uint8_t *bytes; michael@0: michael@0: int32_t sourceIndex, nextSourceIndex; michael@0: michael@0: /* set up the local pointers */ michael@0: cnv=pArgs->converter; michael@0: source=(const uint8_t *)pArgs->source; michael@0: sourceLimit=(const uint8_t *)pArgs->sourceLimit; michael@0: target=pArgs->target; michael@0: targetLimit=pArgs->targetLimit; michael@0: offsets=pArgs->offsets; michael@0: michael@0: /* get the converter state from UConverter */ michael@0: prev=(int32_t)cnv->toUnicodeStatus; michael@0: if(prev==0) { michael@0: prev=BOCU1_ASCII_PREV; michael@0: } michael@0: diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */ michael@0: count=diff&3; michael@0: diff>>=2; michael@0: michael@0: byteIndex=cnv->toULength; michael@0: bytes=cnv->toUBytes; michael@0: michael@0: /* sourceIndex=-1 if the current character began in the previous buffer */ michael@0: sourceIndex=byteIndex==0 ? 0 : -1; michael@0: nextSourceIndex=0; michael@0: michael@0: /* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */ michael@0: if(count>0 && byteIndex>0 && targettargetLimit-target); michael@0: if(count>diff) { michael@0: count=diff; michael@0: } michael@0: while(count>0) { michael@0: if(BOCU1_START_NEG_2<=(c=*source) && c=targetLimit) { michael@0: /* target is full */ michael@0: *pErrorCode=U_BUFFER_OVERFLOW_ERROR; michael@0: break; michael@0: } michael@0: michael@0: ++nextSourceIndex; michael@0: c=*source++; michael@0: if(BOCU1_START_NEG_2<=c && c=BOCU1_MIDDLE) { michael@0: diff=((int32_t)c-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1; michael@0: } else { michael@0: diff=((int32_t)c-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1; michael@0: } michael@0: michael@0: /* trail byte */ michael@0: ++nextSourceIndex; michael@0: c=decodeBocu1TrailByte(1, *source++); michael@0: if(c<0 || (uint32_t)(c=prev+diff+c)>0x10ffff) { michael@0: bytes[0]=source[-2]; michael@0: bytes[1]=source[-1]; michael@0: byteIndex=2; michael@0: *pErrorCode=U_ILLEGAL_CHAR_FOUND; michael@0: break; michael@0: } michael@0: } else if(c==BOCU1_RESET) { michael@0: /* only reset the state, no code point */ michael@0: prev=BOCU1_ASCII_PREV; michael@0: sourceIndex=nextSourceIndex; michael@0: continue; michael@0: } else { michael@0: /* michael@0: * For multi-byte difference lead bytes, set the decoder state michael@0: * with the partial difference value from the lead byte and michael@0: * with the number of trail bytes. michael@0: */ michael@0: bytes[0]=(uint8_t)c; michael@0: byteIndex=1; michael@0: michael@0: diff=decodeBocu1LeadByte(c); michael@0: count=diff&3; michael@0: diff>>=2; michael@0: getTrail: michael@0: for(;;) { michael@0: if(source>=sourceLimit) { michael@0: goto endloop; michael@0: } michael@0: ++nextSourceIndex; michael@0: c=bytes[byteIndex++]=*source++; michael@0: michael@0: /* trail byte in any position */ michael@0: c=decodeBocu1TrailByte(count, c); michael@0: if(c<0) { michael@0: *pErrorCode=U_ILLEGAL_CHAR_FOUND; michael@0: goto endloop; michael@0: } michael@0: michael@0: diff+=c; michael@0: if(--count==0) { michael@0: /* final trail byte, deliver a code point */ michael@0: byteIndex=0; michael@0: c=prev+diff; michael@0: if((uint32_t)c>0x10ffff) { michael@0: *pErrorCode=U_ILLEGAL_CHAR_FOUND; michael@0: goto endloop; michael@0: } michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: michael@0: /* calculate the next prev and output c */ michael@0: prev=BOCU1_PREV(c); michael@0: if(c<=0xffff) { michael@0: *target++=(UChar)c; michael@0: *offsets++=sourceIndex; michael@0: } else { michael@0: /* output surrogate pair */ michael@0: *target++=U16_LEAD(c); michael@0: if(targetUCharErrorBuffer[0]=U16_TRAIL(c); michael@0: cnv->UCharErrorBufferLength=1; michael@0: *pErrorCode=U_BUFFER_OVERFLOW_ERROR; michael@0: break; michael@0: } michael@0: } michael@0: sourceIndex=nextSourceIndex; michael@0: } michael@0: endloop: michael@0: michael@0: if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) { michael@0: /* set the converter state in UConverter to deal with the next character */ michael@0: cnv->toUnicodeStatus=BOCU1_ASCII_PREV; michael@0: cnv->mode=0; michael@0: } else { michael@0: /* set the converter state back into UConverter */ michael@0: cnv->toUnicodeStatus=(uint32_t)prev; michael@0: cnv->mode=(diff<<2)|count; michael@0: } michael@0: cnv->toULength=byteIndex; michael@0: michael@0: /* write back the updated pointers */ michael@0: pArgs->source=(const char *)source; michael@0: pArgs->target=target; michael@0: pArgs->offsets=offsets; michael@0: return; michael@0: } michael@0: michael@0: /* michael@0: * Identical to _Bocu1ToUnicodeWithOffsets but without offset handling. michael@0: * If a change is made in the original function, then either michael@0: * change this function the same way or michael@0: * re-copy the original function and remove the variables michael@0: * offsets, sourceIndex, and nextSourceIndex. michael@0: */ michael@0: static void michael@0: _Bocu1ToUnicode(UConverterToUnicodeArgs *pArgs, michael@0: UErrorCode *pErrorCode) { michael@0: UConverter *cnv; michael@0: const uint8_t *source, *sourceLimit; michael@0: UChar *target; michael@0: const UChar *targetLimit; michael@0: michael@0: int32_t prev, count, diff, c; michael@0: michael@0: int8_t byteIndex; michael@0: uint8_t *bytes; michael@0: michael@0: U_ALIGN_CODE(16) michael@0: michael@0: /* set up the local pointers */ michael@0: cnv=pArgs->converter; michael@0: source=(const uint8_t *)pArgs->source; michael@0: sourceLimit=(const uint8_t *)pArgs->sourceLimit; michael@0: target=pArgs->target; michael@0: targetLimit=pArgs->targetLimit; michael@0: michael@0: /* get the converter state from UConverter */ michael@0: prev=(int32_t)cnv->toUnicodeStatus; michael@0: if(prev==0) { michael@0: prev=BOCU1_ASCII_PREV; michael@0: } michael@0: diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */ michael@0: count=diff&3; michael@0: diff>>=2; michael@0: michael@0: byteIndex=cnv->toULength; michael@0: bytes=cnv->toUBytes; michael@0: michael@0: /* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */ michael@0: if(count>0 && byteIndex>0 && targettargetLimit-target); michael@0: if(count>diff) { michael@0: count=diff; michael@0: } michael@0: while(count>0) { michael@0: if(BOCU1_START_NEG_2<=(c=*source) && c=targetLimit) { michael@0: /* target is full */ michael@0: *pErrorCode=U_BUFFER_OVERFLOW_ERROR; michael@0: break; michael@0: } michael@0: michael@0: c=*source++; michael@0: if(BOCU1_START_NEG_2<=c && c=BOCU1_MIDDLE) { michael@0: diff=((int32_t)c-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1; michael@0: } else { michael@0: diff=((int32_t)c-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1; michael@0: } michael@0: michael@0: /* trail byte */ michael@0: c=decodeBocu1TrailByte(1, *source++); michael@0: if(c<0 || (uint32_t)(c=prev+diff+c)>0x10ffff) { michael@0: bytes[0]=source[-2]; michael@0: bytes[1]=source[-1]; michael@0: byteIndex=2; michael@0: *pErrorCode=U_ILLEGAL_CHAR_FOUND; michael@0: break; michael@0: } michael@0: } else if(c==BOCU1_RESET) { michael@0: /* only reset the state, no code point */ michael@0: prev=BOCU1_ASCII_PREV; michael@0: continue; michael@0: } else { michael@0: /* michael@0: * For multi-byte difference lead bytes, set the decoder state michael@0: * with the partial difference value from the lead byte and michael@0: * with the number of trail bytes. michael@0: */ michael@0: bytes[0]=(uint8_t)c; michael@0: byteIndex=1; michael@0: michael@0: diff=decodeBocu1LeadByte(c); michael@0: count=diff&3; michael@0: diff>>=2; michael@0: getTrail: michael@0: for(;;) { michael@0: if(source>=sourceLimit) { michael@0: goto endloop; michael@0: } michael@0: c=bytes[byteIndex++]=*source++; michael@0: michael@0: /* trail byte in any position */ michael@0: c=decodeBocu1TrailByte(count, c); michael@0: if(c<0) { michael@0: *pErrorCode=U_ILLEGAL_CHAR_FOUND; michael@0: goto endloop; michael@0: } michael@0: michael@0: diff+=c; michael@0: if(--count==0) { michael@0: /* final trail byte, deliver a code point */ michael@0: byteIndex=0; michael@0: c=prev+diff; michael@0: if((uint32_t)c>0x10ffff) { michael@0: *pErrorCode=U_ILLEGAL_CHAR_FOUND; michael@0: goto endloop; michael@0: } michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: michael@0: /* calculate the next prev and output c */ michael@0: prev=BOCU1_PREV(c); michael@0: if(c<=0xffff) { michael@0: *target++=(UChar)c; michael@0: } else { michael@0: /* output surrogate pair */ michael@0: *target++=U16_LEAD(c); michael@0: if(targetUCharErrorBuffer[0]=U16_TRAIL(c); michael@0: cnv->UCharErrorBufferLength=1; michael@0: *pErrorCode=U_BUFFER_OVERFLOW_ERROR; michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: endloop: michael@0: michael@0: if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) { michael@0: /* set the converter state in UConverter to deal with the next character */ michael@0: cnv->toUnicodeStatus=BOCU1_ASCII_PREV; michael@0: cnv->mode=0; michael@0: } else { michael@0: /* set the converter state back into UConverter */ michael@0: cnv->toUnicodeStatus=(uint32_t)prev; michael@0: cnv->mode=(diff<<2)|count; michael@0: } michael@0: cnv->toULength=byteIndex; michael@0: michael@0: /* write back the updated pointers */ michael@0: pArgs->source=(const char *)source; michael@0: pArgs->target=target; michael@0: return; michael@0: } michael@0: michael@0: /* miscellaneous ------------------------------------------------------------ */ michael@0: michael@0: static const UConverterImpl _Bocu1Impl={ michael@0: UCNV_BOCU1, michael@0: michael@0: NULL, michael@0: NULL, michael@0: michael@0: NULL, michael@0: NULL, michael@0: NULL, michael@0: michael@0: _Bocu1ToUnicode, michael@0: _Bocu1ToUnicodeWithOffsets, michael@0: _Bocu1FromUnicode, michael@0: _Bocu1FromUnicodeWithOffsets, michael@0: NULL, michael@0: michael@0: NULL, michael@0: NULL, michael@0: NULL, michael@0: NULL, michael@0: ucnv_getCompleteUnicodeSet, michael@0: michael@0: NULL, michael@0: NULL michael@0: }; michael@0: michael@0: static const UConverterStaticData _Bocu1StaticData={ michael@0: sizeof(UConverterStaticData), michael@0: "BOCU-1", michael@0: 1214, /* CCSID for BOCU-1 */ michael@0: UCNV_IBM, UCNV_BOCU1, michael@0: 1, 4, /* one UChar generates at least 1 byte and at most 4 bytes */ michael@0: { 0x1a, 0, 0, 0 }, 1, /* BOCU-1 never needs to write a subchar */ michael@0: FALSE, FALSE, michael@0: 0, michael@0: 0, michael@0: { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ michael@0: }; michael@0: michael@0: const UConverterSharedData _Bocu1Data={ michael@0: sizeof(UConverterSharedData), ~((uint32_t)0), michael@0: NULL, NULL, &_Bocu1StaticData, FALSE, &_Bocu1Impl, michael@0: 0, michael@0: UCNV_MBCS_TABLE_INITIALIZER michael@0: }; michael@0: michael@0: #endif