1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/ucnv_ct.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,645 @@ 1.4 +/* 1.5 +********************************************************************** 1.6 +* Copyright (C) 2010-2012, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +********************************************************************** 1.9 +* file name: ucnv_ct.c 1.10 +* encoding: US-ASCII 1.11 +* tab size: 8 (not used) 1.12 +* indentation:4 1.13 +* 1.14 +* created on: 2010Dec09 1.15 +* created by: Michael Ow 1.16 +*/ 1.17 + 1.18 +#include "unicode/utypes.h" 1.19 + 1.20 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION 1.21 + 1.22 +#include "unicode/ucnv.h" 1.23 +#include "unicode/uset.h" 1.24 +#include "unicode/ucnv_err.h" 1.25 +#include "unicode/ucnv_cb.h" 1.26 +#include "unicode/utf16.h" 1.27 +#include "ucnv_imp.h" 1.28 +#include "ucnv_bld.h" 1.29 +#include "ucnv_cnv.h" 1.30 +#include "ucnvmbcs.h" 1.31 +#include "cstring.h" 1.32 +#include "cmemory.h" 1.33 + 1.34 +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 1.35 + 1.36 +typedef enum { 1.37 + INVALID = -2, 1.38 + DO_SEARCH = -1, 1.39 + 1.40 + COMPOUND_TEXT_SINGLE_0 = 0, 1.41 + COMPOUND_TEXT_SINGLE_1 = 1, 1.42 + COMPOUND_TEXT_SINGLE_2 = 2, 1.43 + COMPOUND_TEXT_SINGLE_3 = 3, 1.44 + 1.45 + COMPOUND_TEXT_DOUBLE_1 = 4, 1.46 + COMPOUND_TEXT_DOUBLE_2 = 5, 1.47 + COMPOUND_TEXT_DOUBLE_3 = 6, 1.48 + COMPOUND_TEXT_DOUBLE_4 = 7, 1.49 + COMPOUND_TEXT_DOUBLE_5 = 8, 1.50 + COMPOUND_TEXT_DOUBLE_6 = 9, 1.51 + COMPOUND_TEXT_DOUBLE_7 = 10, 1.52 + 1.53 + COMPOUND_TEXT_TRIPLE_DOUBLE = 11, 1.54 + 1.55 + IBM_915 = 12, 1.56 + IBM_916 = 13, 1.57 + IBM_914 = 14, 1.58 + IBM_874 = 15, 1.59 + IBM_912 = 16, 1.60 + IBM_913 = 17, 1.61 + ISO_8859_14 = 18, 1.62 + IBM_923 = 19, 1.63 + NUM_OF_CONVERTERS = 20 1.64 +} COMPOUND_TEXT_CONVERTERS; 1.65 + 1.66 +#define SEARCH_LENGTH 12 1.67 + 1.68 +static const uint8_t escSeqCompoundText[NUM_OF_CONVERTERS][5] = { 1.69 + /* Single */ 1.70 + { 0x1B, 0x2D, 0x41, 0, 0 }, 1.71 + { 0x1B, 0x2D, 0x4D, 0, 0 }, 1.72 + { 0x1B, 0x2D, 0x46, 0, 0 }, 1.73 + { 0x1B, 0x2D, 0x47, 0, 0 }, 1.74 + 1.75 + /* Double */ 1.76 + { 0x1B, 0x24, 0x29, 0x41, 0 }, 1.77 + { 0x1B, 0x24, 0x29, 0x42, 0 }, 1.78 + { 0x1B, 0x24, 0x29, 0x43, 0 }, 1.79 + { 0x1B, 0x24, 0x29, 0x44, 0 }, 1.80 + { 0x1B, 0x24, 0x29, 0x47, 0 }, 1.81 + { 0x1B, 0x24, 0x29, 0x48, 0 }, 1.82 + { 0x1B, 0x24, 0x29, 0x49, 0 }, 1.83 + 1.84 + /* Triple/Double */ 1.85 + { 0x1B, 0x25, 0x47, 0, 0 }, 1.86 + 1.87 + /*IBM-915*/ 1.88 + { 0x1B, 0x2D, 0x4C, 0, 0 }, 1.89 + /*IBM-916*/ 1.90 + { 0x1B, 0x2D, 0x48, 0, 0 }, 1.91 + /*IBM-914*/ 1.92 + { 0x1B, 0x2D, 0x44, 0, 0 }, 1.93 + /*IBM-874*/ 1.94 + { 0x1B, 0x2D, 0x54, 0, 0 }, 1.95 + /*IBM-912*/ 1.96 + { 0x1B, 0x2D, 0x42, 0, 0 }, 1.97 + /* IBM-913 */ 1.98 + { 0x1B, 0x2D, 0x43, 0, 0 }, 1.99 + /* ISO-8859_14 */ 1.100 + { 0x1B, 0x2D, 0x5F, 0, 0 }, 1.101 + /* IBM-923 */ 1.102 + { 0x1B, 0x2D, 0x62, 0, 0 }, 1.103 +}; 1.104 + 1.105 +#define ESC_START 0x1B 1.106 + 1.107 +#define isASCIIRange(codepoint) \ 1.108 + ((codepoint == 0x0000) || (codepoint == 0x0009) || (codepoint == 0x000A) || \ 1.109 + (codepoint >= 0x0020 && codepoint <= 0x007f) || (codepoint >= 0x00A0 && codepoint <= 0x00FF)) 1.110 + 1.111 +#define isIBM915(codepoint) \ 1.112 + ((codepoint >= 0x0401 && codepoint <= 0x045F) || (codepoint == 0x2116)) 1.113 + 1.114 +#define isIBM916(codepoint) \ 1.115 + ((codepoint >= 0x05D0 && codepoint <= 0x05EA) || (codepoint == 0x2017) || (codepoint == 0x203E)) 1.116 + 1.117 +#define isCompoundS3(codepoint) \ 1.118 + ((codepoint == 0x060C) || (codepoint == 0x061B) || (codepoint == 0x061F) || (codepoint >= 0x0621 && codepoint <= 0x063A) || \ 1.119 + (codepoint >= 0x0640 && codepoint <= 0x0652) || (codepoint >= 0x0660 && codepoint <= 0x066D) || (codepoint == 0x200B) || \ 1.120 + (codepoint >= 0x0FE70 && codepoint <= 0x0FE72) || (codepoint == 0x0FE74) || (codepoint >= 0x0FE76 && codepoint <= 0x0FEBE)) 1.121 + 1.122 +#define isCompoundS2(codepoint) \ 1.123 + ((codepoint == 0x02BC) || (codepoint == 0x02BD) || (codepoint >= 0x0384 && codepoint <= 0x03CE) || (codepoint == 0x2015)) 1.124 + 1.125 +#define isIBM914(codepoint) \ 1.126 + ((codepoint == 0x0100) || (codepoint == 0x0101) || (codepoint == 0x0112) || (codepoint == 0x0113) || (codepoint == 0x0116) || (codepoint == 0x0117) || \ 1.127 + (codepoint == 0x0122) || (codepoint == 0x0123) || (codepoint >= 0x0128 && codepoint <= 0x012B) || (codepoint == 0x012E) || (codepoint == 0x012F) || \ 1.128 + (codepoint >= 0x0136 && codepoint <= 0x0138) || (codepoint == 0x013B) || (codepoint == 0x013C) || (codepoint == 0x0145) || (codepoint == 0x0146) || \ 1.129 + (codepoint >= 0x014A && codepoint <= 0x014D) || (codepoint == 0x0156) || (codepoint == 0x0157) || (codepoint >= 0x0166 && codepoint <= 0x016B) || \ 1.130 + (codepoint == 0x0172) || (codepoint == 0x0173)) 1.131 + 1.132 +#define isIBM874(codepoint) \ 1.133 + ((codepoint >= 0x0E01 && codepoint <= 0x0E3A) || (codepoint >= 0x0E3F && codepoint <= 0x0E5B)) 1.134 + 1.135 +#define isIBM912(codepoint) \ 1.136 + ((codepoint >= 0x0102 && codepoint <= 0x0107) || (codepoint >= 0x010C && codepoint <= 0x0111) || (codepoint >= 0x0118 && codepoint <= 0x011B) || \ 1.137 + (codepoint == 0x0139) || (codepoint == 0x013A) || (codepoint == 0x013D) || (codepoint == 0x013E) || (codepoint >= 0x0141 && codepoint <= 0x0144) || \ 1.138 + (codepoint == 0x0147) || (codepoint == 0x0147) || (codepoint == 0x0150) || (codepoint == 0x0151) || (codepoint == 0x0154) || (codepoint == 0x0155) || \ 1.139 + (codepoint >= 0x0158 && codepoint <= 0x015B) || (codepoint == 0x015E) || (codepoint == 0x015F) || (codepoint >= 0x0160 && codepoint <= 0x0165) || \ 1.140 + (codepoint == 0x016E) || (codepoint == 0x016F) || (codepoint == 0x0170) || (codepoint == 0x0171) || (codepoint >= 0x0179 && codepoint <= 0x017E) || \ 1.141 + (codepoint == 0x02C7) || (codepoint == 0x02D8) || (codepoint == 0x02D9) || (codepoint == 0x02DB) || (codepoint == 0x02DD)) 1.142 + 1.143 +#define isIBM913(codepoint) \ 1.144 + ((codepoint >= 0x0108 && codepoint <= 0x010B) || (codepoint == 0x011C) || \ 1.145 + (codepoint == 0x011D) || (codepoint == 0x0120) || (codepoint == 0x0121) || \ 1.146 + (codepoint >= 0x0124 && codepoint <= 0x0127) || (codepoint == 0x0134) || (codepoint == 0x0135) || \ 1.147 + (codepoint == 0x015C) || (codepoint == 0x015D) || (codepoint == 0x016C) || (codepoint == 0x016D)) 1.148 + 1.149 +#define isCompoundS1(codepoint) \ 1.150 + ((codepoint == 0x011E) || (codepoint == 0x011F) || (codepoint == 0x0130) || \ 1.151 + (codepoint == 0x0131) || (codepoint >= 0x0218 && codepoint <= 0x021B)) 1.152 + 1.153 +#define isISO8859_14(codepoint) \ 1.154 + ((codepoint >= 0x0174 && codepoint <= 0x0177) || (codepoint == 0x1E0A) || \ 1.155 + (codepoint == 0x1E0B) || (codepoint == 0x1E1E) || (codepoint == 0x1E1F) || \ 1.156 + (codepoint == 0x1E40) || (codepoint == 0x1E41) || (codepoint == 0x1E56) || \ 1.157 + (codepoint == 0x1E57) || (codepoint == 0x1E60) || (codepoint == 0x1E61) || \ 1.158 + (codepoint == 0x1E6A) || (codepoint == 0x1E6B) || (codepoint == 0x1EF2) || \ 1.159 + (codepoint == 0x1EF3) || (codepoint >= 0x1E80 && codepoint <= 0x1E85)) 1.160 + 1.161 +#define isIBM923(codepoint) \ 1.162 + ((codepoint == 0x0152) || (codepoint == 0x0153) || (codepoint == 0x0178) || (codepoint == 0x20AC)) 1.163 + 1.164 + 1.165 +typedef struct{ 1.166 + UConverterSharedData *myConverterArray[NUM_OF_CONVERTERS]; 1.167 + COMPOUND_TEXT_CONVERTERS state; 1.168 +} UConverterDataCompoundText; 1.169 + 1.170 +/*********** Compound Text Converter Protos ***********/ 1.171 +static void 1.172 +_CompoundTextOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode); 1.173 + 1.174 +static void 1.175 + _CompoundTextClose(UConverter *converter); 1.176 + 1.177 +static void 1.178 +_CompoundTextReset(UConverter *converter, UConverterResetChoice choice); 1.179 + 1.180 +static const char* 1.181 +_CompoundTextgetName(const UConverter* cnv); 1.182 + 1.183 + 1.184 +static int32_t findNextEsc(const char *source, const char *sourceLimit) { 1.185 + int32_t length = sourceLimit - source; 1.186 + int32_t i; 1.187 + for (i = 1; i < length; i++) { 1.188 + if (*(source + i) == 0x1B) { 1.189 + return i; 1.190 + } 1.191 + } 1.192 + 1.193 + return length; 1.194 +} 1.195 + 1.196 +static COMPOUND_TEXT_CONVERTERS getState(int codepoint) { 1.197 + COMPOUND_TEXT_CONVERTERS state = DO_SEARCH; 1.198 + 1.199 + if (isASCIIRange(codepoint)) { 1.200 + state = COMPOUND_TEXT_SINGLE_0; 1.201 + } else if (isIBM912(codepoint)) { 1.202 + state = IBM_912; 1.203 + }else if (isIBM913(codepoint)) { 1.204 + state = IBM_913; 1.205 + } else if (isISO8859_14(codepoint)) { 1.206 + state = ISO_8859_14; 1.207 + } else if (isIBM923(codepoint)) { 1.208 + state = IBM_923; 1.209 + } else if (isIBM874(codepoint)) { 1.210 + state = IBM_874; 1.211 + } else if (isIBM914(codepoint)) { 1.212 + state = IBM_914; 1.213 + } else if (isCompoundS2(codepoint)) { 1.214 + state = COMPOUND_TEXT_SINGLE_2; 1.215 + } else if (isCompoundS3(codepoint)) { 1.216 + state = COMPOUND_TEXT_SINGLE_3; 1.217 + } else if (isIBM916(codepoint)) { 1.218 + state = IBM_916; 1.219 + } else if (isIBM915(codepoint)) { 1.220 + state = IBM_915; 1.221 + } else if (isCompoundS1(codepoint)) { 1.222 + state = COMPOUND_TEXT_SINGLE_1; 1.223 + } 1.224 + 1.225 + return state; 1.226 +} 1.227 + 1.228 +static COMPOUND_TEXT_CONVERTERS findStateFromEscSeq(const char* source, const char* sourceLimit, const uint8_t* toUBytesBuffer, int32_t toUBytesBufferLength, UErrorCode *err) { 1.229 + COMPOUND_TEXT_CONVERTERS state = INVALID; 1.230 + UBool matchFound = FALSE; 1.231 + int32_t i, n, offset = toUBytesBufferLength; 1.232 + 1.233 + for (i = 0; i < NUM_OF_CONVERTERS; i++) { 1.234 + matchFound = TRUE; 1.235 + for (n = 0; escSeqCompoundText[i][n] != 0; n++) { 1.236 + if (n < toUBytesBufferLength) { 1.237 + if (toUBytesBuffer[n] != escSeqCompoundText[i][n]) { 1.238 + matchFound = FALSE; 1.239 + break; 1.240 + } 1.241 + } else if ((source + (n - offset)) >= sourceLimit) { 1.242 + *err = U_TRUNCATED_CHAR_FOUND; 1.243 + matchFound = FALSE; 1.244 + break; 1.245 + } else if (*(source + (n - offset)) != escSeqCompoundText[i][n]) { 1.246 + matchFound = FALSE; 1.247 + break; 1.248 + } 1.249 + } 1.250 + 1.251 + if (matchFound) { 1.252 + break; 1.253 + } 1.254 + } 1.255 + 1.256 + if (matchFound) { 1.257 + state = (COMPOUND_TEXT_CONVERTERS)i; 1.258 + } 1.259 + 1.260 + return state; 1.261 +} 1.262 + 1.263 +static void 1.264 +_CompoundTextOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ 1.265 + cnv->extraInfo = uprv_malloc (sizeof (UConverterDataCompoundText)); 1.266 + if (cnv->extraInfo != NULL) { 1.267 + UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *) cnv->extraInfo; 1.268 + 1.269 + UConverterNamePieces stackPieces; 1.270 + UConverterLoadArgs stackArgs={ (int32_t)sizeof(UConverterLoadArgs) }; 1.271 + 1.272 + myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_0] = NULL; 1.273 + myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_1] = ucnv_loadSharedData("icu-internal-compound-s1", &stackPieces, &stackArgs, errorCode); 1.274 + myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_2] = ucnv_loadSharedData("icu-internal-compound-s2", &stackPieces, &stackArgs, errorCode); 1.275 + myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_3] = ucnv_loadSharedData("icu-internal-compound-s3", &stackPieces, &stackArgs, errorCode); 1.276 + myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_1] = ucnv_loadSharedData("icu-internal-compound-d1", &stackPieces, &stackArgs, errorCode); 1.277 + myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_2] = ucnv_loadSharedData("icu-internal-compound-d2", &stackPieces, &stackArgs, errorCode); 1.278 + myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_3] = ucnv_loadSharedData("icu-internal-compound-d3", &stackPieces, &stackArgs, errorCode); 1.279 + myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_4] = ucnv_loadSharedData("icu-internal-compound-d4", &stackPieces, &stackArgs, errorCode); 1.280 + myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_5] = ucnv_loadSharedData("icu-internal-compound-d5", &stackPieces, &stackArgs, errorCode); 1.281 + myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_6] = ucnv_loadSharedData("icu-internal-compound-d6", &stackPieces, &stackArgs, errorCode); 1.282 + myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_7] = ucnv_loadSharedData("icu-internal-compound-d7", &stackPieces, &stackArgs, errorCode); 1.283 + myConverterData->myConverterArray[COMPOUND_TEXT_TRIPLE_DOUBLE] = ucnv_loadSharedData("icu-internal-compound-t", &stackPieces, &stackArgs, errorCode); 1.284 + 1.285 + myConverterData->myConverterArray[IBM_915] = ucnv_loadSharedData("ibm-915_P100-1995", &stackPieces, &stackArgs, errorCode); 1.286 + myConverterData->myConverterArray[IBM_916] = ucnv_loadSharedData("ibm-916_P100-1995", &stackPieces, &stackArgs, errorCode); 1.287 + myConverterData->myConverterArray[IBM_914] = ucnv_loadSharedData("ibm-914_P100-1995", &stackPieces, &stackArgs, errorCode); 1.288 + myConverterData->myConverterArray[IBM_874] = ucnv_loadSharedData("ibm-874_P100-1995", &stackPieces, &stackArgs, errorCode); 1.289 + myConverterData->myConverterArray[IBM_912] = ucnv_loadSharedData("ibm-912_P100-1995", &stackPieces, &stackArgs, errorCode); 1.290 + myConverterData->myConverterArray[IBM_913] = ucnv_loadSharedData("ibm-913_P100-2000", &stackPieces, &stackArgs, errorCode); 1.291 + myConverterData->myConverterArray[ISO_8859_14] = ucnv_loadSharedData("iso-8859_14-1998", &stackPieces, &stackArgs, errorCode); 1.292 + myConverterData->myConverterArray[IBM_923] = ucnv_loadSharedData("ibm-923_P100-1998", &stackPieces, &stackArgs, errorCode); 1.293 + 1.294 + if (U_FAILURE(*errorCode) || pArgs->onlyTestIsLoadable) { 1.295 + _CompoundTextClose(cnv); 1.296 + return; 1.297 + } 1.298 + 1.299 + myConverterData->state = (COMPOUND_TEXT_CONVERTERS)0; 1.300 + } else { 1.301 + *errorCode = U_MEMORY_ALLOCATION_ERROR; 1.302 + } 1.303 +} 1.304 + 1.305 + 1.306 +static void 1.307 +_CompoundTextClose(UConverter *converter) { 1.308 + UConverterDataCompoundText* myConverterData = (UConverterDataCompoundText*)(converter->extraInfo); 1.309 + int32_t i; 1.310 + 1.311 + if (converter->extraInfo != NULL) { 1.312 + /*close the array of converter pointers and free the memory*/ 1.313 + for (i = 0; i < NUM_OF_CONVERTERS; i++) { 1.314 + if (myConverterData->myConverterArray[i] != NULL) { 1.315 + ucnv_unloadSharedDataIfReady(myConverterData->myConverterArray[i]); 1.316 + } 1.317 + } 1.318 + 1.319 + uprv_free(converter->extraInfo); 1.320 + } 1.321 +} 1.322 + 1.323 +static void 1.324 +_CompoundTextReset(UConverter *converter, UConverterResetChoice choice) { 1.325 +} 1.326 + 1.327 +static const char* 1.328 +_CompoundTextgetName(const UConverter* cnv){ 1.329 + return "x11-compound-text"; 1.330 +} 1.331 + 1.332 +static void 1.333 +UConverter_fromUnicode_CompoundText_OFFSETS(UConverterFromUnicodeArgs* args, UErrorCode* err){ 1.334 + UConverter *cnv = args->converter; 1.335 + uint8_t *target = (uint8_t *) args->target; 1.336 + const uint8_t *targetLimit = (const uint8_t *) args->targetLimit; 1.337 + const UChar* source = args->source; 1.338 + const UChar* sourceLimit = args->sourceLimit; 1.339 + /* int32_t* offsets = args->offsets; */ 1.340 + UChar32 sourceChar; 1.341 + UBool useFallback = cnv->useFallback; 1.342 + uint8_t tmpTargetBuffer[7]; 1.343 + int32_t tmpTargetBufferLength = 0; 1.344 + COMPOUND_TEXT_CONVERTERS currentState, tmpState; 1.345 + uint32_t pValue; 1.346 + int32_t pValueLength = 0; 1.347 + int32_t i, n, j; 1.348 + 1.349 + UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *) cnv->extraInfo; 1.350 + 1.351 + currentState = myConverterData->state; 1.352 + 1.353 + /* check if the last codepoint of previous buffer was a lead surrogate*/ 1.354 + if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) { 1.355 + goto getTrail; 1.356 + } 1.357 + 1.358 + while( source < sourceLimit){ 1.359 + if(target < targetLimit){ 1.360 + 1.361 + sourceChar = *(source++); 1.362 + /*check if the char is a First surrogate*/ 1.363 + if(U16_IS_SURROGATE(sourceChar)) { 1.364 + if(U16_IS_SURROGATE_LEAD(sourceChar)) { 1.365 +getTrail: 1.366 + /*look ahead to find the trail surrogate*/ 1.367 + if(source < sourceLimit) { 1.368 + /* test the following code unit */ 1.369 + UChar trail=(UChar) *source; 1.370 + if(U16_IS_TRAIL(trail)) { 1.371 + source++; 1.372 + sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail); 1.373 + cnv->fromUChar32=0x00; 1.374 + /* convert this supplementary code point */ 1.375 + /* exit this condition tree */ 1.376 + } else { 1.377 + /* this is an unmatched lead code unit (1st surrogate) */ 1.378 + /* callback(illegal) */ 1.379 + *err=U_ILLEGAL_CHAR_FOUND; 1.380 + cnv->fromUChar32=sourceChar; 1.381 + break; 1.382 + } 1.383 + } else { 1.384 + /* no more input */ 1.385 + cnv->fromUChar32=sourceChar; 1.386 + break; 1.387 + } 1.388 + } else { 1.389 + /* this is an unmatched trail code unit (2nd surrogate) */ 1.390 + /* callback(illegal) */ 1.391 + *err=U_ILLEGAL_CHAR_FOUND; 1.392 + cnv->fromUChar32=sourceChar; 1.393 + break; 1.394 + } 1.395 + } 1.396 + 1.397 + tmpTargetBufferLength = 0; 1.398 + tmpState = getState(sourceChar); 1.399 + 1.400 + if (tmpState != DO_SEARCH && currentState != tmpState) { 1.401 + /* Get escape sequence if necessary */ 1.402 + currentState = tmpState; 1.403 + for (i = 0; escSeqCompoundText[currentState][i] != 0; i++) { 1.404 + tmpTargetBuffer[tmpTargetBufferLength++] = escSeqCompoundText[currentState][i]; 1.405 + } 1.406 + } 1.407 + 1.408 + if (tmpState == DO_SEARCH) { 1.409 + /* Test all available converters */ 1.410 + for (i = 1; i < SEARCH_LENGTH; i++) { 1.411 + pValueLength = ucnv_MBCSFromUChar32(myConverterData->myConverterArray[i], sourceChar, &pValue, useFallback); 1.412 + if (pValueLength > 0) { 1.413 + tmpState = (COMPOUND_TEXT_CONVERTERS)i; 1.414 + if (currentState != tmpState) { 1.415 + currentState = tmpState; 1.416 + for (j = 0; escSeqCompoundText[currentState][j] != 0; j++) { 1.417 + tmpTargetBuffer[tmpTargetBufferLength++] = escSeqCompoundText[currentState][j]; 1.418 + } 1.419 + } 1.420 + for (n = (pValueLength - 1); n >= 0; n--) { 1.421 + tmpTargetBuffer[tmpTargetBufferLength++] = (uint8_t)(pValue >> (n * 8)); 1.422 + } 1.423 + break; 1.424 + } 1.425 + } 1.426 + } else if (tmpState == COMPOUND_TEXT_SINGLE_0) { 1.427 + tmpTargetBuffer[tmpTargetBufferLength++] = (uint8_t)sourceChar; 1.428 + } else { 1.429 + pValueLength = ucnv_MBCSFromUChar32(myConverterData->myConverterArray[currentState], sourceChar, &pValue, useFallback); 1.430 + if (pValueLength > 0) { 1.431 + for (n = (pValueLength - 1); n >= 0; n--) { 1.432 + tmpTargetBuffer[tmpTargetBufferLength++] = (uint8_t)(pValue >> (n * 8)); 1.433 + } 1.434 + } 1.435 + } 1.436 + 1.437 + for (i = 0; i < tmpTargetBufferLength; i++) { 1.438 + if (target < targetLimit) { 1.439 + *target++ = tmpTargetBuffer[i]; 1.440 + } else { 1.441 + *err = U_BUFFER_OVERFLOW_ERROR; 1.442 + break; 1.443 + } 1.444 + } 1.445 + 1.446 + if (*err == U_BUFFER_OVERFLOW_ERROR) { 1.447 + for (; i < tmpTargetBufferLength; i++) { 1.448 + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = tmpTargetBuffer[i]; 1.449 + } 1.450 + } 1.451 + } else { 1.452 + *err = U_BUFFER_OVERFLOW_ERROR; 1.453 + break; 1.454 + } 1.455 + } 1.456 + 1.457 + /*save the state and return */ 1.458 + myConverterData->state = currentState; 1.459 + args->source = source; 1.460 + args->target = (char*)target; 1.461 +} 1.462 + 1.463 + 1.464 +static void 1.465 +UConverter_toUnicode_CompoundText_OFFSETS(UConverterToUnicodeArgs *args, 1.466 + UErrorCode* err){ 1.467 + const char *mySource = (char *) args->source; 1.468 + UChar *myTarget = args->target; 1.469 + const char *mySourceLimit = args->sourceLimit; 1.470 + const char *tmpSourceLimit = mySourceLimit; 1.471 + uint32_t mySourceChar = 0x0000; 1.472 + COMPOUND_TEXT_CONVERTERS currentState, tmpState; 1.473 + int32_t sourceOffset = 0; 1.474 + UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *) args->converter->extraInfo; 1.475 + UConverterSharedData* savedSharedData = NULL; 1.476 + 1.477 + UConverterToUnicodeArgs subArgs; 1.478 + int32_t minArgsSize; 1.479 + 1.480 + /* set up the subconverter arguments */ 1.481 + if(args->size<sizeof(UConverterToUnicodeArgs)) { 1.482 + minArgsSize = args->size; 1.483 + } else { 1.484 + minArgsSize = (int32_t)sizeof(UConverterToUnicodeArgs); 1.485 + } 1.486 + 1.487 + uprv_memcpy(&subArgs, args, minArgsSize); 1.488 + subArgs.size = (uint16_t)minArgsSize; 1.489 + 1.490 + currentState = tmpState = myConverterData->state; 1.491 + 1.492 + while(mySource < mySourceLimit){ 1.493 + if(myTarget < args->targetLimit){ 1.494 + if (args->converter->toULength > 0) { 1.495 + mySourceChar = args->converter->toUBytes[0]; 1.496 + } else { 1.497 + mySourceChar = (uint8_t)*mySource; 1.498 + } 1.499 + 1.500 + if (mySourceChar == ESC_START) { 1.501 + tmpState = findStateFromEscSeq(mySource, mySourceLimit, args->converter->toUBytes, args->converter->toULength, err); 1.502 + 1.503 + if (*err == U_TRUNCATED_CHAR_FOUND) { 1.504 + for (; mySource < mySourceLimit;) { 1.505 + args->converter->toUBytes[args->converter->toULength++] = *mySource++; 1.506 + } 1.507 + *err = U_ZERO_ERROR; 1.508 + break; 1.509 + } else if (tmpState == INVALID) { 1.510 + if (args->converter->toULength == 0) { 1.511 + mySource++; /* skip over the 0x1b byte */ 1.512 + } 1.513 + *err = U_ILLEGAL_CHAR_FOUND; 1.514 + break; 1.515 + } 1.516 + 1.517 + if (tmpState != currentState) { 1.518 + currentState = tmpState; 1.519 + } 1.520 + 1.521 + sourceOffset = uprv_strlen((char*)escSeqCompoundText[currentState]) - args->converter->toULength; 1.522 + 1.523 + mySource += sourceOffset; 1.524 + 1.525 + args->converter->toULength = 0; 1.526 + } 1.527 + 1.528 + if (currentState == COMPOUND_TEXT_SINGLE_0) { 1.529 + while (mySource < mySourceLimit) { 1.530 + if (*mySource == ESC_START) { 1.531 + break; 1.532 + } 1.533 + if (myTarget < args->targetLimit) { 1.534 + *myTarget++ = 0x00ff&(*mySource++); 1.535 + } else { 1.536 + *err = U_BUFFER_OVERFLOW_ERROR; 1.537 + break; 1.538 + } 1.539 + } 1.540 + } else if (mySource < mySourceLimit){ 1.541 + sourceOffset = findNextEsc(mySource, mySourceLimit); 1.542 + 1.543 + tmpSourceLimit = mySource + sourceOffset; 1.544 + 1.545 + subArgs.source = mySource; 1.546 + subArgs.sourceLimit = tmpSourceLimit; 1.547 + subArgs.target = myTarget; 1.548 + savedSharedData = subArgs.converter->sharedData; 1.549 + subArgs.converter->sharedData = myConverterData->myConverterArray[currentState]; 1.550 + 1.551 + ucnv_MBCSToUnicodeWithOffsets(&subArgs, err); 1.552 + 1.553 + subArgs.converter->sharedData = savedSharedData; 1.554 + 1.555 + mySource = subArgs.source; 1.556 + myTarget = subArgs.target; 1.557 + 1.558 + if (U_FAILURE(*err)) { 1.559 + if(*err == U_BUFFER_OVERFLOW_ERROR) { 1.560 + if(subArgs.converter->UCharErrorBufferLength > 0) { 1.561 + uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer, 1.562 + subArgs.converter->UCharErrorBufferLength); 1.563 + } 1.564 + args->converter->UCharErrorBufferLength=subArgs.converter->UCharErrorBufferLength; 1.565 + subArgs.converter->UCharErrorBufferLength = 0; 1.566 + } 1.567 + break; 1.568 + } 1.569 + } 1.570 + } else { 1.571 + *err = U_BUFFER_OVERFLOW_ERROR; 1.572 + break; 1.573 + } 1.574 + } 1.575 + myConverterData->state = currentState; 1.576 + args->target = myTarget; 1.577 + args->source = mySource; 1.578 +} 1.579 + 1.580 +static void 1.581 +_CompoundText_GetUnicodeSet(const UConverter *cnv, 1.582 + const USetAdder *sa, 1.583 + UConverterUnicodeSet which, 1.584 + UErrorCode *pErrorCode) { 1.585 + UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *)cnv->extraInfo; 1.586 + int32_t i; 1.587 + 1.588 + for (i = 1; i < NUM_OF_CONVERTERS; i++) { 1.589 + ucnv_MBCSGetUnicodeSetForUnicode(myConverterData->myConverterArray[i], sa, which, pErrorCode); 1.590 + } 1.591 + sa->add(sa->set, 0x0000); 1.592 + sa->add(sa->set, 0x0009); 1.593 + sa->add(sa->set, 0x000A); 1.594 + sa->addRange(sa->set, 0x0020, 0x007F); 1.595 + sa->addRange(sa->set, 0x00A0, 0x00FF); 1.596 +} 1.597 + 1.598 +static const UConverterImpl _CompoundTextImpl = { 1.599 + 1.600 + UCNV_COMPOUND_TEXT, 1.601 + 1.602 + NULL, 1.603 + NULL, 1.604 + 1.605 + _CompoundTextOpen, 1.606 + _CompoundTextClose, 1.607 + _CompoundTextReset, 1.608 + 1.609 + UConverter_toUnicode_CompoundText_OFFSETS, 1.610 + UConverter_toUnicode_CompoundText_OFFSETS, 1.611 + UConverter_fromUnicode_CompoundText_OFFSETS, 1.612 + UConverter_fromUnicode_CompoundText_OFFSETS, 1.613 + NULL, 1.614 + 1.615 + NULL, 1.616 + _CompoundTextgetName, 1.617 + NULL, 1.618 + NULL, 1.619 + _CompoundText_GetUnicodeSet 1.620 +}; 1.621 +static const UConverterStaticData _CompoundTextStaticData = { 1.622 + sizeof(UConverterStaticData), 1.623 + "COMPOUND_TEXT", 1.624 + 0, 1.625 + UCNV_IBM, 1.626 + UCNV_COMPOUND_TEXT, 1.627 + 1, 1.628 + 6, 1.629 + { 0xef, 0, 0, 0 }, 1.630 + 1, 1.631 + FALSE, 1.632 + FALSE, 1.633 + 0, 1.634 + 0, 1.635 + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 1.636 +}; 1.637 +const UConverterSharedData _CompoundTextData = { 1.638 + sizeof(UConverterSharedData), 1.639 + ~((uint32_t) 0), 1.640 + NULL, 1.641 + NULL, 1.642 + &_CompoundTextStaticData, 1.643 + FALSE, 1.644 + &_CompoundTextImpl, 1.645 + 0 1.646 +}; 1.647 + 1.648 +#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */