1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/ucnv_err.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,406 @@ 1.4 +/* 1.5 + ***************************************************************************** 1.6 + * 1.7 + * Copyright (C) 1998-2007, International Business Machines 1.8 + * Corporation and others. All Rights Reserved. 1.9 + * 1.10 + ***************************************************************************** 1.11 + * 1.12 + * ucnv_err.c 1.13 + * Implements error behaviour functions called by T_UConverter_{from,to}Unicode 1.14 + * 1.15 + * 1.16 +* Change history: 1.17 +* 1.18 +* 06/29/2000 helena Major rewrite of the callback APIs. 1.19 +*/ 1.20 + 1.21 +#include "unicode/utypes.h" 1.22 + 1.23 +#if !UCONFIG_NO_CONVERSION 1.24 + 1.25 +#include "unicode/ucnv_err.h" 1.26 +#include "unicode/ucnv_cb.h" 1.27 +#include "ucnv_cnv.h" 1.28 +#include "cmemory.h" 1.29 +#include "unicode/ucnv.h" 1.30 +#include "ustrfmt.h" 1.31 + 1.32 +#define VALUE_STRING_LENGTH 32 1.33 +/*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */ 1.34 +#define UNICODE_PERCENT_SIGN_CODEPOINT 0x0025 1.35 +#define UNICODE_U_CODEPOINT 0x0055 1.36 +#define UNICODE_X_CODEPOINT 0x0058 1.37 +#define UNICODE_RS_CODEPOINT 0x005C 1.38 +#define UNICODE_U_LOW_CODEPOINT 0x0075 1.39 +#define UNICODE_X_LOW_CODEPOINT 0x0078 1.40 +#define UNICODE_AMP_CODEPOINT 0x0026 1.41 +#define UNICODE_HASH_CODEPOINT 0x0023 1.42 +#define UNICODE_SEMICOLON_CODEPOINT 0x003B 1.43 +#define UNICODE_PLUS_CODEPOINT 0x002B 1.44 +#define UNICODE_LEFT_CURLY_CODEPOINT 0x007B 1.45 +#define UNICODE_RIGHT_CURLY_CODEPOINT 0x007D 1.46 +#define UNICODE_SPACE_CODEPOINT 0x0020 1.47 +#define UCNV_PRV_ESCAPE_ICU 0 1.48 +#define UCNV_PRV_ESCAPE_C 'C' 1.49 +#define UCNV_PRV_ESCAPE_XML_DEC 'D' 1.50 +#define UCNV_PRV_ESCAPE_XML_HEX 'X' 1.51 +#define UCNV_PRV_ESCAPE_JAVA 'J' 1.52 +#define UCNV_PRV_ESCAPE_UNICODE 'U' 1.53 +#define UCNV_PRV_ESCAPE_CSS2 'S' 1.54 +#define UCNV_PRV_STOP_ON_ILLEGAL 'i' 1.55 + 1.56 +/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */ 1.57 +U_CAPI void U_EXPORT2 1.58 +UCNV_FROM_U_CALLBACK_STOP ( 1.59 + const void *context, 1.60 + UConverterFromUnicodeArgs *fromUArgs, 1.61 + const UChar* codeUnits, 1.62 + int32_t length, 1.63 + UChar32 codePoint, 1.64 + UConverterCallbackReason reason, 1.65 + UErrorCode * err) 1.66 +{ 1.67 + /* the caller must have set the error code accordingly */ 1.68 + return; 1.69 +} 1.70 + 1.71 + 1.72 +/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */ 1.73 +U_CAPI void U_EXPORT2 1.74 +UCNV_TO_U_CALLBACK_STOP ( 1.75 + const void *context, 1.76 + UConverterToUnicodeArgs *toUArgs, 1.77 + const char* codePoints, 1.78 + int32_t length, 1.79 + UConverterCallbackReason reason, 1.80 + UErrorCode * err) 1.81 +{ 1.82 + /* the caller must have set the error code accordingly */ 1.83 + return; 1.84 +} 1.85 + 1.86 +U_CAPI void U_EXPORT2 1.87 +UCNV_FROM_U_CALLBACK_SKIP ( 1.88 + const void *context, 1.89 + UConverterFromUnicodeArgs *fromUArgs, 1.90 + const UChar* codeUnits, 1.91 + int32_t length, 1.92 + UChar32 codePoint, 1.93 + UConverterCallbackReason reason, 1.94 + UErrorCode * err) 1.95 +{ 1.96 + if (reason <= UCNV_IRREGULAR) 1.97 + { 1.98 + if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) 1.99 + { 1.100 + *err = U_ZERO_ERROR; 1.101 + } 1.102 + /* else the caller must have set the error code accordingly. */ 1.103 + } 1.104 + /* else ignore the reset, close and clone calls. */ 1.105 +} 1.106 + 1.107 +U_CAPI void U_EXPORT2 1.108 +UCNV_FROM_U_CALLBACK_SUBSTITUTE ( 1.109 + const void *context, 1.110 + UConverterFromUnicodeArgs *fromArgs, 1.111 + const UChar* codeUnits, 1.112 + int32_t length, 1.113 + UChar32 codePoint, 1.114 + UConverterCallbackReason reason, 1.115 + UErrorCode * err) 1.116 +{ 1.117 + if (reason <= UCNV_IRREGULAR) 1.118 + { 1.119 + if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) 1.120 + { 1.121 + *err = U_ZERO_ERROR; 1.122 + ucnv_cbFromUWriteSub(fromArgs, 0, err); 1.123 + } 1.124 + /* else the caller must have set the error code accordingly. */ 1.125 + } 1.126 + /* else ignore the reset, close and clone calls. */ 1.127 +} 1.128 + 1.129 +/*uses uprv_itou to get a unicode escape sequence of the offensive sequence, 1.130 + *uses a clean copy (resetted) of the converter, to convert that unicode 1.131 + *escape sequence to the target codepage (if conversion failure happens then 1.132 + *we revert to substituting with subchar) 1.133 + */ 1.134 +U_CAPI void U_EXPORT2 1.135 +UCNV_FROM_U_CALLBACK_ESCAPE ( 1.136 + const void *context, 1.137 + UConverterFromUnicodeArgs *fromArgs, 1.138 + const UChar *codeUnits, 1.139 + int32_t length, 1.140 + UChar32 codePoint, 1.141 + UConverterCallbackReason reason, 1.142 + UErrorCode * err) 1.143 +{ 1.144 + 1.145 + UChar valueString[VALUE_STRING_LENGTH]; 1.146 + int32_t valueStringLength = 0; 1.147 + int32_t i = 0; 1.148 + 1.149 + const UChar *myValueSource = NULL; 1.150 + UErrorCode err2 = U_ZERO_ERROR; 1.151 + UConverterFromUCallback original = NULL; 1.152 + const void *originalContext; 1.153 + 1.154 + UConverterFromUCallback ignoredCallback = NULL; 1.155 + const void *ignoredContext; 1.156 + 1.157 + if (reason > UCNV_IRREGULAR) 1.158 + { 1.159 + return; 1.160 + } 1.161 + 1.162 + ucnv_setFromUCallBack (fromArgs->converter, 1.163 + (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE, 1.164 + NULL, 1.165 + &original, 1.166 + &originalContext, 1.167 + &err2); 1.168 + 1.169 + if (U_FAILURE (err2)) 1.170 + { 1.171 + *err = err2; 1.172 + return; 1.173 + } 1.174 + if(context==NULL) 1.175 + { 1.176 + while (i < length) 1.177 + { 1.178 + valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ 1.179 + valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ 1.180 + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); 1.181 + } 1.182 + } 1.183 + else 1.184 + { 1.185 + switch(*((char*)context)) 1.186 + { 1.187 + case UCNV_PRV_ESCAPE_JAVA: 1.188 + while (i < length) 1.189 + { 1.190 + valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ 1.191 + valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */ 1.192 + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); 1.193 + } 1.194 + break; 1.195 + 1.196 + case UCNV_PRV_ESCAPE_C: 1.197 + valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ 1.198 + 1.199 + if(length==2){ 1.200 + valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ 1.201 + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8); 1.202 + 1.203 + } 1.204 + else{ 1.205 + valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */ 1.206 + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4); 1.207 + } 1.208 + break; 1.209 + 1.210 + case UCNV_PRV_ESCAPE_XML_DEC: 1.211 + 1.212 + valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ 1.213 + valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ 1.214 + if(length==2){ 1.215 + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0); 1.216 + } 1.217 + else{ 1.218 + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0); 1.219 + } 1.220 + valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ 1.221 + break; 1.222 + 1.223 + case UCNV_PRV_ESCAPE_XML_HEX: 1.224 + 1.225 + valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ 1.226 + valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ 1.227 + valueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ 1.228 + if(length==2){ 1.229 + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0); 1.230 + } 1.231 + else{ 1.232 + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0); 1.233 + } 1.234 + valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ 1.235 + break; 1.236 + 1.237 + case UCNV_PRV_ESCAPE_UNICODE: 1.238 + valueString[valueStringLength++] = (UChar) UNICODE_LEFT_CURLY_CODEPOINT; /* adding { */ 1.239 + valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ 1.240 + valueString[valueStringLength++] = (UChar) UNICODE_PLUS_CODEPOINT; /* adding + */ 1.241 + if (length == 2) { 1.242 + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4); 1.243 + } else { 1.244 + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4); 1.245 + } 1.246 + valueString[valueStringLength++] = (UChar) UNICODE_RIGHT_CURLY_CODEPOINT; /* adding } */ 1.247 + break; 1.248 + 1.249 + case UCNV_PRV_ESCAPE_CSS2: 1.250 + valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ 1.251 + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0); 1.252 + /* Always add space character, becase the next character might be whitespace, 1.253 + which would erroneously be considered the termination of the escape sequence. */ 1.254 + valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT; 1.255 + break; 1.256 + 1.257 + default: 1.258 + while (i < length) 1.259 + { 1.260 + valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ 1.261 + valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ 1.262 + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); 1.263 + } 1.264 + } 1.265 + } 1.266 + myValueSource = valueString; 1.267 + 1.268 + /* reset the error */ 1.269 + *err = U_ZERO_ERROR; 1.270 + 1.271 + ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err); 1.272 + 1.273 + ucnv_setFromUCallBack (fromArgs->converter, 1.274 + original, 1.275 + originalContext, 1.276 + &ignoredCallback, 1.277 + &ignoredContext, 1.278 + &err2); 1.279 + if (U_FAILURE (err2)) 1.280 + { 1.281 + *err = err2; 1.282 + return; 1.283 + } 1.284 + 1.285 + return; 1.286 +} 1.287 + 1.288 + 1.289 + 1.290 +U_CAPI void U_EXPORT2 1.291 +UCNV_TO_U_CALLBACK_SKIP ( 1.292 + const void *context, 1.293 + UConverterToUnicodeArgs *toArgs, 1.294 + const char* codeUnits, 1.295 + int32_t length, 1.296 + UConverterCallbackReason reason, 1.297 + UErrorCode * err) 1.298 +{ 1.299 + if (reason <= UCNV_IRREGULAR) 1.300 + { 1.301 + if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) 1.302 + { 1.303 + *err = U_ZERO_ERROR; 1.304 + } 1.305 + /* else the caller must have set the error code accordingly. */ 1.306 + } 1.307 + /* else ignore the reset, close and clone calls. */ 1.308 +} 1.309 + 1.310 +U_CAPI void U_EXPORT2 1.311 +UCNV_TO_U_CALLBACK_SUBSTITUTE ( 1.312 + const void *context, 1.313 + UConverterToUnicodeArgs *toArgs, 1.314 + const char* codeUnits, 1.315 + int32_t length, 1.316 + UConverterCallbackReason reason, 1.317 + UErrorCode * err) 1.318 +{ 1.319 + if (reason <= UCNV_IRREGULAR) 1.320 + { 1.321 + if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) 1.322 + { 1.323 + *err = U_ZERO_ERROR; 1.324 + ucnv_cbToUWriteSub(toArgs,0,err); 1.325 + } 1.326 + /* else the caller must have set the error code accordingly. */ 1.327 + } 1.328 + /* else ignore the reset, close and clone calls. */ 1.329 +} 1.330 + 1.331 +/*uses uprv_itou to get a unicode escape sequence of the offensive sequence, 1.332 + *and uses that as the substitution sequence 1.333 + */ 1.334 +U_CAPI void U_EXPORT2 1.335 +UCNV_TO_U_CALLBACK_ESCAPE ( 1.336 + const void *context, 1.337 + UConverterToUnicodeArgs *toArgs, 1.338 + const char* codeUnits, 1.339 + int32_t length, 1.340 + UConverterCallbackReason reason, 1.341 + UErrorCode * err) 1.342 +{ 1.343 + UChar uniValueString[VALUE_STRING_LENGTH]; 1.344 + int32_t valueStringLength = 0; 1.345 + int32_t i = 0; 1.346 + 1.347 + if (reason > UCNV_IRREGULAR) 1.348 + { 1.349 + return; 1.350 + } 1.351 + 1.352 + if(context==NULL) 1.353 + { 1.354 + while (i < length) 1.355 + { 1.356 + uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ 1.357 + uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */ 1.358 + valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2); 1.359 + } 1.360 + } 1.361 + else 1.362 + { 1.363 + switch(*((char*)context)) 1.364 + { 1.365 + case UCNV_PRV_ESCAPE_XML_DEC: 1.366 + while (i < length) 1.367 + { 1.368 + uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ 1.369 + uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ 1.370 + valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 10, 0); 1.371 + uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ 1.372 + } 1.373 + break; 1.374 + 1.375 + case UCNV_PRV_ESCAPE_XML_HEX: 1.376 + while (i < length) 1.377 + { 1.378 + uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ 1.379 + uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ 1.380 + uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ 1.381 + valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 0); 1.382 + uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ 1.383 + } 1.384 + break; 1.385 + case UCNV_PRV_ESCAPE_C: 1.386 + while (i < length) 1.387 + { 1.388 + uniValueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ 1.389 + uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ 1.390 + valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 2); 1.391 + } 1.392 + break; 1.393 + default: 1.394 + while (i < length) 1.395 + { 1.396 + uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ 1.397 + uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */ 1.398 + uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2); 1.399 + valueStringLength += 2; 1.400 + } 1.401 + } 1.402 + } 1.403 + /* reset the error */ 1.404 + *err = U_ZERO_ERROR; 1.405 + 1.406 + ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err); 1.407 +} 1.408 + 1.409 +#endif