intl/icu/source/common/ucnv_err.c

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

michael@0 1 /*
michael@0 2 *****************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 1998-2007, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 *****************************************************************************
michael@0 8 *
michael@0 9 * ucnv_err.c
michael@0 10 * Implements error behaviour functions called by T_UConverter_{from,to}Unicode
michael@0 11 *
michael@0 12 *
michael@0 13 * Change history:
michael@0 14 *
michael@0 15 * 06/29/2000 helena Major rewrite of the callback APIs.
michael@0 16 */
michael@0 17
michael@0 18 #include "unicode/utypes.h"
michael@0 19
michael@0 20 #if !UCONFIG_NO_CONVERSION
michael@0 21
michael@0 22 #include "unicode/ucnv_err.h"
michael@0 23 #include "unicode/ucnv_cb.h"
michael@0 24 #include "ucnv_cnv.h"
michael@0 25 #include "cmemory.h"
michael@0 26 #include "unicode/ucnv.h"
michael@0 27 #include "ustrfmt.h"
michael@0 28
michael@0 29 #define VALUE_STRING_LENGTH 32
michael@0 30 /*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */
michael@0 31 #define UNICODE_PERCENT_SIGN_CODEPOINT 0x0025
michael@0 32 #define UNICODE_U_CODEPOINT 0x0055
michael@0 33 #define UNICODE_X_CODEPOINT 0x0058
michael@0 34 #define UNICODE_RS_CODEPOINT 0x005C
michael@0 35 #define UNICODE_U_LOW_CODEPOINT 0x0075
michael@0 36 #define UNICODE_X_LOW_CODEPOINT 0x0078
michael@0 37 #define UNICODE_AMP_CODEPOINT 0x0026
michael@0 38 #define UNICODE_HASH_CODEPOINT 0x0023
michael@0 39 #define UNICODE_SEMICOLON_CODEPOINT 0x003B
michael@0 40 #define UNICODE_PLUS_CODEPOINT 0x002B
michael@0 41 #define UNICODE_LEFT_CURLY_CODEPOINT 0x007B
michael@0 42 #define UNICODE_RIGHT_CURLY_CODEPOINT 0x007D
michael@0 43 #define UNICODE_SPACE_CODEPOINT 0x0020
michael@0 44 #define UCNV_PRV_ESCAPE_ICU 0
michael@0 45 #define UCNV_PRV_ESCAPE_C 'C'
michael@0 46 #define UCNV_PRV_ESCAPE_XML_DEC 'D'
michael@0 47 #define UCNV_PRV_ESCAPE_XML_HEX 'X'
michael@0 48 #define UCNV_PRV_ESCAPE_JAVA 'J'
michael@0 49 #define UCNV_PRV_ESCAPE_UNICODE 'U'
michael@0 50 #define UCNV_PRV_ESCAPE_CSS2 'S'
michael@0 51 #define UCNV_PRV_STOP_ON_ILLEGAL 'i'
michael@0 52
michael@0 53 /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
michael@0 54 U_CAPI void U_EXPORT2
michael@0 55 UCNV_FROM_U_CALLBACK_STOP (
michael@0 56 const void *context,
michael@0 57 UConverterFromUnicodeArgs *fromUArgs,
michael@0 58 const UChar* codeUnits,
michael@0 59 int32_t length,
michael@0 60 UChar32 codePoint,
michael@0 61 UConverterCallbackReason reason,
michael@0 62 UErrorCode * err)
michael@0 63 {
michael@0 64 /* the caller must have set the error code accordingly */
michael@0 65 return;
michael@0 66 }
michael@0 67
michael@0 68
michael@0 69 /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
michael@0 70 U_CAPI void U_EXPORT2
michael@0 71 UCNV_TO_U_CALLBACK_STOP (
michael@0 72 const void *context,
michael@0 73 UConverterToUnicodeArgs *toUArgs,
michael@0 74 const char* codePoints,
michael@0 75 int32_t length,
michael@0 76 UConverterCallbackReason reason,
michael@0 77 UErrorCode * err)
michael@0 78 {
michael@0 79 /* the caller must have set the error code accordingly */
michael@0 80 return;
michael@0 81 }
michael@0 82
michael@0 83 U_CAPI void U_EXPORT2
michael@0 84 UCNV_FROM_U_CALLBACK_SKIP (
michael@0 85 const void *context,
michael@0 86 UConverterFromUnicodeArgs *fromUArgs,
michael@0 87 const UChar* codeUnits,
michael@0 88 int32_t length,
michael@0 89 UChar32 codePoint,
michael@0 90 UConverterCallbackReason reason,
michael@0 91 UErrorCode * err)
michael@0 92 {
michael@0 93 if (reason <= UCNV_IRREGULAR)
michael@0 94 {
michael@0 95 if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
michael@0 96 {
michael@0 97 *err = U_ZERO_ERROR;
michael@0 98 }
michael@0 99 /* else the caller must have set the error code accordingly. */
michael@0 100 }
michael@0 101 /* else ignore the reset, close and clone calls. */
michael@0 102 }
michael@0 103
michael@0 104 U_CAPI void U_EXPORT2
michael@0 105 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
michael@0 106 const void *context,
michael@0 107 UConverterFromUnicodeArgs *fromArgs,
michael@0 108 const UChar* codeUnits,
michael@0 109 int32_t length,
michael@0 110 UChar32 codePoint,
michael@0 111 UConverterCallbackReason reason,
michael@0 112 UErrorCode * err)
michael@0 113 {
michael@0 114 if (reason <= UCNV_IRREGULAR)
michael@0 115 {
michael@0 116 if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
michael@0 117 {
michael@0 118 *err = U_ZERO_ERROR;
michael@0 119 ucnv_cbFromUWriteSub(fromArgs, 0, err);
michael@0 120 }
michael@0 121 /* else the caller must have set the error code accordingly. */
michael@0 122 }
michael@0 123 /* else ignore the reset, close and clone calls. */
michael@0 124 }
michael@0 125
michael@0 126 /*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
michael@0 127 *uses a clean copy (resetted) of the converter, to convert that unicode
michael@0 128 *escape sequence to the target codepage (if conversion failure happens then
michael@0 129 *we revert to substituting with subchar)
michael@0 130 */
michael@0 131 U_CAPI void U_EXPORT2
michael@0 132 UCNV_FROM_U_CALLBACK_ESCAPE (
michael@0 133 const void *context,
michael@0 134 UConverterFromUnicodeArgs *fromArgs,
michael@0 135 const UChar *codeUnits,
michael@0 136 int32_t length,
michael@0 137 UChar32 codePoint,
michael@0 138 UConverterCallbackReason reason,
michael@0 139 UErrorCode * err)
michael@0 140 {
michael@0 141
michael@0 142 UChar valueString[VALUE_STRING_LENGTH];
michael@0 143 int32_t valueStringLength = 0;
michael@0 144 int32_t i = 0;
michael@0 145
michael@0 146 const UChar *myValueSource = NULL;
michael@0 147 UErrorCode err2 = U_ZERO_ERROR;
michael@0 148 UConverterFromUCallback original = NULL;
michael@0 149 const void *originalContext;
michael@0 150
michael@0 151 UConverterFromUCallback ignoredCallback = NULL;
michael@0 152 const void *ignoredContext;
michael@0 153
michael@0 154 if (reason > UCNV_IRREGULAR)
michael@0 155 {
michael@0 156 return;
michael@0 157 }
michael@0 158
michael@0 159 ucnv_setFromUCallBack (fromArgs->converter,
michael@0 160 (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE,
michael@0 161 NULL,
michael@0 162 &original,
michael@0 163 &originalContext,
michael@0 164 &err2);
michael@0 165
michael@0 166 if (U_FAILURE (err2))
michael@0 167 {
michael@0 168 *err = err2;
michael@0 169 return;
michael@0 170 }
michael@0 171 if(context==NULL)
michael@0 172 {
michael@0 173 while (i < length)
michael@0 174 {
michael@0 175 valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
michael@0 176 valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
michael@0 177 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
michael@0 178 }
michael@0 179 }
michael@0 180 else
michael@0 181 {
michael@0 182 switch(*((char*)context))
michael@0 183 {
michael@0 184 case UCNV_PRV_ESCAPE_JAVA:
michael@0 185 while (i < length)
michael@0 186 {
michael@0 187 valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
michael@0 188 valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
michael@0 189 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
michael@0 190 }
michael@0 191 break;
michael@0 192
michael@0 193 case UCNV_PRV_ESCAPE_C:
michael@0 194 valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
michael@0 195
michael@0 196 if(length==2){
michael@0 197 valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
michael@0 198 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8);
michael@0 199
michael@0 200 }
michael@0 201 else{
michael@0 202 valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
michael@0 203 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
michael@0 204 }
michael@0 205 break;
michael@0 206
michael@0 207 case UCNV_PRV_ESCAPE_XML_DEC:
michael@0 208
michael@0 209 valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
michael@0 210 valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
michael@0 211 if(length==2){
michael@0 212 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0);
michael@0 213 }
michael@0 214 else{
michael@0 215 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0);
michael@0 216 }
michael@0 217 valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
michael@0 218 break;
michael@0 219
michael@0 220 case UCNV_PRV_ESCAPE_XML_HEX:
michael@0 221
michael@0 222 valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
michael@0 223 valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
michael@0 224 valueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
michael@0 225 if(length==2){
michael@0 226 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
michael@0 227 }
michael@0 228 else{
michael@0 229 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0);
michael@0 230 }
michael@0 231 valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
michael@0 232 break;
michael@0 233
michael@0 234 case UCNV_PRV_ESCAPE_UNICODE:
michael@0 235 valueString[valueStringLength++] = (UChar) UNICODE_LEFT_CURLY_CODEPOINT; /* adding { */
michael@0 236 valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
michael@0 237 valueString[valueStringLength++] = (UChar) UNICODE_PLUS_CODEPOINT; /* adding + */
michael@0 238 if (length == 2) {
michael@0 239 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4);
michael@0 240 } else {
michael@0 241 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
michael@0 242 }
michael@0 243 valueString[valueStringLength++] = (UChar) UNICODE_RIGHT_CURLY_CODEPOINT; /* adding } */
michael@0 244 break;
michael@0 245
michael@0 246 case UCNV_PRV_ESCAPE_CSS2:
michael@0 247 valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
michael@0 248 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
michael@0 249 /* Always add space character, becase the next character might be whitespace,
michael@0 250 which would erroneously be considered the termination of the escape sequence. */
michael@0 251 valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT;
michael@0 252 break;
michael@0 253
michael@0 254 default:
michael@0 255 while (i < length)
michael@0 256 {
michael@0 257 valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
michael@0 258 valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
michael@0 259 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
michael@0 260 }
michael@0 261 }
michael@0 262 }
michael@0 263 myValueSource = valueString;
michael@0 264
michael@0 265 /* reset the error */
michael@0 266 *err = U_ZERO_ERROR;
michael@0 267
michael@0 268 ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err);
michael@0 269
michael@0 270 ucnv_setFromUCallBack (fromArgs->converter,
michael@0 271 original,
michael@0 272 originalContext,
michael@0 273 &ignoredCallback,
michael@0 274 &ignoredContext,
michael@0 275 &err2);
michael@0 276 if (U_FAILURE (err2))
michael@0 277 {
michael@0 278 *err = err2;
michael@0 279 return;
michael@0 280 }
michael@0 281
michael@0 282 return;
michael@0 283 }
michael@0 284
michael@0 285
michael@0 286
michael@0 287 U_CAPI void U_EXPORT2
michael@0 288 UCNV_TO_U_CALLBACK_SKIP (
michael@0 289 const void *context,
michael@0 290 UConverterToUnicodeArgs *toArgs,
michael@0 291 const char* codeUnits,
michael@0 292 int32_t length,
michael@0 293 UConverterCallbackReason reason,
michael@0 294 UErrorCode * err)
michael@0 295 {
michael@0 296 if (reason <= UCNV_IRREGULAR)
michael@0 297 {
michael@0 298 if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
michael@0 299 {
michael@0 300 *err = U_ZERO_ERROR;
michael@0 301 }
michael@0 302 /* else the caller must have set the error code accordingly. */
michael@0 303 }
michael@0 304 /* else ignore the reset, close and clone calls. */
michael@0 305 }
michael@0 306
michael@0 307 U_CAPI void U_EXPORT2
michael@0 308 UCNV_TO_U_CALLBACK_SUBSTITUTE (
michael@0 309 const void *context,
michael@0 310 UConverterToUnicodeArgs *toArgs,
michael@0 311 const char* codeUnits,
michael@0 312 int32_t length,
michael@0 313 UConverterCallbackReason reason,
michael@0 314 UErrorCode * err)
michael@0 315 {
michael@0 316 if (reason <= UCNV_IRREGULAR)
michael@0 317 {
michael@0 318 if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
michael@0 319 {
michael@0 320 *err = U_ZERO_ERROR;
michael@0 321 ucnv_cbToUWriteSub(toArgs,0,err);
michael@0 322 }
michael@0 323 /* else the caller must have set the error code accordingly. */
michael@0 324 }
michael@0 325 /* else ignore the reset, close and clone calls. */
michael@0 326 }
michael@0 327
michael@0 328 /*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
michael@0 329 *and uses that as the substitution sequence
michael@0 330 */
michael@0 331 U_CAPI void U_EXPORT2
michael@0 332 UCNV_TO_U_CALLBACK_ESCAPE (
michael@0 333 const void *context,
michael@0 334 UConverterToUnicodeArgs *toArgs,
michael@0 335 const char* codeUnits,
michael@0 336 int32_t length,
michael@0 337 UConverterCallbackReason reason,
michael@0 338 UErrorCode * err)
michael@0 339 {
michael@0 340 UChar uniValueString[VALUE_STRING_LENGTH];
michael@0 341 int32_t valueStringLength = 0;
michael@0 342 int32_t i = 0;
michael@0 343
michael@0 344 if (reason > UCNV_IRREGULAR)
michael@0 345 {
michael@0 346 return;
michael@0 347 }
michael@0 348
michael@0 349 if(context==NULL)
michael@0 350 {
michael@0 351 while (i < length)
michael@0 352 {
michael@0 353 uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
michael@0 354 uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */
michael@0 355 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
michael@0 356 }
michael@0 357 }
michael@0 358 else
michael@0 359 {
michael@0 360 switch(*((char*)context))
michael@0 361 {
michael@0 362 case UCNV_PRV_ESCAPE_XML_DEC:
michael@0 363 while (i < length)
michael@0 364 {
michael@0 365 uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
michael@0 366 uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
michael@0 367 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 10, 0);
michael@0 368 uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
michael@0 369 }
michael@0 370 break;
michael@0 371
michael@0 372 case UCNV_PRV_ESCAPE_XML_HEX:
michael@0 373 while (i < length)
michael@0 374 {
michael@0 375 uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
michael@0 376 uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
michael@0 377 uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
michael@0 378 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 0);
michael@0 379 uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
michael@0 380 }
michael@0 381 break;
michael@0 382 case UCNV_PRV_ESCAPE_C:
michael@0 383 while (i < length)
michael@0 384 {
michael@0 385 uniValueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
michael@0 386 uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
michael@0 387 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 2);
michael@0 388 }
michael@0 389 break;
michael@0 390 default:
michael@0 391 while (i < length)
michael@0 392 {
michael@0 393 uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
michael@0 394 uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */
michael@0 395 uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
michael@0 396 valueStringLength += 2;
michael@0 397 }
michael@0 398 }
michael@0 399 }
michael@0 400 /* reset the error */
michael@0 401 *err = U_ZERO_ERROR;
michael@0 402
michael@0 403 ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err);
michael@0 404 }
michael@0 405
michael@0 406 #endif

mercurial