michael@0: /* michael@0: ***************************************************************************** michael@0: * michael@0: * Copyright (C) 1998-2007, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: * michael@0: ***************************************************************************** michael@0: * michael@0: * ucnv_err.c michael@0: * Implements error behaviour functions called by T_UConverter_{from,to}Unicode michael@0: * michael@0: * michael@0: * Change history: michael@0: * michael@0: * 06/29/2000 helena Major rewrite of the callback APIs. michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: #if !UCONFIG_NO_CONVERSION michael@0: michael@0: #include "unicode/ucnv_err.h" michael@0: #include "unicode/ucnv_cb.h" michael@0: #include "ucnv_cnv.h" michael@0: #include "cmemory.h" michael@0: #include "unicode/ucnv.h" michael@0: #include "ustrfmt.h" michael@0: michael@0: #define VALUE_STRING_LENGTH 32 michael@0: /*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */ michael@0: #define UNICODE_PERCENT_SIGN_CODEPOINT 0x0025 michael@0: #define UNICODE_U_CODEPOINT 0x0055 michael@0: #define UNICODE_X_CODEPOINT 0x0058 michael@0: #define UNICODE_RS_CODEPOINT 0x005C michael@0: #define UNICODE_U_LOW_CODEPOINT 0x0075 michael@0: #define UNICODE_X_LOW_CODEPOINT 0x0078 michael@0: #define UNICODE_AMP_CODEPOINT 0x0026 michael@0: #define UNICODE_HASH_CODEPOINT 0x0023 michael@0: #define UNICODE_SEMICOLON_CODEPOINT 0x003B michael@0: #define UNICODE_PLUS_CODEPOINT 0x002B michael@0: #define UNICODE_LEFT_CURLY_CODEPOINT 0x007B michael@0: #define UNICODE_RIGHT_CURLY_CODEPOINT 0x007D michael@0: #define UNICODE_SPACE_CODEPOINT 0x0020 michael@0: #define UCNV_PRV_ESCAPE_ICU 0 michael@0: #define UCNV_PRV_ESCAPE_C 'C' michael@0: #define UCNV_PRV_ESCAPE_XML_DEC 'D' michael@0: #define UCNV_PRV_ESCAPE_XML_HEX 'X' michael@0: #define UCNV_PRV_ESCAPE_JAVA 'J' michael@0: #define UCNV_PRV_ESCAPE_UNICODE 'U' michael@0: #define UCNV_PRV_ESCAPE_CSS2 'S' michael@0: #define UCNV_PRV_STOP_ON_ILLEGAL 'i' michael@0: michael@0: /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */ michael@0: U_CAPI void U_EXPORT2 michael@0: UCNV_FROM_U_CALLBACK_STOP ( michael@0: const void *context, michael@0: UConverterFromUnicodeArgs *fromUArgs, michael@0: const UChar* codeUnits, michael@0: int32_t length, michael@0: UChar32 codePoint, michael@0: UConverterCallbackReason reason, michael@0: UErrorCode * err) michael@0: { michael@0: /* the caller must have set the error code accordingly */ michael@0: return; michael@0: } michael@0: michael@0: michael@0: /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */ michael@0: U_CAPI void U_EXPORT2 michael@0: UCNV_TO_U_CALLBACK_STOP ( michael@0: const void *context, michael@0: UConverterToUnicodeArgs *toUArgs, michael@0: const char* codePoints, michael@0: int32_t length, michael@0: UConverterCallbackReason reason, michael@0: UErrorCode * err) michael@0: { michael@0: /* the caller must have set the error code accordingly */ michael@0: return; michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: UCNV_FROM_U_CALLBACK_SKIP ( michael@0: const void *context, michael@0: UConverterFromUnicodeArgs *fromUArgs, michael@0: const UChar* codeUnits, michael@0: int32_t length, michael@0: UChar32 codePoint, michael@0: UConverterCallbackReason reason, michael@0: UErrorCode * err) michael@0: { michael@0: if (reason <= UCNV_IRREGULAR) michael@0: { michael@0: if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) michael@0: { michael@0: *err = U_ZERO_ERROR; michael@0: } michael@0: /* else the caller must have set the error code accordingly. */ michael@0: } michael@0: /* else ignore the reset, close and clone calls. */ michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: UCNV_FROM_U_CALLBACK_SUBSTITUTE ( michael@0: const void *context, michael@0: UConverterFromUnicodeArgs *fromArgs, michael@0: const UChar* codeUnits, michael@0: int32_t length, michael@0: UChar32 codePoint, michael@0: UConverterCallbackReason reason, michael@0: UErrorCode * err) michael@0: { michael@0: if (reason <= UCNV_IRREGULAR) michael@0: { michael@0: if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) michael@0: { michael@0: *err = U_ZERO_ERROR; michael@0: ucnv_cbFromUWriteSub(fromArgs, 0, err); michael@0: } michael@0: /* else the caller must have set the error code accordingly. */ michael@0: } michael@0: /* else ignore the reset, close and clone calls. */ michael@0: } michael@0: michael@0: /*uses uprv_itou to get a unicode escape sequence of the offensive sequence, michael@0: *uses a clean copy (resetted) of the converter, to convert that unicode michael@0: *escape sequence to the target codepage (if conversion failure happens then michael@0: *we revert to substituting with subchar) michael@0: */ michael@0: U_CAPI void U_EXPORT2 michael@0: UCNV_FROM_U_CALLBACK_ESCAPE ( michael@0: const void *context, michael@0: UConverterFromUnicodeArgs *fromArgs, michael@0: const UChar *codeUnits, michael@0: int32_t length, michael@0: UChar32 codePoint, michael@0: UConverterCallbackReason reason, michael@0: UErrorCode * err) michael@0: { michael@0: michael@0: UChar valueString[VALUE_STRING_LENGTH]; michael@0: int32_t valueStringLength = 0; michael@0: int32_t i = 0; michael@0: michael@0: const UChar *myValueSource = NULL; michael@0: UErrorCode err2 = U_ZERO_ERROR; michael@0: UConverterFromUCallback original = NULL; michael@0: const void *originalContext; michael@0: michael@0: UConverterFromUCallback ignoredCallback = NULL; michael@0: const void *ignoredContext; michael@0: michael@0: if (reason > UCNV_IRREGULAR) michael@0: { michael@0: return; michael@0: } michael@0: michael@0: ucnv_setFromUCallBack (fromArgs->converter, michael@0: (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE, michael@0: NULL, michael@0: &original, michael@0: &originalContext, michael@0: &err2); michael@0: michael@0: if (U_FAILURE (err2)) michael@0: { michael@0: *err = err2; michael@0: return; michael@0: } michael@0: if(context==NULL) michael@0: { michael@0: while (i < length) michael@0: { michael@0: valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ michael@0: valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ michael@0: valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); michael@0: } michael@0: } michael@0: else michael@0: { michael@0: switch(*((char*)context)) michael@0: { michael@0: case UCNV_PRV_ESCAPE_JAVA: michael@0: while (i < length) michael@0: { michael@0: valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ michael@0: valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */ michael@0: valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); michael@0: } michael@0: break; michael@0: michael@0: case UCNV_PRV_ESCAPE_C: michael@0: valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ michael@0: michael@0: if(length==2){ michael@0: valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ michael@0: valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8); michael@0: michael@0: } michael@0: else{ michael@0: valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */ michael@0: valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4); michael@0: } michael@0: break; michael@0: michael@0: case UCNV_PRV_ESCAPE_XML_DEC: michael@0: michael@0: valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ michael@0: valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ michael@0: if(length==2){ michael@0: valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0); michael@0: } michael@0: else{ michael@0: valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0); michael@0: } michael@0: valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ michael@0: break; michael@0: michael@0: case UCNV_PRV_ESCAPE_XML_HEX: michael@0: michael@0: valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ michael@0: valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ michael@0: valueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ michael@0: if(length==2){ michael@0: valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0); michael@0: } michael@0: else{ michael@0: valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0); michael@0: } michael@0: valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ michael@0: break; michael@0: michael@0: case UCNV_PRV_ESCAPE_UNICODE: michael@0: valueString[valueStringLength++] = (UChar) UNICODE_LEFT_CURLY_CODEPOINT; /* adding { */ michael@0: valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ michael@0: valueString[valueStringLength++] = (UChar) UNICODE_PLUS_CODEPOINT; /* adding + */ michael@0: if (length == 2) { michael@0: valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4); michael@0: } else { michael@0: valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4); michael@0: } michael@0: valueString[valueStringLength++] = (UChar) UNICODE_RIGHT_CURLY_CODEPOINT; /* adding } */ michael@0: break; michael@0: michael@0: case UCNV_PRV_ESCAPE_CSS2: michael@0: valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ michael@0: valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0); michael@0: /* Always add space character, becase the next character might be whitespace, michael@0: which would erroneously be considered the termination of the escape sequence. */ michael@0: valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT; michael@0: break; michael@0: michael@0: default: michael@0: while (i < length) michael@0: { michael@0: valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ michael@0: valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ michael@0: valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); michael@0: } michael@0: } michael@0: } michael@0: myValueSource = valueString; michael@0: michael@0: /* reset the error */ michael@0: *err = U_ZERO_ERROR; michael@0: michael@0: ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err); michael@0: michael@0: ucnv_setFromUCallBack (fromArgs->converter, michael@0: original, michael@0: originalContext, michael@0: &ignoredCallback, michael@0: &ignoredContext, michael@0: &err2); michael@0: if (U_FAILURE (err2)) michael@0: { michael@0: *err = err2; michael@0: return; michael@0: } michael@0: michael@0: return; michael@0: } michael@0: michael@0: michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: UCNV_TO_U_CALLBACK_SKIP ( michael@0: const void *context, michael@0: UConverterToUnicodeArgs *toArgs, michael@0: const char* codeUnits, michael@0: int32_t length, michael@0: UConverterCallbackReason reason, michael@0: UErrorCode * err) michael@0: { michael@0: if (reason <= UCNV_IRREGULAR) michael@0: { michael@0: if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) michael@0: { michael@0: *err = U_ZERO_ERROR; michael@0: } michael@0: /* else the caller must have set the error code accordingly. */ michael@0: } michael@0: /* else ignore the reset, close and clone calls. */ michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: UCNV_TO_U_CALLBACK_SUBSTITUTE ( michael@0: const void *context, michael@0: UConverterToUnicodeArgs *toArgs, michael@0: const char* codeUnits, michael@0: int32_t length, michael@0: UConverterCallbackReason reason, michael@0: UErrorCode * err) michael@0: { michael@0: if (reason <= UCNV_IRREGULAR) michael@0: { michael@0: if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) michael@0: { michael@0: *err = U_ZERO_ERROR; michael@0: ucnv_cbToUWriteSub(toArgs,0,err); michael@0: } michael@0: /* else the caller must have set the error code accordingly. */ michael@0: } michael@0: /* else ignore the reset, close and clone calls. */ michael@0: } michael@0: michael@0: /*uses uprv_itou to get a unicode escape sequence of the offensive sequence, michael@0: *and uses that as the substitution sequence michael@0: */ michael@0: U_CAPI void U_EXPORT2 michael@0: UCNV_TO_U_CALLBACK_ESCAPE ( michael@0: const void *context, michael@0: UConverterToUnicodeArgs *toArgs, michael@0: const char* codeUnits, michael@0: int32_t length, michael@0: UConverterCallbackReason reason, michael@0: UErrorCode * err) michael@0: { michael@0: UChar uniValueString[VALUE_STRING_LENGTH]; michael@0: int32_t valueStringLength = 0; michael@0: int32_t i = 0; michael@0: michael@0: if (reason > UCNV_IRREGULAR) michael@0: { michael@0: return; michael@0: } michael@0: michael@0: if(context==NULL) michael@0: { michael@0: while (i < length) michael@0: { michael@0: uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ michael@0: uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */ michael@0: valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2); michael@0: } michael@0: } michael@0: else michael@0: { michael@0: switch(*((char*)context)) michael@0: { michael@0: case UCNV_PRV_ESCAPE_XML_DEC: michael@0: while (i < length) michael@0: { michael@0: uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ michael@0: uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ michael@0: valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 10, 0); michael@0: uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ michael@0: } michael@0: break; michael@0: michael@0: case UCNV_PRV_ESCAPE_XML_HEX: michael@0: while (i < length) michael@0: { michael@0: uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ michael@0: uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ michael@0: uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ michael@0: valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 0); michael@0: uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ michael@0: } michael@0: break; michael@0: case UCNV_PRV_ESCAPE_C: michael@0: while (i < length) michael@0: { michael@0: uniValueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ michael@0: uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ michael@0: valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 2); michael@0: } michael@0: break; michael@0: default: michael@0: while (i < length) michael@0: { michael@0: uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ michael@0: uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */ michael@0: uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2); michael@0: valueStringLength += 2; michael@0: } michael@0: } michael@0: } michael@0: /* reset the error */ michael@0: *err = U_ZERO_ERROR; michael@0: michael@0: ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err); michael@0: } michael@0: michael@0: #endif