michael@0: /* michael@0: ********************************************************************** michael@0: * Copyright (C) 1999-2009, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ********************************************************************** michael@0: * michael@0: * michael@0: * ucnv_err.h: michael@0: */ michael@0: michael@0: /** michael@0: * \file michael@0: * \brief C UConverter predefined error callbacks michael@0: * michael@0: *

Error Behaviour Functions

michael@0: * Defines some error behaviour functions called by ucnv_{from,to}Unicode michael@0: * These are provided as part of ICU and many are stable, but they michael@0: * can also be considered only as an example of what can be done with michael@0: * callbacks. You may of course write your own. michael@0: * michael@0: * If you want to write your own, you may also find the functions from michael@0: * ucnv_cb.h useful when writing your own callbacks. michael@0: * michael@0: * These functions, although public, should NEVER be called directly. michael@0: * They should be used as parameters to the ucnv_setFromUCallback michael@0: * and ucnv_setToUCallback functions, to set the behaviour of a converter michael@0: * when it encounters ILLEGAL/UNMAPPED/INVALID sequences. michael@0: * michael@0: * usage example: 'STOP' doesn't need any context, but newContext michael@0: * could be set to something other than 'NULL' if needed. The available michael@0: * contexts in this header can modify the default behavior of the callback. michael@0: * michael@0: * \code michael@0: * UErrorCode err = U_ZERO_ERROR; michael@0: * UConverter *myConverter = ucnv_open("ibm-949", &err); michael@0: * const void *oldContext; michael@0: * UConverterFromUCallback oldAction; michael@0: * michael@0: * michael@0: * if (U_SUCCESS(err)) michael@0: * { michael@0: * ucnv_setFromUCallBack(myConverter, michael@0: * UCNV_FROM_U_CALLBACK_STOP, michael@0: * NULL, michael@0: * &oldAction, michael@0: * &oldContext, michael@0: * &status); michael@0: * } michael@0: * \endcode michael@0: * michael@0: * The code above tells "myConverter" to stop when it encounters an michael@0: * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from michael@0: * Unicode -> Codepage. The behavior from Codepage to Unicode is not changed, michael@0: * and ucnv_setToUCallBack would need to be called in order to change michael@0: * that behavior too. michael@0: * michael@0: * Here is an example with a context: michael@0: * michael@0: * \code michael@0: * UErrorCode err = U_ZERO_ERROR; michael@0: * UConverter *myConverter = ucnv_open("ibm-949", &err); michael@0: * const void *oldContext; michael@0: * UConverterFromUCallback oldAction; michael@0: * michael@0: * michael@0: * if (U_SUCCESS(err)) michael@0: * { michael@0: * ucnv_setToUCallBack(myConverter, michael@0: * UCNV_TO_U_CALLBACK_SUBSTITUTE, michael@0: * UCNV_SUB_STOP_ON_ILLEGAL, michael@0: * &oldAction, michael@0: * &oldContext, michael@0: * &status); michael@0: * } michael@0: * \endcode michael@0: * michael@0: * The code above tells "myConverter" to stop when it encounters an michael@0: * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from michael@0: * Codepage -> Unicode. Any unmapped and legal characters will be michael@0: * substituted to be the default substitution character. michael@0: */ michael@0: michael@0: #ifndef UCNV_ERR_H michael@0: #define UCNV_ERR_H michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: #if !UCONFIG_NO_CONVERSION michael@0: michael@0: /** Forward declaring the UConverter structure. @stable ICU 2.0 */ michael@0: struct UConverter; michael@0: michael@0: /** @stable ICU 2.0 */ michael@0: typedef struct UConverter UConverter; michael@0: michael@0: /** michael@0: * FROM_U, TO_U context options for sub callback michael@0: * @stable ICU 2.0 michael@0: */ michael@0: #define UCNV_SUB_STOP_ON_ILLEGAL "i" michael@0: michael@0: /** michael@0: * FROM_U, TO_U context options for skip callback michael@0: * @stable ICU 2.0 michael@0: */ michael@0: #define UCNV_SKIP_STOP_ON_ILLEGAL "i" michael@0: michael@0: /** michael@0: * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX) michael@0: * @stable ICU 2.0 michael@0: */ michael@0: #define UCNV_ESCAPE_ICU NULL michael@0: /** michael@0: * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX) michael@0: * @stable ICU 2.0 michael@0: */ michael@0: #define UCNV_ESCAPE_JAVA "J" michael@0: /** michael@0: * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX) michael@0: * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX) michael@0: * @stable ICU 2.0 michael@0: */ michael@0: #define UCNV_ESCAPE_C "C" michael@0: /** michael@0: * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly michael@0: * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly michael@0: * @stable ICU 2.0 michael@0: */ michael@0: #define UCNV_ESCAPE_XML_DEC "D" michael@0: /** michael@0: * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly michael@0: * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly michael@0: * @stable ICU 2.0 michael@0: */ michael@0: #define UCNV_ESCAPE_XML_HEX "X" michael@0: /** michael@0: * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX) michael@0: * @stable ICU 2.0 michael@0: */ michael@0: #define UCNV_ESCAPE_UNICODE "U" michael@0: michael@0: /** michael@0: * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to CSS2 conventions (\\HH..H, that is, michael@0: * a backslash, 1..6 hex digits, and a space) michael@0: * @stable ICU 4.0 michael@0: */ michael@0: #define UCNV_ESCAPE_CSS2 "S" michael@0: michael@0: /** michael@0: * The process condition code to be used with the callbacks. michael@0: * Codes which are greater than UCNV_IRREGULAR should be michael@0: * passed on to any chained callbacks. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: typedef enum { michael@0: UCNV_UNASSIGNED = 0, /**< The code point is unassigned. michael@0: The error code U_INVALID_CHAR_FOUND will be set. */ michael@0: UCNV_ILLEGAL = 1, /**< The code point is illegal. For example, michael@0: \\x81\\x2E is illegal in SJIS because \\x2E michael@0: is not a valid trail byte for the \\x81 michael@0: lead byte. michael@0: Also, starting with Unicode 3.0.1, non-shortest byte sequences michael@0: in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061) michael@0: are also illegal, not just irregular. michael@0: The error code U_ILLEGAL_CHAR_FOUND will be set. */ michael@0: UCNV_IRREGULAR = 2, /**< The codepoint is not a regular sequence in michael@0: the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF michael@0: are irregular UTF-8 byte sequences for single surrogate michael@0: code points. michael@0: The error code U_INVALID_CHAR_FOUND will be set. */ michael@0: UCNV_RESET = 3, /**< The callback is called with this reason when a michael@0: 'reset' has occured. Callback should reset all michael@0: state. */ michael@0: UCNV_CLOSE = 4, /**< Called when the converter is closed. The michael@0: callback should release any allocated memory.*/ michael@0: UCNV_CLONE = 5 /**< Called when ucnv_safeClone() is called on the michael@0: converter. the pointer available as the michael@0: 'context' is an alias to the original converters' michael@0: context pointer. If the context must be owned michael@0: by the new converter, the callback must clone michael@0: the data and call ucnv_setFromUCallback michael@0: (or setToUCallback) with the correct pointer. michael@0: @stable ICU 2.2 michael@0: */ michael@0: } UConverterCallbackReason; michael@0: michael@0: michael@0: /** michael@0: * The structure for the fromUnicode callback function parameter. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: typedef struct { michael@0: uint16_t size; /**< The size of this struct. @stable ICU 2.0 */ michael@0: UBool flush; /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0 */ michael@0: UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */ michael@0: const UChar *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */ michael@0: const UChar *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */ michael@0: char *target; /**< Pointer to the target buffer. @stable ICU 2.0 */ michael@0: const char *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */ michael@0: int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ michael@0: } UConverterFromUnicodeArgs; michael@0: michael@0: michael@0: /** michael@0: * The structure for the toUnicode callback function parameter. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: typedef struct { michael@0: uint16_t size; /**< The size of this struct @stable ICU 2.0 */ michael@0: UBool flush; /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0 */ michael@0: UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */ michael@0: const char *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */ michael@0: const char *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */ michael@0: UChar *target; /**< Pointer to the target buffer. @stable ICU 2.0 */ michael@0: const UChar *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */ michael@0: int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ michael@0: } UConverterToUnicodeArgs; michael@0: michael@0: michael@0: /** michael@0: * DO NOT CALL THIS FUNCTION DIRECTLY! michael@0: * This From Unicode callback STOPS at the ILLEGAL_SEQUENCE, michael@0: * returning the error code back to the caller immediately. michael@0: * michael@0: * @param context Pointer to the callback's private data michael@0: * @param fromUArgs Information about the conversion in progress michael@0: * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence michael@0: * @param length Size (in bytes) of the concerned codepage sequence michael@0: * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. michael@0: * @param reason Defines the reason the callback was invoked michael@0: * @param err This should always be set to a failure status prior to calling. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP ( michael@0: const void *context, michael@0: UConverterFromUnicodeArgs *fromUArgs, michael@0: const UChar* codeUnits, michael@0: int32_t length, michael@0: UChar32 codePoint, michael@0: UConverterCallbackReason reason, michael@0: UErrorCode * err); michael@0: michael@0: michael@0: michael@0: /** michael@0: * DO NOT CALL THIS FUNCTION DIRECTLY! michael@0: * This To Unicode callback STOPS at the ILLEGAL_SEQUENCE, michael@0: * returning the error code back to the caller immediately. michael@0: * michael@0: * @param context Pointer to the callback's private data michael@0: * @param toUArgs Information about the conversion in progress michael@0: * @param codeUnits Points to 'length' bytes of the concerned codepage sequence michael@0: * @param length Size (in bytes) of the concerned codepage sequence michael@0: * @param reason Defines the reason the callback was invoked michael@0: * @param err This should always be set to a failure status prior to calling. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP ( michael@0: const void *context, michael@0: UConverterToUnicodeArgs *toUArgs, michael@0: const char* codeUnits, michael@0: int32_t length, michael@0: UConverterCallbackReason reason, michael@0: UErrorCode * err); michael@0: michael@0: /** michael@0: * DO NOT CALL THIS FUNCTION DIRECTLY! michael@0: * This From Unicode callback skips any ILLEGAL_SEQUENCE, or michael@0: * skips only UNASSINGED_SEQUENCE depending on the context parameter michael@0: * simply ignoring those characters. michael@0: * michael@0: * @param context The function currently recognizes the callback options: michael@0: * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, michael@0: * returning the error code back to the caller immediately. michael@0: * NULL: Skips any ILLEGAL_SEQUENCE michael@0: * @param fromUArgs Information about the conversion in progress michael@0: * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence michael@0: * @param length Size (in bytes) of the concerned codepage sequence michael@0: * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. michael@0: * @param reason Defines the reason the callback was invoked michael@0: * @param err Return value will be set to success if the callback was handled, michael@0: * otherwise this value will be set to a failure status. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP ( michael@0: const void *context, michael@0: UConverterFromUnicodeArgs *fromUArgs, michael@0: const UChar* codeUnits, michael@0: int32_t length, michael@0: UChar32 codePoint, michael@0: UConverterCallbackReason reason, michael@0: UErrorCode * err); michael@0: michael@0: /** michael@0: * DO NOT CALL THIS FUNCTION DIRECTLY! michael@0: * This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or michael@0: * UNASSIGNED_SEQUENCE depending on context parameter, with the michael@0: * current substitution string for the converter. This is the default michael@0: * callback. michael@0: * michael@0: * @param context The function currently recognizes the callback options: michael@0: * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, michael@0: * returning the error code back to the caller immediately. michael@0: * NULL: Substitutes any ILLEGAL_SEQUENCE michael@0: * @param fromUArgs Information about the conversion in progress michael@0: * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence michael@0: * @param length Size (in bytes) of the concerned codepage sequence michael@0: * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. michael@0: * @param reason Defines the reason the callback was invoked michael@0: * @param err Return value will be set to success if the callback was handled, michael@0: * otherwise this value will be set to a failure status. michael@0: * @see ucnv_setSubstChars michael@0: * @stable ICU 2.0 michael@0: */ michael@0: U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE ( michael@0: const void *context, michael@0: UConverterFromUnicodeArgs *fromUArgs, michael@0: const UChar* codeUnits, michael@0: int32_t length, michael@0: UChar32 codePoint, michael@0: UConverterCallbackReason reason, michael@0: UErrorCode * err); michael@0: michael@0: /** michael@0: * DO NOT CALL THIS FUNCTION DIRECTLY! michael@0: * This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the michael@0: * hexadecimal representation of the illegal codepoints michael@0: * michael@0: * @param context The function currently recognizes the callback options: michael@0: *

UCNV_ESCAPE_ICU: Substitues the ILLEGAL SEQUENCE with the hexadecimal michael@0: * representation in the format %UXXXX, e.g. "%uFFFE%u00AC%uC8FE"). michael@0: * In the Event the converter doesn't support the characters {%,U}[A-F][0-9], michael@0: * it will substitute the illegal sequence with the substitution characters. michael@0: * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as michael@0: * %UD84D%UDC56
UCNV_ESCAPE_JAVA: Substitues the ILLEGAL SEQUENCE with the hexadecimal michael@0: * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). michael@0: * In the Event the converter doesn't support the characters {\,u}[A-F][0-9], michael@0: * it will substitute the illegal sequence with the substitution characters. michael@0: * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as michael@0: * \\uD84D\\uDC56
UCNV_ESCAPE_C: Substitues the ILLEGAL SEQUENCE with the hexadecimal michael@0: * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). michael@0: * In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9], michael@0: * it will substitute the illegal sequence with the substitution characters. michael@0: * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as michael@0: * \\U00023456
UCNV_ESCAPE_XML_DEC: Substitues the ILLEGAL SEQUENCE with the decimal michael@0: * representation in the format \htmlonly&#DDDDDDDD;, e.g. "¬죾")\endhtmlonly. michael@0: * In the Event the converter doesn't support the characters {&,#}[0-9], michael@0: * it will substitute the illegal sequence with the substitution characters. michael@0: * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as michael@0: * 𣑖 and Zero padding is ignored.
UCNV_ESCAPE_XML_HEX:Substitues the ILLEGAL SEQUENCE with the decimal michael@0: * representation in the format \htmlonly&#xXXXX; e.g. "¬죾")\endhtmlonly. michael@0: * In the Event the converter doesn't support the characters {&,#,x}[0-9], michael@0: * it will substitute the illegal sequence with the substitution characters. michael@0: * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as michael@0: * \htmlonly𣑖\endhtmlonly

michael@0: * @param fromUArgs Information about the conversion in progress michael@0: * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence michael@0: * @param length Size (in bytes) of the concerned codepage sequence michael@0: * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. michael@0: * @param reason Defines the reason the callback was invoked michael@0: * @param err Return value will be set to success if the callback was handled, michael@0: * otherwise this value will be set to a failure status. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE ( michael@0: const void *context, michael@0: UConverterFromUnicodeArgs *fromUArgs, michael@0: const UChar* codeUnits, michael@0: int32_t length, michael@0: UChar32 codePoint, michael@0: UConverterCallbackReason reason, michael@0: UErrorCode * err); michael@0: michael@0: michael@0: /** michael@0: * DO NOT CALL THIS FUNCTION DIRECTLY! michael@0: * This To Unicode callback skips any ILLEGAL_SEQUENCE, or michael@0: * skips only UNASSINGED_SEQUENCE depending on the context parameter michael@0: * simply ignoring those characters. michael@0: * michael@0: * @param context The function currently recognizes the callback options: michael@0: * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, michael@0: * returning the error code back to the caller immediately. michael@0: * NULL: Skips any ILLEGAL_SEQUENCE michael@0: * @param toUArgs Information about the conversion in progress michael@0: * @param codeUnits Points to 'length' bytes of the concerned codepage sequence michael@0: * @param length Size (in bytes) of the concerned codepage sequence michael@0: * @param reason Defines the reason the callback was invoked michael@0: * @param err Return value will be set to success if the callback was handled, michael@0: * otherwise this value will be set to a failure status. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP ( michael@0: const void *context, michael@0: UConverterToUnicodeArgs *toUArgs, michael@0: const char* codeUnits, michael@0: int32_t length, michael@0: UConverterCallbackReason reason, michael@0: UErrorCode * err); michael@0: michael@0: /** michael@0: * DO NOT CALL THIS FUNCTION DIRECTLY! michael@0: * This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or michael@0: * UNASSIGNED_SEQUENCE depending on context parameter, with the michael@0: * Unicode substitution character, U+FFFD. michael@0: * michael@0: * @param context The function currently recognizes the callback options: michael@0: * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, michael@0: * returning the error code back to the caller immediately. michael@0: * NULL: Substitutes any ILLEGAL_SEQUENCE michael@0: * @param toUArgs Information about the conversion in progress michael@0: * @param codeUnits Points to 'length' bytes of the concerned codepage sequence michael@0: * @param length Size (in bytes) of the concerned codepage sequence michael@0: * @param reason Defines the reason the callback was invoked michael@0: * @param err Return value will be set to success if the callback was handled, michael@0: * otherwise this value will be set to a failure status. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE ( michael@0: const void *context, michael@0: UConverterToUnicodeArgs *toUArgs, michael@0: const char* codeUnits, michael@0: int32_t length, michael@0: UConverterCallbackReason reason, michael@0: UErrorCode * err); michael@0: michael@0: /** michael@0: * DO NOT CALL THIS FUNCTION DIRECTLY! michael@0: * This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the michael@0: * hexadecimal representation of the illegal bytes michael@0: * (in the format %XNN, e.g. "%XFF%X0A%XC8%X03"). michael@0: * michael@0: * @param context This function currently recognizes the callback options: michael@0: * UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC, michael@0: * UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE. michael@0: * @param toUArgs Information about the conversion in progress michael@0: * @param codeUnits Points to 'length' bytes of the concerned codepage sequence michael@0: * @param length Size (in bytes) of the concerned codepage sequence michael@0: * @param reason Defines the reason the callback was invoked michael@0: * @param err Return value will be set to success if the callback was handled, michael@0: * otherwise this value will be set to a failure status. michael@0: * @stable ICU 2.0 michael@0: */ michael@0: michael@0: U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE ( michael@0: const void *context, michael@0: UConverterToUnicodeArgs *toUArgs, michael@0: const char* codeUnits, michael@0: int32_t length, michael@0: UConverterCallbackReason reason, michael@0: UErrorCode * err); michael@0: michael@0: #endif michael@0: michael@0: #endif michael@0: michael@0: /*UCNV_ERR_H*/