1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/ucnv_cnv.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,321 @@ 1.4 +/* 1.5 +********************************************************************** 1.6 +* Copyright (C) 1999-2011, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +********************************************************************** 1.9 +* 1.10 +* ucnv_cnv.h: 1.11 +* Definitions for converter implementations. 1.12 +* 1.13 +* Modification History: 1.14 +* 1.15 +* Date Name Description 1.16 +* 05/09/00 helena Added implementation to handle fallback mappings. 1.17 +* 06/29/2000 helena Major rewrite of the callback APIs. 1.18 +*/ 1.19 + 1.20 +#ifndef UCNV_CNV_H 1.21 +#define UCNV_CNV_H 1.22 + 1.23 +#include "unicode/utypes.h" 1.24 + 1.25 +#if !UCONFIG_NO_CONVERSION 1.26 + 1.27 +#include "unicode/ucnv.h" 1.28 +#include "unicode/ucnv_err.h" 1.29 +#include "unicode/uset.h" 1.30 +#include "uset_imp.h" 1.31 + 1.32 +U_CDECL_BEGIN 1.33 + 1.34 +/* this is used in fromUnicode DBCS tables as an "unassigned" marker */ 1.35 +#define missingCharMarker 0xFFFF 1.36 + 1.37 +/* 1.38 + * #define missingUCharMarker 0xfffe 1.39 + * 1.40 + * commented out because there are actually two values used in toUnicode tables: 1.41 + * U+fffe "unassigned" 1.42 + * U+ffff "illegal" 1.43 + */ 1.44 + 1.45 +/** Forward declaration, see ucnv_bld.h */ 1.46 +struct UConverterSharedData; 1.47 +typedef struct UConverterSharedData UConverterSharedData; 1.48 + 1.49 +/* function types for UConverterImpl ---------------------------------------- */ 1.50 + 1.51 +/* struct with arguments for UConverterLoad and ucnv_load() */ 1.52 +typedef struct { 1.53 + int32_t size; /* sizeof(UConverterLoadArgs) */ 1.54 + int32_t nestedLoads; /* count nested ucnv_load() calls */ 1.55 + UBool onlyTestIsLoadable; /* input: don't actually load */ 1.56 + UBool reserved0; /* reserved - for good alignment of the pointers */ 1.57 + int16_t reserved; /* reserved - for good alignment of the pointers */ 1.58 + uint32_t options; 1.59 + const char *pkg, *name, *locale; 1.60 +} UConverterLoadArgs; 1.61 + 1.62 +#define UCNV_LOAD_ARGS_INITIALIZER \ 1.63 + { (int32_t)sizeof(UConverterLoadArgs), 0, FALSE, FALSE, 0, 0, NULL, NULL, NULL } 1.64 + 1.65 +typedef void (*UConverterLoad) (UConverterSharedData *sharedData, 1.66 + UConverterLoadArgs *pArgs, 1.67 + const uint8_t *raw, UErrorCode *pErrorCode); 1.68 +typedef void (*UConverterUnload) (UConverterSharedData *sharedData); 1.69 + 1.70 +typedef void (*UConverterOpen) (UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *pErrorCode); 1.71 +typedef void (*UConverterClose) (UConverter *cnv); 1.72 + 1.73 +typedef enum UConverterResetChoice { 1.74 + UCNV_RESET_BOTH, 1.75 + UCNV_RESET_TO_UNICODE, 1.76 + UCNV_RESET_FROM_UNICODE 1.77 +} UConverterResetChoice; 1.78 + 1.79 +typedef void (*UConverterReset) (UConverter *cnv, UConverterResetChoice choice); 1.80 + 1.81 +/* 1.82 + * Converter implementation function(s) for ucnv_toUnicode(). 1.83 + * If the toUnicodeWithOffsets function pointer is NULL, 1.84 + * then the toUnicode function will be used and the offsets will be set to -1. 1.85 + * 1.86 + * Must maintain state across buffers. Use toUBytes[toULength] for partial input 1.87 + * sequences; it will be checked in ucnv.c at the end of the input stream 1.88 + * to detect truncated input. 1.89 + * Some converters may need additional detection and may then set U_TRUNCATED_CHAR_FOUND. 1.90 + * 1.91 + * The toUnicodeWithOffsets must write exactly as many offset values as target 1.92 + * units. Write offset values of -1 for when the source index corresponding to 1.93 + * the output unit is not known (e.g., the character started in an earlier buffer). 1.94 + * The pArgs->offsets pointer need not be moved forward. 1.95 + * 1.96 + * At function return, either one of the following conditions must be true: 1.97 + * - U_BUFFER_OVERFLOW_ERROR and the target is full: target==targetLimit 1.98 + * - another error code with toUBytes[toULength] set to the offending input 1.99 + * - no error, and the source is consumed: source==sourceLimit 1.100 + * 1.101 + * The ucnv.c code will handle the end of the input (reset) 1.102 + * (reset, and truncation detection) and callbacks. 1.103 + */ 1.104 +typedef void (*UConverterToUnicode) (UConverterToUnicodeArgs *, UErrorCode *); 1.105 + 1.106 +/* 1.107 + * Same rules as for UConverterToUnicode. 1.108 + * A lead surrogate is kept in fromUChar32 across buffers, and if an error 1.109 + * occurs, then the offending input code point must be put into fromUChar32 1.110 + * as well. 1.111 + */ 1.112 +typedef void (*UConverterFromUnicode) (UConverterFromUnicodeArgs *, UErrorCode *); 1.113 + 1.114 +/* 1.115 + * Converter implementation function for ucnv_convertEx(), for direct conversion 1.116 + * between two charsets without pivoting through UTF-16. 1.117 + * The rules are the same as for UConverterToUnicode and UConverterFromUnicode. 1.118 + * In addition, 1.119 + * - The toUnicode side must behave and keep state exactly like the 1.120 + * UConverterToUnicode implementation for the same source charset. 1.121 + * - A U_USING_DEFAULT_WARNING can be set to request to temporarily fall back 1.122 + * to pivoting. When this function is called, the conversion framework makes 1.123 + * sure that this warning is not set on input. 1.124 + * - Continuing a partial match and flushing the toUnicode replay buffer 1.125 + * are handled by pivoting, using the toUnicode and fromUnicode functions. 1.126 + */ 1.127 +typedef void (*UConverterConvert) (UConverterFromUnicodeArgs *pFromUArgs, 1.128 + UConverterToUnicodeArgs *pToUArgs, 1.129 + UErrorCode *pErrorCode); 1.130 + 1.131 +/* 1.132 + * Converter implementation function for ucnv_getNextUChar(). 1.133 + * If the function pointer is NULL, then the toUnicode function will be used. 1.134 + * 1.135 + * Will be called at a character boundary (toULength==0). 1.136 + * May return with 1.137 + * - U_INDEX_OUTOFBOUNDS_ERROR if there was no output for the input 1.138 + * (the return value will be ignored) 1.139 + * - U_TRUNCATED_CHAR_FOUND or another error code (never U_BUFFER_OVERFLOW_ERROR!) 1.140 + * with toUBytes[toULength] set to the offending input 1.141 + * (the return value will be ignored) 1.142 + * - return UCNV_GET_NEXT_UCHAR_USE_TO_U, without moving the source pointer, 1.143 + * to indicate that the ucnv.c code shall call the toUnicode function instead 1.144 + * - return a real code point result 1.145 + * 1.146 + * Unless UCNV_GET_NEXT_UCHAR_USE_TO_U is returned, the source bytes must be consumed. 1.147 + * 1.148 + * The ucnv.c code will handle the end of the input (reset) 1.149 + * (except for truncation detection!) and callbacks. 1.150 + */ 1.151 +typedef UChar32 (*UConverterGetNextUChar) (UConverterToUnicodeArgs *, UErrorCode *); 1.152 + 1.153 +typedef void (*UConverterGetStarters)(const UConverter* converter, 1.154 + UBool starters[256], 1.155 + UErrorCode *pErrorCode); 1.156 + 1.157 +/* If this function pointer is null or if the function returns null 1.158 + * the name field in static data struct should be returned by 1.159 + * ucnv_getName() API function 1.160 + */ 1.161 +typedef const char * (*UConverterGetName) (const UConverter *cnv); 1.162 + 1.163 +/** 1.164 + * Write the codepage substitution character. 1.165 + * If this function is not set, then ucnv_cbFromUWriteSub() writes 1.166 + * the substitution character from UConverter. 1.167 + * For stateful converters, it is typically necessary to handle this 1.168 + * specificially for the converter in order to properly maintain the state. 1.169 + */ 1.170 +typedef void (*UConverterWriteSub) (UConverterFromUnicodeArgs *pArgs, int32_t offsetIndex, UErrorCode *pErrorCode); 1.171 + 1.172 +/** 1.173 + * For converter-specific safeClone processing 1.174 + * If this function is not set, then ucnv_safeClone assumes that the converter has no private data that changes 1.175 + * after the converter is done opening. 1.176 + * If this function is set, then it is called just after a memcpy() of 1.177 + * converter data to the new, empty converter, and is expected to set up 1.178 + * the initial state of the converter. It is not expected to increment the 1.179 + * reference counts of the standard data types such as the shared data. 1.180 + */ 1.181 +typedef UConverter * (*UConverterSafeClone) (const UConverter *cnv, 1.182 + void *stackBuffer, 1.183 + int32_t *pBufferSize, 1.184 + UErrorCode *status); 1.185 + 1.186 +/** 1.187 + * Filters for some ucnv_getUnicodeSet() implementation code. 1.188 + */ 1.189 +typedef enum UConverterSetFilter { 1.190 + UCNV_SET_FILTER_NONE, 1.191 + UCNV_SET_FILTER_DBCS_ONLY, 1.192 + UCNV_SET_FILTER_2022_CN, 1.193 + UCNV_SET_FILTER_SJIS, 1.194 + UCNV_SET_FILTER_GR94DBCS, 1.195 + UCNV_SET_FILTER_HZ, 1.196 + UCNV_SET_FILTER_COUNT 1.197 +} UConverterSetFilter; 1.198 + 1.199 +/** 1.200 + * Fills the set of Unicode code points that can be converted by an ICU converter. 1.201 + * The API function ucnv_getUnicodeSet() clears the USet before calling 1.202 + * the converter's getUnicodeSet() implementation; the converter should only 1.203 + * add the appropriate code points to allow recursive use. 1.204 + * For example, the ISO-2022-JP converter will call each subconverter's 1.205 + * getUnicodeSet() implementation to consecutively add code points to 1.206 + * the same USet, which will result in a union of the sets of all subconverters. 1.207 + * 1.208 + * For more documentation, see ucnv_getUnicodeSet() in ucnv.h. 1.209 + */ 1.210 +typedef void (*UConverterGetUnicodeSet) (const UConverter *cnv, 1.211 + const USetAdder *sa, 1.212 + UConverterUnicodeSet which, 1.213 + UErrorCode *pErrorCode); 1.214 + 1.215 +UBool CONVERSION_U_SUCCESS (UErrorCode err); 1.216 + 1.217 +/** 1.218 + * UConverterImpl contains all the data and functions for a converter type. 1.219 + * Its function pointers work much like a C++ vtable. 1.220 + * Many converter types need to define only a subset of the functions; 1.221 + * when a function pointer is NULL, then a default action will be performed. 1.222 + * 1.223 + * Every converter type must implement toUnicode, fromUnicode, and getNextUChar, 1.224 + * otherwise the converter may crash. 1.225 + * Every converter type that has variable-length codepage sequences should 1.226 + * also implement toUnicodeWithOffsets and fromUnicodeWithOffsets for 1.227 + * correct offset handling. 1.228 + * All other functions may or may not be implemented - it depends only on 1.229 + * whether the converter type needs them. 1.230 + * 1.231 + * When open() fails, then close() will be called, if present. 1.232 + */ 1.233 +struct UConverterImpl { 1.234 + UConverterType type; 1.235 + 1.236 + UConverterLoad load; 1.237 + UConverterUnload unload; 1.238 + 1.239 + UConverterOpen open; 1.240 + UConverterClose close; 1.241 + UConverterReset reset; 1.242 + 1.243 + UConverterToUnicode toUnicode; 1.244 + UConverterToUnicode toUnicodeWithOffsets; 1.245 + UConverterFromUnicode fromUnicode; 1.246 + UConverterFromUnicode fromUnicodeWithOffsets; 1.247 + UConverterGetNextUChar getNextUChar; 1.248 + 1.249 + UConverterGetStarters getStarters; 1.250 + UConverterGetName getName; 1.251 + UConverterWriteSub writeSub; 1.252 + UConverterSafeClone safeClone; 1.253 + UConverterGetUnicodeSet getUnicodeSet; 1.254 + 1.255 + UConverterConvert toUTF8; 1.256 + UConverterConvert fromUTF8; 1.257 +}; 1.258 + 1.259 +extern const UConverterSharedData 1.260 + _MBCSData, _Latin1Data, 1.261 + _UTF8Data, _UTF16BEData, _UTF16LEData, _UTF32BEData, _UTF32LEData, 1.262 + _ISO2022Data, 1.263 + _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6, 1.264 + _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData19, 1.265 + _HZData,_ISCIIData, _SCSUData, _ASCIIData, 1.266 + _UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData, _CompoundTextData; 1.267 + 1.268 +U_CDECL_END 1.269 + 1.270 +/** Always use fallbacks from codepage to Unicode */ 1.271 +#define TO_U_USE_FALLBACK(useFallback) TRUE 1.272 +#define UCNV_TO_U_USE_FALLBACK(cnv) TRUE 1.273 + 1.274 +/** Use fallbacks from Unicode to codepage when cnv->useFallback or for private-use code points */ 1.275 +#define IS_PRIVATE_USE(c) ((uint32_t)((c)-0xe000)<0x1900 || (uint32_t)((c)-0xf0000)<0x20000) 1.276 +#define FROM_U_USE_FALLBACK(useFallback, c) ((useFallback) || IS_PRIVATE_USE(c)) 1.277 +#define UCNV_FROM_U_USE_FALLBACK(cnv, c) FROM_U_USE_FALLBACK((cnv)->useFallback, c) 1.278 + 1.279 +/** 1.280 + * Magic number for ucnv_getNextUChar(), returned by a 1.281 + * getNextUChar() implementation to indicate to use the converter's toUnicode() 1.282 + * instead of the native function. 1.283 + * @internal 1.284 + */ 1.285 +#define UCNV_GET_NEXT_UCHAR_USE_TO_U -9 1.286 + 1.287 +U_CFUNC void 1.288 +ucnv_getCompleteUnicodeSet(const UConverter *cnv, 1.289 + const USetAdder *sa, 1.290 + UConverterUnicodeSet which, 1.291 + UErrorCode *pErrorCode); 1.292 + 1.293 +U_CFUNC void 1.294 +ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv, 1.295 + const USetAdder *sa, 1.296 + UConverterUnicodeSet which, 1.297 + UErrorCode *pErrorCode); 1.298 + 1.299 +U_CFUNC void 1.300 +ucnv_fromUWriteBytes(UConverter *cnv, 1.301 + const char *bytes, int32_t length, 1.302 + char **target, const char *targetLimit, 1.303 + int32_t **offsets, 1.304 + int32_t sourceIndex, 1.305 + UErrorCode *pErrorCode); 1.306 +U_CFUNC void 1.307 +ucnv_toUWriteUChars(UConverter *cnv, 1.308 + const UChar *uchars, int32_t length, 1.309 + UChar **target, const UChar *targetLimit, 1.310 + int32_t **offsets, 1.311 + int32_t sourceIndex, 1.312 + UErrorCode *pErrorCode); 1.313 + 1.314 +U_CFUNC void 1.315 +ucnv_toUWriteCodePoint(UConverter *cnv, 1.316 + UChar32 c, 1.317 + UChar **target, const UChar *targetLimit, 1.318 + int32_t **offsets, 1.319 + int32_t sourceIndex, 1.320 + UErrorCode *pErrorCode); 1.321 + 1.322 +#endif 1.323 + 1.324 +#endif /* UCNV_CNV */