Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* |
michael@0 | 2 | ********************************************************************** |
michael@0 | 3 | * Copyright (C) 1999-2011 International Business Machines |
michael@0 | 4 | * Corporation and others. All Rights Reserved. |
michael@0 | 5 | ********************************************************************** |
michael@0 | 6 | * |
michael@0 | 7 | * |
michael@0 | 8 | * ucnv_bld.h: |
michael@0 | 9 | * Contains internal data structure definitions |
michael@0 | 10 | * Created by Bertrand A. Damiba |
michael@0 | 11 | * |
michael@0 | 12 | * Change history: |
michael@0 | 13 | * |
michael@0 | 14 | * 06/29/2000 helena Major rewrite of the callback APIs. |
michael@0 | 15 | */ |
michael@0 | 16 | |
michael@0 | 17 | #ifndef UCNV_BLD_H |
michael@0 | 18 | #define UCNV_BLD_H |
michael@0 | 19 | |
michael@0 | 20 | #include "unicode/utypes.h" |
michael@0 | 21 | |
michael@0 | 22 | #if !UCONFIG_NO_CONVERSION |
michael@0 | 23 | |
michael@0 | 24 | #include "unicode/ucnv.h" |
michael@0 | 25 | #include "unicode/ucnv_err.h" |
michael@0 | 26 | #include "unicode/utf16.h" |
michael@0 | 27 | #include "ucnv_cnv.h" |
michael@0 | 28 | #include "ucnvmbcs.h" |
michael@0 | 29 | #include "ucnv_ext.h" |
michael@0 | 30 | #include "udataswp.h" |
michael@0 | 31 | |
michael@0 | 32 | /* size of the overflow buffers in UConverter, enough for escaping callbacks */ |
michael@0 | 33 | #define UCNV_ERROR_BUFFER_LENGTH 32 |
michael@0 | 34 | |
michael@0 | 35 | /* at most 4 bytes per substitution character (part of .cnv file format! see UConverterStaticData) */ |
michael@0 | 36 | #define UCNV_MAX_SUBCHAR_LEN 4 |
michael@0 | 37 | |
michael@0 | 38 | /* at most 8 bytes per character in toUBytes[] (UTF-8 uses up to 6) */ |
michael@0 | 39 | #define UCNV_MAX_CHAR_LEN 8 |
michael@0 | 40 | |
michael@0 | 41 | /* converter options bits */ |
michael@0 | 42 | #define UCNV_OPTION_VERSION 0xf |
michael@0 | 43 | #define UCNV_OPTION_SWAP_LFNL 0x10 |
michael@0 | 44 | |
michael@0 | 45 | #define UCNV_GET_VERSION(cnv) ((cnv)->options&UCNV_OPTION_VERSION) |
michael@0 | 46 | |
michael@0 | 47 | U_CDECL_BEGIN /* We must declare the following as 'extern "C"' so that if ucnv |
michael@0 | 48 | itself is compiled under C++, the linkage of the funcptrs will |
michael@0 | 49 | work. |
michael@0 | 50 | */ |
michael@0 | 51 | |
michael@0 | 52 | union UConverterTable { |
michael@0 | 53 | UConverterMBCSTable mbcs; |
michael@0 | 54 | }; |
michael@0 | 55 | |
michael@0 | 56 | typedef union UConverterTable UConverterTable; |
michael@0 | 57 | |
michael@0 | 58 | struct UConverterImpl; |
michael@0 | 59 | typedef struct UConverterImpl UConverterImpl; |
michael@0 | 60 | |
michael@0 | 61 | /** values for the unicodeMask */ |
michael@0 | 62 | #define UCNV_HAS_SUPPLEMENTARY 1 |
michael@0 | 63 | #define UCNV_HAS_SURROGATES 2 |
michael@0 | 64 | |
michael@0 | 65 | typedef struct UConverterStaticData { /* +offset: size */ |
michael@0 | 66 | uint32_t structSize; /* +0: 4 Size of this structure */ |
michael@0 | 67 | |
michael@0 | 68 | char name |
michael@0 | 69 | [UCNV_MAX_CONVERTER_NAME_LENGTH]; /* +4: 60 internal name of the converter- invariant chars */ |
michael@0 | 70 | |
michael@0 | 71 | int32_t codepage; /* +64: 4 codepage # (now IBM-$codepage) */ |
michael@0 | 72 | |
michael@0 | 73 | int8_t platform; /* +68: 1 platform of the converter (only IBM now) */ |
michael@0 | 74 | int8_t conversionType; /* +69: 1 conversion type */ |
michael@0 | 75 | |
michael@0 | 76 | int8_t minBytesPerChar; /* +70: 1 Minimum # bytes per char in this codepage */ |
michael@0 | 77 | int8_t maxBytesPerChar; /* +71: 1 Maximum # bytes output per UChar in this codepage */ |
michael@0 | 78 | |
michael@0 | 79 | uint8_t subChar[UCNV_MAX_SUBCHAR_LEN]; /* +72: 4 [note: 4 and 8 byte boundary] */ |
michael@0 | 80 | int8_t subCharLen; /* +76: 1 */ |
michael@0 | 81 | |
michael@0 | 82 | uint8_t hasToUnicodeFallback; /* +77: 1 UBool needs to be changed to UBool to be consistent across platform */ |
michael@0 | 83 | uint8_t hasFromUnicodeFallback; /* +78: 1 */ |
michael@0 | 84 | uint8_t unicodeMask; /* +79: 1 bit 0: has supplementary bit 1: has single surrogates */ |
michael@0 | 85 | uint8_t subChar1; /* +80: 1 single-byte substitution character for IBM MBCS (0 if none) */ |
michael@0 | 86 | uint8_t reserved[19]; /* +81: 19 to round out the structure */ |
michael@0 | 87 | /* total size: 100 */ |
michael@0 | 88 | } UConverterStaticData; |
michael@0 | 89 | |
michael@0 | 90 | /* |
michael@0 | 91 | * Defines the UConverterSharedData struct, |
michael@0 | 92 | * the immutable, shared part of UConverter. |
michael@0 | 93 | */ |
michael@0 | 94 | struct UConverterSharedData { |
michael@0 | 95 | uint32_t structSize; /* Size of this structure */ |
michael@0 | 96 | uint32_t referenceCounter; /* used to count number of clients, 0xffffffff for static SharedData */ |
michael@0 | 97 | |
michael@0 | 98 | const void *dataMemory; /* from udata_openChoice() - for cleanup */ |
michael@0 | 99 | void *table; /* Unused. This used to be a UConverterTable - Pointer to conversion data - see mbcs below */ |
michael@0 | 100 | |
michael@0 | 101 | const UConverterStaticData *staticData; /* pointer to the static (non changing) data. */ |
michael@0 | 102 | |
michael@0 | 103 | UBool sharedDataCached; /* TRUE: shared data is in cache, don't destroy on ucnv_close() if 0 ref. FALSE: shared data isn't in the cache, do attempt to clean it up if the ref is 0 */ |
michael@0 | 104 | /*UBool staticDataOwned; TRUE if static data owned by shared data & should be freed with it, NEVER true for udata() loaded statics. This ignored variable was removed to make space for sharedDataCached. */ |
michael@0 | 105 | |
michael@0 | 106 | const UConverterImpl *impl; /* vtable-style struct of mostly function pointers */ |
michael@0 | 107 | |
michael@0 | 108 | /*initial values of some members of the mutable part of object */ |
michael@0 | 109 | uint32_t toUnicodeStatus; |
michael@0 | 110 | |
michael@0 | 111 | /* |
michael@0 | 112 | * Shared data structures currently come in two flavors: |
michael@0 | 113 | * - readonly for built-in algorithmic converters |
michael@0 | 114 | * - allocated for MBCS, with a pointer to an allocated UConverterTable |
michael@0 | 115 | * which always has a UConverterMBCSTable |
michael@0 | 116 | * |
michael@0 | 117 | * To eliminate one allocation, I am making the UConverterMBCSTable |
michael@0 | 118 | * a member of the shared data. It is the last member so that static |
michael@0 | 119 | * definitions of UConverterSharedData work as before. |
michael@0 | 120 | * The table field above also remains to avoid updating all static |
michael@0 | 121 | * definitions, but is now unused. |
michael@0 | 122 | * |
michael@0 | 123 | * markus 2003-nov-07 |
michael@0 | 124 | */ |
michael@0 | 125 | UConverterMBCSTable mbcs; |
michael@0 | 126 | }; |
michael@0 | 127 | |
michael@0 | 128 | /* Defines a UConverter, the lightweight mutable part the user sees */ |
michael@0 | 129 | |
michael@0 | 130 | struct UConverter { |
michael@0 | 131 | /* |
michael@0 | 132 | * Error function pointer called when conversion issues |
michael@0 | 133 | * occur during a ucnv_fromUnicode call |
michael@0 | 134 | */ |
michael@0 | 135 | void (U_EXPORT2 *fromUCharErrorBehaviour) (const void *context, |
michael@0 | 136 | UConverterFromUnicodeArgs *args, |
michael@0 | 137 | const UChar *codeUnits, |
michael@0 | 138 | int32_t length, |
michael@0 | 139 | UChar32 codePoint, |
michael@0 | 140 | UConverterCallbackReason reason, |
michael@0 | 141 | UErrorCode *); |
michael@0 | 142 | /* |
michael@0 | 143 | * Error function pointer called when conversion issues |
michael@0 | 144 | * occur during a ucnv_toUnicode call |
michael@0 | 145 | */ |
michael@0 | 146 | void (U_EXPORT2 *fromCharErrorBehaviour) (const void *context, |
michael@0 | 147 | UConverterToUnicodeArgs *args, |
michael@0 | 148 | const char *codeUnits, |
michael@0 | 149 | int32_t length, |
michael@0 | 150 | UConverterCallbackReason reason, |
michael@0 | 151 | UErrorCode *); |
michael@0 | 152 | |
michael@0 | 153 | /* |
michael@0 | 154 | * Pointer to additional data that depends on the converter type. |
michael@0 | 155 | * Used by ISO 2022, SCSU, GB 18030 converters, possibly more. |
michael@0 | 156 | */ |
michael@0 | 157 | void *extraInfo; |
michael@0 | 158 | |
michael@0 | 159 | const void *fromUContext; |
michael@0 | 160 | const void *toUContext; |
michael@0 | 161 | |
michael@0 | 162 | /* |
michael@0 | 163 | * Pointer to charset bytes for substitution string if subCharLen>0, |
michael@0 | 164 | * or pointer to Unicode string (UChar *) if subCharLen<0. |
michael@0 | 165 | * subCharLen==0 is equivalent to using a skip callback. |
michael@0 | 166 | * If the pointer is !=subUChars then it is allocated with |
michael@0 | 167 | * UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR bytes. |
michael@0 | 168 | * The subUChars field is declared as UChar[] not uint8_t[] to |
michael@0 | 169 | * guarantee alignment for UChars. |
michael@0 | 170 | */ |
michael@0 | 171 | uint8_t *subChars; |
michael@0 | 172 | |
michael@0 | 173 | UConverterSharedData *sharedData; /* Pointer to the shared immutable part of the converter object */ |
michael@0 | 174 | |
michael@0 | 175 | uint32_t options; /* options flags from UConverterOpen, may contain additional bits */ |
michael@0 | 176 | |
michael@0 | 177 | UBool sharedDataIsCached; /* TRUE: shared data is in cache, don't destroy on ucnv_close() if 0 ref. FALSE: shared data isn't in the cache, do attempt to clean it up if the ref is 0 */ |
michael@0 | 178 | UBool isCopyLocal; /* TRUE if UConverter is not owned and not released in ucnv_close() (stack-allocated, safeClone(), etc.) */ |
michael@0 | 179 | UBool isExtraLocal; /* TRUE if extraInfo is not owned and not released in ucnv_close() (stack-allocated, safeClone(), etc.) */ |
michael@0 | 180 | |
michael@0 | 181 | UBool useFallback; |
michael@0 | 182 | int8_t toULength; /* number of bytes in toUBytes */ |
michael@0 | 183 | uint8_t toUBytes[UCNV_MAX_CHAR_LEN-1];/* more "toU status"; keeps the bytes of the current character */ |
michael@0 | 184 | uint32_t toUnicodeStatus; /* Used to internalize stream status information */ |
michael@0 | 185 | int32_t mode; |
michael@0 | 186 | uint32_t fromUnicodeStatus; |
michael@0 | 187 | |
michael@0 | 188 | /* |
michael@0 | 189 | * More fromUnicode() status. Serves 3 purposes: |
michael@0 | 190 | * - keeps a lead surrogate between buffers (similar to toUBytes[]) |
michael@0 | 191 | * - keeps a lead surrogate at the end of the stream, |
michael@0 | 192 | * which the framework handles as truncated input |
michael@0 | 193 | * - if the fromUnicode() implementation returns to the framework |
michael@0 | 194 | * (ucnv.c ucnv_fromUnicode()), then the framework calls the callback |
michael@0 | 195 | * for this code point |
michael@0 | 196 | */ |
michael@0 | 197 | UChar32 fromUChar32; |
michael@0 | 198 | |
michael@0 | 199 | /* |
michael@0 | 200 | * value for ucnv_getMaxCharSize() |
michael@0 | 201 | * |
michael@0 | 202 | * usually simply copied from the static data, but ucnvmbcs.c modifies |
michael@0 | 203 | * the value depending on the converter type and options |
michael@0 | 204 | */ |
michael@0 | 205 | int8_t maxBytesPerUChar; |
michael@0 | 206 | |
michael@0 | 207 | int8_t subCharLen; /* length of the codepage specific character sequence */ |
michael@0 | 208 | int8_t invalidCharLength; |
michael@0 | 209 | int8_t charErrorBufferLength; /* number of valid bytes in charErrorBuffer */ |
michael@0 | 210 | |
michael@0 | 211 | int8_t invalidUCharLength; |
michael@0 | 212 | int8_t UCharErrorBufferLength; /* number of valid UChars in charErrorBuffer */ |
michael@0 | 213 | |
michael@0 | 214 | uint8_t subChar1; /* single-byte substitution character if different from subChar */ |
michael@0 | 215 | UBool useSubChar1; |
michael@0 | 216 | char invalidCharBuffer[UCNV_MAX_CHAR_LEN]; /* bytes from last error/callback situation */ |
michael@0 | 217 | uint8_t charErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /* codepage output from Error functions */ |
michael@0 | 218 | UChar subUChars[UCNV_MAX_SUBCHAR_LEN/U_SIZEOF_UCHAR]; /* see subChars documentation */ |
michael@0 | 219 | |
michael@0 | 220 | UChar invalidUCharBuffer[U16_MAX_LENGTH]; /* UChars from last error/callback situation */ |
michael@0 | 221 | UChar UCharErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /* unicode output from Error functions */ |
michael@0 | 222 | |
michael@0 | 223 | /* fields for conversion extension */ |
michael@0 | 224 | |
michael@0 | 225 | /* store previous UChars/chars to continue partial matches */ |
michael@0 | 226 | UChar32 preFromUFirstCP; /* >=0: partial match */ |
michael@0 | 227 | UChar preFromU[UCNV_EXT_MAX_UCHARS]; |
michael@0 | 228 | char preToU[UCNV_EXT_MAX_BYTES]; |
michael@0 | 229 | int8_t preFromULength, preToULength; /* negative: replay */ |
michael@0 | 230 | int8_t preToUFirstLength; /* length of first character */ |
michael@0 | 231 | |
michael@0 | 232 | /* new fields for ICU 4.0 */ |
michael@0 | 233 | UConverterCallbackReason toUCallbackReason; /* (*fromCharErrorBehaviour) reason, set when error is detected */ |
michael@0 | 234 | }; |
michael@0 | 235 | |
michael@0 | 236 | U_CDECL_END /* end of UConverter */ |
michael@0 | 237 | |
michael@0 | 238 | #define CONVERTER_FILE_EXTENSION ".cnv" |
michael@0 | 239 | |
michael@0 | 240 | |
michael@0 | 241 | /** |
michael@0 | 242 | * Return the number of all converter names. |
michael@0 | 243 | * @param pErrorCode The error code |
michael@0 | 244 | * @return the number of all converter names |
michael@0 | 245 | */ |
michael@0 | 246 | U_CFUNC uint16_t |
michael@0 | 247 | ucnv_bld_countAvailableConverters(UErrorCode *pErrorCode); |
michael@0 | 248 | |
michael@0 | 249 | /** |
michael@0 | 250 | * Return the (n)th converter name in mixed case, or NULL |
michael@0 | 251 | * if there is none (typically, if the data cannot be loaded). |
michael@0 | 252 | * 0<=index<ucnv_io_countAvailableConverters(). |
michael@0 | 253 | * @param n The number specifies which converter name to get |
michael@0 | 254 | * @param pErrorCode The error code |
michael@0 | 255 | * @return the (n)th converter name in mixed case, or NULL if there is none. |
michael@0 | 256 | */ |
michael@0 | 257 | U_CFUNC const char * |
michael@0 | 258 | ucnv_bld_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode); |
michael@0 | 259 | |
michael@0 | 260 | /** |
michael@0 | 261 | * Load a non-algorithmic converter. |
michael@0 | 262 | * If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex). |
michael@0 | 263 | */ |
michael@0 | 264 | U_CAPI UConverterSharedData * |
michael@0 | 265 | ucnv_load(UConverterLoadArgs *pArgs, UErrorCode *err); |
michael@0 | 266 | |
michael@0 | 267 | /** |
michael@0 | 268 | * Unload a non-algorithmic converter. |
michael@0 | 269 | * It must be sharedData->referenceCounter != ~0 |
michael@0 | 270 | * and this function must be called inside umtx_lock(&cnvCacheMutex). |
michael@0 | 271 | */ |
michael@0 | 272 | U_CAPI void |
michael@0 | 273 | ucnv_unload(UConverterSharedData *sharedData); |
michael@0 | 274 | |
michael@0 | 275 | /** |
michael@0 | 276 | * Swap ICU .cnv conversion tables. See udataswp.h. |
michael@0 | 277 | * @internal |
michael@0 | 278 | */ |
michael@0 | 279 | U_CAPI int32_t U_EXPORT2 |
michael@0 | 280 | ucnv_swap(const UDataSwapper *ds, |
michael@0 | 281 | const void *inData, int32_t length, void *outData, |
michael@0 | 282 | UErrorCode *pErrorCode); |
michael@0 | 283 | |
michael@0 | 284 | #endif |
michael@0 | 285 | |
michael@0 | 286 | #endif /* _UCNV_BLD */ |