intl/icu/source/common/ucnv_bld.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 **********************************************************************
michael@0 3 * Copyright (C) 1999-2011 International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 **********************************************************************
michael@0 6 *
michael@0 7 *
michael@0 8 * ucnv_bld.h:
michael@0 9 * Contains internal data structure definitions
michael@0 10 * Created by Bertrand A. Damiba
michael@0 11 *
michael@0 12 * Change history:
michael@0 13 *
michael@0 14 * 06/29/2000 helena Major rewrite of the callback APIs.
michael@0 15 */
michael@0 16
michael@0 17 #ifndef UCNV_BLD_H
michael@0 18 #define UCNV_BLD_H
michael@0 19
michael@0 20 #include "unicode/utypes.h"
michael@0 21
michael@0 22 #if !UCONFIG_NO_CONVERSION
michael@0 23
michael@0 24 #include "unicode/ucnv.h"
michael@0 25 #include "unicode/ucnv_err.h"
michael@0 26 #include "unicode/utf16.h"
michael@0 27 #include "ucnv_cnv.h"
michael@0 28 #include "ucnvmbcs.h"
michael@0 29 #include "ucnv_ext.h"
michael@0 30 #include "udataswp.h"
michael@0 31
michael@0 32 /* size of the overflow buffers in UConverter, enough for escaping callbacks */
michael@0 33 #define UCNV_ERROR_BUFFER_LENGTH 32
michael@0 34
michael@0 35 /* at most 4 bytes per substitution character (part of .cnv file format! see UConverterStaticData) */
michael@0 36 #define UCNV_MAX_SUBCHAR_LEN 4
michael@0 37
michael@0 38 /* at most 8 bytes per character in toUBytes[] (UTF-8 uses up to 6) */
michael@0 39 #define UCNV_MAX_CHAR_LEN 8
michael@0 40
michael@0 41 /* converter options bits */
michael@0 42 #define UCNV_OPTION_VERSION 0xf
michael@0 43 #define UCNV_OPTION_SWAP_LFNL 0x10
michael@0 44
michael@0 45 #define UCNV_GET_VERSION(cnv) ((cnv)->options&UCNV_OPTION_VERSION)
michael@0 46
michael@0 47 U_CDECL_BEGIN /* We must declare the following as 'extern "C"' so that if ucnv
michael@0 48 itself is compiled under C++, the linkage of the funcptrs will
michael@0 49 work.
michael@0 50 */
michael@0 51
michael@0 52 union UConverterTable {
michael@0 53 UConverterMBCSTable mbcs;
michael@0 54 };
michael@0 55
michael@0 56 typedef union UConverterTable UConverterTable;
michael@0 57
michael@0 58 struct UConverterImpl;
michael@0 59 typedef struct UConverterImpl UConverterImpl;
michael@0 60
michael@0 61 /** values for the unicodeMask */
michael@0 62 #define UCNV_HAS_SUPPLEMENTARY 1
michael@0 63 #define UCNV_HAS_SURROGATES 2
michael@0 64
michael@0 65 typedef struct UConverterStaticData { /* +offset: size */
michael@0 66 uint32_t structSize; /* +0: 4 Size of this structure */
michael@0 67
michael@0 68 char name
michael@0 69 [UCNV_MAX_CONVERTER_NAME_LENGTH]; /* +4: 60 internal name of the converter- invariant chars */
michael@0 70
michael@0 71 int32_t codepage; /* +64: 4 codepage # (now IBM-$codepage) */
michael@0 72
michael@0 73 int8_t platform; /* +68: 1 platform of the converter (only IBM now) */
michael@0 74 int8_t conversionType; /* +69: 1 conversion type */
michael@0 75
michael@0 76 int8_t minBytesPerChar; /* +70: 1 Minimum # bytes per char in this codepage */
michael@0 77 int8_t maxBytesPerChar; /* +71: 1 Maximum # bytes output per UChar in this codepage */
michael@0 78
michael@0 79 uint8_t subChar[UCNV_MAX_SUBCHAR_LEN]; /* +72: 4 [note: 4 and 8 byte boundary] */
michael@0 80 int8_t subCharLen; /* +76: 1 */
michael@0 81
michael@0 82 uint8_t hasToUnicodeFallback; /* +77: 1 UBool needs to be changed to UBool to be consistent across platform */
michael@0 83 uint8_t hasFromUnicodeFallback; /* +78: 1 */
michael@0 84 uint8_t unicodeMask; /* +79: 1 bit 0: has supplementary bit 1: has single surrogates */
michael@0 85 uint8_t subChar1; /* +80: 1 single-byte substitution character for IBM MBCS (0 if none) */
michael@0 86 uint8_t reserved[19]; /* +81: 19 to round out the structure */
michael@0 87 /* total size: 100 */
michael@0 88 } UConverterStaticData;
michael@0 89
michael@0 90 /*
michael@0 91 * Defines the UConverterSharedData struct,
michael@0 92 * the immutable, shared part of UConverter.
michael@0 93 */
michael@0 94 struct UConverterSharedData {
michael@0 95 uint32_t structSize; /* Size of this structure */
michael@0 96 uint32_t referenceCounter; /* used to count number of clients, 0xffffffff for static SharedData */
michael@0 97
michael@0 98 const void *dataMemory; /* from udata_openChoice() - for cleanup */
michael@0 99 void *table; /* Unused. This used to be a UConverterTable - Pointer to conversion data - see mbcs below */
michael@0 100
michael@0 101 const UConverterStaticData *staticData; /* pointer to the static (non changing) data. */
michael@0 102
michael@0 103 UBool sharedDataCached; /* TRUE: shared data is in cache, don't destroy on ucnv_close() if 0 ref. FALSE: shared data isn't in the cache, do attempt to clean it up if the ref is 0 */
michael@0 104 /*UBool staticDataOwned; TRUE if static data owned by shared data & should be freed with it, NEVER true for udata() loaded statics. This ignored variable was removed to make space for sharedDataCached. */
michael@0 105
michael@0 106 const UConverterImpl *impl; /* vtable-style struct of mostly function pointers */
michael@0 107
michael@0 108 /*initial values of some members of the mutable part of object */
michael@0 109 uint32_t toUnicodeStatus;
michael@0 110
michael@0 111 /*
michael@0 112 * Shared data structures currently come in two flavors:
michael@0 113 * - readonly for built-in algorithmic converters
michael@0 114 * - allocated for MBCS, with a pointer to an allocated UConverterTable
michael@0 115 * which always has a UConverterMBCSTable
michael@0 116 *
michael@0 117 * To eliminate one allocation, I am making the UConverterMBCSTable
michael@0 118 * a member of the shared data. It is the last member so that static
michael@0 119 * definitions of UConverterSharedData work as before.
michael@0 120 * The table field above also remains to avoid updating all static
michael@0 121 * definitions, but is now unused.
michael@0 122 *
michael@0 123 * markus 2003-nov-07
michael@0 124 */
michael@0 125 UConverterMBCSTable mbcs;
michael@0 126 };
michael@0 127
michael@0 128 /* Defines a UConverter, the lightweight mutable part the user sees */
michael@0 129
michael@0 130 struct UConverter {
michael@0 131 /*
michael@0 132 * Error function pointer called when conversion issues
michael@0 133 * occur during a ucnv_fromUnicode call
michael@0 134 */
michael@0 135 void (U_EXPORT2 *fromUCharErrorBehaviour) (const void *context,
michael@0 136 UConverterFromUnicodeArgs *args,
michael@0 137 const UChar *codeUnits,
michael@0 138 int32_t length,
michael@0 139 UChar32 codePoint,
michael@0 140 UConverterCallbackReason reason,
michael@0 141 UErrorCode *);
michael@0 142 /*
michael@0 143 * Error function pointer called when conversion issues
michael@0 144 * occur during a ucnv_toUnicode call
michael@0 145 */
michael@0 146 void (U_EXPORT2 *fromCharErrorBehaviour) (const void *context,
michael@0 147 UConverterToUnicodeArgs *args,
michael@0 148 const char *codeUnits,
michael@0 149 int32_t length,
michael@0 150 UConverterCallbackReason reason,
michael@0 151 UErrorCode *);
michael@0 152
michael@0 153 /*
michael@0 154 * Pointer to additional data that depends on the converter type.
michael@0 155 * Used by ISO 2022, SCSU, GB 18030 converters, possibly more.
michael@0 156 */
michael@0 157 void *extraInfo;
michael@0 158
michael@0 159 const void *fromUContext;
michael@0 160 const void *toUContext;
michael@0 161
michael@0 162 /*
michael@0 163 * Pointer to charset bytes for substitution string if subCharLen>0,
michael@0 164 * or pointer to Unicode string (UChar *) if subCharLen<0.
michael@0 165 * subCharLen==0 is equivalent to using a skip callback.
michael@0 166 * If the pointer is !=subUChars then it is allocated with
michael@0 167 * UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR bytes.
michael@0 168 * The subUChars field is declared as UChar[] not uint8_t[] to
michael@0 169 * guarantee alignment for UChars.
michael@0 170 */
michael@0 171 uint8_t *subChars;
michael@0 172
michael@0 173 UConverterSharedData *sharedData; /* Pointer to the shared immutable part of the converter object */
michael@0 174
michael@0 175 uint32_t options; /* options flags from UConverterOpen, may contain additional bits */
michael@0 176
michael@0 177 UBool sharedDataIsCached; /* TRUE: shared data is in cache, don't destroy on ucnv_close() if 0 ref. FALSE: shared data isn't in the cache, do attempt to clean it up if the ref is 0 */
michael@0 178 UBool isCopyLocal; /* TRUE if UConverter is not owned and not released in ucnv_close() (stack-allocated, safeClone(), etc.) */
michael@0 179 UBool isExtraLocal; /* TRUE if extraInfo is not owned and not released in ucnv_close() (stack-allocated, safeClone(), etc.) */
michael@0 180
michael@0 181 UBool useFallback;
michael@0 182 int8_t toULength; /* number of bytes in toUBytes */
michael@0 183 uint8_t toUBytes[UCNV_MAX_CHAR_LEN-1];/* more "toU status"; keeps the bytes of the current character */
michael@0 184 uint32_t toUnicodeStatus; /* Used to internalize stream status information */
michael@0 185 int32_t mode;
michael@0 186 uint32_t fromUnicodeStatus;
michael@0 187
michael@0 188 /*
michael@0 189 * More fromUnicode() status. Serves 3 purposes:
michael@0 190 * - keeps a lead surrogate between buffers (similar to toUBytes[])
michael@0 191 * - keeps a lead surrogate at the end of the stream,
michael@0 192 * which the framework handles as truncated input
michael@0 193 * - if the fromUnicode() implementation returns to the framework
michael@0 194 * (ucnv.c ucnv_fromUnicode()), then the framework calls the callback
michael@0 195 * for this code point
michael@0 196 */
michael@0 197 UChar32 fromUChar32;
michael@0 198
michael@0 199 /*
michael@0 200 * value for ucnv_getMaxCharSize()
michael@0 201 *
michael@0 202 * usually simply copied from the static data, but ucnvmbcs.c modifies
michael@0 203 * the value depending on the converter type and options
michael@0 204 */
michael@0 205 int8_t maxBytesPerUChar;
michael@0 206
michael@0 207 int8_t subCharLen; /* length of the codepage specific character sequence */
michael@0 208 int8_t invalidCharLength;
michael@0 209 int8_t charErrorBufferLength; /* number of valid bytes in charErrorBuffer */
michael@0 210
michael@0 211 int8_t invalidUCharLength;
michael@0 212 int8_t UCharErrorBufferLength; /* number of valid UChars in charErrorBuffer */
michael@0 213
michael@0 214 uint8_t subChar1; /* single-byte substitution character if different from subChar */
michael@0 215 UBool useSubChar1;
michael@0 216 char invalidCharBuffer[UCNV_MAX_CHAR_LEN]; /* bytes from last error/callback situation */
michael@0 217 uint8_t charErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /* codepage output from Error functions */
michael@0 218 UChar subUChars[UCNV_MAX_SUBCHAR_LEN/U_SIZEOF_UCHAR]; /* see subChars documentation */
michael@0 219
michael@0 220 UChar invalidUCharBuffer[U16_MAX_LENGTH]; /* UChars from last error/callback situation */
michael@0 221 UChar UCharErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /* unicode output from Error functions */
michael@0 222
michael@0 223 /* fields for conversion extension */
michael@0 224
michael@0 225 /* store previous UChars/chars to continue partial matches */
michael@0 226 UChar32 preFromUFirstCP; /* >=0: partial match */
michael@0 227 UChar preFromU[UCNV_EXT_MAX_UCHARS];
michael@0 228 char preToU[UCNV_EXT_MAX_BYTES];
michael@0 229 int8_t preFromULength, preToULength; /* negative: replay */
michael@0 230 int8_t preToUFirstLength; /* length of first character */
michael@0 231
michael@0 232 /* new fields for ICU 4.0 */
michael@0 233 UConverterCallbackReason toUCallbackReason; /* (*fromCharErrorBehaviour) reason, set when error is detected */
michael@0 234 };
michael@0 235
michael@0 236 U_CDECL_END /* end of UConverter */
michael@0 237
michael@0 238 #define CONVERTER_FILE_EXTENSION ".cnv"
michael@0 239
michael@0 240
michael@0 241 /**
michael@0 242 * Return the number of all converter names.
michael@0 243 * @param pErrorCode The error code
michael@0 244 * @return the number of all converter names
michael@0 245 */
michael@0 246 U_CFUNC uint16_t
michael@0 247 ucnv_bld_countAvailableConverters(UErrorCode *pErrorCode);
michael@0 248
michael@0 249 /**
michael@0 250 * Return the (n)th converter name in mixed case, or NULL
michael@0 251 * if there is none (typically, if the data cannot be loaded).
michael@0 252 * 0<=index<ucnv_io_countAvailableConverters().
michael@0 253 * @param n The number specifies which converter name to get
michael@0 254 * @param pErrorCode The error code
michael@0 255 * @return the (n)th converter name in mixed case, or NULL if there is none.
michael@0 256 */
michael@0 257 U_CFUNC const char *
michael@0 258 ucnv_bld_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode);
michael@0 259
michael@0 260 /**
michael@0 261 * Load a non-algorithmic converter.
michael@0 262 * If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex).
michael@0 263 */
michael@0 264 U_CAPI UConverterSharedData *
michael@0 265 ucnv_load(UConverterLoadArgs *pArgs, UErrorCode *err);
michael@0 266
michael@0 267 /**
michael@0 268 * Unload a non-algorithmic converter.
michael@0 269 * It must be sharedData->referenceCounter != ~0
michael@0 270 * and this function must be called inside umtx_lock(&cnvCacheMutex).
michael@0 271 */
michael@0 272 U_CAPI void
michael@0 273 ucnv_unload(UConverterSharedData *sharedData);
michael@0 274
michael@0 275 /**
michael@0 276 * Swap ICU .cnv conversion tables. See udataswp.h.
michael@0 277 * @internal
michael@0 278 */
michael@0 279 U_CAPI int32_t U_EXPORT2
michael@0 280 ucnv_swap(const UDataSwapper *ds,
michael@0 281 const void *inData, int32_t length, void *outData,
michael@0 282 UErrorCode *pErrorCode);
michael@0 283
michael@0 284 #endif
michael@0 285
michael@0 286 #endif /* _UCNV_BLD */

mercurial