1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/ucnv_bld.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,286 @@ 1.4 +/* 1.5 +********************************************************************** 1.6 +* Copyright (C) 1999-2011 International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +********************************************************************** 1.9 +* 1.10 +* 1.11 +* ucnv_bld.h: 1.12 +* Contains internal data structure definitions 1.13 +* Created by Bertrand A. Damiba 1.14 +* 1.15 +* Change history: 1.16 +* 1.17 +* 06/29/2000 helena Major rewrite of the callback APIs. 1.18 +*/ 1.19 + 1.20 +#ifndef UCNV_BLD_H 1.21 +#define UCNV_BLD_H 1.22 + 1.23 +#include "unicode/utypes.h" 1.24 + 1.25 +#if !UCONFIG_NO_CONVERSION 1.26 + 1.27 +#include "unicode/ucnv.h" 1.28 +#include "unicode/ucnv_err.h" 1.29 +#include "unicode/utf16.h" 1.30 +#include "ucnv_cnv.h" 1.31 +#include "ucnvmbcs.h" 1.32 +#include "ucnv_ext.h" 1.33 +#include "udataswp.h" 1.34 + 1.35 +/* size of the overflow buffers in UConverter, enough for escaping callbacks */ 1.36 +#define UCNV_ERROR_BUFFER_LENGTH 32 1.37 + 1.38 +/* at most 4 bytes per substitution character (part of .cnv file format! see UConverterStaticData) */ 1.39 +#define UCNV_MAX_SUBCHAR_LEN 4 1.40 + 1.41 +/* at most 8 bytes per character in toUBytes[] (UTF-8 uses up to 6) */ 1.42 +#define UCNV_MAX_CHAR_LEN 8 1.43 + 1.44 +/* converter options bits */ 1.45 +#define UCNV_OPTION_VERSION 0xf 1.46 +#define UCNV_OPTION_SWAP_LFNL 0x10 1.47 + 1.48 +#define UCNV_GET_VERSION(cnv) ((cnv)->options&UCNV_OPTION_VERSION) 1.49 + 1.50 +U_CDECL_BEGIN /* We must declare the following as 'extern "C"' so that if ucnv 1.51 + itself is compiled under C++, the linkage of the funcptrs will 1.52 + work. 1.53 + */ 1.54 + 1.55 +union UConverterTable { 1.56 + UConverterMBCSTable mbcs; 1.57 +}; 1.58 + 1.59 +typedef union UConverterTable UConverterTable; 1.60 + 1.61 +struct UConverterImpl; 1.62 +typedef struct UConverterImpl UConverterImpl; 1.63 + 1.64 +/** values for the unicodeMask */ 1.65 +#define UCNV_HAS_SUPPLEMENTARY 1 1.66 +#define UCNV_HAS_SURROGATES 2 1.67 + 1.68 +typedef struct UConverterStaticData { /* +offset: size */ 1.69 + uint32_t structSize; /* +0: 4 Size of this structure */ 1.70 + 1.71 + char name 1.72 + [UCNV_MAX_CONVERTER_NAME_LENGTH]; /* +4: 60 internal name of the converter- invariant chars */ 1.73 + 1.74 + int32_t codepage; /* +64: 4 codepage # (now IBM-$codepage) */ 1.75 + 1.76 + int8_t platform; /* +68: 1 platform of the converter (only IBM now) */ 1.77 + int8_t conversionType; /* +69: 1 conversion type */ 1.78 + 1.79 + int8_t minBytesPerChar; /* +70: 1 Minimum # bytes per char in this codepage */ 1.80 + int8_t maxBytesPerChar; /* +71: 1 Maximum # bytes output per UChar in this codepage */ 1.81 + 1.82 + uint8_t subChar[UCNV_MAX_SUBCHAR_LEN]; /* +72: 4 [note: 4 and 8 byte boundary] */ 1.83 + int8_t subCharLen; /* +76: 1 */ 1.84 + 1.85 + uint8_t hasToUnicodeFallback; /* +77: 1 UBool needs to be changed to UBool to be consistent across platform */ 1.86 + uint8_t hasFromUnicodeFallback; /* +78: 1 */ 1.87 + uint8_t unicodeMask; /* +79: 1 bit 0: has supplementary bit 1: has single surrogates */ 1.88 + uint8_t subChar1; /* +80: 1 single-byte substitution character for IBM MBCS (0 if none) */ 1.89 + uint8_t reserved[19]; /* +81: 19 to round out the structure */ 1.90 + /* total size: 100 */ 1.91 +} UConverterStaticData; 1.92 + 1.93 +/* 1.94 + * Defines the UConverterSharedData struct, 1.95 + * the immutable, shared part of UConverter. 1.96 + */ 1.97 +struct UConverterSharedData { 1.98 + uint32_t structSize; /* Size of this structure */ 1.99 + uint32_t referenceCounter; /* used to count number of clients, 0xffffffff for static SharedData */ 1.100 + 1.101 + const void *dataMemory; /* from udata_openChoice() - for cleanup */ 1.102 + void *table; /* Unused. This used to be a UConverterTable - Pointer to conversion data - see mbcs below */ 1.103 + 1.104 + const UConverterStaticData *staticData; /* pointer to the static (non changing) data. */ 1.105 + 1.106 + UBool sharedDataCached; /* TRUE: shared data is in cache, don't destroy on ucnv_close() if 0 ref. FALSE: shared data isn't in the cache, do attempt to clean it up if the ref is 0 */ 1.107 + /*UBool staticDataOwned; TRUE if static data owned by shared data & should be freed with it, NEVER true for udata() loaded statics. This ignored variable was removed to make space for sharedDataCached. */ 1.108 + 1.109 + const UConverterImpl *impl; /* vtable-style struct of mostly function pointers */ 1.110 + 1.111 + /*initial values of some members of the mutable part of object */ 1.112 + uint32_t toUnicodeStatus; 1.113 + 1.114 + /* 1.115 + * Shared data structures currently come in two flavors: 1.116 + * - readonly for built-in algorithmic converters 1.117 + * - allocated for MBCS, with a pointer to an allocated UConverterTable 1.118 + * which always has a UConverterMBCSTable 1.119 + * 1.120 + * To eliminate one allocation, I am making the UConverterMBCSTable 1.121 + * a member of the shared data. It is the last member so that static 1.122 + * definitions of UConverterSharedData work as before. 1.123 + * The table field above also remains to avoid updating all static 1.124 + * definitions, but is now unused. 1.125 + * 1.126 + * markus 2003-nov-07 1.127 + */ 1.128 + UConverterMBCSTable mbcs; 1.129 +}; 1.130 + 1.131 +/* Defines a UConverter, the lightweight mutable part the user sees */ 1.132 + 1.133 +struct UConverter { 1.134 + /* 1.135 + * Error function pointer called when conversion issues 1.136 + * occur during a ucnv_fromUnicode call 1.137 + */ 1.138 + void (U_EXPORT2 *fromUCharErrorBehaviour) (const void *context, 1.139 + UConverterFromUnicodeArgs *args, 1.140 + const UChar *codeUnits, 1.141 + int32_t length, 1.142 + UChar32 codePoint, 1.143 + UConverterCallbackReason reason, 1.144 + UErrorCode *); 1.145 + /* 1.146 + * Error function pointer called when conversion issues 1.147 + * occur during a ucnv_toUnicode call 1.148 + */ 1.149 + void (U_EXPORT2 *fromCharErrorBehaviour) (const void *context, 1.150 + UConverterToUnicodeArgs *args, 1.151 + const char *codeUnits, 1.152 + int32_t length, 1.153 + UConverterCallbackReason reason, 1.154 + UErrorCode *); 1.155 + 1.156 + /* 1.157 + * Pointer to additional data that depends on the converter type. 1.158 + * Used by ISO 2022, SCSU, GB 18030 converters, possibly more. 1.159 + */ 1.160 + void *extraInfo; 1.161 + 1.162 + const void *fromUContext; 1.163 + const void *toUContext; 1.164 + 1.165 + /* 1.166 + * Pointer to charset bytes for substitution string if subCharLen>0, 1.167 + * or pointer to Unicode string (UChar *) if subCharLen<0. 1.168 + * subCharLen==0 is equivalent to using a skip callback. 1.169 + * If the pointer is !=subUChars then it is allocated with 1.170 + * UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR bytes. 1.171 + * The subUChars field is declared as UChar[] not uint8_t[] to 1.172 + * guarantee alignment for UChars. 1.173 + */ 1.174 + uint8_t *subChars; 1.175 + 1.176 + UConverterSharedData *sharedData; /* Pointer to the shared immutable part of the converter object */ 1.177 + 1.178 + uint32_t options; /* options flags from UConverterOpen, may contain additional bits */ 1.179 + 1.180 + UBool sharedDataIsCached; /* TRUE: shared data is in cache, don't destroy on ucnv_close() if 0 ref. FALSE: shared data isn't in the cache, do attempt to clean it up if the ref is 0 */ 1.181 + UBool isCopyLocal; /* TRUE if UConverter is not owned and not released in ucnv_close() (stack-allocated, safeClone(), etc.) */ 1.182 + UBool isExtraLocal; /* TRUE if extraInfo is not owned and not released in ucnv_close() (stack-allocated, safeClone(), etc.) */ 1.183 + 1.184 + UBool useFallback; 1.185 + int8_t toULength; /* number of bytes in toUBytes */ 1.186 + uint8_t toUBytes[UCNV_MAX_CHAR_LEN-1];/* more "toU status"; keeps the bytes of the current character */ 1.187 + uint32_t toUnicodeStatus; /* Used to internalize stream status information */ 1.188 + int32_t mode; 1.189 + uint32_t fromUnicodeStatus; 1.190 + 1.191 + /* 1.192 + * More fromUnicode() status. Serves 3 purposes: 1.193 + * - keeps a lead surrogate between buffers (similar to toUBytes[]) 1.194 + * - keeps a lead surrogate at the end of the stream, 1.195 + * which the framework handles as truncated input 1.196 + * - if the fromUnicode() implementation returns to the framework 1.197 + * (ucnv.c ucnv_fromUnicode()), then the framework calls the callback 1.198 + * for this code point 1.199 + */ 1.200 + UChar32 fromUChar32; 1.201 + 1.202 + /* 1.203 + * value for ucnv_getMaxCharSize() 1.204 + * 1.205 + * usually simply copied from the static data, but ucnvmbcs.c modifies 1.206 + * the value depending on the converter type and options 1.207 + */ 1.208 + int8_t maxBytesPerUChar; 1.209 + 1.210 + int8_t subCharLen; /* length of the codepage specific character sequence */ 1.211 + int8_t invalidCharLength; 1.212 + int8_t charErrorBufferLength; /* number of valid bytes in charErrorBuffer */ 1.213 + 1.214 + int8_t invalidUCharLength; 1.215 + int8_t UCharErrorBufferLength; /* number of valid UChars in charErrorBuffer */ 1.216 + 1.217 + uint8_t subChar1; /* single-byte substitution character if different from subChar */ 1.218 + UBool useSubChar1; 1.219 + char invalidCharBuffer[UCNV_MAX_CHAR_LEN]; /* bytes from last error/callback situation */ 1.220 + uint8_t charErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /* codepage output from Error functions */ 1.221 + UChar subUChars[UCNV_MAX_SUBCHAR_LEN/U_SIZEOF_UCHAR]; /* see subChars documentation */ 1.222 + 1.223 + UChar invalidUCharBuffer[U16_MAX_LENGTH]; /* UChars from last error/callback situation */ 1.224 + UChar UCharErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /* unicode output from Error functions */ 1.225 + 1.226 + /* fields for conversion extension */ 1.227 + 1.228 + /* store previous UChars/chars to continue partial matches */ 1.229 + UChar32 preFromUFirstCP; /* >=0: partial match */ 1.230 + UChar preFromU[UCNV_EXT_MAX_UCHARS]; 1.231 + char preToU[UCNV_EXT_MAX_BYTES]; 1.232 + int8_t preFromULength, preToULength; /* negative: replay */ 1.233 + int8_t preToUFirstLength; /* length of first character */ 1.234 + 1.235 + /* new fields for ICU 4.0 */ 1.236 + UConverterCallbackReason toUCallbackReason; /* (*fromCharErrorBehaviour) reason, set when error is detected */ 1.237 +}; 1.238 + 1.239 +U_CDECL_END /* end of UConverter */ 1.240 + 1.241 +#define CONVERTER_FILE_EXTENSION ".cnv" 1.242 + 1.243 + 1.244 +/** 1.245 + * Return the number of all converter names. 1.246 + * @param pErrorCode The error code 1.247 + * @return the number of all converter names 1.248 + */ 1.249 +U_CFUNC uint16_t 1.250 +ucnv_bld_countAvailableConverters(UErrorCode *pErrorCode); 1.251 + 1.252 +/** 1.253 + * Return the (n)th converter name in mixed case, or NULL 1.254 + * if there is none (typically, if the data cannot be loaded). 1.255 + * 0<=index<ucnv_io_countAvailableConverters(). 1.256 + * @param n The number specifies which converter name to get 1.257 + * @param pErrorCode The error code 1.258 + * @return the (n)th converter name in mixed case, or NULL if there is none. 1.259 + */ 1.260 +U_CFUNC const char * 1.261 +ucnv_bld_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode); 1.262 + 1.263 +/** 1.264 + * Load a non-algorithmic converter. 1.265 + * If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex). 1.266 + */ 1.267 +U_CAPI UConverterSharedData * 1.268 +ucnv_load(UConverterLoadArgs *pArgs, UErrorCode *err); 1.269 + 1.270 +/** 1.271 + * Unload a non-algorithmic converter. 1.272 + * It must be sharedData->referenceCounter != ~0 1.273 + * and this function must be called inside umtx_lock(&cnvCacheMutex). 1.274 + */ 1.275 +U_CAPI void 1.276 +ucnv_unload(UConverterSharedData *sharedData); 1.277 + 1.278 +/** 1.279 + * Swap ICU .cnv conversion tables. See udataswp.h. 1.280 + * @internal 1.281 + */ 1.282 +U_CAPI int32_t U_EXPORT2 1.283 +ucnv_swap(const UDataSwapper *ds, 1.284 + const void *inData, int32_t length, void *outData, 1.285 + UErrorCode *pErrorCode); 1.286 + 1.287 +#endif 1.288 + 1.289 +#endif /* _UCNV_BLD */