Sat, 03 Jan 2015 20:18:00 +0100
Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.
michael@0 | 1 | /* |
michael@0 | 2 | ****************************************************************************** |
michael@0 | 3 | * |
michael@0 | 4 | * Copyright (C) 2003-2013, International Business Machines |
michael@0 | 5 | * Corporation and others. All Rights Reserved. |
michael@0 | 6 | * |
michael@0 | 7 | ****************************************************************************** |
michael@0 | 8 | * file name: ucnv_ext.h |
michael@0 | 9 | * encoding: US-ASCII |
michael@0 | 10 | * tab size: 8 (not used) |
michael@0 | 11 | * indentation:4 |
michael@0 | 12 | * |
michael@0 | 13 | * created on: 2003jun13 |
michael@0 | 14 | * created by: Markus W. Scherer |
michael@0 | 15 | * |
michael@0 | 16 | * Conversion extensions |
michael@0 | 17 | */ |
michael@0 | 18 | |
michael@0 | 19 | #ifndef __UCNV_EXT_H__ |
michael@0 | 20 | #define __UCNV_EXT_H__ |
michael@0 | 21 | |
michael@0 | 22 | #include "unicode/utypes.h" |
michael@0 | 23 | |
michael@0 | 24 | #if !UCONFIG_NO_CONVERSION |
michael@0 | 25 | |
michael@0 | 26 | #include "unicode/ucnv.h" |
michael@0 | 27 | #include "ucnv_cnv.h" |
michael@0 | 28 | |
michael@0 | 29 | /* |
michael@0 | 30 | * See icuhtml/design/conversion/conversion_extensions.html |
michael@0 | 31 | * |
michael@0 | 32 | * Conversion extensions serve three purposes: |
michael@0 | 33 | * 1. They support m:n mappings. |
michael@0 | 34 | * 2. They support extension-only conversion files that are used together |
michael@0 | 35 | * with the regular conversion data in base files. |
michael@0 | 36 | * 3. They support mappings with more complicated meta data, |
michael@0 | 37 | * for example "good one-way" mappings (|4). |
michael@0 | 38 | * |
michael@0 | 39 | * A base file may contain an extension table (explicitly requested or |
michael@0 | 40 | * implicitly generated for m:n mappings), but its extension table is not |
michael@0 | 41 | * used when an extension-only file is used. |
michael@0 | 42 | * |
michael@0 | 43 | * It is an error if a base file contains any regular (not extension) mapping |
michael@0 | 44 | * from the same sequence as a mapping in the extension file |
michael@0 | 45 | * because the base mapping would hide the extension mapping. |
michael@0 | 46 | * |
michael@0 | 47 | * |
michael@0 | 48 | * Data for conversion extensions: |
michael@0 | 49 | * |
michael@0 | 50 | * One set of data structures per conversion direction (to/from Unicode). |
michael@0 | 51 | * The data structures are sorted by input units to allow for binary search. |
michael@0 | 52 | * Input sequences of more than one unit are handled like contraction tables |
michael@0 | 53 | * in collation: |
michael@0 | 54 | * The lookup value of a unit points to another table that is to be searched |
michael@0 | 55 | * for the next unit, recursively. |
michael@0 | 56 | * |
michael@0 | 57 | * For conversion from Unicode, the initial code point is looked up in |
michael@0 | 58 | * a 3-stage trie for speed, |
michael@0 | 59 | * with an additional table of unique results to save space. |
michael@0 | 60 | * |
michael@0 | 61 | * Long output strings are stored in separate arrays, with length and index |
michael@0 | 62 | * in the lookup tables. |
michael@0 | 63 | * Output results also include a flag distinguishing roundtrip from |
michael@0 | 64 | * (reverse) fallback mappings. |
michael@0 | 65 | * |
michael@0 | 66 | * Input Unicode strings must not begin or end with unpaired surrogates |
michael@0 | 67 | * to avoid problems with matches on parts of surrogate pairs. |
michael@0 | 68 | * |
michael@0 | 69 | * Mappings from multiple characters (code points or codepage state |
michael@0 | 70 | * table sequences) must be searched preferring the longest match. |
michael@0 | 71 | * For this to work and be efficient, the variable-width table must contain |
michael@0 | 72 | * all mappings that contain prefixes of the multiple characters. |
michael@0 | 73 | * If an extension table is built on top of a base table in another file |
michael@0 | 74 | * and a base table entry is a prefix of a multi-character mapping, then |
michael@0 | 75 | * this is an error. |
michael@0 | 76 | * |
michael@0 | 77 | * |
michael@0 | 78 | * Implementation note: |
michael@0 | 79 | * |
michael@0 | 80 | * Currently, the parser and several checks in the code limit the number |
michael@0 | 81 | * of UChars or bytes in a mapping to |
michael@0 | 82 | * UCNV_EXT_MAX_UCHARS and UCNV_EXT_MAX_BYTES, respectively, |
michael@0 | 83 | * which are output value limits in the data structure. |
michael@0 | 84 | * |
michael@0 | 85 | * For input, this is not strictly necessary - it is a hard limit only for the |
michael@0 | 86 | * buffers in UConverter that are used to store partial matches. |
michael@0 | 87 | * |
michael@0 | 88 | * Input sequences could otherwise be arbitrarily long if partial matches |
michael@0 | 89 | * need not be stored (i.e., if a sequence does not span several buffers with too |
michael@0 | 90 | * many units before the last buffer), although then results would differ |
michael@0 | 91 | * depending on whether partial matches exceed the limits or not, |
michael@0 | 92 | * which depends on the pattern of buffer sizes. |
michael@0 | 93 | * |
michael@0 | 94 | * |
michael@0 | 95 | * Data structure: |
michael@0 | 96 | * |
michael@0 | 97 | * int32_t indexes[>=32]; |
michael@0 | 98 | * |
michael@0 | 99 | * Array of indexes and lengths etc. The length of the array is at least 32. |
michael@0 | 100 | * The actual length is stored in indexes[0] to be forward compatible. |
michael@0 | 101 | * |
michael@0 | 102 | * Each index to another array is the number of bytes from indexes[]. |
michael@0 | 103 | * Each length of an array is the number of array base units in that array. |
michael@0 | 104 | * |
michael@0 | 105 | * Some of the structures may not be present, in which case their indexes |
michael@0 | 106 | * and lengths are 0. |
michael@0 | 107 | * |
michael@0 | 108 | * Usage of indexes[i]: |
michael@0 | 109 | * [0] length of indexes[] |
michael@0 | 110 | * |
michael@0 | 111 | * // to Unicode table |
michael@0 | 112 | * [1] index of toUTable[] (array of uint32_t) |
michael@0 | 113 | * [2] length of toUTable[] |
michael@0 | 114 | * [3] index of toUUChars[] (array of UChar) |
michael@0 | 115 | * [4] length of toUUChars[] |
michael@0 | 116 | * |
michael@0 | 117 | * // from Unicode table, not for the initial code point |
michael@0 | 118 | * [5] index of fromUTableUChars[] (array of UChar) |
michael@0 | 119 | * [6] index of fromUTableValues[] (array of uint32_t) |
michael@0 | 120 | * [7] length of fromUTableUChars[] and fromUTableValues[] |
michael@0 | 121 | * [8] index of fromUBytes[] (array of char) |
michael@0 | 122 | * [9] length of fromUBytes[] |
michael@0 | 123 | * |
michael@0 | 124 | * // from Unicode trie for initial-code point lookup |
michael@0 | 125 | * [10] index of fromUStage12[] (combined array of uint16_t for stages 1 & 2) |
michael@0 | 126 | * [11] length of stage 1 portion of fromUStage12[] |
michael@0 | 127 | * [12] length of fromUStage12[] |
michael@0 | 128 | * [13] index of fromUStage3[] (array of uint16_t indexes into fromUStage3b[]) |
michael@0 | 129 | * [14] length of fromUStage3[] |
michael@0 | 130 | * [15] index of fromUStage3b[] (array of uint32_t like fromUTableValues[]) |
michael@0 | 131 | * [16] length of fromUStage3b[] |
michael@0 | 132 | * |
michael@0 | 133 | * [17] Bit field containing numbers of bytes: |
michael@0 | 134 | * 31..24 reserved, 0 |
michael@0 | 135 | * 23..16 maximum input bytes |
michael@0 | 136 | * 15.. 8 maximum output bytes |
michael@0 | 137 | * 7.. 0 maximum bytes per UChar |
michael@0 | 138 | * |
michael@0 | 139 | * [18] Bit field containing numbers of UChars: |
michael@0 | 140 | * 31..24 reserved, 0 |
michael@0 | 141 | * 23..16 maximum input UChars |
michael@0 | 142 | * 15.. 8 maximum output UChars |
michael@0 | 143 | * 7.. 0 maximum UChars per byte |
michael@0 | 144 | * |
michael@0 | 145 | * [19] Bit field containing flags: |
michael@0 | 146 | * (extension table unicodeMask) |
michael@0 | 147 | * 1 UCNV_HAS_SURROGATES flag for the extension table |
michael@0 | 148 | * 0 UCNV_HAS_SUPPLEMENTARY flag for the extension table |
michael@0 | 149 | * |
michael@0 | 150 | * [20]..[30] reserved, 0 |
michael@0 | 151 | * [31] number of bytes for the entire extension structure |
michael@0 | 152 | * [>31] reserved; there are indexes[0] indexes |
michael@0 | 153 | * |
michael@0 | 154 | * |
michael@0 | 155 | * uint32_t toUTable[]; |
michael@0 | 156 | * |
michael@0 | 157 | * Array of byte/value pairs for lookups for toUnicode conversion. |
michael@0 | 158 | * The array is partitioned into sections like collation contraction tables. |
michael@0 | 159 | * Each section contains one word with the number of following words and |
michael@0 | 160 | * a default value for when the lookup in this section yields no match. |
michael@0 | 161 | * |
michael@0 | 162 | * A section is sorted in ascending order of input bytes, |
michael@0 | 163 | * allowing for fast linear or binary searches. |
michael@0 | 164 | * The builder may store entries for a contiguous range of byte values |
michael@0 | 165 | * (compare difference between the first and last one with count), |
michael@0 | 166 | * which then allows for direct array access. |
michael@0 | 167 | * The builder should always do this for the initial table section. |
michael@0 | 168 | * |
michael@0 | 169 | * Entries may have 0 values, see below. |
michael@0 | 170 | * No two entries in a section have the same byte values. |
michael@0 | 171 | * |
michael@0 | 172 | * Each uint32_t contains an input byte value in bits 31..24 and the |
michael@0 | 173 | * corresponding lookup value in bits 23..0. |
michael@0 | 174 | * Interpret the value as follows: |
michael@0 | 175 | * if(value==0) { |
michael@0 | 176 | * no match, see below |
michael@0 | 177 | * } else if(value<0x1f0000) { |
michael@0 | 178 | * partial match - use value as index to the next toUTable section |
michael@0 | 179 | * and match the next unit; (value indexes toUTable[value]) |
michael@0 | 180 | * } else { |
michael@0 | 181 | * if(bit 23 set) { |
michael@0 | 182 | * roundtrip; |
michael@0 | 183 | * } else { |
michael@0 | 184 | * fallback; |
michael@0 | 185 | * } |
michael@0 | 186 | * unset value bit 23; |
michael@0 | 187 | * if(value<=0x2fffff) { |
michael@0 | 188 | * (value-0x1f0000) is a code point; (BMP: value<=0x1fffff) |
michael@0 | 189 | * } else { |
michael@0 | 190 | * bits 17..0 (value&0x3ffff) is an index to |
michael@0 | 191 | * the result UChars in toUUChars[]; (0 indexes toUUChars[0]) |
michael@0 | 192 | * length of the result=((value>>18)-12); (length=0..19) |
michael@0 | 193 | * } |
michael@0 | 194 | * } |
michael@0 | 195 | * |
michael@0 | 196 | * The first word in a section contains the number of following words in the |
michael@0 | 197 | * input byte position (bits 31..24, number=1..0xff). |
michael@0 | 198 | * The value of the initial word is used when the current byte is not found |
michael@0 | 199 | * in this section. |
michael@0 | 200 | * If the value is not 0, then it represents a result as above. |
michael@0 | 201 | * If the value is 0, then the search has to return a shorter match with an |
michael@0 | 202 | * earlier default value as the result, or result in "unmappable" even for the |
michael@0 | 203 | * initial bytes. |
michael@0 | 204 | * If the value is 0 for the initial toUTable entry, then the initial byte |
michael@0 | 205 | * does not start any mapping input. |
michael@0 | 206 | * |
michael@0 | 207 | * |
michael@0 | 208 | * UChar toUUChars[]; |
michael@0 | 209 | * |
michael@0 | 210 | * Contains toUnicode mapping results, stored as sequences of UChars. |
michael@0 | 211 | * Indexes and lengths stored in the toUTable[]. |
michael@0 | 212 | * |
michael@0 | 213 | * |
michael@0 | 214 | * UChar fromUTableUChars[]; |
michael@0 | 215 | * uint32_t fromUTableValues[]; |
michael@0 | 216 | * |
michael@0 | 217 | * The fromUTable is split into two arrays, but works otherwise much like |
michael@0 | 218 | * the toUTable. The array is partitioned into sections like collation |
michael@0 | 219 | * contraction tables and toUTable. |
michael@0 | 220 | * A row in the table consists of same-index entries in fromUTableUChars[] |
michael@0 | 221 | * and fromUTableValues[]. |
michael@0 | 222 | * |
michael@0 | 223 | * Interpret a value as follows: |
michael@0 | 224 | * if(value==0) { |
michael@0 | 225 | * no match, see below |
michael@0 | 226 | * } else if(value<=0xffffff) { (bits 31..24 are 0) |
michael@0 | 227 | * partial match - use value as index to the next fromUTable section |
michael@0 | 228 | * and match the next unit; (value indexes fromUTable[value]) |
michael@0 | 229 | * } else { |
michael@0 | 230 | * if(value==0x80000001) { |
michael@0 | 231 | * return no mapping, but request for <subchar1>; |
michael@0 | 232 | * } |
michael@0 | 233 | * if(bit 31 set) { |
michael@0 | 234 | * roundtrip (|0); |
michael@0 | 235 | * } else if(bit 30 set) { |
michael@0 | 236 | * "good one-way" mapping (|4); -- new in ICU4C 51, _MBCSHeader.version 5.4/4.4 |
michael@0 | 237 | * } else { |
michael@0 | 238 | * normal fallback (|1); |
michael@0 | 239 | * } |
michael@0 | 240 | * // bit 29 reserved, 0 |
michael@0 | 241 | * length=(value>>24)&0x1f; (bits 28..24) |
michael@0 | 242 | * if(length==1..3) { |
michael@0 | 243 | * bits 23..0 contain 1..3 bytes, padded with 00s on the left; |
michael@0 | 244 | * } else { |
michael@0 | 245 | * bits 23..0 (value&0xffffff) is an index to |
michael@0 | 246 | * the result bytes in fromUBytes[]; (0 indexes fromUBytes[0]) |
michael@0 | 247 | * } |
michael@0 | 248 | * } |
michael@0 | 249 | * |
michael@0 | 250 | * The first pair in a section contains the number of following pairs in the |
michael@0 | 251 | * UChar position (16 bits, number=1..0xffff). |
michael@0 | 252 | * The value of the initial pair is used when the current UChar is not found |
michael@0 | 253 | * in this section. |
michael@0 | 254 | * If the value is not 0, then it represents a result as above. |
michael@0 | 255 | * If the value is 0, then the search has to return a shorter match with an |
michael@0 | 256 | * earlier default value as the result, or result in "unmappable" even for the |
michael@0 | 257 | * initial UChars. |
michael@0 | 258 | * |
michael@0 | 259 | * If the from Unicode trie is present, then the from Unicode search tables |
michael@0 | 260 | * are not used for initial code points. |
michael@0 | 261 | * In this case, the first entries (index 0) in the tables are not used |
michael@0 | 262 | * (reserved, set to 0) because a value of 0 is used in trie results |
michael@0 | 263 | * to indicate no mapping. |
michael@0 | 264 | * |
michael@0 | 265 | * |
michael@0 | 266 | * uint16_t fromUStage12[]; |
michael@0 | 267 | * |
michael@0 | 268 | * Stages 1 & 2 of a trie that maps an initial code point. |
michael@0 | 269 | * Indexes in stage 1 are all offset by the length of stage 1 so that the |
michael@0 | 270 | * same array pointer can be used for both stages. |
michael@0 | 271 | * If (c>>10)>=(length of stage 1) then c does not start any mapping. |
michael@0 | 272 | * Same bit distribution as for regular conversion tries. |
michael@0 | 273 | * |
michael@0 | 274 | * |
michael@0 | 275 | * uint16_t fromUStage3[]; |
michael@0 | 276 | * uint32_t fromUStage3b[]; |
michael@0 | 277 | * |
michael@0 | 278 | * Stage 3 of the trie. The first array simply contains indexes to the second, |
michael@0 | 279 | * which contains words in the same format as fromUTableValues[]. |
michael@0 | 280 | * Use a stage 3 granularity of 4, which allows for 256k stage 3 entries, |
michael@0 | 281 | * and 16-bit entries in stage 3 allow for 64k stage 3b entries. |
michael@0 | 282 | * The stage 3 granularity means that the stage 2 entry needs to be left-shifted. |
michael@0 | 283 | * |
michael@0 | 284 | * Two arrays are used because it is expected that more than half of the stage 3 |
michael@0 | 285 | * entries will be zero. The 16-bit index stage 3 array saves space even |
michael@0 | 286 | * considering storing a total of 6 bytes per non-zero entry in both arrays |
michael@0 | 287 | * together. |
michael@0 | 288 | * Using a stage 3 granularity of >1 diminishes the compactability in that stage |
michael@0 | 289 | * but provides a larger effective addressing space in stage 2. |
michael@0 | 290 | * All but the final result stage use 16-bit entries to save space. |
michael@0 | 291 | * |
michael@0 | 292 | * fromUStage3b[] contains a zero for "no mapping" at its index 0, |
michael@0 | 293 | * and may contain UCNV_EXT_FROM_U_SUBCHAR1 at index 1 for "<subchar1> SUB mapping" |
michael@0 | 294 | * (i.e., "no mapping" with preference for <subchar1> rather than <subchar>), |
michael@0 | 295 | * and all other items are unique non-zero results. |
michael@0 | 296 | * |
michael@0 | 297 | * The default value of a fromUTableValues[] section that is referenced |
michael@0 | 298 | * _directly_ from a fromUStage3b[] item may also be UCNV_EXT_FROM_U_SUBCHAR1, |
michael@0 | 299 | * but this value must not occur anywhere else in fromUTableValues[] |
michael@0 | 300 | * because "no mapping" is always a property of a single code point, |
michael@0 | 301 | * never of multiple. |
michael@0 | 302 | * |
michael@0 | 303 | * |
michael@0 | 304 | * char fromUBytes[]; |
michael@0 | 305 | * |
michael@0 | 306 | * Contains fromUnicode mapping results, stored as sequences of chars. |
michael@0 | 307 | * Indexes and lengths stored in the fromUTableValues[]. |
michael@0 | 308 | */ |
michael@0 | 309 | enum { |
michael@0 | 310 | UCNV_EXT_INDEXES_LENGTH, /* 0 */ |
michael@0 | 311 | |
michael@0 | 312 | UCNV_EXT_TO_U_INDEX, /* 1 */ |
michael@0 | 313 | UCNV_EXT_TO_U_LENGTH, |
michael@0 | 314 | UCNV_EXT_TO_U_UCHARS_INDEX, |
michael@0 | 315 | UCNV_EXT_TO_U_UCHARS_LENGTH, |
michael@0 | 316 | |
michael@0 | 317 | UCNV_EXT_FROM_U_UCHARS_INDEX, /* 5 */ |
michael@0 | 318 | UCNV_EXT_FROM_U_VALUES_INDEX, |
michael@0 | 319 | UCNV_EXT_FROM_U_LENGTH, |
michael@0 | 320 | UCNV_EXT_FROM_U_BYTES_INDEX, |
michael@0 | 321 | UCNV_EXT_FROM_U_BYTES_LENGTH, |
michael@0 | 322 | |
michael@0 | 323 | UCNV_EXT_FROM_U_STAGE_12_INDEX, /* 10 */ |
michael@0 | 324 | UCNV_EXT_FROM_U_STAGE_1_LENGTH, |
michael@0 | 325 | UCNV_EXT_FROM_U_STAGE_12_LENGTH, |
michael@0 | 326 | UCNV_EXT_FROM_U_STAGE_3_INDEX, |
michael@0 | 327 | UCNV_EXT_FROM_U_STAGE_3_LENGTH, |
michael@0 | 328 | UCNV_EXT_FROM_U_STAGE_3B_INDEX, |
michael@0 | 329 | UCNV_EXT_FROM_U_STAGE_3B_LENGTH, |
michael@0 | 330 | |
michael@0 | 331 | UCNV_EXT_COUNT_BYTES, /* 17 */ |
michael@0 | 332 | UCNV_EXT_COUNT_UCHARS, |
michael@0 | 333 | UCNV_EXT_FLAGS, |
michael@0 | 334 | |
michael@0 | 335 | UCNV_EXT_RESERVED_INDEX, /* 20, moves with additional indexes */ |
michael@0 | 336 | |
michael@0 | 337 | UCNV_EXT_SIZE=31, |
michael@0 | 338 | UCNV_EXT_INDEXES_MIN_LENGTH=32 |
michael@0 | 339 | }; |
michael@0 | 340 | |
michael@0 | 341 | /* get the pointer to an extension array from indexes[index] */ |
michael@0 | 342 | #define UCNV_EXT_ARRAY(indexes, index, itemType) \ |
michael@0 | 343 | ((const itemType *)((const char *)(indexes)+(indexes)[index])) |
michael@0 | 344 | |
michael@0 | 345 | #define UCNV_GET_MAX_BYTES_PER_UCHAR(indexes) \ |
michael@0 | 346 | ((indexes)[UCNV_EXT_COUNT_BYTES]&0xff) |
michael@0 | 347 | |
michael@0 | 348 | /* internal API ------------------------------------------------------------- */ |
michael@0 | 349 | |
michael@0 | 350 | U_CFUNC UBool |
michael@0 | 351 | ucnv_extInitialMatchToU(UConverter *cnv, const int32_t *cx, |
michael@0 | 352 | int32_t firstLength, |
michael@0 | 353 | const char **src, const char *srcLimit, |
michael@0 | 354 | UChar **target, const UChar *targetLimit, |
michael@0 | 355 | int32_t **offsets, int32_t srcIndex, |
michael@0 | 356 | UBool flush, |
michael@0 | 357 | UErrorCode *pErrorCode); |
michael@0 | 358 | |
michael@0 | 359 | U_CFUNC UChar32 |
michael@0 | 360 | ucnv_extSimpleMatchToU(const int32_t *cx, |
michael@0 | 361 | const char *source, int32_t length, |
michael@0 | 362 | UBool useFallback); |
michael@0 | 363 | |
michael@0 | 364 | U_CFUNC void |
michael@0 | 365 | ucnv_extContinueMatchToU(UConverter *cnv, |
michael@0 | 366 | UConverterToUnicodeArgs *pArgs, int32_t srcIndex, |
michael@0 | 367 | UErrorCode *pErrorCode); |
michael@0 | 368 | |
michael@0 | 369 | |
michael@0 | 370 | U_CFUNC UBool |
michael@0 | 371 | ucnv_extInitialMatchFromU(UConverter *cnv, const int32_t *cx, |
michael@0 | 372 | UChar32 cp, |
michael@0 | 373 | const UChar **src, const UChar *srcLimit, |
michael@0 | 374 | char **target, const char *targetLimit, |
michael@0 | 375 | int32_t **offsets, int32_t srcIndex, |
michael@0 | 376 | UBool flush, |
michael@0 | 377 | UErrorCode *pErrorCode); |
michael@0 | 378 | |
michael@0 | 379 | U_CFUNC int32_t |
michael@0 | 380 | ucnv_extSimpleMatchFromU(const int32_t *cx, |
michael@0 | 381 | UChar32 cp, uint32_t *pValue, |
michael@0 | 382 | UBool useFallback); |
michael@0 | 383 | |
michael@0 | 384 | U_CFUNC void |
michael@0 | 385 | ucnv_extContinueMatchFromU(UConverter *cnv, |
michael@0 | 386 | UConverterFromUnicodeArgs *pArgs, int32_t srcIndex, |
michael@0 | 387 | UErrorCode *pErrorCode); |
michael@0 | 388 | |
michael@0 | 389 | /* |
michael@0 | 390 | * Add code points and strings to the set according to the extension mappings. |
michael@0 | 391 | * Limitation on the UConverterSetFilter: |
michael@0 | 392 | * The filters currently assume that they are used with 1:1 mappings. |
michael@0 | 393 | * They only apply to single input code points, and then they pass through |
michael@0 | 394 | * only mappings with single-charset-code results. |
michael@0 | 395 | * For example, the Shift-JIS filter only works for 2-byte results and tests |
michael@0 | 396 | * that those 2 bytes are in the JIS X 0208 range of Shift-JIS. |
michael@0 | 397 | */ |
michael@0 | 398 | U_CFUNC void |
michael@0 | 399 | ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData, |
michael@0 | 400 | const USetAdder *sa, |
michael@0 | 401 | UConverterUnicodeSet which, |
michael@0 | 402 | UConverterSetFilter filter, |
michael@0 | 403 | UErrorCode *pErrorCode); |
michael@0 | 404 | |
michael@0 | 405 | /* toUnicode helpers -------------------------------------------------------- */ |
michael@0 | 406 | |
michael@0 | 407 | #define UCNV_EXT_TO_U_BYTE_SHIFT 24 |
michael@0 | 408 | #define UCNV_EXT_TO_U_VALUE_MASK 0xffffff |
michael@0 | 409 | #define UCNV_EXT_TO_U_MIN_CODE_POINT 0x1f0000 |
michael@0 | 410 | #define UCNV_EXT_TO_U_MAX_CODE_POINT 0x2fffff |
michael@0 | 411 | #define UCNV_EXT_TO_U_ROUNDTRIP_FLAG ((uint32_t)1<<23) |
michael@0 | 412 | #define UCNV_EXT_TO_U_INDEX_MASK 0x3ffff |
michael@0 | 413 | #define UCNV_EXT_TO_U_LENGTH_SHIFT 18 |
michael@0 | 414 | #define UCNV_EXT_TO_U_LENGTH_OFFSET 12 |
michael@0 | 415 | |
michael@0 | 416 | /* maximum number of indexed UChars */ |
michael@0 | 417 | #define UCNV_EXT_MAX_UCHARS 19 |
michael@0 | 418 | |
michael@0 | 419 | #define UCNV_EXT_TO_U_MAKE_WORD(byte, value) (((uint32_t)(byte)<<UCNV_EXT_TO_U_BYTE_SHIFT)|(value)) |
michael@0 | 420 | |
michael@0 | 421 | #define UCNV_EXT_TO_U_GET_BYTE(word) ((word)>>UCNV_EXT_TO_U_BYTE_SHIFT) |
michael@0 | 422 | #define UCNV_EXT_TO_U_GET_VALUE(word) ((word)&UCNV_EXT_TO_U_VALUE_MASK) |
michael@0 | 423 | |
michael@0 | 424 | #define UCNV_EXT_TO_U_IS_PARTIAL(value) ((value)<UCNV_EXT_TO_U_MIN_CODE_POINT) |
michael@0 | 425 | #define UCNV_EXT_TO_U_GET_PARTIAL_INDEX(value) (value) |
michael@0 | 426 | |
michael@0 | 427 | #define UCNV_EXT_TO_U_IS_ROUNDTRIP(value) (((value)&UCNV_EXT_TO_U_ROUNDTRIP_FLAG)!=0) |
michael@0 | 428 | #define UCNV_EXT_TO_U_MASK_ROUNDTRIP(value) ((value)&~UCNV_EXT_TO_U_ROUNDTRIP_FLAG) |
michael@0 | 429 | |
michael@0 | 430 | /* use after masking off the roundtrip flag */ |
michael@0 | 431 | #define UCNV_EXT_TO_U_IS_CODE_POINT(value) ((value)<=UCNV_EXT_TO_U_MAX_CODE_POINT) |
michael@0 | 432 | #define UCNV_EXT_TO_U_GET_CODE_POINT(value) ((value)-UCNV_EXT_TO_U_MIN_CODE_POINT) |
michael@0 | 433 | |
michael@0 | 434 | #define UCNV_EXT_TO_U_GET_INDEX(value) ((value)&UCNV_EXT_TO_U_INDEX_MASK) |
michael@0 | 435 | #define UCNV_EXT_TO_U_GET_LENGTH(value) (((value)>>UCNV_EXT_TO_U_LENGTH_SHIFT)-UCNV_EXT_TO_U_LENGTH_OFFSET) |
michael@0 | 436 | |
michael@0 | 437 | /* fromUnicode helpers ------------------------------------------------------ */ |
michael@0 | 438 | |
michael@0 | 439 | /* most trie constants are shared with ucnvmbcs.h */ |
michael@0 | 440 | |
michael@0 | 441 | /* see similar utrie.h UTRIE_INDEX_SHIFT and UTRIE_DATA_GRANULARITY */ |
michael@0 | 442 | #define UCNV_EXT_STAGE_2_LEFT_SHIFT 2 |
michael@0 | 443 | #define UCNV_EXT_STAGE_3_GRANULARITY 4 |
michael@0 | 444 | |
michael@0 | 445 | /* trie access, returns the stage 3 value=index to stage 3b; s1Index=c>>10 */ |
michael@0 | 446 | #define UCNV_EXT_FROM_U(stage12, stage3, s1Index, c) \ |
michael@0 | 447 | (stage3)[ ((int32_t)(stage12)[ (stage12)[s1Index] +(((c)>>4)&0x3f) ]<<UCNV_EXT_STAGE_2_LEFT_SHIFT) +((c)&0xf) ] |
michael@0 | 448 | |
michael@0 | 449 | #define UCNV_EXT_FROM_U_LENGTH_SHIFT 24 |
michael@0 | 450 | #define UCNV_EXT_FROM_U_ROUNDTRIP_FLAG ((uint32_t)1<<31) |
michael@0 | 451 | #define UCNV_EXT_FROM_U_GOOD_ONE_WAY_FLAG 0x40000000 |
michael@0 | 452 | #define UCNV_EXT_FROM_U_STATUS_MASK 0xc0000000 |
michael@0 | 453 | #define UCNV_EXT_FROM_U_RESERVED_MASK 0x20000000 |
michael@0 | 454 | #define UCNV_EXT_FROM_U_DATA_MASK 0xffffff |
michael@0 | 455 | |
michael@0 | 456 | /* special value for "no mapping" to <subchar1> (impossible roundtrip to 0 bytes, value 01) */ |
michael@0 | 457 | #define UCNV_EXT_FROM_U_SUBCHAR1 0x80000001 |
michael@0 | 458 | |
michael@0 | 459 | /* at most 3 bytes in the lower part of the value */ |
michael@0 | 460 | #define UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH 3 |
michael@0 | 461 | |
michael@0 | 462 | /* maximum number of indexed bytes */ |
michael@0 | 463 | #define UCNV_EXT_MAX_BYTES 0x1f |
michael@0 | 464 | |
michael@0 | 465 | #define UCNV_EXT_FROM_U_IS_PARTIAL(value) (((value)>>UCNV_EXT_FROM_U_LENGTH_SHIFT)==0) |
michael@0 | 466 | #define UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value) (value) |
michael@0 | 467 | |
michael@0 | 468 | #define UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) (((value)&UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)!=0) |
michael@0 | 469 | #define UCNV_EXT_FROM_U_MASK_ROUNDTRIP(value) ((value)&~UCNV_EXT_FROM_U_ROUNDTRIP_FLAG) |
michael@0 | 470 | |
michael@0 | 471 | /* get length; masks away all other bits */ |
michael@0 | 472 | #define UCNV_EXT_FROM_U_GET_LENGTH(value) (int32_t)(((value)>>UCNV_EXT_FROM_U_LENGTH_SHIFT)&UCNV_EXT_MAX_BYTES) |
michael@0 | 473 | |
michael@0 | 474 | /* get bytes or bytes index */ |
michael@0 | 475 | #define UCNV_EXT_FROM_U_GET_DATA(value) ((value)&UCNV_EXT_FROM_U_DATA_MASK) |
michael@0 | 476 | |
michael@0 | 477 | #endif |
michael@0 | 478 | |
michael@0 | 479 | #endif |