Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* |
michael@0 | 2 | ******************************************************************************* |
michael@0 | 3 | * |
michael@0 | 4 | * Copyright (C) 2000-2008, International Business Machines |
michael@0 | 5 | * Corporation and others. All Rights Reserved. |
michael@0 | 6 | * |
michael@0 | 7 | ******************************************************************************* |
michael@0 | 8 | * file name: genmbcs.h |
michael@0 | 9 | * encoding: US-ASCII |
michael@0 | 10 | * tab size: 8 (not used) |
michael@0 | 11 | * indentation:4 |
michael@0 | 12 | * |
michael@0 | 13 | * created on: 2000jul10 |
michael@0 | 14 | * created by: Markus W. Scherer |
michael@0 | 15 | */ |
michael@0 | 16 | |
michael@0 | 17 | #ifndef __GENMBCS_H__ |
michael@0 | 18 | #define __GENMBCS_H__ |
michael@0 | 19 | |
michael@0 | 20 | #include "makeconv.h" |
michael@0 | 21 | |
michael@0 | 22 | enum { |
michael@0 | 23 | /* |
michael@0 | 24 | * TODO: Consider using ucnvmbcs.h constants. |
michael@0 | 25 | * However, not all values need to be exactly the same, for example |
michael@0 | 26 | * the xxx_UTF8_MAX values may be different. (Especially SBCS_UTF8_MAX |
michael@0 | 27 | * may be higher in makeconv than in the runtime code because that |
michael@0 | 28 | * affects only a small number of .cnv files [if any] but all |
michael@0 | 29 | * runtime UConverterSharedData objects. |
michael@0 | 30 | */ |
michael@0 | 31 | MBCS_STAGE_2_SHIFT=4, |
michael@0 | 32 | MBCS_STAGE_2_BLOCK_SIZE=0x40, /* =64=1<<6 for 6 bits in stage 2 */ |
michael@0 | 33 | MBCS_STAGE_2_BLOCK_SIZE_SHIFT=6, /* log2(MBCS_STAGE_2_BLOCK_SIZE) */ |
michael@0 | 34 | MBCS_STAGE_2_BLOCK_MASK=0x3f, /* for after shifting by MBCS_STAGE_2_SHIFT */ |
michael@0 | 35 | MBCS_STAGE_1_SHIFT=10, |
michael@0 | 36 | MBCS_STAGE_1_BMP_SIZE=0x40, /* 0x10000>>MBCS_STAGE_1_SHIFT, or 16 for one entry per 1k code points on the BMP */ |
michael@0 | 37 | MBCS_STAGE_1_SIZE=0x440, /* 0x110000>>MBCS_STAGE_1_SHIFT, or 17*64 for one entry per 1k code points */ |
michael@0 | 38 | MBCS_STAGE_2_SIZE=0xfbc0, /* 0x10000-MBCS_STAGE_1_SIZE: stages 1 & 2 share a 16-bit-indexed array */ |
michael@0 | 39 | MBCS_MAX_STAGE_2_TOP=MBCS_STAGE_2_SIZE, |
michael@0 | 40 | MBCS_STAGE_2_MAX_BLOCKS=MBCS_STAGE_2_SIZE>>MBCS_STAGE_2_BLOCK_SIZE_SHIFT, |
michael@0 | 41 | |
michael@0 | 42 | MBCS_STAGE_2_ALL_UNASSIGNED_INDEX=0, /* stage 1 entry for the all-unassigned stage 2 block */ |
michael@0 | 43 | MBCS_STAGE_2_FIRST_ASSIGNED=MBCS_STAGE_2_BLOCK_SIZE, /* start of the first stage 2 block after the all-unassigned one */ |
michael@0 | 44 | |
michael@0 | 45 | MBCS_STAGE_3_BLOCK_SIZE=16, /* =16=1<<4 for 4 bits in stage 3 */ |
michael@0 | 46 | MBCS_STAGE_3_BLOCK_MASK=0xf, |
michael@0 | 47 | MBCS_STAGE_3_FIRST_ASSIGNED=MBCS_STAGE_3_BLOCK_SIZE, /* start of the first stage 3 block after the all-unassigned one */ |
michael@0 | 48 | |
michael@0 | 49 | MBCS_STAGE_3_GRANULARITY=16, /* =1<<4: MBCS stage 2 indexes are shifted left 4 */ |
michael@0 | 50 | MBCS_STAGE_3_SBCS_SIZE=0x10000, /* max 64k mappings for SBCS */ |
michael@0 | 51 | MBCS_STAGE_3_MBCS_SIZE=0x10000*MBCS_STAGE_3_GRANULARITY, /* max mappings for MBCS */ |
michael@0 | 52 | |
michael@0 | 53 | /* |
michael@0 | 54 | * SBCS_UTF8_MAX: Maximum code point with UTF-8-friendly SBCS data structures. |
michael@0 | 55 | * Possible values are 0x01ff..0xffff, in steps of 0x100. |
michael@0 | 56 | * |
michael@0 | 57 | * Unlike for MBCS, this constant only affects the stage 3 block allocation size; |
michael@0 | 58 | * there is no additional stage 1/2 table stored in the .cnv file. |
michael@0 | 59 | * The max value should be at least 0x7ff to cover 2-byte UTF-8. |
michael@0 | 60 | * 0xfff also covers a number other small scripts which have legacy charsets |
michael@0 | 61 | * (like Thai). |
michael@0 | 62 | * Higher values up to 0x1fff are harmless and potentially useful because |
michael@0 | 63 | * that covers small-script blocks which usually have either dense mappings |
michael@0 | 64 | * or no mappings at all. |
michael@0 | 65 | * Starting at U+2000, there are mostly symbols and format characters |
michael@0 | 66 | * with a low density of SBCS mappings, which would result in more wasted |
michael@0 | 67 | * stage 3 entries with the larger block size. |
michael@0 | 68 | */ |
michael@0 | 69 | SBCS_UTF8_MAX=0x1fff, |
michael@0 | 70 | |
michael@0 | 71 | /* |
michael@0 | 72 | * MBCS_UTF8_MAX: Maximum code point with UTF-8-friendly MBCS data structures. |
michael@0 | 73 | * Possible values are 0x01ff..0xffff, in steps of 0x100. |
michael@0 | 74 | * |
michael@0 | 75 | * Note that with 0xffff, MBCSAddFromUnicode() may overflow the additional UTF-8 stage table |
michael@0 | 76 | * with extreme input data. The function checks for this overflow. |
michael@0 | 77 | * |
michael@0 | 78 | * 0xd7ff is chosen for the majority of common characters including Unihan and Hangul. |
michael@0 | 79 | * At U+d800 there are mostly surrogates, private use codes, compatibility characters, etc. |
michael@0 | 80 | * Larger values cause slightly larger MBCS .cnv files. |
michael@0 | 81 | */ |
michael@0 | 82 | MBCS_UTF8_MAX=0xd7ff, |
michael@0 | 83 | MBCS_UTF8_LIMIT=MBCS_UTF8_MAX+1, /* =0xd800 */ |
michael@0 | 84 | |
michael@0 | 85 | MBCS_UTF8_STAGE_SHIFT=6, |
michael@0 | 86 | MBCS_UTF8_STAGE_3_BLOCK_SIZE=0x40, /* =64=1<<6 for 6 bits from last trail byte */ |
michael@0 | 87 | MBCS_UTF8_STAGE_3_BLOCK_MASK=0x3f, |
michael@0 | 88 | |
michael@0 | 89 | /* size of the single-stage table for up to U+d7ff (used instead of stage1/2) */ |
michael@0 | 90 | MBCS_UTF8_STAGE_SIZE=MBCS_UTF8_LIMIT>>MBCS_UTF8_STAGE_SHIFT, /* =0x360 */ |
michael@0 | 91 | |
michael@0 | 92 | MBCS_FROM_U_EXT_FLAG=0x10, /* UCMapping.f bit for base table mappings that fit into the base toU table */ |
michael@0 | 93 | MBCS_FROM_U_EXT_MASK=0x0f, /* but need to go into the extension fromU table */ |
michael@0 | 94 | |
michael@0 | 95 | /* =4 number of regular stage 3 blocks for final UTF-8 trail byte */ |
michael@0 | 96 | MBCS_UTF8_STAGE_3_BLOCKS=MBCS_UTF8_STAGE_3_BLOCK_SIZE/MBCS_STAGE_3_BLOCK_SIZE, |
michael@0 | 97 | |
michael@0 | 98 | MBCS_MAX_FALLBACK_COUNT=8192 |
michael@0 | 99 | }; |
michael@0 | 100 | |
michael@0 | 101 | U_CFUNC NewConverter * |
michael@0 | 102 | MBCSOpen(UCMFile *ucm); |
michael@0 | 103 | |
michael@0 | 104 | struct MBCSData; |
michael@0 | 105 | typedef struct MBCSData MBCSData; |
michael@0 | 106 | |
michael@0 | 107 | /* |
michael@0 | 108 | * Get a dummy MBCSData for use with MBCSOkForBaseFromUnicode() |
michael@0 | 109 | * for creating an extension-only file. |
michael@0 | 110 | * Assume maxCharLength>1. |
michael@0 | 111 | */ |
michael@0 | 112 | U_CFUNC const MBCSData * |
michael@0 | 113 | MBCSGetDummy(void); |
michael@0 | 114 | |
michael@0 | 115 | /* Test if a 1:1 mapping fits into the MBCS base table's fromUnicode structure. */ |
michael@0 | 116 | U_CFUNC UBool |
michael@0 | 117 | MBCSOkForBaseFromUnicode(const MBCSData *mbcsData, |
michael@0 | 118 | const uint8_t *bytes, int32_t length, |
michael@0 | 119 | UChar32 c, int8_t flag); |
michael@0 | 120 | |
michael@0 | 121 | U_CFUNC NewConverter * |
michael@0 | 122 | CnvExtOpen(UCMFile *ucm); |
michael@0 | 123 | |
michael@0 | 124 | #endif /* __GENMBCS_H__ */ |