intl/icu/source/tools/makeconv/genmbcs.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 2000-2008, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 *******************************************************************************
michael@0 8 * file name: genmbcs.h
michael@0 9 * encoding: US-ASCII
michael@0 10 * tab size: 8 (not used)
michael@0 11 * indentation:4
michael@0 12 *
michael@0 13 * created on: 2000jul10
michael@0 14 * created by: Markus W. Scherer
michael@0 15 */
michael@0 16
michael@0 17 #ifndef __GENMBCS_H__
michael@0 18 #define __GENMBCS_H__
michael@0 19
michael@0 20 #include "makeconv.h"
michael@0 21
michael@0 22 enum {
michael@0 23 /*
michael@0 24 * TODO: Consider using ucnvmbcs.h constants.
michael@0 25 * However, not all values need to be exactly the same, for example
michael@0 26 * the xxx_UTF8_MAX values may be different. (Especially SBCS_UTF8_MAX
michael@0 27 * may be higher in makeconv than in the runtime code because that
michael@0 28 * affects only a small number of .cnv files [if any] but all
michael@0 29 * runtime UConverterSharedData objects.
michael@0 30 */
michael@0 31 MBCS_STAGE_2_SHIFT=4,
michael@0 32 MBCS_STAGE_2_BLOCK_SIZE=0x40, /* =64=1<<6 for 6 bits in stage 2 */
michael@0 33 MBCS_STAGE_2_BLOCK_SIZE_SHIFT=6, /* log2(MBCS_STAGE_2_BLOCK_SIZE) */
michael@0 34 MBCS_STAGE_2_BLOCK_MASK=0x3f, /* for after shifting by MBCS_STAGE_2_SHIFT */
michael@0 35 MBCS_STAGE_1_SHIFT=10,
michael@0 36 MBCS_STAGE_1_BMP_SIZE=0x40, /* 0x10000>>MBCS_STAGE_1_SHIFT, or 16 for one entry per 1k code points on the BMP */
michael@0 37 MBCS_STAGE_1_SIZE=0x440, /* 0x110000>>MBCS_STAGE_1_SHIFT, or 17*64 for one entry per 1k code points */
michael@0 38 MBCS_STAGE_2_SIZE=0xfbc0, /* 0x10000-MBCS_STAGE_1_SIZE: stages 1 & 2 share a 16-bit-indexed array */
michael@0 39 MBCS_MAX_STAGE_2_TOP=MBCS_STAGE_2_SIZE,
michael@0 40 MBCS_STAGE_2_MAX_BLOCKS=MBCS_STAGE_2_SIZE>>MBCS_STAGE_2_BLOCK_SIZE_SHIFT,
michael@0 41
michael@0 42 MBCS_STAGE_2_ALL_UNASSIGNED_INDEX=0, /* stage 1 entry for the all-unassigned stage 2 block */
michael@0 43 MBCS_STAGE_2_FIRST_ASSIGNED=MBCS_STAGE_2_BLOCK_SIZE, /* start of the first stage 2 block after the all-unassigned one */
michael@0 44
michael@0 45 MBCS_STAGE_3_BLOCK_SIZE=16, /* =16=1<<4 for 4 bits in stage 3 */
michael@0 46 MBCS_STAGE_3_BLOCK_MASK=0xf,
michael@0 47 MBCS_STAGE_3_FIRST_ASSIGNED=MBCS_STAGE_3_BLOCK_SIZE, /* start of the first stage 3 block after the all-unassigned one */
michael@0 48
michael@0 49 MBCS_STAGE_3_GRANULARITY=16, /* =1<<4: MBCS stage 2 indexes are shifted left 4 */
michael@0 50 MBCS_STAGE_3_SBCS_SIZE=0x10000, /* max 64k mappings for SBCS */
michael@0 51 MBCS_STAGE_3_MBCS_SIZE=0x10000*MBCS_STAGE_3_GRANULARITY, /* max mappings for MBCS */
michael@0 52
michael@0 53 /*
michael@0 54 * SBCS_UTF8_MAX: Maximum code point with UTF-8-friendly SBCS data structures.
michael@0 55 * Possible values are 0x01ff..0xffff, in steps of 0x100.
michael@0 56 *
michael@0 57 * Unlike for MBCS, this constant only affects the stage 3 block allocation size;
michael@0 58 * there is no additional stage 1/2 table stored in the .cnv file.
michael@0 59 * The max value should be at least 0x7ff to cover 2-byte UTF-8.
michael@0 60 * 0xfff also covers a number other small scripts which have legacy charsets
michael@0 61 * (like Thai).
michael@0 62 * Higher values up to 0x1fff are harmless and potentially useful because
michael@0 63 * that covers small-script blocks which usually have either dense mappings
michael@0 64 * or no mappings at all.
michael@0 65 * Starting at U+2000, there are mostly symbols and format characters
michael@0 66 * with a low density of SBCS mappings, which would result in more wasted
michael@0 67 * stage 3 entries with the larger block size.
michael@0 68 */
michael@0 69 SBCS_UTF8_MAX=0x1fff,
michael@0 70
michael@0 71 /*
michael@0 72 * MBCS_UTF8_MAX: Maximum code point with UTF-8-friendly MBCS data structures.
michael@0 73 * Possible values are 0x01ff..0xffff, in steps of 0x100.
michael@0 74 *
michael@0 75 * Note that with 0xffff, MBCSAddFromUnicode() may overflow the additional UTF-8 stage table
michael@0 76 * with extreme input data. The function checks for this overflow.
michael@0 77 *
michael@0 78 * 0xd7ff is chosen for the majority of common characters including Unihan and Hangul.
michael@0 79 * At U+d800 there are mostly surrogates, private use codes, compatibility characters, etc.
michael@0 80 * Larger values cause slightly larger MBCS .cnv files.
michael@0 81 */
michael@0 82 MBCS_UTF8_MAX=0xd7ff,
michael@0 83 MBCS_UTF8_LIMIT=MBCS_UTF8_MAX+1, /* =0xd800 */
michael@0 84
michael@0 85 MBCS_UTF8_STAGE_SHIFT=6,
michael@0 86 MBCS_UTF8_STAGE_3_BLOCK_SIZE=0x40, /* =64=1<<6 for 6 bits from last trail byte */
michael@0 87 MBCS_UTF8_STAGE_3_BLOCK_MASK=0x3f,
michael@0 88
michael@0 89 /* size of the single-stage table for up to U+d7ff (used instead of stage1/2) */
michael@0 90 MBCS_UTF8_STAGE_SIZE=MBCS_UTF8_LIMIT>>MBCS_UTF8_STAGE_SHIFT, /* =0x360 */
michael@0 91
michael@0 92 MBCS_FROM_U_EXT_FLAG=0x10, /* UCMapping.f bit for base table mappings that fit into the base toU table */
michael@0 93 MBCS_FROM_U_EXT_MASK=0x0f, /* but need to go into the extension fromU table */
michael@0 94
michael@0 95 /* =4 number of regular stage 3 blocks for final UTF-8 trail byte */
michael@0 96 MBCS_UTF8_STAGE_3_BLOCKS=MBCS_UTF8_STAGE_3_BLOCK_SIZE/MBCS_STAGE_3_BLOCK_SIZE,
michael@0 97
michael@0 98 MBCS_MAX_FALLBACK_COUNT=8192
michael@0 99 };
michael@0 100
michael@0 101 U_CFUNC NewConverter *
michael@0 102 MBCSOpen(UCMFile *ucm);
michael@0 103
michael@0 104 struct MBCSData;
michael@0 105 typedef struct MBCSData MBCSData;
michael@0 106
michael@0 107 /*
michael@0 108 * Get a dummy MBCSData for use with MBCSOkForBaseFromUnicode()
michael@0 109 * for creating an extension-only file.
michael@0 110 * Assume maxCharLength>1.
michael@0 111 */
michael@0 112 U_CFUNC const MBCSData *
michael@0 113 MBCSGetDummy(void);
michael@0 114
michael@0 115 /* Test if a 1:1 mapping fits into the MBCS base table's fromUnicode structure. */
michael@0 116 U_CFUNC UBool
michael@0 117 MBCSOkForBaseFromUnicode(const MBCSData *mbcsData,
michael@0 118 const uint8_t *bytes, int32_t length,
michael@0 119 UChar32 c, int8_t flag);
michael@0 120
michael@0 121 U_CFUNC NewConverter *
michael@0 122 CnvExtOpen(UCMFile *ucm);
michael@0 123
michael@0 124 #endif /* __GENMBCS_H__ */

mercurial