intl/icu/source/tools/makeconv/genmbcs.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2 *******************************************************************************
     3 *
     4 *   Copyright (C) 2000-2008, International Business Machines
     5 *   Corporation and others.  All Rights Reserved.
     6 *
     7 *******************************************************************************
     8 *   file name:  genmbcs.h
     9 *   encoding:   US-ASCII
    10 *   tab size:   8 (not used)
    11 *   indentation:4
    12 *
    13 *   created on: 2000jul10
    14 *   created by: Markus W. Scherer
    15 */
    17 #ifndef __GENMBCS_H__
    18 #define __GENMBCS_H__
    20 #include "makeconv.h"
    22 enum {
    23     /*
    24      * TODO: Consider using ucnvmbcs.h constants.
    25      * However, not all values need to be exactly the same, for example
    26      * the xxx_UTF8_MAX values may be different. (Especially SBCS_UTF8_MAX
    27      * may be higher in makeconv than in the runtime code because that
    28      * affects only a small number of .cnv files [if any] but all
    29      * runtime UConverterSharedData objects.
    30      */
    31     MBCS_STAGE_2_SHIFT=4,
    32     MBCS_STAGE_2_BLOCK_SIZE=0x40,       /* =64=1<<6 for 6 bits in stage 2 */
    33     MBCS_STAGE_2_BLOCK_SIZE_SHIFT=6,    /* log2(MBCS_STAGE_2_BLOCK_SIZE) */
    34     MBCS_STAGE_2_BLOCK_MASK=0x3f,       /* for after shifting by MBCS_STAGE_2_SHIFT */
    35     MBCS_STAGE_1_SHIFT=10,
    36     MBCS_STAGE_1_BMP_SIZE=0x40, /* 0x10000>>MBCS_STAGE_1_SHIFT, or 16 for one entry per 1k code points on the BMP */
    37     MBCS_STAGE_1_SIZE=0x440,    /* 0x110000>>MBCS_STAGE_1_SHIFT, or 17*64 for one entry per 1k code points */
    38     MBCS_STAGE_2_SIZE=0xfbc0,   /* 0x10000-MBCS_STAGE_1_SIZE: stages 1 & 2 share a 16-bit-indexed array */
    39     MBCS_MAX_STAGE_2_TOP=MBCS_STAGE_2_SIZE,
    40     MBCS_STAGE_2_MAX_BLOCKS=MBCS_STAGE_2_SIZE>>MBCS_STAGE_2_BLOCK_SIZE_SHIFT,
    42     MBCS_STAGE_2_ALL_UNASSIGNED_INDEX=0, /* stage 1 entry for the all-unassigned stage 2 block */
    43     MBCS_STAGE_2_FIRST_ASSIGNED=MBCS_STAGE_2_BLOCK_SIZE, /* start of the first stage 2 block after the all-unassigned one */
    45     MBCS_STAGE_3_BLOCK_SIZE=16,         /* =16=1<<4 for 4 bits in stage 3 */
    46     MBCS_STAGE_3_BLOCK_MASK=0xf,
    47     MBCS_STAGE_3_FIRST_ASSIGNED=MBCS_STAGE_3_BLOCK_SIZE, /* start of the first stage 3 block after the all-unassigned one */
    49     MBCS_STAGE_3_GRANULARITY=16,        /* =1<<4: MBCS stage 2 indexes are shifted left 4 */
    50     MBCS_STAGE_3_SBCS_SIZE=0x10000,     /* max 64k mappings for SBCS */
    51     MBCS_STAGE_3_MBCS_SIZE=0x10000*MBCS_STAGE_3_GRANULARITY, /* max mappings for MBCS */
    53     /*
    54      * SBCS_UTF8_MAX: Maximum code point with UTF-8-friendly SBCS data structures.
    55      * Possible values are 0x01ff..0xffff, in steps of 0x100.
    56      *
    57      * Unlike for MBCS, this constant only affects the stage 3 block allocation size;
    58      * there is no additional stage 1/2 table stored in the .cnv file.
    59      * The max value should be at least 0x7ff to cover 2-byte UTF-8.
    60      * 0xfff also covers a number other small scripts which have legacy charsets
    61      * (like Thai).
    62      * Higher values up to 0x1fff are harmless and potentially useful because
    63      * that covers small-script blocks which usually have either dense mappings
    64      * or no mappings at all.
    65      * Starting at U+2000, there are mostly symbols and format characters
    66      * with a low density of SBCS mappings, which would result in more wasted
    67      * stage 3 entries with the larger block size.
    68      */
    69     SBCS_UTF8_MAX=0x1fff,
    71     /*
    72      * MBCS_UTF8_MAX: Maximum code point with UTF-8-friendly MBCS data structures.
    73      * Possible values are 0x01ff..0xffff, in steps of 0x100.
    74      *
    75      * Note that with 0xffff, MBCSAddFromUnicode() may overflow the additional UTF-8 stage table
    76      * with extreme input data. The function checks for this overflow.
    77      *
    78      * 0xd7ff is chosen for the majority of common characters including Unihan and Hangul.
    79      * At U+d800 there are mostly surrogates, private use codes, compatibility characters, etc.
    80      * Larger values cause slightly larger MBCS .cnv files.
    81      */
    82     MBCS_UTF8_MAX=0xd7ff,
    83     MBCS_UTF8_LIMIT=MBCS_UTF8_MAX+1,    /* =0xd800 */
    85     MBCS_UTF8_STAGE_SHIFT=6,
    86     MBCS_UTF8_STAGE_3_BLOCK_SIZE=0x40,  /* =64=1<<6 for 6 bits from last trail byte */
    87     MBCS_UTF8_STAGE_3_BLOCK_MASK=0x3f,
    89     /* size of the single-stage table for up to U+d7ff (used instead of stage1/2) */
    90     MBCS_UTF8_STAGE_SIZE=MBCS_UTF8_LIMIT>>MBCS_UTF8_STAGE_SHIFT, /* =0x360 */
    92     MBCS_FROM_U_EXT_FLAG=0x10,          /* UCMapping.f bit for base table mappings that fit into the base toU table */
    93     MBCS_FROM_U_EXT_MASK=0x0f,          /* but need to go into the extension fromU table */
    95     /* =4 number of regular stage 3 blocks for final UTF-8 trail byte */
    96     MBCS_UTF8_STAGE_3_BLOCKS=MBCS_UTF8_STAGE_3_BLOCK_SIZE/MBCS_STAGE_3_BLOCK_SIZE,
    98     MBCS_MAX_FALLBACK_COUNT=8192
    99 };
   101 U_CFUNC NewConverter *
   102 MBCSOpen(UCMFile *ucm);
   104 struct MBCSData;
   105 typedef struct MBCSData MBCSData;
   107 /*
   108  * Get a dummy MBCSData for use with MBCSOkForBaseFromUnicode()
   109  * for creating an extension-only file.
   110  * Assume maxCharLength>1.
   111  */
   112 U_CFUNC const MBCSData *
   113 MBCSGetDummy(void);
   115 /* Test if a 1:1 mapping fits into the MBCS base table's fromUnicode structure. */
   116 U_CFUNC UBool
   117 MBCSOkForBaseFromUnicode(const MBCSData *mbcsData,
   118                          const uint8_t *bytes, int32_t length,
   119                          UChar32 c, int8_t flag);
   121 U_CFUNC NewConverter *
   122 CnvExtOpen(UCMFile *ucm);
   124 #endif /* __GENMBCS_H__ */

mercurial