intl/icu/source/tools/makeconv/genmbcs.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/tools/makeconv/genmbcs.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,124 @@
     1.4 +/*
     1.5 +*******************************************************************************
     1.6 +*
     1.7 +*   Copyright (C) 2000-2008, International Business Machines
     1.8 +*   Corporation and others.  All Rights Reserved.
     1.9 +*
    1.10 +*******************************************************************************
    1.11 +*   file name:  genmbcs.h
    1.12 +*   encoding:   US-ASCII
    1.13 +*   tab size:   8 (not used)
    1.14 +*   indentation:4
    1.15 +*
    1.16 +*   created on: 2000jul10
    1.17 +*   created by: Markus W. Scherer
    1.18 +*/
    1.19 +
    1.20 +#ifndef __GENMBCS_H__
    1.21 +#define __GENMBCS_H__
    1.22 +
    1.23 +#include "makeconv.h"
    1.24 +
    1.25 +enum {
    1.26 +    /*
    1.27 +     * TODO: Consider using ucnvmbcs.h constants.
    1.28 +     * However, not all values need to be exactly the same, for example
    1.29 +     * the xxx_UTF8_MAX values may be different. (Especially SBCS_UTF8_MAX
    1.30 +     * may be higher in makeconv than in the runtime code because that
    1.31 +     * affects only a small number of .cnv files [if any] but all
    1.32 +     * runtime UConverterSharedData objects.
    1.33 +     */
    1.34 +    MBCS_STAGE_2_SHIFT=4,
    1.35 +    MBCS_STAGE_2_BLOCK_SIZE=0x40,       /* =64=1<<6 for 6 bits in stage 2 */
    1.36 +    MBCS_STAGE_2_BLOCK_SIZE_SHIFT=6,    /* log2(MBCS_STAGE_2_BLOCK_SIZE) */
    1.37 +    MBCS_STAGE_2_BLOCK_MASK=0x3f,       /* for after shifting by MBCS_STAGE_2_SHIFT */
    1.38 +    MBCS_STAGE_1_SHIFT=10,
    1.39 +    MBCS_STAGE_1_BMP_SIZE=0x40, /* 0x10000>>MBCS_STAGE_1_SHIFT, or 16 for one entry per 1k code points on the BMP */
    1.40 +    MBCS_STAGE_1_SIZE=0x440,    /* 0x110000>>MBCS_STAGE_1_SHIFT, or 17*64 for one entry per 1k code points */
    1.41 +    MBCS_STAGE_2_SIZE=0xfbc0,   /* 0x10000-MBCS_STAGE_1_SIZE: stages 1 & 2 share a 16-bit-indexed array */
    1.42 +    MBCS_MAX_STAGE_2_TOP=MBCS_STAGE_2_SIZE,
    1.43 +    MBCS_STAGE_2_MAX_BLOCKS=MBCS_STAGE_2_SIZE>>MBCS_STAGE_2_BLOCK_SIZE_SHIFT,
    1.44 +
    1.45 +    MBCS_STAGE_2_ALL_UNASSIGNED_INDEX=0, /* stage 1 entry for the all-unassigned stage 2 block */
    1.46 +    MBCS_STAGE_2_FIRST_ASSIGNED=MBCS_STAGE_2_BLOCK_SIZE, /* start of the first stage 2 block after the all-unassigned one */
    1.47 +
    1.48 +    MBCS_STAGE_3_BLOCK_SIZE=16,         /* =16=1<<4 for 4 bits in stage 3 */
    1.49 +    MBCS_STAGE_3_BLOCK_MASK=0xf,
    1.50 +    MBCS_STAGE_3_FIRST_ASSIGNED=MBCS_STAGE_3_BLOCK_SIZE, /* start of the first stage 3 block after the all-unassigned one */
    1.51 +
    1.52 +    MBCS_STAGE_3_GRANULARITY=16,        /* =1<<4: MBCS stage 2 indexes are shifted left 4 */
    1.53 +    MBCS_STAGE_3_SBCS_SIZE=0x10000,     /* max 64k mappings for SBCS */
    1.54 +    MBCS_STAGE_3_MBCS_SIZE=0x10000*MBCS_STAGE_3_GRANULARITY, /* max mappings for MBCS */
    1.55 +
    1.56 +    /*
    1.57 +     * SBCS_UTF8_MAX: Maximum code point with UTF-8-friendly SBCS data structures.
    1.58 +     * Possible values are 0x01ff..0xffff, in steps of 0x100.
    1.59 +     *
    1.60 +     * Unlike for MBCS, this constant only affects the stage 3 block allocation size;
    1.61 +     * there is no additional stage 1/2 table stored in the .cnv file.
    1.62 +     * The max value should be at least 0x7ff to cover 2-byte UTF-8.
    1.63 +     * 0xfff also covers a number other small scripts which have legacy charsets
    1.64 +     * (like Thai).
    1.65 +     * Higher values up to 0x1fff are harmless and potentially useful because
    1.66 +     * that covers small-script blocks which usually have either dense mappings
    1.67 +     * or no mappings at all.
    1.68 +     * Starting at U+2000, there are mostly symbols and format characters
    1.69 +     * with a low density of SBCS mappings, which would result in more wasted
    1.70 +     * stage 3 entries with the larger block size.
    1.71 +     */
    1.72 +    SBCS_UTF8_MAX=0x1fff,
    1.73 +
    1.74 +    /*
    1.75 +     * MBCS_UTF8_MAX: Maximum code point with UTF-8-friendly MBCS data structures.
    1.76 +     * Possible values are 0x01ff..0xffff, in steps of 0x100.
    1.77 +     *
    1.78 +     * Note that with 0xffff, MBCSAddFromUnicode() may overflow the additional UTF-8 stage table
    1.79 +     * with extreme input data. The function checks for this overflow.
    1.80 +     *
    1.81 +     * 0xd7ff is chosen for the majority of common characters including Unihan and Hangul.
    1.82 +     * At U+d800 there are mostly surrogates, private use codes, compatibility characters, etc.
    1.83 +     * Larger values cause slightly larger MBCS .cnv files.
    1.84 +     */
    1.85 +    MBCS_UTF8_MAX=0xd7ff,
    1.86 +    MBCS_UTF8_LIMIT=MBCS_UTF8_MAX+1,    /* =0xd800 */
    1.87 +
    1.88 +    MBCS_UTF8_STAGE_SHIFT=6,
    1.89 +    MBCS_UTF8_STAGE_3_BLOCK_SIZE=0x40,  /* =64=1<<6 for 6 bits from last trail byte */
    1.90 +    MBCS_UTF8_STAGE_3_BLOCK_MASK=0x3f,
    1.91 +
    1.92 +    /* size of the single-stage table for up to U+d7ff (used instead of stage1/2) */
    1.93 +    MBCS_UTF8_STAGE_SIZE=MBCS_UTF8_LIMIT>>MBCS_UTF8_STAGE_SHIFT, /* =0x360 */
    1.94 +
    1.95 +    MBCS_FROM_U_EXT_FLAG=0x10,          /* UCMapping.f bit for base table mappings that fit into the base toU table */
    1.96 +    MBCS_FROM_U_EXT_MASK=0x0f,          /* but need to go into the extension fromU table */
    1.97 +
    1.98 +    /* =4 number of regular stage 3 blocks for final UTF-8 trail byte */
    1.99 +    MBCS_UTF8_STAGE_3_BLOCKS=MBCS_UTF8_STAGE_3_BLOCK_SIZE/MBCS_STAGE_3_BLOCK_SIZE,
   1.100 +
   1.101 +    MBCS_MAX_FALLBACK_COUNT=8192
   1.102 +};
   1.103 +
   1.104 +U_CFUNC NewConverter *
   1.105 +MBCSOpen(UCMFile *ucm);
   1.106 +
   1.107 +struct MBCSData;
   1.108 +typedef struct MBCSData MBCSData;
   1.109 +
   1.110 +/*
   1.111 + * Get a dummy MBCSData for use with MBCSOkForBaseFromUnicode()
   1.112 + * for creating an extension-only file.
   1.113 + * Assume maxCharLength>1.
   1.114 + */
   1.115 +U_CFUNC const MBCSData *
   1.116 +MBCSGetDummy(void);
   1.117 +
   1.118 +/* Test if a 1:1 mapping fits into the MBCS base table's fromUnicode structure. */
   1.119 +U_CFUNC UBool
   1.120 +MBCSOkForBaseFromUnicode(const MBCSData *mbcsData,
   1.121 +                         const uint8_t *bytes, int32_t length,
   1.122 +                         UChar32 c, int8_t flag);
   1.123 +
   1.124 +U_CFUNC NewConverter *
   1.125 +CnvExtOpen(UCMFile *ucm);
   1.126 +
   1.127 +#endif /* __GENMBCS_H__ */

mercurial