intl/icu/source/common/utrie2_impl.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/utrie2_impl.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,172 @@
     1.4 +/*
     1.5 +******************************************************************************
     1.6 +*
     1.7 +*   Copyright (C) 2001-2008, International Business Machines
     1.8 +*   Corporation and others.  All Rights Reserved.
     1.9 +*
    1.10 +******************************************************************************
    1.11 +*   file name:  utrie2_impl.h
    1.12 +*   encoding:   US-ASCII
    1.13 +*   tab size:   8 (not used)
    1.14 +*   indentation:4
    1.15 +*
    1.16 +*   created on: 2008sep26 (split off from utrie2.c)
    1.17 +*   created by: Markus W. Scherer
    1.18 +*
    1.19 +*   Definitions needed for both runtime and builder code for UTrie2,
    1.20 +*   used by utrie2.c and utrie2_builder.c.
    1.21 +*/
    1.22 +
    1.23 +#ifndef __UTRIE2_IMPL_H__
    1.24 +#define __UTRIE2_IMPL_H__
    1.25 +
    1.26 +#include "utrie2.h"
    1.27 +
    1.28 +/* Public UTrie2 API implementation ----------------------------------------- */
    1.29 +
    1.30 +/*
    1.31 + * These definitions are mostly needed by utrie2.c,
    1.32 + * but also by utrie2_serialize() and utrie2_swap().
    1.33 + */
    1.34 +
    1.35 +/*
    1.36 + * UTrie and UTrie2 signature values,
    1.37 + * in platform endianness and opposite endianness.
    1.38 + */
    1.39 +#define UTRIE_SIG       0x54726965
    1.40 +#define UTRIE_OE_SIG    0x65697254
    1.41 +
    1.42 +#define UTRIE2_SIG      0x54726932
    1.43 +#define UTRIE2_OE_SIG   0x32697254
    1.44 +
    1.45 +/**
    1.46 + * Trie data structure in serialized form:
    1.47 + *
    1.48 + * UTrie2Header header;
    1.49 + * uint16_t index[header.index2Length];
    1.50 + * uint16_t data[header.shiftedDataLength<<2];  -- or uint32_t data[...]
    1.51 + * @internal
    1.52 + */
    1.53 +typedef struct UTrie2Header {
    1.54 +    /** "Tri2" in big-endian US-ASCII (0x54726932) */
    1.55 +    uint32_t signature;
    1.56 +
    1.57 +    /**
    1.58 +     * options bit field:
    1.59 +     * 15.. 4   reserved (0)
    1.60 +     *  3.. 0   UTrie2ValueBits valueBits
    1.61 +     */
    1.62 +    uint16_t options;
    1.63 +
    1.64 +    /** UTRIE2_INDEX_1_OFFSET..UTRIE2_MAX_INDEX_LENGTH */
    1.65 +    uint16_t indexLength;
    1.66 +
    1.67 +    /** (UTRIE2_DATA_START_OFFSET..UTRIE2_MAX_DATA_LENGTH)>>UTRIE2_INDEX_SHIFT */
    1.68 +    uint16_t shiftedDataLength;
    1.69 +
    1.70 +    /** Null index and data blocks, not shifted. */
    1.71 +    uint16_t index2NullOffset, dataNullOffset;
    1.72 +
    1.73 +    /**
    1.74 +     * First code point of the single-value range ending with U+10ffff,
    1.75 +     * rounded up and then shifted right by UTRIE2_SHIFT_1.
    1.76 +     */
    1.77 +    uint16_t shiftedHighStart;
    1.78 +} UTrie2Header;
    1.79 +
    1.80 +/**
    1.81 + * Constants for use with UTrie2Header.options.
    1.82 + * @internal
    1.83 + */
    1.84 +enum {
    1.85 +    /** Mask to get the UTrie2ValueBits valueBits from options. */
    1.86 +    UTRIE2_OPTIONS_VALUE_BITS_MASK=0xf
    1.87 +};
    1.88 +
    1.89 +/* Building a trie ---------------------------------------------------------- */
    1.90 +
    1.91 +/*
    1.92 + * These definitions are mostly needed by utrie2_builder.c, but also by
    1.93 + * utrie2_get32() and utrie2_enum().
    1.94 + */
    1.95 +
    1.96 +enum {
    1.97 +    /**
    1.98 +     * At build time, leave a gap in the index-2 table,
    1.99 +     * at least as long as the maximum lengths of the 2-byte UTF-8 index-2 table
   1.100 +     * and the supplementary index-1 table.
   1.101 +     * Round up to UTRIE2_INDEX_2_BLOCK_LENGTH for proper compacting.
   1.102 +     */
   1.103 +    UNEWTRIE2_INDEX_GAP_OFFSET=UTRIE2_INDEX_2_BMP_LENGTH,
   1.104 +    UNEWTRIE2_INDEX_GAP_LENGTH=
   1.105 +        ((UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH)+UTRIE2_INDEX_2_MASK)&
   1.106 +        ~UTRIE2_INDEX_2_MASK,
   1.107 +
   1.108 +    /**
   1.109 +     * Maximum length of the build-time index-2 array.
   1.110 +     * Maximum number of Unicode code points (0x110000) shifted right by UTRIE2_SHIFT_2,
   1.111 +     * plus the part of the index-2 table for lead surrogate code points,
   1.112 +     * plus the build-time index gap,
   1.113 +     * plus the null index-2 block.
   1.114 +     */
   1.115 +    UNEWTRIE2_MAX_INDEX_2_LENGTH=
   1.116 +        (0x110000>>UTRIE2_SHIFT_2)+
   1.117 +        UTRIE2_LSCP_INDEX_2_LENGTH+
   1.118 +        UNEWTRIE2_INDEX_GAP_LENGTH+
   1.119 +        UTRIE2_INDEX_2_BLOCK_LENGTH,
   1.120 +
   1.121 +    UNEWTRIE2_INDEX_1_LENGTH=0x110000>>UTRIE2_SHIFT_1
   1.122 +};
   1.123 +
   1.124 +/**
   1.125 + * Maximum length of the build-time data array.
   1.126 + * One entry per 0x110000 code points, plus the illegal-UTF-8 block and the null block,
   1.127 + * plus values for the 0x400 surrogate code units.
   1.128 + */
   1.129 +#define UNEWTRIE2_MAX_DATA_LENGTH (0x110000+0x40+0x40+0x400)
   1.130 +
   1.131 +/*
   1.132 + * Build-time trie structure.
   1.133 + *
   1.134 + * Just using a boolean flag for "repeat use" could lead to data array overflow
   1.135 + * because we would not be able to detect when a data block becomes unused.
   1.136 + * It also leads to orphan data blocks that are kept through serialization.
   1.137 + *
   1.138 + * Need to use reference counting for data blocks,
   1.139 + * and allocDataBlock() needs to look for a free block before increasing dataLength.
   1.140 + *
   1.141 + * This scheme seems like overkill for index-2 blocks since the whole index array is
   1.142 + * preallocated anyway (unlike the growable data array).
   1.143 + * Just allocating multiple index-2 blocks as needed.
   1.144 + */
   1.145 +struct UNewTrie2 {
   1.146 +    int32_t index1[UNEWTRIE2_INDEX_1_LENGTH];
   1.147 +    int32_t index2[UNEWTRIE2_MAX_INDEX_2_LENGTH];
   1.148 +    uint32_t *data;
   1.149 +
   1.150 +    uint32_t initialValue, errorValue;
   1.151 +    int32_t index2Length, dataCapacity, dataLength;
   1.152 +    int32_t firstFreeBlock;
   1.153 +    int32_t index2NullOffset, dataNullOffset;
   1.154 +    UChar32 highStart;
   1.155 +    UBool isCompacted;
   1.156 +
   1.157 +    /**
   1.158 +     * Multi-purpose per-data-block table.
   1.159 +     *
   1.160 +     * Before compacting:
   1.161 +     *
   1.162 +     * Per-data-block reference counters/free-block list.
   1.163 +     *  0: unused
   1.164 +     * >0: reference counter (number of index-2 entries pointing here)
   1.165 +     * <0: next free data block in free-block list
   1.166 +     *
   1.167 +     * While compacting:
   1.168 +     *
   1.169 +     * Map of adjusted indexes, used in compactData() and compactIndex2().
   1.170 +     * Maps from original indexes to new ones.
   1.171 +     */
   1.172 +    int32_t map[UNEWTRIE2_MAX_DATA_LENGTH>>UTRIE2_SHIFT_2];
   1.173 +};
   1.174 +
   1.175 +#endif

mercurial