|
1 /* |
|
2 ******************************************************************************* |
|
3 * |
|
4 * Copyright (C) 2000-2011, International Business Machines |
|
5 * Corporation and others. All Rights Reserved. |
|
6 * |
|
7 ******************************************************************************* |
|
8 * file name: ucol_elm.h |
|
9 * encoding: US-ASCII |
|
10 * tab size: 8 (not used) |
|
11 * indentation:4 |
|
12 * |
|
13 * created 02/22/2001 |
|
14 * created by: Vladimir Weinstein |
|
15 * |
|
16 * This program reads the Franctional UCA table and generates |
|
17 * internal format for UCA table as well as inverse UCA table. |
|
18 * It then writes binary files containing the data: ucadata.dat |
|
19 * & invuca.dat |
|
20 */ |
|
21 #ifndef UCOL_UCAELEMS_H |
|
22 #define UCOL_UCAELEMS_H |
|
23 |
|
24 #include "unicode/utypes.h" |
|
25 #include "unicode/uniset.h" |
|
26 #include "ucol_tok.h" |
|
27 |
|
28 #if !UCONFIG_NO_COLLATION |
|
29 |
|
30 #include "ucol_imp.h" |
|
31 |
|
32 #ifdef UCOL_DEBUG |
|
33 #include "cmemory.h" |
|
34 #include <stdio.h> |
|
35 #endif |
|
36 |
|
37 U_CDECL_BEGIN |
|
38 |
|
39 /* This is the maximum trie capacity for the mapping trie. |
|
40 Due to current limitations in genuca and the design of UTrie, |
|
41 this number can't be more than 256K. |
|
42 As of Unicode 5, it currently could safely go to 128K without |
|
43 a problem. Normally, less than 32K are tailored. |
|
44 */ |
|
45 #define UCOL_ELM_TRIE_CAPACITY 0x40000 |
|
46 |
|
47 /* This is the maxmun capacity for temparay combining class |
|
48 * table. The table will be compacted after scanning all the |
|
49 * Unicode codepoints. |
|
50 */ |
|
51 #define UCOL_MAX_CM_TAB 0x10000 |
|
52 |
|
53 |
|
54 typedef struct { |
|
55 uint32_t *CEs; |
|
56 int32_t position; |
|
57 int32_t size; |
|
58 } ExpansionTable; |
|
59 |
|
60 typedef struct { |
|
61 UChar prefixChars[128]; |
|
62 UChar *prefix; |
|
63 uint32_t prefixSize; |
|
64 UChar uchars[128]; |
|
65 UChar *cPoints; |
|
66 uint32_t cSize; /* Number of characters in sequence - for contraction */ |
|
67 uint32_t noOfCEs; /* Number of collation elements */ |
|
68 uint32_t CEs[128]; /* These are collation elements - there could be more than one - in case of expansion */ |
|
69 uint32_t mapCE; /* This is the value element maps in original table */ |
|
70 uint32_t sizePrim[128]; |
|
71 uint32_t sizeSec[128]; |
|
72 uint32_t sizeTer[128]; |
|
73 UBool caseBit; |
|
74 UBool isThai; |
|
75 } UCAElements; |
|
76 |
|
77 typedef struct { |
|
78 uint32_t *endExpansionCE; |
|
79 UBool *isV; |
|
80 int32_t position; |
|
81 int32_t size; |
|
82 uint8_t maxLSize; |
|
83 uint8_t maxVSize; |
|
84 uint8_t maxTSize; |
|
85 } MaxJamoExpansionTable; |
|
86 |
|
87 typedef struct { |
|
88 uint32_t *endExpansionCE; |
|
89 uint8_t *expansionCESize; |
|
90 int32_t position; |
|
91 int32_t size; |
|
92 } MaxExpansionTable; |
|
93 |
|
94 typedef struct { |
|
95 uint16_t index[256]; /* index of cPoints by combining class 0-255. */ |
|
96 UChar *cPoints; /* code point array of all combining marks */ |
|
97 uint32_t size; /* total number of combining marks */ |
|
98 } CombinClassTable; |
|
99 |
|
100 typedef struct { |
|
101 /*CompactEIntArray *mapping; */ |
|
102 UNewTrie *mapping; |
|
103 ExpansionTable *expansions; |
|
104 struct CntTable *contractions; |
|
105 UCATableHeader *image; |
|
106 UColOptionSet *options; |
|
107 MaxExpansionTable *maxExpansions; |
|
108 MaxJamoExpansionTable *maxJamoExpansions; |
|
109 uint8_t *unsafeCP; |
|
110 uint8_t *contrEndCP; |
|
111 const UCollator *UCA; |
|
112 UHashtable *prefixLookup; |
|
113 CombinClassTable *cmLookup; /* combining class lookup for tailoring. */ |
|
114 } tempUCATable; |
|
115 |
|
116 typedef struct { |
|
117 UChar cp; |
|
118 uint16_t cClass; // combining class |
|
119 }CompData; |
|
120 |
|
121 typedef struct { |
|
122 CompData *precomp; |
|
123 int32_t precompLen; |
|
124 UChar *decomp; |
|
125 int32_t decompLen; |
|
126 UChar *comp; |
|
127 int32_t compLen; |
|
128 uint16_t curClass; |
|
129 uint16_t tailoringCM; |
|
130 int32_t cmPos; |
|
131 }tempTailorContext; |
|
132 |
|
133 U_CAPI tempUCATable * U_EXPORT2 uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollator *UCA, UColCETags initTag, UColCETags supplementaryInitTag, UErrorCode *status); |
|
134 U_CAPI void U_EXPORT2 uprv_uca_closeTempTable(tempUCATable *t); |
|
135 U_CAPI uint32_t U_EXPORT2 uprv_uca_addAnElement(tempUCATable *t, UCAElements *element, UErrorCode *status); |
|
136 U_CAPI UCATableHeader * U_EXPORT2 uprv_uca_assembleTable(tempUCATable *t, UErrorCode *status); |
|
137 |
|
138 U_CAPI int32_t U_EXPORT2 |
|
139 uprv_uca_canonicalClosure(tempUCATable *t, UColTokenParser *src, |
|
140 icu::UnicodeSet *closed, UErrorCode *status); |
|
141 |
|
142 U_CDECL_END |
|
143 |
|
144 #endif /* #if !UCONFIG_NO_COLLATION */ |
|
145 |
|
146 #endif |