|
1 /* |
|
2 ****************************************************************************** |
|
3 * |
|
4 * Copyright (C) 2008-2011, International Business Machines |
|
5 * Corporation and others. All Rights Reserved. |
|
6 * |
|
7 ****************************************************************************** |
|
8 * file name: uspoof_conf.h |
|
9 * encoding: US-ASCII |
|
10 * tab size: 8 (not used) |
|
11 * indentation:4 |
|
12 * |
|
13 * created on: 2009Jan05 |
|
14 * created by: Andy Heninger |
|
15 * |
|
16 * Internal classes for compiling confusable data into its binary (runtime) form. |
|
17 */ |
|
18 |
|
19 #ifndef __USPOOF_BUILDCONF_H__ |
|
20 #define __USPOOF_BUILDCONF_H__ |
|
21 |
|
22 #if !UCONFIG_NO_NORMALIZATION |
|
23 |
|
24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS |
|
25 |
|
26 #include "uspoof_impl.h" |
|
27 |
|
28 U_NAMESPACE_BEGIN |
|
29 |
|
30 // SPUString |
|
31 // Holds a string that is the result of one of the mappings defined |
|
32 // by the confusable mapping data (confusables.txt from Unicode.org) |
|
33 // Instances of SPUString exist during the compilation process only. |
|
34 |
|
35 struct SPUString : public UMemory { |
|
36 UnicodeString *fStr; // The actual string. |
|
37 int32_t fStrTableIndex; // Index into the final runtime data for this string. |
|
38 // (or, for length 1, the single string char itself, |
|
39 // there being no string table entry for it.) |
|
40 SPUString(UnicodeString *s); |
|
41 ~SPUString(); |
|
42 }; |
|
43 |
|
44 |
|
45 // String Pool A utility class for holding the strings that are the result of |
|
46 // the spoof mappings. These strings will utimately end up in the |
|
47 // run-time String Table. |
|
48 // This is sort of like a sorted set of strings, except that ICU's anemic |
|
49 // built-in collections don't support those, so it is implemented with a |
|
50 // combination of a uhash and a UVector. |
|
51 |
|
52 |
|
53 class SPUStringPool : public UMemory { |
|
54 public: |
|
55 SPUStringPool(UErrorCode &status); |
|
56 ~SPUStringPool(); |
|
57 |
|
58 // Add a string. Return the string from the table. |
|
59 // If the input parameter string is already in the table, delete the |
|
60 // input parameter and return the existing string. |
|
61 SPUString *addString(UnicodeString *src, UErrorCode &status); |
|
62 |
|
63 |
|
64 // Get the n-th string in the collection. |
|
65 SPUString *getByIndex(int32_t i); |
|
66 |
|
67 // Sort the contents; affects the ordering of getByIndex(). |
|
68 void sort(UErrorCode &status); |
|
69 |
|
70 int32_t size(); |
|
71 |
|
72 private: |
|
73 UVector *fVec; // Elements are SPUString * |
|
74 UHashtable *fHash; // Key: UnicodeString Value: SPUString |
|
75 }; |
|
76 |
|
77 |
|
78 // class ConfusabledataBuilder |
|
79 // An instance of this class exists while the confusable data is being built from source. |
|
80 // It encapsulates the intermediate data structures that are used for building. |
|
81 // It exports one static function, to do a confusable data build. |
|
82 |
|
83 class ConfusabledataBuilder : public UMemory { |
|
84 private: |
|
85 SpoofImpl *fSpoofImpl; |
|
86 UChar *fInput; |
|
87 UHashtable *fSLTable; |
|
88 UHashtable *fSATable; |
|
89 UHashtable *fMLTable; |
|
90 UHashtable *fMATable; |
|
91 UnicodeSet *fKeySet; // A set of all keys (UChar32s) that go into the four mapping tables. |
|
92 |
|
93 // The binary data is first assembled into the following four collections, then |
|
94 // copied to its final raw-memory destination. |
|
95 UVector *fKeyVec; |
|
96 UVector *fValueVec; |
|
97 UnicodeString *fStringTable; |
|
98 UVector *fStringLengthsTable; |
|
99 |
|
100 SPUStringPool *stringPool; |
|
101 URegularExpression *fParseLine; |
|
102 URegularExpression *fParseHexNum; |
|
103 int32_t fLineNum; |
|
104 |
|
105 ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status); |
|
106 ~ConfusabledataBuilder(); |
|
107 void build(const char * confusables, int32_t confusablesLen, UErrorCode &status); |
|
108 |
|
109 // Add an entry to the key and value tables being built |
|
110 // input: data from SLTable, MATable, etc. |
|
111 // outut: entry added to fKeyVec and fValueVec |
|
112 void addKeyEntry(UChar32 keyChar, // The key character |
|
113 UHashtable *table, // The table, one of SATable, MATable, etc. |
|
114 int32_t tableFlag, // One of USPOOF_SA_TABLE_FLAG, etc. |
|
115 UErrorCode &status); |
|
116 |
|
117 // From an index into fKeyVec & fValueVec |
|
118 // get a UnicodeString with the corresponding mapping. |
|
119 UnicodeString getMapping(int32_t index); |
|
120 |
|
121 // Populate the final binary output data array with the compiled data. |
|
122 void outputData(UErrorCode &status); |
|
123 |
|
124 public: |
|
125 static void buildConfusableData(SpoofImpl *spImpl, const char * confusables, |
|
126 int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status); |
|
127 }; |
|
128 U_NAMESPACE_END |
|
129 |
|
130 #endif |
|
131 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS |
|
132 #endif // __USPOOF_BUILDCONF_H__ |