Wed, 31 Dec 2014 07:22:50 +0100
Correct previous dual key logic pending first delivery installment.
michael@0 | 1 | /* |
michael@0 | 2 | ****************************************************************************** |
michael@0 | 3 | * |
michael@0 | 4 | * Copyright (C) 2008-2011, International Business Machines |
michael@0 | 5 | * Corporation and others. All Rights Reserved. |
michael@0 | 6 | * |
michael@0 | 7 | ****************************************************************************** |
michael@0 | 8 | * file name: uspoof_conf.h |
michael@0 | 9 | * encoding: US-ASCII |
michael@0 | 10 | * tab size: 8 (not used) |
michael@0 | 11 | * indentation:4 |
michael@0 | 12 | * |
michael@0 | 13 | * created on: 2009Jan05 |
michael@0 | 14 | * created by: Andy Heninger |
michael@0 | 15 | * |
michael@0 | 16 | * Internal classes for compiling confusable data into its binary (runtime) form. |
michael@0 | 17 | */ |
michael@0 | 18 | |
michael@0 | 19 | #ifndef __USPOOF_BUILDCONF_H__ |
michael@0 | 20 | #define __USPOOF_BUILDCONF_H__ |
michael@0 | 21 | |
michael@0 | 22 | #if !UCONFIG_NO_NORMALIZATION |
michael@0 | 23 | |
michael@0 | 24 | #if !UCONFIG_NO_REGULAR_EXPRESSIONS |
michael@0 | 25 | |
michael@0 | 26 | #include "uspoof_impl.h" |
michael@0 | 27 | |
michael@0 | 28 | U_NAMESPACE_BEGIN |
michael@0 | 29 | |
michael@0 | 30 | // SPUString |
michael@0 | 31 | // Holds a string that is the result of one of the mappings defined |
michael@0 | 32 | // by the confusable mapping data (confusables.txt from Unicode.org) |
michael@0 | 33 | // Instances of SPUString exist during the compilation process only. |
michael@0 | 34 | |
michael@0 | 35 | struct SPUString : public UMemory { |
michael@0 | 36 | UnicodeString *fStr; // The actual string. |
michael@0 | 37 | int32_t fStrTableIndex; // Index into the final runtime data for this string. |
michael@0 | 38 | // (or, for length 1, the single string char itself, |
michael@0 | 39 | // there being no string table entry for it.) |
michael@0 | 40 | SPUString(UnicodeString *s); |
michael@0 | 41 | ~SPUString(); |
michael@0 | 42 | }; |
michael@0 | 43 | |
michael@0 | 44 | |
michael@0 | 45 | // String Pool A utility class for holding the strings that are the result of |
michael@0 | 46 | // the spoof mappings. These strings will utimately end up in the |
michael@0 | 47 | // run-time String Table. |
michael@0 | 48 | // This is sort of like a sorted set of strings, except that ICU's anemic |
michael@0 | 49 | // built-in collections don't support those, so it is implemented with a |
michael@0 | 50 | // combination of a uhash and a UVector. |
michael@0 | 51 | |
michael@0 | 52 | |
michael@0 | 53 | class SPUStringPool : public UMemory { |
michael@0 | 54 | public: |
michael@0 | 55 | SPUStringPool(UErrorCode &status); |
michael@0 | 56 | ~SPUStringPool(); |
michael@0 | 57 | |
michael@0 | 58 | // Add a string. Return the string from the table. |
michael@0 | 59 | // If the input parameter string is already in the table, delete the |
michael@0 | 60 | // input parameter and return the existing string. |
michael@0 | 61 | SPUString *addString(UnicodeString *src, UErrorCode &status); |
michael@0 | 62 | |
michael@0 | 63 | |
michael@0 | 64 | // Get the n-th string in the collection. |
michael@0 | 65 | SPUString *getByIndex(int32_t i); |
michael@0 | 66 | |
michael@0 | 67 | // Sort the contents; affects the ordering of getByIndex(). |
michael@0 | 68 | void sort(UErrorCode &status); |
michael@0 | 69 | |
michael@0 | 70 | int32_t size(); |
michael@0 | 71 | |
michael@0 | 72 | private: |
michael@0 | 73 | UVector *fVec; // Elements are SPUString * |
michael@0 | 74 | UHashtable *fHash; // Key: UnicodeString Value: SPUString |
michael@0 | 75 | }; |
michael@0 | 76 | |
michael@0 | 77 | |
michael@0 | 78 | // class ConfusabledataBuilder |
michael@0 | 79 | // An instance of this class exists while the confusable data is being built from source. |
michael@0 | 80 | // It encapsulates the intermediate data structures that are used for building. |
michael@0 | 81 | // It exports one static function, to do a confusable data build. |
michael@0 | 82 | |
michael@0 | 83 | class ConfusabledataBuilder : public UMemory { |
michael@0 | 84 | private: |
michael@0 | 85 | SpoofImpl *fSpoofImpl; |
michael@0 | 86 | UChar *fInput; |
michael@0 | 87 | UHashtable *fSLTable; |
michael@0 | 88 | UHashtable *fSATable; |
michael@0 | 89 | UHashtable *fMLTable; |
michael@0 | 90 | UHashtable *fMATable; |
michael@0 | 91 | UnicodeSet *fKeySet; // A set of all keys (UChar32s) that go into the four mapping tables. |
michael@0 | 92 | |
michael@0 | 93 | // The binary data is first assembled into the following four collections, then |
michael@0 | 94 | // copied to its final raw-memory destination. |
michael@0 | 95 | UVector *fKeyVec; |
michael@0 | 96 | UVector *fValueVec; |
michael@0 | 97 | UnicodeString *fStringTable; |
michael@0 | 98 | UVector *fStringLengthsTable; |
michael@0 | 99 | |
michael@0 | 100 | SPUStringPool *stringPool; |
michael@0 | 101 | URegularExpression *fParseLine; |
michael@0 | 102 | URegularExpression *fParseHexNum; |
michael@0 | 103 | int32_t fLineNum; |
michael@0 | 104 | |
michael@0 | 105 | ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status); |
michael@0 | 106 | ~ConfusabledataBuilder(); |
michael@0 | 107 | void build(const char * confusables, int32_t confusablesLen, UErrorCode &status); |
michael@0 | 108 | |
michael@0 | 109 | // Add an entry to the key and value tables being built |
michael@0 | 110 | // input: data from SLTable, MATable, etc. |
michael@0 | 111 | // outut: entry added to fKeyVec and fValueVec |
michael@0 | 112 | void addKeyEntry(UChar32 keyChar, // The key character |
michael@0 | 113 | UHashtable *table, // The table, one of SATable, MATable, etc. |
michael@0 | 114 | int32_t tableFlag, // One of USPOOF_SA_TABLE_FLAG, etc. |
michael@0 | 115 | UErrorCode &status); |
michael@0 | 116 | |
michael@0 | 117 | // From an index into fKeyVec & fValueVec |
michael@0 | 118 | // get a UnicodeString with the corresponding mapping. |
michael@0 | 119 | UnicodeString getMapping(int32_t index); |
michael@0 | 120 | |
michael@0 | 121 | // Populate the final binary output data array with the compiled data. |
michael@0 | 122 | void outputData(UErrorCode &status); |
michael@0 | 123 | |
michael@0 | 124 | public: |
michael@0 | 125 | static void buildConfusableData(SpoofImpl *spImpl, const char * confusables, |
michael@0 | 126 | int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status); |
michael@0 | 127 | }; |
michael@0 | 128 | U_NAMESPACE_END |
michael@0 | 129 | |
michael@0 | 130 | #endif |
michael@0 | 131 | #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS |
michael@0 | 132 | #endif // __USPOOF_BUILDCONF_H__ |