1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/i18n/uspoof_conf.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,132 @@ 1.4 +/* 1.5 +****************************************************************************** 1.6 +* 1.7 +* Copyright (C) 2008-2011, International Business Machines 1.8 +* Corporation and others. All Rights Reserved. 1.9 +* 1.10 +****************************************************************************** 1.11 +* file name: uspoof_conf.h 1.12 +* encoding: US-ASCII 1.13 +* tab size: 8 (not used) 1.14 +* indentation:4 1.15 +* 1.16 +* created on: 2009Jan05 1.17 +* created by: Andy Heninger 1.18 +* 1.19 +* Internal classes for compiling confusable data into its binary (runtime) form. 1.20 +*/ 1.21 + 1.22 +#ifndef __USPOOF_BUILDCONF_H__ 1.23 +#define __USPOOF_BUILDCONF_H__ 1.24 + 1.25 +#if !UCONFIG_NO_NORMALIZATION 1.26 + 1.27 +#if !UCONFIG_NO_REGULAR_EXPRESSIONS 1.28 + 1.29 +#include "uspoof_impl.h" 1.30 + 1.31 +U_NAMESPACE_BEGIN 1.32 + 1.33 +// SPUString 1.34 +// Holds a string that is the result of one of the mappings defined 1.35 +// by the confusable mapping data (confusables.txt from Unicode.org) 1.36 +// Instances of SPUString exist during the compilation process only. 1.37 + 1.38 +struct SPUString : public UMemory { 1.39 + UnicodeString *fStr; // The actual string. 1.40 + int32_t fStrTableIndex; // Index into the final runtime data for this string. 1.41 + // (or, for length 1, the single string char itself, 1.42 + // there being no string table entry for it.) 1.43 + SPUString(UnicodeString *s); 1.44 + ~SPUString(); 1.45 +}; 1.46 + 1.47 + 1.48 +// String Pool A utility class for holding the strings that are the result of 1.49 +// the spoof mappings. These strings will utimately end up in the 1.50 +// run-time String Table. 1.51 +// This is sort of like a sorted set of strings, except that ICU's anemic 1.52 +// built-in collections don't support those, so it is implemented with a 1.53 +// combination of a uhash and a UVector. 1.54 + 1.55 + 1.56 +class SPUStringPool : public UMemory { 1.57 + public: 1.58 + SPUStringPool(UErrorCode &status); 1.59 + ~SPUStringPool(); 1.60 + 1.61 + // Add a string. Return the string from the table. 1.62 + // If the input parameter string is already in the table, delete the 1.63 + // input parameter and return the existing string. 1.64 + SPUString *addString(UnicodeString *src, UErrorCode &status); 1.65 + 1.66 + 1.67 + // Get the n-th string in the collection. 1.68 + SPUString *getByIndex(int32_t i); 1.69 + 1.70 + // Sort the contents; affects the ordering of getByIndex(). 1.71 + void sort(UErrorCode &status); 1.72 + 1.73 + int32_t size(); 1.74 + 1.75 + private: 1.76 + UVector *fVec; // Elements are SPUString * 1.77 + UHashtable *fHash; // Key: UnicodeString Value: SPUString 1.78 +}; 1.79 + 1.80 + 1.81 +// class ConfusabledataBuilder 1.82 +// An instance of this class exists while the confusable data is being built from source. 1.83 +// It encapsulates the intermediate data structures that are used for building. 1.84 +// It exports one static function, to do a confusable data build. 1.85 + 1.86 +class ConfusabledataBuilder : public UMemory { 1.87 + private: 1.88 + SpoofImpl *fSpoofImpl; 1.89 + UChar *fInput; 1.90 + UHashtable *fSLTable; 1.91 + UHashtable *fSATable; 1.92 + UHashtable *fMLTable; 1.93 + UHashtable *fMATable; 1.94 + UnicodeSet *fKeySet; // A set of all keys (UChar32s) that go into the four mapping tables. 1.95 + 1.96 + // The binary data is first assembled into the following four collections, then 1.97 + // copied to its final raw-memory destination. 1.98 + UVector *fKeyVec; 1.99 + UVector *fValueVec; 1.100 + UnicodeString *fStringTable; 1.101 + UVector *fStringLengthsTable; 1.102 + 1.103 + SPUStringPool *stringPool; 1.104 + URegularExpression *fParseLine; 1.105 + URegularExpression *fParseHexNum; 1.106 + int32_t fLineNum; 1.107 + 1.108 + ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status); 1.109 + ~ConfusabledataBuilder(); 1.110 + void build(const char * confusables, int32_t confusablesLen, UErrorCode &status); 1.111 + 1.112 + // Add an entry to the key and value tables being built 1.113 + // input: data from SLTable, MATable, etc. 1.114 + // outut: entry added to fKeyVec and fValueVec 1.115 + void addKeyEntry(UChar32 keyChar, // The key character 1.116 + UHashtable *table, // The table, one of SATable, MATable, etc. 1.117 + int32_t tableFlag, // One of USPOOF_SA_TABLE_FLAG, etc. 1.118 + UErrorCode &status); 1.119 + 1.120 + // From an index into fKeyVec & fValueVec 1.121 + // get a UnicodeString with the corresponding mapping. 1.122 + UnicodeString getMapping(int32_t index); 1.123 + 1.124 + // Populate the final binary output data array with the compiled data. 1.125 + void outputData(UErrorCode &status); 1.126 + 1.127 + public: 1.128 + static void buildConfusableData(SpoofImpl *spImpl, const char * confusables, 1.129 + int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status); 1.130 +}; 1.131 +U_NAMESPACE_END 1.132 + 1.133 +#endif 1.134 +#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS 1.135 +#endif // __USPOOF_BUILDCONF_H__