michael@0: /* michael@0: ****************************************************************************** michael@0: * michael@0: * Copyright (C) 2008-2011, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: * michael@0: ****************************************************************************** michael@0: * file name: uspoof_conf.h michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created on: 2009Jan05 michael@0: * created by: Andy Heninger michael@0: * michael@0: * Internal classes for compiling confusable data into its binary (runtime) form. michael@0: */ michael@0: michael@0: #ifndef __USPOOF_BUILDCONF_H__ michael@0: #define __USPOOF_BUILDCONF_H__ michael@0: michael@0: #if !UCONFIG_NO_NORMALIZATION michael@0: michael@0: #if !UCONFIG_NO_REGULAR_EXPRESSIONS michael@0: michael@0: #include "uspoof_impl.h" michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: // SPUString michael@0: // Holds a string that is the result of one of the mappings defined michael@0: // by the confusable mapping data (confusables.txt from Unicode.org) michael@0: // Instances of SPUString exist during the compilation process only. michael@0: michael@0: struct SPUString : public UMemory { michael@0: UnicodeString *fStr; // The actual string. michael@0: int32_t fStrTableIndex; // Index into the final runtime data for this string. michael@0: // (or, for length 1, the single string char itself, michael@0: // there being no string table entry for it.) michael@0: SPUString(UnicodeString *s); michael@0: ~SPUString(); michael@0: }; michael@0: michael@0: michael@0: // String Pool A utility class for holding the strings that are the result of michael@0: // the spoof mappings. These strings will utimately end up in the michael@0: // run-time String Table. michael@0: // This is sort of like a sorted set of strings, except that ICU's anemic michael@0: // built-in collections don't support those, so it is implemented with a michael@0: // combination of a uhash and a UVector. michael@0: michael@0: michael@0: class SPUStringPool : public UMemory { michael@0: public: michael@0: SPUStringPool(UErrorCode &status); michael@0: ~SPUStringPool(); michael@0: michael@0: // Add a string. Return the string from the table. michael@0: // If the input parameter string is already in the table, delete the michael@0: // input parameter and return the existing string. michael@0: SPUString *addString(UnicodeString *src, UErrorCode &status); michael@0: michael@0: michael@0: // Get the n-th string in the collection. michael@0: SPUString *getByIndex(int32_t i); michael@0: michael@0: // Sort the contents; affects the ordering of getByIndex(). michael@0: void sort(UErrorCode &status); michael@0: michael@0: int32_t size(); michael@0: michael@0: private: michael@0: UVector *fVec; // Elements are SPUString * michael@0: UHashtable *fHash; // Key: UnicodeString Value: SPUString michael@0: }; michael@0: michael@0: michael@0: // class ConfusabledataBuilder michael@0: // An instance of this class exists while the confusable data is being built from source. michael@0: // It encapsulates the intermediate data structures that are used for building. michael@0: // It exports one static function, to do a confusable data build. michael@0: michael@0: class ConfusabledataBuilder : public UMemory { michael@0: private: michael@0: SpoofImpl *fSpoofImpl; michael@0: UChar *fInput; michael@0: UHashtable *fSLTable; michael@0: UHashtable *fSATable; michael@0: UHashtable *fMLTable; michael@0: UHashtable *fMATable; michael@0: UnicodeSet *fKeySet; // A set of all keys (UChar32s) that go into the four mapping tables. michael@0: michael@0: // The binary data is first assembled into the following four collections, then michael@0: // copied to its final raw-memory destination. michael@0: UVector *fKeyVec; michael@0: UVector *fValueVec; michael@0: UnicodeString *fStringTable; michael@0: UVector *fStringLengthsTable; michael@0: michael@0: SPUStringPool *stringPool; michael@0: URegularExpression *fParseLine; michael@0: URegularExpression *fParseHexNum; michael@0: int32_t fLineNum; michael@0: michael@0: ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status); michael@0: ~ConfusabledataBuilder(); michael@0: void build(const char * confusables, int32_t confusablesLen, UErrorCode &status); michael@0: michael@0: // Add an entry to the key and value tables being built michael@0: // input: data from SLTable, MATable, etc. michael@0: // outut: entry added to fKeyVec and fValueVec michael@0: void addKeyEntry(UChar32 keyChar, // The key character michael@0: UHashtable *table, // The table, one of SATable, MATable, etc. michael@0: int32_t tableFlag, // One of USPOOF_SA_TABLE_FLAG, etc. michael@0: UErrorCode &status); michael@0: michael@0: // From an index into fKeyVec & fValueVec michael@0: // get a UnicodeString with the corresponding mapping. michael@0: UnicodeString getMapping(int32_t index); michael@0: michael@0: // Populate the final binary output data array with the compiled data. michael@0: void outputData(UErrorCode &status); michael@0: michael@0: public: michael@0: static void buildConfusableData(SpoofImpl *spImpl, const char * confusables, michael@0: int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status); michael@0: }; michael@0: U_NAMESPACE_END michael@0: michael@0: #endif michael@0: #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS michael@0: #endif // __USPOOF_BUILDCONF_H__