intl/icu/source/i18n/uspoof_conf.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 ******************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 2008-2011, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 ******************************************************************************
michael@0 8 * file name: uspoof_conf.h
michael@0 9 * encoding: US-ASCII
michael@0 10 * tab size: 8 (not used)
michael@0 11 * indentation:4
michael@0 12 *
michael@0 13 * created on: 2009Jan05
michael@0 14 * created by: Andy Heninger
michael@0 15 *
michael@0 16 * Internal classes for compiling confusable data into its binary (runtime) form.
michael@0 17 */
michael@0 18
michael@0 19 #ifndef __USPOOF_BUILDCONF_H__
michael@0 20 #define __USPOOF_BUILDCONF_H__
michael@0 21
michael@0 22 #if !UCONFIG_NO_NORMALIZATION
michael@0 23
michael@0 24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
michael@0 25
michael@0 26 #include "uspoof_impl.h"
michael@0 27
michael@0 28 U_NAMESPACE_BEGIN
michael@0 29
michael@0 30 // SPUString
michael@0 31 // Holds a string that is the result of one of the mappings defined
michael@0 32 // by the confusable mapping data (confusables.txt from Unicode.org)
michael@0 33 // Instances of SPUString exist during the compilation process only.
michael@0 34
michael@0 35 struct SPUString : public UMemory {
michael@0 36 UnicodeString *fStr; // The actual string.
michael@0 37 int32_t fStrTableIndex; // Index into the final runtime data for this string.
michael@0 38 // (or, for length 1, the single string char itself,
michael@0 39 // there being no string table entry for it.)
michael@0 40 SPUString(UnicodeString *s);
michael@0 41 ~SPUString();
michael@0 42 };
michael@0 43
michael@0 44
michael@0 45 // String Pool A utility class for holding the strings that are the result of
michael@0 46 // the spoof mappings. These strings will utimately end up in the
michael@0 47 // run-time String Table.
michael@0 48 // This is sort of like a sorted set of strings, except that ICU's anemic
michael@0 49 // built-in collections don't support those, so it is implemented with a
michael@0 50 // combination of a uhash and a UVector.
michael@0 51
michael@0 52
michael@0 53 class SPUStringPool : public UMemory {
michael@0 54 public:
michael@0 55 SPUStringPool(UErrorCode &status);
michael@0 56 ~SPUStringPool();
michael@0 57
michael@0 58 // Add a string. Return the string from the table.
michael@0 59 // If the input parameter string is already in the table, delete the
michael@0 60 // input parameter and return the existing string.
michael@0 61 SPUString *addString(UnicodeString *src, UErrorCode &status);
michael@0 62
michael@0 63
michael@0 64 // Get the n-th string in the collection.
michael@0 65 SPUString *getByIndex(int32_t i);
michael@0 66
michael@0 67 // Sort the contents; affects the ordering of getByIndex().
michael@0 68 void sort(UErrorCode &status);
michael@0 69
michael@0 70 int32_t size();
michael@0 71
michael@0 72 private:
michael@0 73 UVector *fVec; // Elements are SPUString *
michael@0 74 UHashtable *fHash; // Key: UnicodeString Value: SPUString
michael@0 75 };
michael@0 76
michael@0 77
michael@0 78 // class ConfusabledataBuilder
michael@0 79 // An instance of this class exists while the confusable data is being built from source.
michael@0 80 // It encapsulates the intermediate data structures that are used for building.
michael@0 81 // It exports one static function, to do a confusable data build.
michael@0 82
michael@0 83 class ConfusabledataBuilder : public UMemory {
michael@0 84 private:
michael@0 85 SpoofImpl *fSpoofImpl;
michael@0 86 UChar *fInput;
michael@0 87 UHashtable *fSLTable;
michael@0 88 UHashtable *fSATable;
michael@0 89 UHashtable *fMLTable;
michael@0 90 UHashtable *fMATable;
michael@0 91 UnicodeSet *fKeySet; // A set of all keys (UChar32s) that go into the four mapping tables.
michael@0 92
michael@0 93 // The binary data is first assembled into the following four collections, then
michael@0 94 // copied to its final raw-memory destination.
michael@0 95 UVector *fKeyVec;
michael@0 96 UVector *fValueVec;
michael@0 97 UnicodeString *fStringTable;
michael@0 98 UVector *fStringLengthsTable;
michael@0 99
michael@0 100 SPUStringPool *stringPool;
michael@0 101 URegularExpression *fParseLine;
michael@0 102 URegularExpression *fParseHexNum;
michael@0 103 int32_t fLineNum;
michael@0 104
michael@0 105 ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status);
michael@0 106 ~ConfusabledataBuilder();
michael@0 107 void build(const char * confusables, int32_t confusablesLen, UErrorCode &status);
michael@0 108
michael@0 109 // Add an entry to the key and value tables being built
michael@0 110 // input: data from SLTable, MATable, etc.
michael@0 111 // outut: entry added to fKeyVec and fValueVec
michael@0 112 void addKeyEntry(UChar32 keyChar, // The key character
michael@0 113 UHashtable *table, // The table, one of SATable, MATable, etc.
michael@0 114 int32_t tableFlag, // One of USPOOF_SA_TABLE_FLAG, etc.
michael@0 115 UErrorCode &status);
michael@0 116
michael@0 117 // From an index into fKeyVec & fValueVec
michael@0 118 // get a UnicodeString with the corresponding mapping.
michael@0 119 UnicodeString getMapping(int32_t index);
michael@0 120
michael@0 121 // Populate the final binary output data array with the compiled data.
michael@0 122 void outputData(UErrorCode &status);
michael@0 123
michael@0 124 public:
michael@0 125 static void buildConfusableData(SpoofImpl *spImpl, const char * confusables,
michael@0 126 int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status);
michael@0 127 };
michael@0 128 U_NAMESPACE_END
michael@0 129
michael@0 130 #endif
michael@0 131 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
michael@0 132 #endif // __USPOOF_BUILDCONF_H__

mercurial