intl/icu/source/i18n/uspoof_conf.h

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

     1 /*
     2 ******************************************************************************
     3 *
     4 *   Copyright (C) 2008-2011, International Business Machines
     5 *   Corporation and others.  All Rights Reserved.
     6 *
     7 ******************************************************************************
     8 *   file name:  uspoof_conf.h
     9 *   encoding:   US-ASCII
    10 *   tab size:   8 (not used)
    11 *   indentation:4
    12 *
    13 *   created on: 2009Jan05
    14 *   created by: Andy Heninger
    15 *
    16 *   Internal classes for compiling confusable data into its binary (runtime) form.
    17 */
    19 #ifndef __USPOOF_BUILDCONF_H__
    20 #define __USPOOF_BUILDCONF_H__
    22 #if !UCONFIG_NO_NORMALIZATION
    24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS 
    26 #include "uspoof_impl.h"
    28 U_NAMESPACE_BEGIN
    30 // SPUString
    31 //              Holds a string that is the result of one of the mappings defined
    32 //              by the confusable mapping data (confusables.txt from Unicode.org)
    33 //              Instances of SPUString exist during the compilation process only.
    35 struct SPUString : public UMemory {
    36     UnicodeString  *fStr;             // The actual string.
    37     int32_t         fStrTableIndex;   // Index into the final runtime data for this string.
    38                                       //  (or, for length 1, the single string char itself,
    39                                       //   there being no string table entry for it.)
    40     SPUString(UnicodeString *s);
    41     ~SPUString();
    42 };
    45 //  String Pool   A utility class for holding the strings that are the result of
    46 //                the spoof mappings.  These strings will utimately end up in the
    47 //                run-time String Table.
    48 //                This is sort of like a sorted set of strings, except that ICU's anemic
    49 //                built-in collections don't support those, so it is implemented with a
    50 //                combination of a uhash and a UVector.
    53 class SPUStringPool : public UMemory {
    54   public:
    55     SPUStringPool(UErrorCode &status);
    56     ~SPUStringPool();
    58     // Add a string. Return the string from the table.
    59     // If the input parameter string is already in the table, delete the
    60     //  input parameter and return the existing string.
    61     SPUString *addString(UnicodeString *src, UErrorCode &status);
    64     // Get the n-th string in the collection.
    65     SPUString *getByIndex(int32_t i);
    67     // Sort the contents; affects the ordering of getByIndex().
    68     void sort(UErrorCode &status);
    70     int32_t size();
    72   private:
    73     UVector     *fVec;    // Elements are SPUString *
    74     UHashtable  *fHash;   // Key: UnicodeString  Value: SPUString
    75 };
    78 // class ConfusabledataBuilder
    79 //     An instance of this class exists while the confusable data is being built from source.
    80 //     It encapsulates the intermediate data structures that are used for building.
    81 //     It exports one static function, to do a confusable data build.
    83 class ConfusabledataBuilder : public UMemory {
    84   private:
    85     SpoofImpl  *fSpoofImpl;
    86     UChar      *fInput;
    87     UHashtable *fSLTable;
    88     UHashtable *fSATable; 
    89     UHashtable *fMLTable; 
    90     UHashtable *fMATable;
    91     UnicodeSet *fKeySet;     // A set of all keys (UChar32s) that go into the four mapping tables.
    93     // The binary data is first assembled into the following four collections, then
    94     //   copied to its final raw-memory destination.
    95     UVector            *fKeyVec;
    96     UVector            *fValueVec;
    97     UnicodeString      *fStringTable;
    98     UVector            *fStringLengthsTable;
   100     SPUStringPool      *stringPool;
   101     URegularExpression *fParseLine;
   102     URegularExpression *fParseHexNum;
   103     int32_t             fLineNum;
   105     ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status);
   106     ~ConfusabledataBuilder();
   107     void build(const char * confusables, int32_t confusablesLen, UErrorCode &status);
   109     // Add an entry to the key and value tables being built
   110     //   input:  data from SLTable, MATable, etc.
   111     //   outut:  entry added to fKeyVec and fValueVec
   112     void addKeyEntry(UChar32     keyChar,     // The key character
   113                      UHashtable *table,       // The table, one of SATable, MATable, etc.
   114                      int32_t     tableFlag,   // One of USPOOF_SA_TABLE_FLAG, etc.
   115                      UErrorCode &status);
   117     // From an index into fKeyVec & fValueVec
   118     //   get a UnicodeString with the corresponding mapping.
   119     UnicodeString getMapping(int32_t index);
   121     // Populate the final binary output data array with the compiled data.
   122     void outputData(UErrorCode &status);
   124   public:
   125     static void buildConfusableData(SpoofImpl *spImpl, const char * confusables,
   126         int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status);
   127 };
   128 U_NAMESPACE_END
   130 #endif
   131 #endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS 
   132 #endif  // __USPOOF_BUILDCONF_H__

mercurial