intl/icu/source/i18n/uspoof_conf.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/i18n/uspoof_conf.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,132 @@
     1.4 +/*
     1.5 +******************************************************************************
     1.6 +*
     1.7 +*   Copyright (C) 2008-2011, International Business Machines
     1.8 +*   Corporation and others.  All Rights Reserved.
     1.9 +*
    1.10 +******************************************************************************
    1.11 +*   file name:  uspoof_conf.h
    1.12 +*   encoding:   US-ASCII
    1.13 +*   tab size:   8 (not used)
    1.14 +*   indentation:4
    1.15 +*
    1.16 +*   created on: 2009Jan05
    1.17 +*   created by: Andy Heninger
    1.18 +*
    1.19 +*   Internal classes for compiling confusable data into its binary (runtime) form.
    1.20 +*/
    1.21 +
    1.22 +#ifndef __USPOOF_BUILDCONF_H__
    1.23 +#define __USPOOF_BUILDCONF_H__
    1.24 +
    1.25 +#if !UCONFIG_NO_NORMALIZATION
    1.26 +
    1.27 +#if !UCONFIG_NO_REGULAR_EXPRESSIONS 
    1.28 +
    1.29 +#include "uspoof_impl.h"
    1.30 +
    1.31 +U_NAMESPACE_BEGIN
    1.32 +
    1.33 +// SPUString
    1.34 +//              Holds a string that is the result of one of the mappings defined
    1.35 +//              by the confusable mapping data (confusables.txt from Unicode.org)
    1.36 +//              Instances of SPUString exist during the compilation process only.
    1.37 +
    1.38 +struct SPUString : public UMemory {
    1.39 +    UnicodeString  *fStr;             // The actual string.
    1.40 +    int32_t         fStrTableIndex;   // Index into the final runtime data for this string.
    1.41 +                                      //  (or, for length 1, the single string char itself,
    1.42 +                                      //   there being no string table entry for it.)
    1.43 +    SPUString(UnicodeString *s);
    1.44 +    ~SPUString();
    1.45 +};
    1.46 +
    1.47 +
    1.48 +//  String Pool   A utility class for holding the strings that are the result of
    1.49 +//                the spoof mappings.  These strings will utimately end up in the
    1.50 +//                run-time String Table.
    1.51 +//                This is sort of like a sorted set of strings, except that ICU's anemic
    1.52 +//                built-in collections don't support those, so it is implemented with a
    1.53 +//                combination of a uhash and a UVector.
    1.54 +
    1.55 +
    1.56 +class SPUStringPool : public UMemory {
    1.57 +  public:
    1.58 +    SPUStringPool(UErrorCode &status);
    1.59 +    ~SPUStringPool();
    1.60 +    
    1.61 +    // Add a string. Return the string from the table.
    1.62 +    // If the input parameter string is already in the table, delete the
    1.63 +    //  input parameter and return the existing string.
    1.64 +    SPUString *addString(UnicodeString *src, UErrorCode &status);
    1.65 +
    1.66 +
    1.67 +    // Get the n-th string in the collection.
    1.68 +    SPUString *getByIndex(int32_t i);
    1.69 +
    1.70 +    // Sort the contents; affects the ordering of getByIndex().
    1.71 +    void sort(UErrorCode &status);
    1.72 +
    1.73 +    int32_t size();
    1.74 +
    1.75 +  private:
    1.76 +    UVector     *fVec;    // Elements are SPUString *
    1.77 +    UHashtable  *fHash;   // Key: UnicodeString  Value: SPUString
    1.78 +};
    1.79 +
    1.80 +
    1.81 +// class ConfusabledataBuilder
    1.82 +//     An instance of this class exists while the confusable data is being built from source.
    1.83 +//     It encapsulates the intermediate data structures that are used for building.
    1.84 +//     It exports one static function, to do a confusable data build.
    1.85 +
    1.86 +class ConfusabledataBuilder : public UMemory {
    1.87 +  private:
    1.88 +    SpoofImpl  *fSpoofImpl;
    1.89 +    UChar      *fInput;
    1.90 +    UHashtable *fSLTable;
    1.91 +    UHashtable *fSATable; 
    1.92 +    UHashtable *fMLTable; 
    1.93 +    UHashtable *fMATable;
    1.94 +    UnicodeSet *fKeySet;     // A set of all keys (UChar32s) that go into the four mapping tables.
    1.95 +
    1.96 +    // The binary data is first assembled into the following four collections, then
    1.97 +    //   copied to its final raw-memory destination.
    1.98 +    UVector            *fKeyVec;
    1.99 +    UVector            *fValueVec;
   1.100 +    UnicodeString      *fStringTable;
   1.101 +    UVector            *fStringLengthsTable;
   1.102 +    
   1.103 +    SPUStringPool      *stringPool;
   1.104 +    URegularExpression *fParseLine;
   1.105 +    URegularExpression *fParseHexNum;
   1.106 +    int32_t             fLineNum;
   1.107 +
   1.108 +    ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status);
   1.109 +    ~ConfusabledataBuilder();
   1.110 +    void build(const char * confusables, int32_t confusablesLen, UErrorCode &status);
   1.111 +
   1.112 +    // Add an entry to the key and value tables being built
   1.113 +    //   input:  data from SLTable, MATable, etc.
   1.114 +    //   outut:  entry added to fKeyVec and fValueVec
   1.115 +    void addKeyEntry(UChar32     keyChar,     // The key character
   1.116 +                     UHashtable *table,       // The table, one of SATable, MATable, etc.
   1.117 +                     int32_t     tableFlag,   // One of USPOOF_SA_TABLE_FLAG, etc.
   1.118 +                     UErrorCode &status);
   1.119 +
   1.120 +    // From an index into fKeyVec & fValueVec
   1.121 +    //   get a UnicodeString with the corresponding mapping.
   1.122 +    UnicodeString getMapping(int32_t index);
   1.123 +
   1.124 +    // Populate the final binary output data array with the compiled data.
   1.125 +    void outputData(UErrorCode &status);
   1.126 +
   1.127 +  public:
   1.128 +    static void buildConfusableData(SpoofImpl *spImpl, const char * confusables,
   1.129 +        int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status);
   1.130 +};
   1.131 +U_NAMESPACE_END
   1.132 +
   1.133 +#endif
   1.134 +#endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS 
   1.135 +#endif  // __USPOOF_BUILDCONF_H__

mercurial