intl/icu/source/i18n/identifier_info.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2 **********************************************************************
     3 *   Copyright (C) 2013, International Business Machines
     4 *   Corporation and others.  All Rights Reserved.
     5 **********************************************************************
     6 *
     7 * indentifier_info.h
     8 * 
     9 * created on: 2013 Jan 7
    10 * created by: Andy Heninger
    11 */
    13 #ifndef __IDENTIFIER_INFO_H__
    14 #define __IDENTIFIER_INFO_H__
    16 #include "unicode/utypes.h"
    18 #include "unicode/uniset.h"
    19 #include "unicode/uspoof.h"
    20 #include "uhash.h"
    22 U_NAMESPACE_BEGIN
    24 class ScriptSet;
    26 // TODO(andy): review consistency of reference vs pointer arguments to the funcions.
    28 /**
    29  * This class analyzes a possible identifier for script and identifier status. Use it by calling setIdentifierProfile
    30  * then setIdentifier. Available methods include:
    31  * <ol>
    32  * <li>call getScripts for the specific scripts in the identifier. The identifier contains at least one character in
    33  * each of these.
    34  * <li>call getAlternates to get cases where a character is not limited to a single script. For example, it could be
    35  * either Katakana or Hiragana.
    36  * <li>call getCommonAmongAlternates to find out if any scripts are common to all the alternates.
    37  * <li>call getNumerics to get a representative character (with value zero) for each of the decimal number systems in
    38  * the identifier.
    39  * <li>call getRestrictionLevel to see what the UTS36 restriction level is.
    40  * </ol>
    41  * 
    42  * This is a port from ICU4J of class com.ibm.icu.text.IdentifierInfo
    43  */
    44 class U_I18N_API IdentifierInfo : public UMemory {
    46   public:
    47     /**
    48      * Create an identifier info object. Subsequently, call setIdentifier(), etc.
    49      * @internal
    50      */
    51     IdentifierInfo(UErrorCode &status);
    53     /**
    54       * Destructor
    55       */
    56     virtual ~IdentifierInfo();
    58   private:
    59     /* Disallow copying for now. Can be added if there's a need. */
    60     IdentifierInfo(const IdentifierInfo &other);
    62   public:
    64     /**
    65      * Set the identifier profile: the characters that are to be allowed in the identifier.
    66      * 
    67      * @param identifierProfile the characters that are to be allowed in the identifier
    68      * @return this
    69      * @internal
    70      */
    71     IdentifierInfo &setIdentifierProfile(const UnicodeSet &identifierProfile);
    73     /**
    74      * Get the identifier profile: the characters that are to be allowed in the identifier.
    75      * 
    76      * @return The characters that are to be allowed in the identifier.
    77      * @internal
    78      */
    79     const UnicodeSet &getIdentifierProfile() const;
    82     /**
    83      * Set an identifier to analyze. Afterwards, call methods like getScripts()
    84      * 
    85      * @param identifier the identifier to analyze
    86      * @param status Errorcode, set if errors occur.
    87      * @return this
    88      * @internal
    89      */
    90     IdentifierInfo &setIdentifier(const UnicodeString &identifier, UErrorCode &status);
    93     /**
    94      * Get the identifier that was analyzed. The returned string is owned by the ICU library,
    95      * and must not be deleted by the caller.
    96      * 
    97      * @return the identifier that was analyzed.
    98      * @internal
    99      */
   100     const UnicodeString *getIdentifier() const;
   103     /**
   104      * Get the scripts found in the identifiers.
   105      * 
   106      * @return the set of explicit scripts.
   107      * @internal
   108      */
   109     const ScriptSet *getScripts() const;
   111     /**
   112      * Get the set of alternate scripts found in the identifiers. That is, when a character can be in two scripts, then
   113      * the set consisting of those scripts will be returned.
   114      * 
   115      * @return a uhash, with each key being of type (ScriptSet *). 
   116      *         This is a set, not a map, so the value stored in the uhash is not relevant.
   117      *         (It is, in fact, 1).
   118      *         Ownership of the uhash and its contents remains with the IndetifierInfo object, 
   119      *         and remains valid until a new identifer is set or until the object is deleted.
   120      * @internal
   121      */
   122     const UHashtable *getAlternates() const;
   124     /**
   125      * Get the representative characters (zeros) for the numerics found in the identifier.
   126      * 
   127      * @return the set of explicit scripts.
   128      * @internal
   129      */
   130     const UnicodeSet *getNumerics() const;
   132     /**
   133      * Find out which scripts are in common among the alternates.
   134      * 
   135      * @return the set of scripts that are in common among the alternates.
   136      * @internal
   137      */
   138     const ScriptSet *getCommonAmongAlternates() const;
   140     /**
   141       * Get the number of scripts appearing in the identifier.
   142       *   Note: Common and Inherited scripts are omitted from the count.
   143       *   Note: Result may be high when the identifier contains characters
   144       *         with alternate scripts. The distinction between
   145       *         0, 1 and > 1 will remain valid, however.
   146       * @return the number of scripts.
   147       */
   148     int32_t getScriptCount() const;
   150 #if !UCONFIG_NO_NORMALIZATION
   152     /**
   153      * Find the "tightest" restriction level that the identifier satisfies.
   154      * 
   155      * @return the restriction level.
   156      * @internal
   157      */
   158     URestrictionLevel getRestrictionLevel(UErrorCode &status) const;
   160 #endif /*!UCONFIG_NO_NORMALIZATION */
   162     UnicodeString toString() const;
   164     /**
   165      * Produce a readable string of alternates.
   166      * 
   167      * @param alternates a UHashtable of UScriptSets.
   168      *        Keys only, no meaningful values in the UHash.
   169      * @return display form
   170      * @internal
   171      */
   172     static UnicodeString &displayAlternates(UnicodeString &dest, const UHashtable *alternates, UErrorCode &status);
   174     /**
   175      * Static memory cleanup function.
   176      * @internal
   177      */
   178     static UBool      cleanup();
   179   private:
   181     IdentifierInfo  & clear();
   182     UBool             containsWithAlternates(const ScriptSet &container, const ScriptSet &containee) const;
   184     UnicodeString     *fIdentifier;
   185     ScriptSet         *fRequiredScripts;
   186     UHashtable        *fScriptSetSet;
   187     ScriptSet         *fCommonAmongAlternates;
   188     UnicodeSet        *fNumerics;
   189     UnicodeSet        *fIdentifierProfile;
   191     static UnicodeSet *ASCII;
   192     static ScriptSet  *JAPANESE;
   193     static ScriptSet  *CHINESE;
   194     static ScriptSet  *KOREAN;
   195     static ScriptSet  *CONFUSABLE_WITH_LATIN;
   199 };
   201 U_NAMESPACE_END
   203 #endif // __IDENTIFIER_INFO_H__

mercurial