intl/icu/source/i18n/tridpars.h

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

     1 /*
     2  **************************************************************************
     3  *   Copyright (c) 2002-2010, International Business Machines Corporation *
     4  *   and others.  All Rights Reserved.                                    *
     5  **************************************************************************
     6  *   Date        Name        Description                                  *
     7  *   01/28/2002  aliu        Creation.                                    *
     8  **************************************************************************
     9  */
    10 #ifndef TRIDPARS_H
    11 #define TRIDPARS_H
    13 #include "unicode/utypes.h"
    15 #if !UCONFIG_NO_TRANSLITERATION
    17 #include "unicode/uobject.h"
    18 #include "unicode/unistr.h"
    20 U_NAMESPACE_BEGIN
    22 class Transliterator;
    23 class UnicodeSet;
    24 class UVector;
    26 /**
    27  * Parsing component for transliterator IDs.  This class contains only
    28  * static members; it cannot be instantiated.  Methods in this class
    29  * parse various ID formats, including the following:
    30  *
    31  * A basic ID, which contains source, target, and variant, but no
    32  * filter and no explicit inverse.  Examples include
    33  * "Latin-Greek/UNGEGN" and "Null".
    34  *
    35  * A single ID, which is a basic ID plus optional filter and optional
    36  * explicit inverse.  Examples include "[a-zA-Z] Latin-Greek" and
    37  * "Lower (Upper)".
    38  *
    39  * A compound ID, which is a sequence of one or more single IDs,
    40  * separated by semicolons, with optional forward and reverse global
    41  * filters.  The global filters are UnicodeSet patterns prepended or
    42  * appended to the IDs, separated by semicolons.  An appended filter
    43  * must be enclosed in parentheses and applies in the reverse
    44  * direction.
    45  *
    46  * @author Alan Liu
    47  */
    48 class TransliteratorIDParser /* not : public UObject because all methods are static */ {
    50  public:
    52     /**
    53      * A structure containing the parsed data of a filtered ID, that
    54      * is, a basic ID optionally with a filter.
    55      *
    56      * 'source' and 'target' will always be non-null.  The 'variant'
    57      * will be non-null only if a non-empty variant was parsed.
    58      *
    59      * 'sawSource' is true if there was an explicit source in the
    60      * parsed id.  If there was no explicit source, then an implied
    61      * source of ANY is returned and 'sawSource' is set to false.
    62      * 
    63      * 'filter' is the parsed filter pattern, or null if there was no
    64      * filter.
    65      */
    66     class Specs : public UMemory {
    67     public:
    68         UnicodeString source; // not null
    69         UnicodeString target; // not null
    70         UnicodeString variant; // may be null
    71         UnicodeString filter; // may be null
    72         UBool sawSource;
    73         Specs(const UnicodeString& s, const UnicodeString& t,
    74               const UnicodeString& v, UBool sawS,
    75               const UnicodeString& f);
    77     private:
    79         Specs(const Specs &other); // forbid copying of this class
    80         Specs &operator=(const Specs &other); // forbid copying of this class
    81     };
    83     /**
    84      * A structure containing the canonicalized data of a filtered ID,
    85      * that is, a basic ID optionally with a filter.
    86      *
    87      * 'canonID' is always non-null.  It may be the empty string "".
    88      * It is the id that should be assigned to the created
    89      * transliterator.  It _cannot_ be instantiated directly.
    90      *
    91      * 'basicID' is always non-null and non-empty.  It is always of
    92      * the form S-T or S-T/V.  It is designed to be fed to low-level
    93      * instantiation code that only understands these two formats.
    94      *
    95      * 'filter' may be null, if there is none, or non-null and
    96      * non-empty.
    97      */
    98     class SingleID : public UMemory {
    99     public:
   100         UnicodeString canonID;
   101         UnicodeString basicID;
   102         UnicodeString filter;
   103         SingleID(const UnicodeString& c, const UnicodeString& b,
   104                  const UnicodeString& f);
   105         SingleID(const UnicodeString& c, const UnicodeString& b);
   106         Transliterator* createInstance();
   108     private:
   110         SingleID(const SingleID &other); // forbid copying of this class
   111         SingleID &operator=(const SingleID &other); // forbid copying of this class
   112     };
   114     /**
   115      * Parse a filter ID, that is, an ID of the general form
   116      * "[f1] s1-t1/v1", with the filters optional, and the variants optional.
   117      * @param id the id to be parsed
   118      * @param pos INPUT-OUTPUT parameter.  On input, the position of
   119      * the first character to parse.  On output, the position after
   120      * the last character parsed.
   121      * @return a SingleID object or null if the parse fails
   122      */
   123     static SingleID* parseFilterID(const UnicodeString& id, int32_t& pos);
   125     /**
   126      * Parse a single ID, that is, an ID of the general form
   127      * "[f1] s1-t1/v1 ([f2] s2-t3/v2)", with the parenthesized element
   128      * optional, the filters optional, and the variants optional.
   129      * @param id the id to be parsed
   130      * @param pos INPUT-OUTPUT parameter.  On input, the position of
   131      * the first character to parse.  On output, the position after
   132      * the last character parsed.
   133      * @param dir the direction.  If the direction is REVERSE then the
   134      * SingleID is constructed for the reverse direction.
   135      * @return a SingleID object or null
   136      */
   137     static SingleID* parseSingleID(const UnicodeString& id, int32_t& pos,
   138                                   int32_t dir, UErrorCode& status);
   140     /**
   141      * Parse a global filter of the form "[f]" or "([f])", depending
   142      * on 'withParens'.
   143      * @param id the pattern the parse
   144      * @param pos INPUT-OUTPUT parameter.  On input, the position of
   145      * the first character to parse.  On output, the position after
   146      * the last character parsed.
   147      * @param dir the direction.
   148      * @param withParens INPUT-OUTPUT parameter.  On entry, if
   149      * withParens[0] is 0, then parens are disallowed.  If it is 1,
   150      * then parens are required.  If it is -1, then parens are
   151      * optional, and the return result will be set to 0 or 1.
   152      * @param canonID OUTPUT parameter.  The pattern for the filter
   153      * added to the canonID, either at the end, if dir is FORWARD, or
   154      * at the start, if dir is REVERSE.  The pattern will be enclosed
   155      * in parentheses if appropriate, and will be suffixed with an
   156      * ID_DELIM character.  May be null.
   157      * @return a UnicodeSet object or null.  A non-null results
   158      * indicates a successful parse, regardless of whether the filter
   159      * applies to the given direction.  The caller should discard it
   160      * if withParens != (dir == REVERSE).
   161      */
   162     static UnicodeSet* parseGlobalFilter(const UnicodeString& id, int32_t& pos,
   163                                          int32_t dir,
   164                                          int32_t& withParens,
   165                                          UnicodeString* canonID);
   167     /**
   168      * Parse a compound ID, consisting of an optional forward global
   169      * filter, a separator, one or more single IDs delimited by
   170      * separators, an an optional reverse global filter.  The
   171      * separator is a semicolon.  The global filters are UnicodeSet
   172      * patterns.  The reverse global filter must be enclosed in
   173      * parentheses.
   174      * @param id the pattern the parse
   175      * @param dir the direction.
   176      * @param canonID OUTPUT parameter that receives the canonical ID,
   177      * consisting of canonical IDs for all elements, as returned by
   178      * parseSingleID(), separated by semicolons.  Previous contents
   179      * are discarded.
   180      * @param list OUTPUT parameter that receives a list of SingleID
   181      * objects representing the parsed IDs.  Previous contents are
   182      * discarded.
   183      * @param globalFilter OUTPUT parameter that receives a pointer to
   184      * a newly created global filter for this ID in this direction, or
   185      * null if there is none.
   186      * @return true if the parse succeeds, that is, if the entire
   187      * id is consumed without syntax error.
   188      */
   189     static UBool parseCompoundID(const UnicodeString& id, int32_t dir,
   190                                  UnicodeString& canonID,
   191                                  UVector& list,
   192                                  UnicodeSet*& globalFilter);
   194     /**
   195      * Convert the elements of the 'list' vector, which are SingleID
   196      * objects, into actual Transliterator objects.  In the course of
   197      * this, some (or all) entries may be removed.  If all entries
   198      * are removed, the Null transliterator will be added.
   199      *
   200      * Delete entries with empty basicIDs; these are generated by
   201      * elements like "(A)" in the forward direction, or "A()" in
   202      * the reverse.  THIS MAY RESULT IN AN EMPTY VECTOR.  Convert
   203      * SingleID entries to actual transliterators.
   204      *
   205      * @param list vector of SingleID objects.  On exit, vector
   206      * of one or more Transliterators.
   207      * @param ec Output param to receive a success or an error code.
   208      * @return new value of insertIndex.  The index will shift if
   209      * there are empty items, like "(Lower)", with indices less than
   210      * insertIndex.
   211      */
   212     static void instantiateList(UVector& list,
   213                                 UErrorCode& ec);
   215     /**
   216      * Parse an ID into pieces.  Take IDs of the form T, T/V, S-T,
   217      * S-T/V, or S/V-T.  If the source is missing, return a source of
   218      * ANY.
   219      * @param id the id string, in any of several forms
   220      * @param source          the given source.
   221      * @param target          the given target.
   222      * @param variant         the given variant
   223      * @param isSourcePresent If TRUE then the source is present. 
   224      *                        If the source is not present, ANY will be
   225      *                        given as the source, and isSourcePresent will be null
   226      * @return an array of 4 strings: source, target, variant, and
   227      * isSourcePresent.  If the source is not present, ANY will be
   228      * given as the source, and isSourcePresent will be null.  Otherwise
   229      * isSourcePresent will be non-null.  The target may be empty if the
   230      * id is not well-formed.  The variant may be empty.
   231      */
   232     static void IDtoSTV(const UnicodeString& id,
   233                         UnicodeString& source,
   234                         UnicodeString& target,
   235                         UnicodeString& variant,
   236                         UBool& isSourcePresent);
   238     /**
   239      * Given source, target, and variant strings, concatenate them into a
   240      * full ID.  If the source is empty, then "Any" will be used for the
   241      * source, so the ID will always be of the form s-t/v or s-t.
   242      */
   243     static void STVtoID(const UnicodeString& source,
   244                         const UnicodeString& target,
   245                         const UnicodeString& variant,
   246                         UnicodeString& id);
   248     /**
   249      * Register two targets as being inverses of one another.  For
   250      * example, calling registerSpecialInverse("NFC", "NFD", true) causes
   251      * Transliterator to form the following inverse relationships:
   252      *
   253      * <pre>NFC => NFD
   254      * Any-NFC => Any-NFD
   255      * NFD => NFC
   256      * Any-NFD => Any-NFC</pre>
   257      *
   258      * (Without the special inverse registration, the inverse of NFC
   259      * would be NFC-Any.)  Note that NFD is shorthand for Any-NFD, but
   260      * that the presence or absence of "Any-" is preserved.
   261      *
   262      * <p>The relationship is symmetrical; registering (a, b) is
   263      * equivalent to registering (b, a).
   264      *
   265      * <p>The relevant IDs must still be registered separately as
   266      * factories or classes.
   267      *
   268      * <p>Only the targets are specified.  Special inverses always
   269      * have the form Any-Target1 <=> Any-Target2.  The target should
   270      * have canonical casing (the casing desired to be produced when
   271      * an inverse is formed) and should contain no whitespace or other
   272      * extraneous characters.
   273      *
   274      * @param target the target against which to register the inverse
   275      * @param inverseTarget the inverse of target, that is
   276      * Any-target.getInverse() => Any-inverseTarget
   277      * @param bidirectional if true, register the reverse relation
   278      * as well, that is, Any-inverseTarget.getInverse() => Any-target
   279      */
   280     static void registerSpecialInverse(const UnicodeString& target,
   281                                        const UnicodeString& inverseTarget,
   282                                        UBool bidirectional,
   283                                        UErrorCode &status);
   285     /**
   286      * Free static memory.
   287      */
   288     static void cleanup();
   290  private:
   291     //----------------------------------------------------------------
   292     // Private implementation
   293     //----------------------------------------------------------------
   295     // forbid instantiation
   296     TransliteratorIDParser();
   298     /**
   299      * Parse an ID into component pieces.  Take IDs of the form T,
   300      * T/V, S-T, S-T/V, or S/V-T.  If the source is missing, return a
   301      * source of ANY.
   302      * @param id the id string, in any of several forms
   303      * @param pos INPUT-OUTPUT parameter.  On input, pos[0] is the
   304      * offset of the first character to parse in id.  On output,
   305      * pos[0] is the offset after the last parsed character.  If the
   306      * parse failed, pos[0] will be unchanged.
   307      * @param allowFilter if true, a UnicodeSet pattern is allowed
   308      * at any location between specs or delimiters, and is returned
   309      * as the fifth string in the array.
   310      * @return a Specs object, or null if the parse failed.  If
   311      * neither source nor target was seen in the parsed id, then the
   312      * parse fails.  If allowFilter is true, then the parsed filter
   313      * pattern is returned in the Specs object, otherwise the returned
   314      * filter reference is null.  If the parse fails for any reason
   315      * null is returned.
   316      */
   317     static Specs* parseFilterID(const UnicodeString& id, int32_t& pos,
   318                                 UBool allowFilter);
   320     /**
   321      * Givens a Specs object, convert it to a SingleID object.  The
   322      * Spec object is a more unprocessed parse result.  The SingleID
   323      * object contains information about canonical and basic IDs.
   324      * @param specs the given Specs object.
   325      * @param dir   either FORWARD or REVERSE.
   326      * @return a SingleID; never returns null.  Returned object always
   327      * has 'filter' field of null.
   328      */
   329     static SingleID* specsToID(const Specs* specs, int32_t dir);
   331     /**
   332      * Given a Specs object, return a SingleID representing the
   333      * special inverse of that ID.  If there is no special inverse
   334      * then return null.
   335      * @param specs the given Specs.
   336      * @return a SingleID or null.  Returned object always has
   337      * 'filter' field of null.
   338      */
   339     static SingleID* specsToSpecialInverse(const Specs& specs, UErrorCode &status);
   341     /**
   342      * Glue method to get around access problems in C++.
   343      * @param id the id string for the transliterator, in any of several forms
   344      * @param canonID the given canonical ID
   345      */
   346     static Transliterator* createBasicInstance(const UnicodeString& id,
   347                                                const UnicodeString* canonID);
   349     /**
   350      * Initialize static memory.
   351      */
   352     static void init(UErrorCode &status);
   354     friend class SingleID;
   355 };
   357 U_NAMESPACE_END
   359 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
   361 #endif

mercurial