The Tor Browser: diff intl/icu/source/common/unicode/caniter.h

     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/unicode/caniter.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,208 @@
     1.4 +/*
     1.5 + *******************************************************************************
     1.6 + * Copyright (C) 1996-2011, International Business Machines Corporation and
     1.7 + * others. All Rights Reserved.
     1.8 + *******************************************************************************
     1.9 + */
    1.10 +
    1.11 +#ifndef CANITER_H
    1.12 +#define CANITER_H
    1.13 +
    1.14 +#include "unicode/utypes.h"
    1.15 +
    1.16 +#if !UCONFIG_NO_NORMALIZATION
    1.17 +
    1.18 +#include "unicode/uobject.h"
    1.19 +#include "unicode/unistr.h"
    1.20 +
    1.21 +/**
    1.22 + * \file
    1.23 + * \brief C++ API: Canonical Iterator
    1.24 + */
    1.25 + 
    1.26 +/** Should permutation skip characters with combining class zero
    1.27 + *  Should be either TRUE or FALSE. This is a compile time option
    1.28 + *  @stable ICU 2.4
    1.29 + */
    1.30 +#ifndef CANITER_SKIP_ZEROES
    1.31 +#define CANITER_SKIP_ZEROES TRUE
    1.32 +#endif
    1.33 +
    1.34 +U_NAMESPACE_BEGIN
    1.35 +
    1.36 +class Hashtable;
    1.37 +class Normalizer2;
    1.38 +class Normalizer2Impl;
    1.39 +
    1.40 +/**
    1.41 + * This class allows one to iterate through all the strings that are canonically equivalent to a given
    1.42 + * string. For example, here are some sample results:
    1.43 +Results for: {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
    1.44 +1: \\u0041\\u030A\\u0064\\u0307\\u0327
    1.45 + = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
    1.46 +2: \\u0041\\u030A\\u0064\\u0327\\u0307
    1.47 + = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
    1.48 +3: \\u0041\\u030A\\u1E0B\\u0327
    1.49 + = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
    1.50 +4: \\u0041\\u030A\\u1E11\\u0307
    1.51 + = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
    1.52 +5: \\u00C5\\u0064\\u0307\\u0327
    1.53 + = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
    1.54 +6: \\u00C5\\u0064\\u0327\\u0307
    1.55 + = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
    1.56 +7: \\u00C5\\u1E0B\\u0327
    1.57 + = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
    1.58 +8: \\u00C5\\u1E11\\u0307
    1.59 + = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
    1.60 +9: \\u212B\\u0064\\u0307\\u0327
    1.61 + = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
    1.62 +10: \\u212B\\u0064\\u0327\\u0307
    1.63 + = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
    1.64 +11: \\u212B\\u1E0B\\u0327
    1.65 + = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
    1.66 +12: \\u212B\\u1E11\\u0307
    1.67 + = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
    1.68 + *<br>Note: the code is intended for use with small strings, and is not suitable for larger ones,
    1.69 + * since it has not been optimized for that situation.
    1.70 + * Note, CanonicalIterator is not intended to be subclassed.
    1.71 + * @author M. Davis
    1.72 + * @author C++ port by V. Weinstein
    1.73 + * @stable ICU 2.4
    1.74 + */
    1.75 +class U_COMMON_API CanonicalIterator : public UObject {
    1.76 +public:
    1.77 +    /**
    1.78 +     * Construct a CanonicalIterator object
    1.79 +     * @param source    string to get results for
    1.80 +     * @param status    Fill-in parameter which receives the status of this operation.
    1.81 +     * @stable ICU 2.4
    1.82 +     */
    1.83 +    CanonicalIterator(const UnicodeString &source, UErrorCode &status);
    1.84 +
    1.85 +    /** Destructor
    1.86 +     *  Cleans pieces
    1.87 +     * @stable ICU 2.4
    1.88 +     */
    1.89 +    virtual ~CanonicalIterator();
    1.90 +
    1.91 +    /**
    1.92 +     * Gets the NFD form of the current source we are iterating over.
    1.93 +     * @return gets the source: NOTE: it is the NFD form of source
    1.94 +     * @stable ICU 2.4
    1.95 +     */
    1.96 +    UnicodeString getSource();
    1.97 +
    1.98 +    /**
    1.99 +     * Resets the iterator so that one can start again from the beginning.
   1.100 +     * @stable ICU 2.4
   1.101 +     */
   1.102 +    void reset();
   1.103 +
   1.104 +    /**
   1.105 +     * Get the next canonically equivalent string.
   1.106 +     * <br><b>Warning: The strings are not guaranteed to be in any particular order.</b>
   1.107 +     * @return the next string that is canonically equivalent. A bogus string is returned when
   1.108 +     * the iteration is done.
   1.109 +     * @stable ICU 2.4
   1.110 +     */
   1.111 +    UnicodeString next();
   1.112 +
   1.113 +    /**
   1.114 +     * Set a new source for this iterator. Allows object reuse.
   1.115 +     * @param newSource     the source string to iterate against. This allows the same iterator to be used
   1.116 +     *                     while changing the source string, saving object creation.
   1.117 +     * @param status        Fill-in parameter which receives the status of this operation.
   1.118 +     * @stable ICU 2.4
   1.119 +     */
   1.120 +    void setSource(const UnicodeString &newSource, UErrorCode &status);
   1.121 +
   1.122 +#ifndef U_HIDE_INTERNAL_API
   1.123 +    /**
   1.124 +     * Dumb recursive implementation of permutation.
   1.125 +     * TODO: optimize
   1.126 +     * @param source     the string to find permutations for
   1.127 +     * @param skipZeros  determine if skip zeros
   1.128 +     * @param result     the results in a set.
   1.129 +     * @param status       Fill-in parameter which receives the status of this operation.
   1.130 +     * @internal
   1.131 +     */
   1.132 +    static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status);
   1.133 +#endif  /* U_HIDE_INTERNAL_API */
   1.134 +
   1.135 +    /**
   1.136 +     * ICU "poor man's RTTI", returns a UClassID for this class.
   1.137 +     *
   1.138 +     * @stable ICU 2.2
   1.139 +     */
   1.140 +    static UClassID U_EXPORT2 getStaticClassID();
   1.141 +
   1.142 +    /**
   1.143 +     * ICU "poor man's RTTI", returns a UClassID for the actual class.
   1.144 +     *
   1.145 +     * @stable ICU 2.2
   1.146 +     */
   1.147 +    virtual UClassID getDynamicClassID() const;
   1.148 +
   1.149 +private:
   1.150 +    // ===================== PRIVATES ==============================
   1.151 +    // private default constructor
   1.152 +    CanonicalIterator();
   1.153 +
   1.154 +
   1.155 +    /**
   1.156 +     * Copy constructor. Private for now.
   1.157 +     * @internal
   1.158 +     */
   1.159 +    CanonicalIterator(const CanonicalIterator& other);
   1.160 +
   1.161 +    /**
   1.162 +     * Assignment operator. Private for now.
   1.163 +     * @internal
   1.164 +     */
   1.165 +    CanonicalIterator& operator=(const CanonicalIterator& other);
   1.166 +
   1.167 +    // fields
   1.168 +    UnicodeString source;
   1.169 +    UBool done;
   1.170 +
   1.171 +    // 2 dimensional array holds the pieces of the string with
   1.172 +    // their different canonically equivalent representations
   1.173 +    UnicodeString **pieces;
   1.174 +    int32_t pieces_length;
   1.175 +    int32_t *pieces_lengths;
   1.176 +
   1.177 +    // current is used in iterating to combine pieces
   1.178 +    int32_t *current;
   1.179 +    int32_t current_length;
   1.180 +
   1.181 +    // transient fields
   1.182 +    UnicodeString buffer;
   1.183 +
   1.184 +    const Normalizer2 &nfd;
   1.185 +    const Normalizer2Impl &nfcImpl;
   1.186 +
   1.187 +    // we have a segment, in NFD. Find all the strings that are canonically equivalent to it.
   1.188 +    UnicodeString *getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status); //private String[] getEquivalents(String segment)
   1.189 +
   1.190 +    //Set getEquivalents2(String segment);
   1.191 +    Hashtable *getEquivalents2(Hashtable *fillinResult, const UChar *segment, int32_t segLen, UErrorCode &status);
   1.192 +    //Hashtable *getEquivalents2(const UnicodeString &segment, int32_t segLen, UErrorCode &status);
   1.193 +
   1.194 +    /**
   1.195 +     * See if the decomposition of cp2 is at segment starting at segmentPos
   1.196 +     * (with canonical rearrangment!)
   1.197 +     * If so, take the remainder, and return the equivalents
   1.198 +     */
   1.199 +    //Set extract(int comp, String segment, int segmentPos, StringBuffer buffer);
   1.200 +    Hashtable *extract(Hashtable *fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);
   1.201 +    //Hashtable *extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);
   1.202 +
   1.203 +    void cleanPieces();
   1.204 +
   1.205 +};
   1.206 +
   1.207 +U_NAMESPACE_END
   1.208 +
   1.209 +#endif /* #if !UCONFIG_NO_NORMALIZATION */
   1.210 +
   1.211 +#endif
The Tor Browser / file diff

diff: intl/icu/source/common/unicode/caniter.h

intl/icu/source/common/unicode/caniter.h