intl/icu/source/i18n/usrchimp.h

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /*
     2 **********************************************************************
     3 *   Copyright (C) 2001-2011 IBM and others. All rights reserved.
     4 **********************************************************************
     5 *   Date        Name        Description
     6 *  08/13/2001   synwee      Creation.
     7 **********************************************************************
     8 */
     9 #ifndef USRCHIMP_H
    10 #define USRCHIMP_H
    12 #include "unicode/utypes.h"
    14 #if !UCONFIG_NO_COLLATION
    16 #include "unicode/normalizer2.h"
    17 #include "unicode/ucol.h"
    18 #include "unicode/ucoleitr.h"
    19 #include "unicode/ubrk.h"
    21 #define INITIAL_ARRAY_SIZE_       256
    22 #define MAX_TABLE_SIZE_           257
    24 struct USearch {
    25     // required since collation element iterator does not have a getText API
    26     const UChar              *text;
    27           int32_t             textLength; // exact length
    28           UBool               isOverlap;
    29           UBool               isCanonicalMatch;
    30           int16_t             elementComparisonType;
    31           UBreakIterator     *internalBreakIter;  //internal character breakiterator
    32           UBreakIterator     *breakIter;
    33     // value USEARCH_DONE is the default value
    34     // if we are not at the start of the text or the end of the text, 
    35     // depending on the iteration direction and matchedIndex is USEARCH_DONE 
    36     // it means that we can't find any more matches in that particular direction
    37           int32_t             matchedIndex; 
    38           int32_t             matchedLength;
    39           UBool               isForwardSearching;
    40           UBool               reset;
    41 };
    43 struct UPattern {
    44     const UChar              *text;
    45           int32_t             textLength; // exact length
    46           // length required for backwards ce comparison
    47           int32_t             CELength; 
    48           int32_t            *CE;
    49           int32_t             CEBuffer[INITIAL_ARRAY_SIZE_];
    50           int32_t             PCELength;
    51           int64_t            *PCE;
    52           int64_t             PCEBuffer[INITIAL_ARRAY_SIZE_];
    53           UBool               hasPrefixAccents;
    54           UBool               hasSuffixAccents;
    55           int16_t             defaultShiftSize;
    56           int16_t             shift[MAX_TABLE_SIZE_];
    57           int16_t             backShift[MAX_TABLE_SIZE_];
    58 };
    60 struct UStringSearch {
    61     struct USearch            *search;
    62     struct UPattern            pattern;
    63     const  UCollator          *collator;
    64     const  icu::Normalizer2   *nfd;
    65     // positions within the collation element iterator is used to determine
    66     // if we are at the start of the text.
    67            UCollationElements *textIter;
    68     // utility collation element, used throughout program for temporary 
    69     // iteration.
    70            UCollationElements *utilIter;
    71            UBool               ownCollator;
    72            UCollationStrength  strength;
    73            uint32_t            ceMask;
    74            uint32_t            variableTop;
    75            UBool               toShift;
    76            UChar               canonicalPrefixAccents[INITIAL_ARRAY_SIZE_];
    77            UChar               canonicalSuffixAccents[INITIAL_ARRAY_SIZE_];
    78 };
    80 /**
    81 * Exact matches without checking for the ends for extra accents.
    82 * The match after the position within the collation element iterator is to be
    83 * found. 
    84 * After a match is found the offset in the collation element iterator will be
    85 * shifted to the start of the match.
    86 * Implementation note: 
    87 * For tertiary we can't use the collator->tertiaryMask, that is a 
    88 * preprocessed mask that takes into account case options. since we are only 
    89 * concerned with exact matches, we don't need that.
    90 * Alternate handling - since only the 16 most significant digits is only used, 
    91 * we can safely do a compare without masking if the ce is a variable, we mask 
    92 * and get only the primary values no shifting to quartenary is required since 
    93 * all primary values less than variabletop will need to be masked off anyway.
    94 * If the end character is composite and the pattern ce does not match the text 
    95 * ce, we skip it until we find a match in the end composite character or when 
    96 * it has passed the character. This is so that we can match pattern "a" with
    97 * the text "\u00e6" 
    98 * @param strsrch string search data
    99 * @param status error status if any
   100 * @return TRUE if an exact match is found, FALSE otherwise
   101 */
   102 U_CFUNC
   103 UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status);
   105 /**
   106 * Canonical matches.
   107 * According to the definition, matches found here will include the whole span 
   108 * of beginning and ending accents if it overlaps that region.
   109 * @param strsrch string search data
   110 * @param status error status if any
   111 * @return TRUE if a canonical match is found, FALSE otherwise
   112 */
   113 U_CFUNC
   114 UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status);
   116 /**
   117 * Gets the previous match.
   118 * Comments follows from handleNextExact
   119 * @param strsrch string search data
   120 * @param status error status if any
   121 * @return True if a exact math is found, FALSE otherwise.
   122 */
   123 U_CFUNC
   124 UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status);
   126 /**
   127 * Canonical matches.
   128 * According to the definition, matches found here will include the whole span 
   129 * of beginning and ending accents if it overlaps that region.
   130 * @param strsrch string search data
   131 * @param status error status if any
   132 * @return TRUE if a canonical match is found, FALSE otherwise
   133 */
   134 U_CFUNC
   135 UBool usearch_handlePreviousCanonical(UStringSearch *strsrch, 
   136                                       UErrorCode    *status);
   138 #endif /* #if !UCONFIG_NO_COLLATION */
   140 #endif

mercurial