intl/icu/source/i18n/usrchimp.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/i18n/usrchimp.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,140 @@
     1.4 +/*
     1.5 +**********************************************************************
     1.6 +*   Copyright (C) 2001-2011 IBM and others. All rights reserved.
     1.7 +**********************************************************************
     1.8 +*   Date        Name        Description
     1.9 +*  08/13/2001   synwee      Creation.
    1.10 +**********************************************************************
    1.11 +*/
    1.12 +#ifndef USRCHIMP_H
    1.13 +#define USRCHIMP_H
    1.14 +
    1.15 +#include "unicode/utypes.h"
    1.16 +
    1.17 +#if !UCONFIG_NO_COLLATION
    1.18 +
    1.19 +#include "unicode/normalizer2.h"
    1.20 +#include "unicode/ucol.h"
    1.21 +#include "unicode/ucoleitr.h"
    1.22 +#include "unicode/ubrk.h"
    1.23 +
    1.24 +#define INITIAL_ARRAY_SIZE_       256
    1.25 +#define MAX_TABLE_SIZE_           257
    1.26 +
    1.27 +struct USearch {
    1.28 +    // required since collation element iterator does not have a getText API
    1.29 +    const UChar              *text;
    1.30 +          int32_t             textLength; // exact length
    1.31 +          UBool               isOverlap;
    1.32 +          UBool               isCanonicalMatch;
    1.33 +          int16_t             elementComparisonType;
    1.34 +          UBreakIterator     *internalBreakIter;  //internal character breakiterator
    1.35 +          UBreakIterator     *breakIter;
    1.36 +    // value USEARCH_DONE is the default value
    1.37 +    // if we are not at the start of the text or the end of the text, 
    1.38 +    // depending on the iteration direction and matchedIndex is USEARCH_DONE 
    1.39 +    // it means that we can't find any more matches in that particular direction
    1.40 +          int32_t             matchedIndex; 
    1.41 +          int32_t             matchedLength;
    1.42 +          UBool               isForwardSearching;
    1.43 +          UBool               reset;
    1.44 +};
    1.45 +
    1.46 +struct UPattern {
    1.47 +    const UChar              *text;
    1.48 +          int32_t             textLength; // exact length
    1.49 +          // length required for backwards ce comparison
    1.50 +          int32_t             CELength; 
    1.51 +          int32_t            *CE;
    1.52 +          int32_t             CEBuffer[INITIAL_ARRAY_SIZE_];
    1.53 +          int32_t             PCELength;
    1.54 +          int64_t            *PCE;
    1.55 +          int64_t             PCEBuffer[INITIAL_ARRAY_SIZE_];
    1.56 +          UBool               hasPrefixAccents;
    1.57 +          UBool               hasSuffixAccents;
    1.58 +          int16_t             defaultShiftSize;
    1.59 +          int16_t             shift[MAX_TABLE_SIZE_];
    1.60 +          int16_t             backShift[MAX_TABLE_SIZE_];
    1.61 +};
    1.62 +
    1.63 +struct UStringSearch {
    1.64 +    struct USearch            *search;
    1.65 +    struct UPattern            pattern;
    1.66 +    const  UCollator          *collator;
    1.67 +    const  icu::Normalizer2   *nfd;
    1.68 +    // positions within the collation element iterator is used to determine
    1.69 +    // if we are at the start of the text.
    1.70 +           UCollationElements *textIter;
    1.71 +    // utility collation element, used throughout program for temporary 
    1.72 +    // iteration.
    1.73 +           UCollationElements *utilIter;
    1.74 +           UBool               ownCollator;
    1.75 +           UCollationStrength  strength;
    1.76 +           uint32_t            ceMask;
    1.77 +           uint32_t            variableTop;
    1.78 +           UBool               toShift;
    1.79 +           UChar               canonicalPrefixAccents[INITIAL_ARRAY_SIZE_];
    1.80 +           UChar               canonicalSuffixAccents[INITIAL_ARRAY_SIZE_];
    1.81 +};
    1.82 +
    1.83 +/**
    1.84 +* Exact matches without checking for the ends for extra accents.
    1.85 +* The match after the position within the collation element iterator is to be
    1.86 +* found. 
    1.87 +* After a match is found the offset in the collation element iterator will be
    1.88 +* shifted to the start of the match.
    1.89 +* Implementation note: 
    1.90 +* For tertiary we can't use the collator->tertiaryMask, that is a 
    1.91 +* preprocessed mask that takes into account case options. since we are only 
    1.92 +* concerned with exact matches, we don't need that.
    1.93 +* Alternate handling - since only the 16 most significant digits is only used, 
    1.94 +* we can safely do a compare without masking if the ce is a variable, we mask 
    1.95 +* and get only the primary values no shifting to quartenary is required since 
    1.96 +* all primary values less than variabletop will need to be masked off anyway.
    1.97 +* If the end character is composite and the pattern ce does not match the text 
    1.98 +* ce, we skip it until we find a match in the end composite character or when 
    1.99 +* it has passed the character. This is so that we can match pattern "a" with
   1.100 +* the text "\u00e6" 
   1.101 +* @param strsrch string search data
   1.102 +* @param status error status if any
   1.103 +* @return TRUE if an exact match is found, FALSE otherwise
   1.104 +*/
   1.105 +U_CFUNC
   1.106 +UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status);
   1.107 +
   1.108 +/**
   1.109 +* Canonical matches.
   1.110 +* According to the definition, matches found here will include the whole span 
   1.111 +* of beginning and ending accents if it overlaps that region.
   1.112 +* @param strsrch string search data
   1.113 +* @param status error status if any
   1.114 +* @return TRUE if a canonical match is found, FALSE otherwise
   1.115 +*/
   1.116 +U_CFUNC
   1.117 +UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status);
   1.118 +
   1.119 +/**
   1.120 +* Gets the previous match.
   1.121 +* Comments follows from handleNextExact
   1.122 +* @param strsrch string search data
   1.123 +* @param status error status if any
   1.124 +* @return True if a exact math is found, FALSE otherwise.
   1.125 +*/
   1.126 +U_CFUNC
   1.127 +UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status);
   1.128 +
   1.129 +/**
   1.130 +* Canonical matches.
   1.131 +* According to the definition, matches found here will include the whole span 
   1.132 +* of beginning and ending accents if it overlaps that region.
   1.133 +* @param strsrch string search data
   1.134 +* @param status error status if any
   1.135 +* @return TRUE if a canonical match is found, FALSE otherwise
   1.136 +*/
   1.137 +U_CFUNC
   1.138 +UBool usearch_handlePreviousCanonical(UStringSearch *strsrch, 
   1.139 +                                      UErrorCode    *status);
   1.140 +
   1.141 +#endif /* #if !UCONFIG_NO_COLLATION */
   1.142 +
   1.143 +#endif

mercurial