1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/i18n/usrchimp.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,140 @@ 1.4 +/* 1.5 +********************************************************************** 1.6 +* Copyright (C) 2001-2011 IBM and others. All rights reserved. 1.7 +********************************************************************** 1.8 +* Date Name Description 1.9 +* 08/13/2001 synwee Creation. 1.10 +********************************************************************** 1.11 +*/ 1.12 +#ifndef USRCHIMP_H 1.13 +#define USRCHIMP_H 1.14 + 1.15 +#include "unicode/utypes.h" 1.16 + 1.17 +#if !UCONFIG_NO_COLLATION 1.18 + 1.19 +#include "unicode/normalizer2.h" 1.20 +#include "unicode/ucol.h" 1.21 +#include "unicode/ucoleitr.h" 1.22 +#include "unicode/ubrk.h" 1.23 + 1.24 +#define INITIAL_ARRAY_SIZE_ 256 1.25 +#define MAX_TABLE_SIZE_ 257 1.26 + 1.27 +struct USearch { 1.28 + // required since collation element iterator does not have a getText API 1.29 + const UChar *text; 1.30 + int32_t textLength; // exact length 1.31 + UBool isOverlap; 1.32 + UBool isCanonicalMatch; 1.33 + int16_t elementComparisonType; 1.34 + UBreakIterator *internalBreakIter; //internal character breakiterator 1.35 + UBreakIterator *breakIter; 1.36 + // value USEARCH_DONE is the default value 1.37 + // if we are not at the start of the text or the end of the text, 1.38 + // depending on the iteration direction and matchedIndex is USEARCH_DONE 1.39 + // it means that we can't find any more matches in that particular direction 1.40 + int32_t matchedIndex; 1.41 + int32_t matchedLength; 1.42 + UBool isForwardSearching; 1.43 + UBool reset; 1.44 +}; 1.45 + 1.46 +struct UPattern { 1.47 + const UChar *text; 1.48 + int32_t textLength; // exact length 1.49 + // length required for backwards ce comparison 1.50 + int32_t CELength; 1.51 + int32_t *CE; 1.52 + int32_t CEBuffer[INITIAL_ARRAY_SIZE_]; 1.53 + int32_t PCELength; 1.54 + int64_t *PCE; 1.55 + int64_t PCEBuffer[INITIAL_ARRAY_SIZE_]; 1.56 + UBool hasPrefixAccents; 1.57 + UBool hasSuffixAccents; 1.58 + int16_t defaultShiftSize; 1.59 + int16_t shift[MAX_TABLE_SIZE_]; 1.60 + int16_t backShift[MAX_TABLE_SIZE_]; 1.61 +}; 1.62 + 1.63 +struct UStringSearch { 1.64 + struct USearch *search; 1.65 + struct UPattern pattern; 1.66 + const UCollator *collator; 1.67 + const icu::Normalizer2 *nfd; 1.68 + // positions within the collation element iterator is used to determine 1.69 + // if we are at the start of the text. 1.70 + UCollationElements *textIter; 1.71 + // utility collation element, used throughout program for temporary 1.72 + // iteration. 1.73 + UCollationElements *utilIter; 1.74 + UBool ownCollator; 1.75 + UCollationStrength strength; 1.76 + uint32_t ceMask; 1.77 + uint32_t variableTop; 1.78 + UBool toShift; 1.79 + UChar canonicalPrefixAccents[INITIAL_ARRAY_SIZE_]; 1.80 + UChar canonicalSuffixAccents[INITIAL_ARRAY_SIZE_]; 1.81 +}; 1.82 + 1.83 +/** 1.84 +* Exact matches without checking for the ends for extra accents. 1.85 +* The match after the position within the collation element iterator is to be 1.86 +* found. 1.87 +* After a match is found the offset in the collation element iterator will be 1.88 +* shifted to the start of the match. 1.89 +* Implementation note: 1.90 +* For tertiary we can't use the collator->tertiaryMask, that is a 1.91 +* preprocessed mask that takes into account case options. since we are only 1.92 +* concerned with exact matches, we don't need that. 1.93 +* Alternate handling - since only the 16 most significant digits is only used, 1.94 +* we can safely do a compare without masking if the ce is a variable, we mask 1.95 +* and get only the primary values no shifting to quartenary is required since 1.96 +* all primary values less than variabletop will need to be masked off anyway. 1.97 +* If the end character is composite and the pattern ce does not match the text 1.98 +* ce, we skip it until we find a match in the end composite character or when 1.99 +* it has passed the character. This is so that we can match pattern "a" with 1.100 +* the text "\u00e6" 1.101 +* @param strsrch string search data 1.102 +* @param status error status if any 1.103 +* @return TRUE if an exact match is found, FALSE otherwise 1.104 +*/ 1.105 +U_CFUNC 1.106 +UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status); 1.107 + 1.108 +/** 1.109 +* Canonical matches. 1.110 +* According to the definition, matches found here will include the whole span 1.111 +* of beginning and ending accents if it overlaps that region. 1.112 +* @param strsrch string search data 1.113 +* @param status error status if any 1.114 +* @return TRUE if a canonical match is found, FALSE otherwise 1.115 +*/ 1.116 +U_CFUNC 1.117 +UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status); 1.118 + 1.119 +/** 1.120 +* Gets the previous match. 1.121 +* Comments follows from handleNextExact 1.122 +* @param strsrch string search data 1.123 +* @param status error status if any 1.124 +* @return True if a exact math is found, FALSE otherwise. 1.125 +*/ 1.126 +U_CFUNC 1.127 +UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status); 1.128 + 1.129 +/** 1.130 +* Canonical matches. 1.131 +* According to the definition, matches found here will include the whole span 1.132 +* of beginning and ending accents if it overlaps that region. 1.133 +* @param strsrch string search data 1.134 +* @param status error status if any 1.135 +* @return TRUE if a canonical match is found, FALSE otherwise 1.136 +*/ 1.137 +U_CFUNC 1.138 +UBool usearch_handlePreviousCanonical(UStringSearch *strsrch, 1.139 + UErrorCode *status); 1.140 + 1.141 +#endif /* #if !UCONFIG_NO_COLLATION */ 1.142 + 1.143 +#endif