The Tor Browser: intl/icu/source/i18n/strmatch.cpp@b8a032363ba2 (annotated)

intl/icu/source/i18n/strmatch.cpp@b8a032363ba2 (annotated)

intl/icu/source/i18n/strmatch.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author: Michael Schloh von Bennewitz <michael@schloh.com>
date: Thu, 22 Jan 2015 13:21:57 +0100
branch: TOR_BUG_9701
changeset 15: b8a032363ba2
permissions: -rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

 /*
 **********************************************************************
 *   Copyright (c) 2001-2012, International Business Machines Corporation
 *   and others.  All Rights Reserved.
 **********************************************************************
 *   Date        Name        Description
 *   07/23/01    aliu        Creation.
 **********************************************************************
 */
 #include "unicode/utypes.h"
 #if !UCONFIG_NO_TRANSLITERATION
 #include "strmatch.h"
 #include "rbt_data.h"
 #include "util.h"
 #include "unicode/uniset.h"
 #include "unicode/utf16.h"
 U_NAMESPACE_BEGIN
 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringMatcher)
 StringMatcher::StringMatcher(const UnicodeString& theString,
                              int32_t start,
                              int32_t limit,
                              int32_t segmentNum,
                              const TransliterationRuleData& theData) :
     data(&theData),
     segmentNumber(segmentNum),
     matchStart(-1),
     matchLimit(-1)
 {
     theString.extractBetween(start, limit, pattern);
 }
 StringMatcher::StringMatcher(const StringMatcher& o) :
     UnicodeFunctor(o),
     UnicodeMatcher(o),
     UnicodeReplacer(o),
     pattern(o.pattern),
     data(o.data),
     segmentNumber(o.segmentNumber),
     matchStart(o.matchStart),
     matchLimit(o.matchLimit)
 {
 }
 /**
  * Destructor
  */
 StringMatcher::~StringMatcher() {
 }
 /**
  * Implement UnicodeFunctor
  */
 UnicodeFunctor* StringMatcher::clone() const {
     return new StringMatcher(*this);
 }
 /**
  * UnicodeFunctor API.  Cast 'this' to a UnicodeMatcher* pointer
  * and return the pointer.
  */
 UnicodeMatcher* StringMatcher::toMatcher() const {
   StringMatcher  *nonconst_this = const_cast<StringMatcher *>(this);
   UnicodeMatcher *nonconst_base = static_cast<UnicodeMatcher *>(nonconst_this);
   return nonconst_base;
 }
 /**
  * UnicodeFunctor API.  Cast 'this' to a UnicodeReplacer* pointer
  * and return the pointer.
  */
 UnicodeReplacer* StringMatcher::toReplacer() const {
   StringMatcher  *nonconst_this = const_cast<StringMatcher *>(this);
   UnicodeReplacer *nonconst_base = static_cast<UnicodeReplacer *>(nonconst_this);
   return nonconst_base;
 }
 /**
  * Implement UnicodeMatcher
  */
 UMatchDegree StringMatcher::matches(const Replaceable& text,
                                     int32_t& offset,
                                     int32_t limit,
                                     UBool incremental) {
     int32_t i;
     int32_t cursor = offset;
     if (limit < cursor) {
         // Match in the reverse direction
         for (i=pattern.length()-1; i>=0; --i) {
             UChar keyChar = pattern.charAt(i);
             UnicodeMatcher* subm = data->lookupMatcher(keyChar);
             if (subm == 0) {
                 if (cursor > limit &&
                     keyChar == text.charAt(cursor)) {
                     --cursor;
                 } else {
                     return U_MISMATCH;
                 }
             } else {
                 UMatchDegree m =
                     subm->matches(text, cursor, limit, incremental);
                 if (m != U_MATCH) {
                     return m;
                 }
             }
         }
         // Record the match position, but adjust for a normal
         // forward start, limit, and only if a prior match does not
         // exist -- we want the rightmost match.
         if (matchStart < 0) {
             matchStart = cursor+1;
             matchLimit = offset+1;
         }
     } else {
         for (i=0; i<pattern.length(); ++i) {
             if (incremental && cursor == limit) {
                 // We've reached the context limit without a mismatch and
                 // without completing our match.
                 return U_PARTIAL_MATCH;
             }
             UChar keyChar = pattern.charAt(i);
             UnicodeMatcher* subm = data->lookupMatcher(keyChar);
             if (subm == 0) {
                 // Don't need the cursor < limit check if
                 // incremental is TRUE (because it's done above); do need
                 // it otherwise.
                 if (cursor < limit &&
                     keyChar == text.charAt(cursor)) {
                     ++cursor;
                 } else {
                     return U_MISMATCH;
                 }
             } else {
                 UMatchDegree m =
                     subm->matches(text, cursor, limit, incremental);
                 if (m != U_MATCH) {
                     return m;
                 }
             }
         }
         // Record the match position
         matchStart = offset;
         matchLimit = cursor;
     }
     offset = cursor;
     return U_MATCH;
 }
 /**
  * Implement UnicodeMatcher
  */
 UnicodeString& StringMatcher::toPattern(UnicodeString& result,
                                         UBool escapeUnprintable) const
 {
     result.truncate(0);
     UnicodeString str, quoteBuf;
     if (segmentNumber > 0) {
         result.append((UChar)40); /*(*/
     }
     for (int32_t i=0; i<pattern.length(); ++i) {
         UChar keyChar = pattern.charAt(i);
         const UnicodeMatcher* m = data->lookupMatcher(keyChar);
         if (m == 0) {
             ICU_Utility::appendToRule(result, keyChar, FALSE, escapeUnprintable, quoteBuf);
         } else {
             ICU_Utility::appendToRule(result, m->toPattern(str, escapeUnprintable),
                          TRUE, escapeUnprintable, quoteBuf);
         }
     }
     if (segmentNumber > 0) {
         result.append((UChar)41); /*)*/
     }
     // Flush quoteBuf out to result
     ICU_Utility::appendToRule(result, -1,
                               TRUE, escapeUnprintable, quoteBuf);
     return result;
 }
 /**
  * Implement UnicodeMatcher
  */
 UBool StringMatcher::matchesIndexValue(uint8_t v) const {
     if (pattern.length() == 0) {
         return TRUE;
     }
     UChar32 c = pattern.char32At(0);
     const UnicodeMatcher *m = data->lookupMatcher(c);
     return (m == 0) ? ((c & 0xFF) == v) : m->matchesIndexValue(v);
 }
 /**
  * Implement UnicodeMatcher
  */
 void StringMatcher::addMatchSetTo(UnicodeSet& toUnionTo) const {
     UChar32 ch;
     for (int32_t i=0; i<pattern.length(); i+=U16_LENGTH(ch)) {
         ch = pattern.char32At(i);
         const UnicodeMatcher* matcher = data->lookupMatcher(ch);
         if (matcher == NULL) {
             toUnionTo.add(ch);
         } else {
             matcher->addMatchSetTo(toUnionTo);
         }
     }
 }
 /**
  * UnicodeReplacer API
  */
 int32_t StringMatcher::replace(Replaceable& text,
                                int32_t start,
                                int32_t limit,
                                int32_t& /*cursor*/) {
     int32_t outLen = 0;
     // Copy segment with out-of-band data
     int32_t dest = limit;
     // If there was no match, that means that a quantifier
     // matched zero-length.  E.g., x (a)* y matched "xy".
     if (matchStart >= 0) {
         if (matchStart != matchLimit) {
             text.copy(matchStart, matchLimit, dest);
             outLen = matchLimit - matchStart;
         }
     }
     text.handleReplaceBetween(start, limit, UnicodeString()); // delete original text
     return outLen;
 }
 /**
  * UnicodeReplacer API
  */
 UnicodeString& StringMatcher::toReplacerPattern(UnicodeString& rule,
                                                 UBool /*escapeUnprintable*/) const {
     // assert(segmentNumber > 0);
     rule.truncate(0);
     rule.append((UChar)0x0024 /*$*/);
     ICU_Utility::appendNumber(rule, segmentNumber, 10, 1);
     return rule;
 }
 /**
  * Remove any match info.  This must be called before performing a
  * set of matches with this segment.
  */
  void StringMatcher::resetMatch() {
     matchStart = matchLimit = -1;
 }
 /**
  * Union the set of all characters that may output by this object
  * into the given set.
  * @param toUnionTo the set into which to union the output characters
  */
 void StringMatcher::addReplacementSetTo(UnicodeSet& /*toUnionTo*/) const {
     // The output of this replacer varies; it is the source text between
     // matchStart and matchLimit.  Since this varies depending on the
     // input text, we can't compute it here.  We can either do nothing
     // or we can add ALL characters to the set.  It's probably more useful
     // to do nothing.
 }
 /**
  * Implement UnicodeFunctor
  */
 void StringMatcher::setData(const TransliterationRuleData* d) {
     data = d;
     int32_t i = 0;
     while (i<pattern.length()) {
         UChar32 c = pattern.char32At(i);
         UnicodeFunctor* f = data->lookup(c);
         if (f != NULL) {
             f->setData(data);
         }
         i += U16_LENGTH(c);
     }
 }
 U_NAMESPACE_END
 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
 //eof

The Tor Browser / annotate

intl/icu/source/i18n/strmatch.cpp@b8a032363ba2 (annotated)

intl/icu/source/i18n/strmatch.cpp