The Tor Browser: comparison intl/icu/source/i18n/strmatch.cpp

--1:000000000000
+:edd5c8937fd4
+/*
+**********************************************************************
+*   Copyright (c) 2001-2012, International Business Machines Corporation
+*   and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   07/23/01    aliu        Creation.
+**********************************************************************
+*/
+#include "unicode/utypes.h"
+#if !UCONFIG_NO_TRANSLITERATION
+#include "strmatch.h"
+#include "rbt_data.h"
+#include "util.h"
+#include "unicode/uniset.h"
+#include "unicode/utf16.h"
+U_NAMESPACE_BEGIN
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringMatcher)
+StringMatcher::StringMatcher(const UnicodeString& theString,
+int32_t start,
+int32_t limit,
+int32_t segmentNum,
+const TransliterationRuleData& theData) :
+data(&theData),
+segmentNumber(segmentNum),
+matchStart(-1),
+matchLimit(-1)
+{
+theString.extractBetween(start, limit, pattern);
+}
+StringMatcher::StringMatcher(const StringMatcher& o) :
+UnicodeFunctor(o),
+UnicodeMatcher(o),
+UnicodeReplacer(o),
+pattern(o.pattern),
+data(o.data),
+segmentNumber(o.segmentNumber),
+matchStart(o.matchStart),
+matchLimit(o.matchLimit)
+{
+}
+/**
+* Destructor
+*/
+StringMatcher::~StringMatcher() {
+}
+/**
+* Implement UnicodeFunctor
+*/
+UnicodeFunctor* StringMatcher::clone() const {
+return new StringMatcher(*this);
+}
+/**
+* UnicodeFunctor API.  Cast 'this' to a UnicodeMatcher* pointer
+* and return the pointer.
+*/
+UnicodeMatcher* StringMatcher::toMatcher() const {
+StringMatcher  *nonconst_this = const_cast<StringMatcher *>(this);
+UnicodeMatcher *nonconst_base = static_cast<UnicodeMatcher *>(nonconst_this);
+return nonconst_base;
+}
+/**
+* UnicodeFunctor API.  Cast 'this' to a UnicodeReplacer* pointer
+* and return the pointer.
+*/
+UnicodeReplacer* StringMatcher::toReplacer() const {
+StringMatcher  *nonconst_this = const_cast<StringMatcher *>(this);
+UnicodeReplacer *nonconst_base = static_cast<UnicodeReplacer *>(nonconst_this);
+return nonconst_base;
+}
+/**
+* Implement UnicodeMatcher
+*/
+UMatchDegree StringMatcher::matches(const Replaceable& text,
+int32_t& offset,
+int32_t limit,
+UBool incremental) {
+int32_t i;
+int32_t cursor = offset;
+if (limit < cursor) {
+// Match in the reverse direction
+for (i=pattern.length()-1; i>=0; --i) {
+UChar keyChar = pattern.charAt(i);
+UnicodeMatcher* subm = data->lookupMatcher(keyChar);
+if (subm == 0) {
+if (cursor > limit &&
+keyChar == text.charAt(cursor)) {
+--cursor;
+} else {
+return U_MISMATCH;
+}
+} else {
+UMatchDegree m =
+subm->matches(text, cursor, limit, incremental);
+if (m != U_MATCH) {
+return m;
+}
+}
+}
+// Record the match position, but adjust for a normal
+// forward start, limit, and only if a prior match does not
+// exist -- we want the rightmost match.
+if (matchStart < 0) {
+matchStart = cursor+1;
+matchLimit = offset+1;
+}
+} else {
+for (i=0; i<pattern.length(); ++i) {
+if (incremental && cursor == limit) {
+// We've reached the context limit without a mismatch and
+// without completing our match.
+return U_PARTIAL_MATCH;
+}
+UChar keyChar = pattern.charAt(i);
+UnicodeMatcher* subm = data->lookupMatcher(keyChar);
+if (subm == 0) {
+// Don't need the cursor < limit check if
+// incremental is TRUE (because it's done above); do need
+// it otherwise.
+if (cursor < limit &&
+keyChar == text.charAt(cursor)) {
+++cursor;
+} else {
+return U_MISMATCH;
+}
+} else {
+UMatchDegree m =
+subm->matches(text, cursor, limit, incremental);
+if (m != U_MATCH) {
+return m;
+}
+}
+}
+// Record the match position
+matchStart = offset;
+matchLimit = cursor;
+}
+offset = cursor;
+return U_MATCH;
+}
+/**
+* Implement UnicodeMatcher
+*/
+UnicodeString& StringMatcher::toPattern(UnicodeString& result,
+UBool escapeUnprintable) const
+{
+result.truncate(0);
+UnicodeString str, quoteBuf;
+if (segmentNumber > 0) {
+result.append((UChar)40); /*(*/
+}
+for (int32_t i=0; i<pattern.length(); ++i) {
+UChar keyChar = pattern.charAt(i);
+const UnicodeMatcher* m = data->lookupMatcher(keyChar);
+if (m == 0) {
+ICU_Utility::appendToRule(result, keyChar, FALSE, escapeUnprintable, quoteBuf);
+} else {
+ICU_Utility::appendToRule(result, m->toPattern(str, escapeUnprintable),
+TRUE, escapeUnprintable, quoteBuf);
+}
+}
+if (segmentNumber > 0) {
+result.append((UChar)41); /*)*/
+}
+// Flush quoteBuf out to result
+ICU_Utility::appendToRule(result, -1,
+TRUE, escapeUnprintable, quoteBuf);
+return result;
+}
+/**
+* Implement UnicodeMatcher
+*/
+UBool StringMatcher::matchesIndexValue(uint8_t v) const {
+if (pattern.length() == 0) {
+return TRUE;
+}
+UChar32 c = pattern.char32At(0);
+const UnicodeMatcher *m = data->lookupMatcher(c);
+return (m == 0) ? ((c & 0xFF) == v) : m->matchesIndexValue(v);
+}
+/**
+* Implement UnicodeMatcher
+*/
+void StringMatcher::addMatchSetTo(UnicodeSet& toUnionTo) const {
+UChar32 ch;
+for (int32_t i=0; i<pattern.length(); i+=U16_LENGTH(ch)) {
+ch = pattern.char32At(i);
+const UnicodeMatcher* matcher = data->lookupMatcher(ch);
+if (matcher == NULL) {
+toUnionTo.add(ch);
+} else {
+matcher->addMatchSetTo(toUnionTo);
+}
+}
+}
+/**
+* UnicodeReplacer API
+*/
+int32_t StringMatcher::replace(Replaceable& text,
+int32_t start,
+int32_t limit,
+int32_t& /*cursor*/) {
+int32_t outLen = 0;
+// Copy segment with out-of-band data
+int32_t dest = limit;
+// If there was no match, that means that a quantifier
+// matched zero-length.  E.g., x (a)* y matched "xy".
+if (matchStart >= 0) {
+if (matchStart != matchLimit) {
+text.copy(matchStart, matchLimit, dest);
+outLen = matchLimit - matchStart;
+}
+}
+text.handleReplaceBetween(start, limit, UnicodeString()); // delete original text
+return outLen;
+}
+/**
+* UnicodeReplacer API
+*/
+UnicodeString& StringMatcher::toReplacerPattern(UnicodeString& rule,
+UBool /*escapeUnprintable*/) const {
+// assert(segmentNumber > 0);
+rule.truncate(0);
+rule.append((UChar)0x0024 /*$*/);
+ICU_Utility::appendNumber(rule, segmentNumber, 10, 1);
+return rule;
+}
+/**
+* Remove any match info.  This must be called before performing a
+* set of matches with this segment.
+*/
+void StringMatcher::resetMatch() {
+matchStart = matchLimit = -1;
+}
+/**
+* Union the set of all characters that may output by this object
+* into the given set.
+* @param toUnionTo the set into which to union the output characters
+*/
+void StringMatcher::addReplacementSetTo(UnicodeSet& /*toUnionTo*/) const {
+// The output of this replacer varies; it is the source text between
+// matchStart and matchLimit.  Since this varies depending on the
+// input text, we can't compute it here.  We can either do nothing
+// or we can add ALL characters to the set.  It's probably more useful
+// to do nothing.
+}
+/**
+* Implement UnicodeFunctor
+*/
+void StringMatcher::setData(const TransliterationRuleData* d) {
+data = d;
+int32_t i = 0;
+while (i<pattern.length()) {
+UChar32 c = pattern.char32At(i);
+UnicodeFunctor* f = data->lookup(c);
+if (f != NULL) {
+f->setData(data);
+}
+i += U16_LENGTH(c);
+}
+}
+U_NAMESPACE_END
+#endif /* #if !UCONFIG_NO_TRANSLITERATION */
+//eof

The Tor Browser / file comparison

comparison: intl/icu/source/i18n/strmatch.cpp

intl/icu/source/i18n/strmatch.cpp