michael@0: /* michael@0: ********************************************************************** michael@0: * Copyright (c) 2001-2012, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ********************************************************************** michael@0: * Date Name Description michael@0: * 07/18/01 aliu Creation. michael@0: ********************************************************************** michael@0: */ michael@0: michael@0: #include "unicode/unifilt.h" michael@0: #include "unicode/rep.h" michael@0: #include "unicode/utf16.h" michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(UnicodeFilter) michael@0: michael@0: michael@0: /* Define this here due to the lack of another file. michael@0: It can't be defined in the header */ michael@0: UnicodeMatcher::~UnicodeMatcher() {} michael@0: michael@0: UnicodeFilter::~UnicodeFilter() {} michael@0: michael@0: /** michael@0: * UnicodeFunctor API. michael@0: * Note that UnicodeMatcher is a base class of UnicodeFilter. michael@0: */ michael@0: UnicodeMatcher* UnicodeFilter::toMatcher() const { michael@0: return const_cast(this); michael@0: } michael@0: michael@0: void UnicodeFilter::setData(const TransliterationRuleData*) {} michael@0: michael@0: /** michael@0: * Default implementation of UnicodeMatcher::matches() for Unicode michael@0: * filters. Matches a single code point at offset (either one or michael@0: * two 16-bit code units). michael@0: */ michael@0: UMatchDegree UnicodeFilter::matches(const Replaceable& text, michael@0: int32_t& offset, michael@0: int32_t limit, michael@0: UBool incremental) { michael@0: UChar32 c; michael@0: if (offset < limit && michael@0: contains(c = text.char32At(offset))) { michael@0: offset += U16_LENGTH(c); michael@0: return U_MATCH; michael@0: } michael@0: if (offset > limit && michael@0: contains(c = text.char32At(offset))) { michael@0: // Backup offset by 1, unless the preceding character is a michael@0: // surrogate pair -- then backup by 2 (keep offset pointing at michael@0: // the lead surrogate). michael@0: --offset; michael@0: if (offset >= 0) { michael@0: offset -= U16_LENGTH(text.char32At(offset)) - 1; michael@0: } michael@0: return U_MATCH; michael@0: } michael@0: if (incremental && offset == limit) { michael@0: return U_PARTIAL_MATCH; michael@0: } michael@0: return U_MISMATCH; michael@0: } michael@0: michael@0: U_NAMESPACE_END michael@0: michael@0: //eof