The Tor Browser: intl/icu/source/common/normlzr.cpp@6474c204b198 (annotated)

intl/icu/source/common/normlzr.cpp@6474c204b198 (annotated)

intl/icu/source/common/normlzr.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author: Michael Schloh von Bennewitz <michael@schloh.com>
date: Wed, 31 Dec 2014 06:09:35 +0100
changeset 0: 6474c204b198
permissions: -rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

 /*
  *************************************************************************
  * COPYRIGHT:
  * Copyright (c) 1996-2012, International Business Machines Corporation and
  * others. All Rights Reserved.
  *************************************************************************
  */
 #include "unicode/utypes.h"
 #if !UCONFIG_NO_NORMALIZATION
 #include "unicode/uniset.h"
 #include "unicode/unistr.h"
 #include "unicode/chariter.h"
 #include "unicode/schriter.h"
 #include "unicode/uchriter.h"
 #include "unicode/normlzr.h"
 #include "unicode/utf16.h"
 #include "cmemory.h"
 #include "normalizer2impl.h"
 #include "uprops.h"  // for uniset_getUnicode32Instance()
 U_NAMESPACE_BEGIN
 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer)
 //-------------------------------------------------------------------------
 // Constructors and other boilerplate
 //-------------------------------------------------------------------------
 Normalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) :
     UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
     text(new StringCharacterIterator(str)),
     currentIndex(0), nextIndex(0),
     buffer(), bufferPos(0)
 {
     init();
 }
 Normalizer::Normalizer(const UChar *str, int32_t length, UNormalizationMode mode) :
     UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
     text(new UCharCharacterIterator(str, length)),
     currentIndex(0), nextIndex(0),
     buffer(), bufferPos(0)
 {
     init();
 }
 Normalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) :
     UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
     text(iter.clone()),
     currentIndex(0), nextIndex(0),
     buffer(), bufferPos(0)
 {
     init();
 }
 Normalizer::Normalizer(const Normalizer &copy) :
     UObject(copy), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(copy.fUMode), fOptions(copy.fOptions),
     text(copy.text->clone()),
     currentIndex(copy.currentIndex), nextIndex(copy.nextIndex),
     buffer(copy.buffer), bufferPos(copy.bufferPos)
 {
     init();
 }
 void
 Normalizer::init() {
     UErrorCode errorCode=U_ZERO_ERROR;
     fNorm2=Normalizer2Factory::getInstance(fUMode, errorCode);
     if(fOptions&UNORM_UNICODE_3_2) {
         delete fFilteredNorm2;
         fNorm2=fFilteredNorm2=
             new FilteredNormalizer2(*fNorm2, *uniset_getUnicode32Instance(errorCode));
     }
     if(U_FAILURE(errorCode)) {
         errorCode=U_ZERO_ERROR;
         fNorm2=Normalizer2Factory::getNoopInstance(errorCode);
     }
 }
 Normalizer::~Normalizer()
 {
     delete fFilteredNorm2;
     delete text;
 }
 Normalizer*
 Normalizer::clone() const
 {
     return new Normalizer(*this);
 }
 /**
  * Generates a hash code for this iterator.
  */
 int32_t Normalizer::hashCode() const
 {
     return text->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex;
 }
 UBool Normalizer::operator==(const Normalizer& that) const
 {
     return
         this==&that ||
         (fUMode==that.fUMode &&
         fOptions==that.fOptions &&
         *text==*that.text &&
         buffer==that.buffer &&
         bufferPos==that.bufferPos &&
         nextIndex==that.nextIndex);
 }
 //-------------------------------------------------------------------------
 // Static utility methods
 //-------------------------------------------------------------------------
 void U_EXPORT2
 Normalizer::normalize(const UnicodeString& source,
                       UNormalizationMode mode, int32_t options,
                       UnicodeString& result,
                       UErrorCode &status) {
     if(source.isBogus() || U_FAILURE(status)) {
         result.setToBogus();
         if(U_SUCCESS(status)) {
             status=U_ILLEGAL_ARGUMENT_ERROR;
         }
     } else {
         UnicodeString localDest;
         UnicodeString *dest;
         if(&source!=&result) {
             dest=&result;
         } else {
             // the source and result strings are the same object, use a temporary one
             dest=&localDest;
         }
         const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
         if(U_SUCCESS(status)) {
             if(options&UNORM_UNICODE_3_2) {
                 FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
                     normalize(source, *dest, status);
             } else {
                 n2->normalize(source, *dest, status);
             }
         }
         if(dest==&localDest && U_SUCCESS(status)) {
             result=*dest;
         }
     }
 }
 void U_EXPORT2
 Normalizer::compose(const UnicodeString& source,
                     UBool compat, int32_t options,
                     UnicodeString& result,
                     UErrorCode &status) {
     normalize(source, compat ? UNORM_NFKC : UNORM_NFC, options, result, status);
 }
 void U_EXPORT2
 Normalizer::decompose(const UnicodeString& source,
                       UBool compat, int32_t options,
                       UnicodeString& result,
                       UErrorCode &status) {
     normalize(source, compat ? UNORM_NFKD : UNORM_NFD, options, result, status);
 }
 UNormalizationCheckResult
 Normalizer::quickCheck(const UnicodeString& source,
                        UNormalizationMode mode, int32_t options,
                        UErrorCode &status) {
     const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
     if(U_SUCCESS(status)) {
         if(options&UNORM_UNICODE_3_2) {
             return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
                 quickCheck(source, status);
         } else {
             return n2->quickCheck(source, status);
         }
     } else {
         return UNORM_MAYBE;
     }
 }
 UBool
 Normalizer::isNormalized(const UnicodeString& source,
                          UNormalizationMode mode, int32_t options,
                          UErrorCode &status) {
     const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
     if(U_SUCCESS(status)) {
         if(options&UNORM_UNICODE_3_2) {
             return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
                 isNormalized(source, status);
         } else {
             return n2->isNormalized(source, status);
         }
     } else {
         return FALSE;
     }
 }
 UnicodeString & U_EXPORT2
 Normalizer::concatenate(const UnicodeString &left, const UnicodeString &right,
                         UnicodeString &result,
                         UNormalizationMode mode, int32_t options,
                         UErrorCode &errorCode) {
     if(left.isBogus() || right.isBogus() || U_FAILURE(errorCode)) {
         result.setToBogus();
         if(U_SUCCESS(errorCode)) {
             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
         }
     } else {
         UnicodeString localDest;
         UnicodeString *dest;
         if(&right!=&result) {
             dest=&result;
         } else {
             // the right and result strings are the same object, use a temporary one
             dest=&localDest;
         }
         *dest=left;
         const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, errorCode);
         if(U_SUCCESS(errorCode)) {
             if(options&UNORM_UNICODE_3_2) {
                 FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(errorCode)).
                     append(*dest, right, errorCode);
             } else {
                 n2->append(*dest, right, errorCode);
             }
         }
         if(dest==&localDest && U_SUCCESS(errorCode)) {
             result=*dest;
         }
     }
     return result;
 }
 //-------------------------------------------------------------------------
 // Iteration API
 //-------------------------------------------------------------------------
 /**
  * Return the current character in the normalized text.
  */
 UChar32 Normalizer::current() {
     if(bufferPos<buffer.length() || nextNormalize()) {
         return buffer.char32At(bufferPos);
     } else {
         return DONE;
     }
 }
 /**
  * Return the next character in the normalized text and advance
  * the iteration position by one.  If the end
  * of the text has already been reached, {@link #DONE} is returned.
  */
 UChar32 Normalizer::next() {
     if(bufferPos<buffer.length() ||  nextNormalize()) {
         UChar32 c=buffer.char32At(bufferPos);
         bufferPos+=U16_LENGTH(c);
         return c;
     } else {
         return DONE;
     }
 }
 /**
  * Return the previous character in the normalized text and decrement
  * the iteration position by one.  If the beginning
  * of the text has already been reached, {@link #DONE} is returned.
  */
 UChar32 Normalizer::previous() {
     if(bufferPos>0 || previousNormalize()) {
         UChar32 c=buffer.char32At(bufferPos-1);
         bufferPos-=U16_LENGTH(c);
         return c;
     } else {
         return DONE;
     }
 }
 void Normalizer::reset() {
     currentIndex=nextIndex=text->setToStart();
     clearBuffer();
 }
 void
 Normalizer::setIndexOnly(int32_t index) {
     text->setIndex(index);  // pins index
     currentIndex=nextIndex=text->getIndex();
     clearBuffer();
 }
 /**
  * Return the first character in the normalized text.  This resets
  * the <tt>Normalizer's</tt> position to the beginning of the text.
  */
 UChar32 Normalizer::first() {
     reset();
     return next();
 }
 /**
  * Return the last character in the normalized text.  This resets
  * the <tt>Normalizer's</tt> position to be just before the
  * the input text corresponding to that normalized character.
  */
 UChar32 Normalizer::last() {
     currentIndex=nextIndex=text->setToEnd();
     clearBuffer();
     return previous();
 }
 /**
  * Retrieve the current iteration position in the input text that is
  * being normalized.  This method is useful in applications such as
  * searching, where you need to be able to determine the position in
  * the input text that corresponds to a given normalized output character.
  * <p>
  * <b>Note:</b> This method sets the position in the <em>input</em>, while
  * {@link #next} and {@link #previous} iterate through characters in the
  * <em>output</em>.  This means that there is not necessarily a one-to-one
  * correspondence between characters returned by <tt>next</tt> and
  * <tt>previous</tt> and the indices passed to and returned from
  * <tt>setIndex</tt> and {@link #getIndex}.
  *
  */
 int32_t Normalizer::getIndex() const {
     if(bufferPos<buffer.length()) {
         return currentIndex;
     } else {
         return nextIndex;
     }
 }
 /**
  * Retrieve the index of the start of the input text.  This is the begin index
  * of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt>
  * over which this <tt>Normalizer</tt> is iterating
  */
 int32_t Normalizer::startIndex() const {
     return text->startIndex();
 }
 /**
  * Retrieve the index of the end of the input text.  This is the end index
  * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
  * over which this <tt>Normalizer</tt> is iterating
  */
 int32_t Normalizer::endIndex() const {
     return text->endIndex();
 }
 //-------------------------------------------------------------------------
 // Property access methods
 //-------------------------------------------------------------------------
 void
 Normalizer::setMode(UNormalizationMode newMode)
 {
     fUMode = newMode;
     init();
 }
 UNormalizationMode
 Normalizer::getUMode() const
 {
     return fUMode;
 }
 void
 Normalizer::setOption(int32_t option,
                       UBool value)
 {
     if (value) {
         fOptions |= option;
     } else {
         fOptions &= (~option);
     }
     init();
 }
 UBool
 Normalizer::getOption(int32_t option) const
 {
     return (fOptions & option) != 0;
 }
 /**
  * Set the input text over which this <tt>Normalizer</tt> will iterate.
  * The iteration position is set to the beginning of the input text.
  */
 void
 Normalizer::setText(const UnicodeString& newText,
                     UErrorCode &status)
 {
     if (U_FAILURE(status)) {
         return;
     }
     CharacterIterator *newIter = new StringCharacterIterator(newText);
     if (newIter == NULL) {
         status = U_MEMORY_ALLOCATION_ERROR;
         return;
     }
     delete text;
     text = newIter;
     reset();
 }
 /**
  * Set the input text over which this <tt>Normalizer</tt> will iterate.
  * The iteration position is set to the beginning of the string.
  */
 void
 Normalizer::setText(const CharacterIterator& newText,
                     UErrorCode &status)
 {
     if (U_FAILURE(status)) {
         return;
     }
     CharacterIterator *newIter = newText.clone();
     if (newIter == NULL) {
         status = U_MEMORY_ALLOCATION_ERROR;
         return;
     }
     delete text;
     text = newIter;
     reset();
 }
 void
 Normalizer::setText(const UChar* newText,
                     int32_t length,
                     UErrorCode &status)
 {
     if (U_FAILURE(status)) {
         return;
     }
     CharacterIterator *newIter = new UCharCharacterIterator(newText, length);
     if (newIter == NULL) {
         status = U_MEMORY_ALLOCATION_ERROR;
         return;
     }
     delete text;
     text = newIter;
     reset();
 }
 /**
  * Copies the text under iteration into the UnicodeString referred to by "result".
  * @param result Receives a copy of the text under iteration.
  */
 void
 Normalizer::getText(UnicodeString&  result)
 {
     text->getText(result);
 }
 //-------------------------------------------------------------------------
 // Private utility methods
 //-------------------------------------------------------------------------
 void Normalizer::clearBuffer() {
     buffer.remove();
     bufferPos=0;
 }
 UBool
 Normalizer::nextNormalize() {
     clearBuffer();
     currentIndex=nextIndex;
     text->setIndex(nextIndex);
     if(!text->hasNext()) {
         return FALSE;
     }
     // Skip at least one character so we make progress.
     UnicodeString segment(text->next32PostInc());
     while(text->hasNext()) {
         UChar32 c;
         if(fNorm2->hasBoundaryBefore(c=text->next32PostInc())) {
             text->move32(-1, CharacterIterator::kCurrent);
             break;
         }
         segment.append(c);
     }
     nextIndex=text->getIndex();
     UErrorCode errorCode=U_ZERO_ERROR;
     fNorm2->normalize(segment, buffer, errorCode);
     return U_SUCCESS(errorCode) && !buffer.isEmpty();
 }
 UBool
 Normalizer::previousNormalize() {
     clearBuffer();
     nextIndex=currentIndex;
     text->setIndex(currentIndex);
     if(!text->hasPrevious()) {
         return FALSE;
     }
     UnicodeString segment;
     while(text->hasPrevious()) {
         UChar32 c=text->previous32();
         segment.insert(0, c);
         if(fNorm2->hasBoundaryBefore(c)) {
             break;
         }
     }
     currentIndex=text->getIndex();
     UErrorCode errorCode=U_ZERO_ERROR;
     fNorm2->normalize(segment, buffer, errorCode);
     bufferPos=buffer.length();
     return U_SUCCESS(errorCode) && !buffer.isEmpty();
 }
 U_NAMESPACE_END
 #endif /* #if !UCONFIG_NO_NORMALIZATION */

The Tor Browser / annotate

intl/icu/source/common/normlzr.cpp@6474c204b198 (annotated)

intl/icu/source/common/normlzr.cpp