The Tor Browser: intl/icu/source/i18n/brktrans.cpp@fc2d59ddac77 (annotated)

intl/icu/source/i18n/brktrans.cpp@fc2d59ddac77 (annotated)

intl/icu/source/i18n/brktrans.cpp

Wed, 31 Dec 2014 07:22:50 +0100

author: Michael Schloh von Bennewitz <michael@schloh.com>
date: Wed, 31 Dec 2014 07:22:50 +0100
branch: TOR_BUG_3246
changeset 4: fc2d59ddac77
permissions: -rw-r--r--

Correct previous dual key logic pending first delivery installment.

 /*
 **********************************************************************
 *   Copyright (C) 2008-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   Date        Name        Description
 *   05/11/2008  Andy Heninger  Port from Java
 **********************************************************************
 */
 #include "unicode/utypes.h"
 #if  !UCONFIG_NO_TRANSLITERATION && !UCONFIG_NO_BREAK_ITERATION
 #include "unicode/unifilt.h"
 #include "unicode/uchar.h"
 #include "unicode/uniset.h"
 #include "unicode/brkiter.h"
 #include "brktrans.h"
 #include "unicode/uchar.h"
 #include "cmemory.h"
 #include "uprops.h"
 #include "uinvchar.h"
 #include "util.h"
 #include "uvectr32.h"
 U_NAMESPACE_BEGIN
 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BreakTransliterator)
 static const UChar SPACE       = 32;  // ' '
 /**
  * Constructs a transliterator with the default delimiters '{' and
  * '}'.
  */
 BreakTransliterator::BreakTransliterator(UnicodeFilter* adoptedFilter) :
     Transliterator(UNICODE_STRING("Any-BreakInternal", 17), adoptedFilter),
     fInsertion(SPACE) {
         bi = NULL;
         UErrorCode status = U_ZERO_ERROR;
         boundaries = new UVector32(status);
     }
 /**
  * Destructor.
  */
 BreakTransliterator::~BreakTransliterator() {
     delete bi;
     bi = NULL;
     delete boundaries;
     boundaries = NULL;
 }
 /**
  * Copy constructor.
  */
 BreakTransliterator::BreakTransliterator(const BreakTransliterator& o) :
     Transliterator(o) {
         bi = NULL;
         if (o.bi != NULL) {
             bi = o.bi->clone();
         }
         fInsertion = o.fInsertion;
         UErrorCode status = U_ZERO_ERROR;
         boundaries = new UVector32(status);
     }
 /**
  * Transliterator API.
  */
 Transliterator* BreakTransliterator::clone(void) const {
     return new BreakTransliterator(*this);
 }
 /**
  * Implements {@link Transliterator#handleTransliterate}.
  */
 void BreakTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
                                                     UBool isIncremental ) const {
         UErrorCode status = U_ZERO_ERROR;
         boundaries->removeAllElements();
         BreakTransliterator *nonConstThis = (BreakTransliterator *)this;
         nonConstThis->getBreakIterator(); // Lazy-create it if necessary
         UnicodeString sText = replaceableAsString(text);
         bi->setText(sText);
         bi->preceding(offsets.start);
         // To make things much easier, we will stack the boundaries, and then insert at the end.
         // generally, we won't need too many, since we will be filtered.
         int32_t boundary;
         for(boundary = bi->next(); boundary != UBRK_DONE && boundary < offsets.limit; boundary = bi->next()) {
             if (boundary == 0) continue;
             // HACK: Check to see that preceeding item was a letter
             UChar32 cp = sText.char32At(boundary-1);
             int type = u_charType(cp);
             //System.out.println(Integer.toString(cp,16) + " (before): " + type);
             if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue;
             cp = sText.char32At(boundary);
             type = u_charType(cp);
             //System.out.println(Integer.toString(cp,16) + " (after): " + type);
             if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue;
             boundaries->addElement(boundary, status);
             // printf("Boundary at %d\n", boundary);
         }
         int delta = 0;
         int lastBoundary = 0;
         if (boundaries->size() != 0) { // if we found something, adjust
             delta = boundaries->size() * fInsertion.length();
             lastBoundary = boundaries->lastElementi();
             // we do this from the end backwards, so that we don't have to keep updating.
             while (boundaries->size() > 0) {
                 boundary = boundaries->popi();
                 text.handleReplaceBetween(boundary, boundary, fInsertion);
             }
         }
         // Now fix up the return values
         offsets.contextLimit += delta;
         offsets.limit += delta;
         offsets.start = isIncremental ? lastBoundary + delta : offsets.limit;
         // TODO:  do something with U_FAILURE(status);
         //        (need to look at transliterators overall, not just here.)
 }
 //
 //  getInsertion()
 //
 const UnicodeString &BreakTransliterator::getInsertion() const {
     return fInsertion;
 }
 //
 //  setInsertion()
 //
 void BreakTransliterator::setInsertion(const UnicodeString &insertion) {
     this->fInsertion = insertion;
 }
 //
 //  getBreakIterator     Lazily create the break iterator if it does
 //                       not already exist.  Copied from Java, probably
 //                       better to just create it in the constructor.
 //
 BreakIterator *BreakTransliterator::getBreakIterator() {
     UErrorCode status = U_ZERO_ERROR;
     if (bi == NULL) {
         // Note:  Thai breaking behavior is universal, it is not
         //        tied to the Thai locale.
         bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);
     }
     return bi;
 }
 //
 //   replaceableAsString   Hack to let break iterators work
 //                         on the replaceable text from transliterators.
 //                         In practice, the only real Replaceable type that we
 //                         will be seeing is UnicodeString, so this function
 //                         will normally be efficient.
 //
 UnicodeString BreakTransliterator::replaceableAsString(Replaceable &r) {
     UnicodeString s;
     UnicodeString *rs = dynamic_cast<UnicodeString *>(&r);
     if (rs != NULL) {
         s = *rs;
     } else {
         r.extractBetween(0, r.length(), s);
     }
     return s;
 }
 U_NAMESPACE_END
 #endif /* #if !UCONFIG_NO_TRANSLITERATION */

The Tor Browser / annotate

intl/icu/source/i18n/brktrans.cpp@fc2d59ddac77 (annotated)

intl/icu/source/i18n/brktrans.cpp