intl/icu/source/i18n/brktrans.cpp

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

     1 /*
     2 **********************************************************************
     3 *   Copyright (C) 2008-2010, International Business Machines
     4 *   Corporation and others.  All Rights Reserved.
     5 **********************************************************************
     6 *   Date        Name        Description
     7 *   05/11/2008  Andy Heninger  Port from Java
     8 **********************************************************************
     9 */
    11 #include "unicode/utypes.h"
    13 #if  !UCONFIG_NO_TRANSLITERATION && !UCONFIG_NO_BREAK_ITERATION
    15 #include "unicode/unifilt.h"
    16 #include "unicode/uchar.h"
    17 #include "unicode/uniset.h"
    18 #include "unicode/brkiter.h"
    19 #include "brktrans.h"
    20 #include "unicode/uchar.h"
    21 #include "cmemory.h"
    22 #include "uprops.h"
    23 #include "uinvchar.h"
    24 #include "util.h"
    25 #include "uvectr32.h"
    27 U_NAMESPACE_BEGIN
    29 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BreakTransliterator)
    31 static const UChar SPACE       = 32;  // ' '
    34 /**
    35  * Constructs a transliterator with the default delimiters '{' and
    36  * '}'.
    37  */
    38 BreakTransliterator::BreakTransliterator(UnicodeFilter* adoptedFilter) :
    39     Transliterator(UNICODE_STRING("Any-BreakInternal", 17), adoptedFilter),
    40     fInsertion(SPACE) {
    41         bi = NULL;
    42         UErrorCode status = U_ZERO_ERROR;
    43         boundaries = new UVector32(status);
    44     }
    47 /**
    48  * Destructor.
    49  */
    50 BreakTransliterator::~BreakTransliterator() {
    51     delete bi;
    52     bi = NULL;
    53     delete boundaries;
    54     boundaries = NULL;
    55 }
    57 /**
    58  * Copy constructor.
    59  */
    60 BreakTransliterator::BreakTransliterator(const BreakTransliterator& o) :
    61     Transliterator(o) {
    62         bi = NULL;
    63         if (o.bi != NULL) {
    64             bi = o.bi->clone();
    65         }
    66         fInsertion = o.fInsertion;
    67         UErrorCode status = U_ZERO_ERROR;
    68         boundaries = new UVector32(status);
    69     }
    72 /**
    73  * Transliterator API.
    74  */
    75 Transliterator* BreakTransliterator::clone(void) const {
    76     return new BreakTransliterator(*this);
    77 }
    79 /**
    80  * Implements {@link Transliterator#handleTransliterate}.
    81  */
    82 void BreakTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
    83                                                     UBool isIncremental ) const {
    85         UErrorCode status = U_ZERO_ERROR;
    86         boundaries->removeAllElements();
    87         BreakTransliterator *nonConstThis = (BreakTransliterator *)this;
    88         nonConstThis->getBreakIterator(); // Lazy-create it if necessary
    89         UnicodeString sText = replaceableAsString(text);
    90         bi->setText(sText);
    91         bi->preceding(offsets.start);
    93         // To make things much easier, we will stack the boundaries, and then insert at the end.
    94         // generally, we won't need too many, since we will be filtered.
    96         int32_t boundary;
    97         for(boundary = bi->next(); boundary != UBRK_DONE && boundary < offsets.limit; boundary = bi->next()) {
    98             if (boundary == 0) continue;
    99             // HACK: Check to see that preceeding item was a letter
   101             UChar32 cp = sText.char32At(boundary-1);
   102             int type = u_charType(cp);
   103             //System.out.println(Integer.toString(cp,16) + " (before): " + type);
   104             if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue;
   106             cp = sText.char32At(boundary);
   107             type = u_charType(cp);
   108             //System.out.println(Integer.toString(cp,16) + " (after): " + type);
   109             if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue;
   111             boundaries->addElement(boundary, status);
   112             // printf("Boundary at %d\n", boundary);
   113         }
   115         int delta = 0;
   116         int lastBoundary = 0;
   118         if (boundaries->size() != 0) { // if we found something, adjust
   119             delta = boundaries->size() * fInsertion.length();
   120             lastBoundary = boundaries->lastElementi();
   122             // we do this from the end backwards, so that we don't have to keep updating.
   124             while (boundaries->size() > 0) {
   125                 boundary = boundaries->popi();
   126                 text.handleReplaceBetween(boundary, boundary, fInsertion);
   127             }
   128         }
   130         // Now fix up the return values
   131         offsets.contextLimit += delta;
   132         offsets.limit += delta;
   133         offsets.start = isIncremental ? lastBoundary + delta : offsets.limit;
   135         // TODO:  do something with U_FAILURE(status);
   136         //        (need to look at transliterators overall, not just here.)
   137 }
   139 //
   140 //  getInsertion()
   141 //
   142 const UnicodeString &BreakTransliterator::getInsertion() const {
   143     return fInsertion;
   144 }
   146 //
   147 //  setInsertion()
   148 //
   149 void BreakTransliterator::setInsertion(const UnicodeString &insertion) {
   150     this->fInsertion = insertion;
   151 }
   153 //
   154 //  getBreakIterator     Lazily create the break iterator if it does
   155 //                       not already exist.  Copied from Java, probably
   156 //                       better to just create it in the constructor.
   157 //
   158 BreakIterator *BreakTransliterator::getBreakIterator() {
   159     UErrorCode status = U_ZERO_ERROR;
   160     if (bi == NULL) {
   161         // Note:  Thai breaking behavior is universal, it is not
   162         //        tied to the Thai locale.
   163         bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);
   164     }
   165     return bi;
   166 }
   168 //
   169 //   replaceableAsString   Hack to let break iterators work
   170 //                         on the replaceable text from transliterators.
   171 //                         In practice, the only real Replaceable type that we
   172 //                         will be seeing is UnicodeString, so this function
   173 //                         will normally be efficient.
   174 //
   175 UnicodeString BreakTransliterator::replaceableAsString(Replaceable &r) {
   176     UnicodeString s;
   177     UnicodeString *rs = dynamic_cast<UnicodeString *>(&r);
   178     if (rs != NULL) {
   179         s = *rs;
   180     } else {
   181         r.extractBetween(0, r.length(), s);
   182     }
   183     return s;
   184 }
   186 U_NAMESPACE_END
   188 #endif /* #if !UCONFIG_NO_TRANSLITERATION */

mercurial