intl/icu/source/i18n/brktrans.cpp

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

michael@0 1 /*
michael@0 2 **********************************************************************
michael@0 3 * Copyright (C) 2008-2010, International Business Machines
michael@0 4 * Corporation and others. All Rights Reserved.
michael@0 5 **********************************************************************
michael@0 6 * Date Name Description
michael@0 7 * 05/11/2008 Andy Heninger Port from Java
michael@0 8 **********************************************************************
michael@0 9 */
michael@0 10
michael@0 11 #include "unicode/utypes.h"
michael@0 12
michael@0 13 #if !UCONFIG_NO_TRANSLITERATION && !UCONFIG_NO_BREAK_ITERATION
michael@0 14
michael@0 15 #include "unicode/unifilt.h"
michael@0 16 #include "unicode/uchar.h"
michael@0 17 #include "unicode/uniset.h"
michael@0 18 #include "unicode/brkiter.h"
michael@0 19 #include "brktrans.h"
michael@0 20 #include "unicode/uchar.h"
michael@0 21 #include "cmemory.h"
michael@0 22 #include "uprops.h"
michael@0 23 #include "uinvchar.h"
michael@0 24 #include "util.h"
michael@0 25 #include "uvectr32.h"
michael@0 26
michael@0 27 U_NAMESPACE_BEGIN
michael@0 28
michael@0 29 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BreakTransliterator)
michael@0 30
michael@0 31 static const UChar SPACE = 32; // ' '
michael@0 32
michael@0 33
michael@0 34 /**
michael@0 35 * Constructs a transliterator with the default delimiters '{' and
michael@0 36 * '}'.
michael@0 37 */
michael@0 38 BreakTransliterator::BreakTransliterator(UnicodeFilter* adoptedFilter) :
michael@0 39 Transliterator(UNICODE_STRING("Any-BreakInternal", 17), adoptedFilter),
michael@0 40 fInsertion(SPACE) {
michael@0 41 bi = NULL;
michael@0 42 UErrorCode status = U_ZERO_ERROR;
michael@0 43 boundaries = new UVector32(status);
michael@0 44 }
michael@0 45
michael@0 46
michael@0 47 /**
michael@0 48 * Destructor.
michael@0 49 */
michael@0 50 BreakTransliterator::~BreakTransliterator() {
michael@0 51 delete bi;
michael@0 52 bi = NULL;
michael@0 53 delete boundaries;
michael@0 54 boundaries = NULL;
michael@0 55 }
michael@0 56
michael@0 57 /**
michael@0 58 * Copy constructor.
michael@0 59 */
michael@0 60 BreakTransliterator::BreakTransliterator(const BreakTransliterator& o) :
michael@0 61 Transliterator(o) {
michael@0 62 bi = NULL;
michael@0 63 if (o.bi != NULL) {
michael@0 64 bi = o.bi->clone();
michael@0 65 }
michael@0 66 fInsertion = o.fInsertion;
michael@0 67 UErrorCode status = U_ZERO_ERROR;
michael@0 68 boundaries = new UVector32(status);
michael@0 69 }
michael@0 70
michael@0 71
michael@0 72 /**
michael@0 73 * Transliterator API.
michael@0 74 */
michael@0 75 Transliterator* BreakTransliterator::clone(void) const {
michael@0 76 return new BreakTransliterator(*this);
michael@0 77 }
michael@0 78
michael@0 79 /**
michael@0 80 * Implements {@link Transliterator#handleTransliterate}.
michael@0 81 */
michael@0 82 void BreakTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
michael@0 83 UBool isIncremental ) const {
michael@0 84
michael@0 85 UErrorCode status = U_ZERO_ERROR;
michael@0 86 boundaries->removeAllElements();
michael@0 87 BreakTransliterator *nonConstThis = (BreakTransliterator *)this;
michael@0 88 nonConstThis->getBreakIterator(); // Lazy-create it if necessary
michael@0 89 UnicodeString sText = replaceableAsString(text);
michael@0 90 bi->setText(sText);
michael@0 91 bi->preceding(offsets.start);
michael@0 92
michael@0 93 // To make things much easier, we will stack the boundaries, and then insert at the end.
michael@0 94 // generally, we won't need too many, since we will be filtered.
michael@0 95
michael@0 96 int32_t boundary;
michael@0 97 for(boundary = bi->next(); boundary != UBRK_DONE && boundary < offsets.limit; boundary = bi->next()) {
michael@0 98 if (boundary == 0) continue;
michael@0 99 // HACK: Check to see that preceeding item was a letter
michael@0 100
michael@0 101 UChar32 cp = sText.char32At(boundary-1);
michael@0 102 int type = u_charType(cp);
michael@0 103 //System.out.println(Integer.toString(cp,16) + " (before): " + type);
michael@0 104 if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue;
michael@0 105
michael@0 106 cp = sText.char32At(boundary);
michael@0 107 type = u_charType(cp);
michael@0 108 //System.out.println(Integer.toString(cp,16) + " (after): " + type);
michael@0 109 if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue;
michael@0 110
michael@0 111 boundaries->addElement(boundary, status);
michael@0 112 // printf("Boundary at %d\n", boundary);
michael@0 113 }
michael@0 114
michael@0 115 int delta = 0;
michael@0 116 int lastBoundary = 0;
michael@0 117
michael@0 118 if (boundaries->size() != 0) { // if we found something, adjust
michael@0 119 delta = boundaries->size() * fInsertion.length();
michael@0 120 lastBoundary = boundaries->lastElementi();
michael@0 121
michael@0 122 // we do this from the end backwards, so that we don't have to keep updating.
michael@0 123
michael@0 124 while (boundaries->size() > 0) {
michael@0 125 boundary = boundaries->popi();
michael@0 126 text.handleReplaceBetween(boundary, boundary, fInsertion);
michael@0 127 }
michael@0 128 }
michael@0 129
michael@0 130 // Now fix up the return values
michael@0 131 offsets.contextLimit += delta;
michael@0 132 offsets.limit += delta;
michael@0 133 offsets.start = isIncremental ? lastBoundary + delta : offsets.limit;
michael@0 134
michael@0 135 // TODO: do something with U_FAILURE(status);
michael@0 136 // (need to look at transliterators overall, not just here.)
michael@0 137 }
michael@0 138
michael@0 139 //
michael@0 140 // getInsertion()
michael@0 141 //
michael@0 142 const UnicodeString &BreakTransliterator::getInsertion() const {
michael@0 143 return fInsertion;
michael@0 144 }
michael@0 145
michael@0 146 //
michael@0 147 // setInsertion()
michael@0 148 //
michael@0 149 void BreakTransliterator::setInsertion(const UnicodeString &insertion) {
michael@0 150 this->fInsertion = insertion;
michael@0 151 }
michael@0 152
michael@0 153 //
michael@0 154 // getBreakIterator Lazily create the break iterator if it does
michael@0 155 // not already exist. Copied from Java, probably
michael@0 156 // better to just create it in the constructor.
michael@0 157 //
michael@0 158 BreakIterator *BreakTransliterator::getBreakIterator() {
michael@0 159 UErrorCode status = U_ZERO_ERROR;
michael@0 160 if (bi == NULL) {
michael@0 161 // Note: Thai breaking behavior is universal, it is not
michael@0 162 // tied to the Thai locale.
michael@0 163 bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);
michael@0 164 }
michael@0 165 return bi;
michael@0 166 }
michael@0 167
michael@0 168 //
michael@0 169 // replaceableAsString Hack to let break iterators work
michael@0 170 // on the replaceable text from transliterators.
michael@0 171 // In practice, the only real Replaceable type that we
michael@0 172 // will be seeing is UnicodeString, so this function
michael@0 173 // will normally be efficient.
michael@0 174 //
michael@0 175 UnicodeString BreakTransliterator::replaceableAsString(Replaceable &r) {
michael@0 176 UnicodeString s;
michael@0 177 UnicodeString *rs = dynamic_cast<UnicodeString *>(&r);
michael@0 178 if (rs != NULL) {
michael@0 179 s = *rs;
michael@0 180 } else {
michael@0 181 r.extractBetween(0, r.length(), s);
michael@0 182 }
michael@0 183 return s;
michael@0 184 }
michael@0 185
michael@0 186 U_NAMESPACE_END
michael@0 187
michael@0 188 #endif /* #if !UCONFIG_NO_TRANSLITERATION */

mercurial