The Tor Browser: intl/icu/source/i18n/strrepl.cpp@6474c204b198

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*

     2 **********************************************************************

     3 *   Copyright (c) 2002-2012, International Business Machines Corporation

     4 *   and others.  All Rights Reserved.

     5 **********************************************************************

     6 *   Date        Name        Description

     7 *   01/21/2002  aliu        Creation.

     8 **********************************************************************

     9 */

    11 #include "unicode/utypes.h"

    13 #if !UCONFIG_NO_TRANSLITERATION

    15 #include "unicode/uniset.h"

    16 #include "unicode/utf16.h"

    17 #include "strrepl.h"

    18 #include "rbt_data.h"

    19 #include "util.h"

    21 U_NAMESPACE_BEGIN

    23 UnicodeReplacer::~UnicodeReplacer() {}

    24 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringReplacer)

    26 /**

    27  * Construct a StringReplacer that sets the emits the given output

    28  * text and sets the cursor to the given position.

    29  * @param theOutput text that will replace input text when the

    30  * replace() method is called.  May contain stand-in characters

    31  * that represent nested replacers.

    32  * @param theCursorPos cursor position that will be returned by

    33  * the replace() method

    34  * @param theData transliterator context object that translates

    35  * stand-in characters to UnicodeReplacer objects

    36  */

    37 StringReplacer::StringReplacer(const UnicodeString& theOutput,

    38                                int32_t theCursorPos,

    39                                const TransliterationRuleData* theData) {

    40     output = theOutput;

    41     cursorPos = theCursorPos;

    42     hasCursor = TRUE;

    43     data = theData;

    44     isComplex = TRUE;

    45 }

    47 /**

    48  * Construct a StringReplacer that sets the emits the given output

    49  * text and does not modify the cursor.

    50  * @param theOutput text that will replace input text when the

    51  * replace() method is called.  May contain stand-in characters

    52  * that represent nested replacers.

    53  * @param theData transliterator context object that translates

    54  * stand-in characters to UnicodeReplacer objects

    55  */

    56 StringReplacer::StringReplacer(const UnicodeString& theOutput,

    57                                const TransliterationRuleData* theData) {

    58     output = theOutput;

    59     cursorPos = 0;

    60     hasCursor = FALSE;

    61     data = theData;

    62     isComplex = TRUE;

    63 }

    65 /**

    66  * Copy constructor.

    67  */

    68 StringReplacer::StringReplacer(const StringReplacer& other) :

    69     UnicodeFunctor(other),

    70     UnicodeReplacer(other)

    71 {

    72     output = other.output;

    73     cursorPos = other.cursorPos;

    74     hasCursor = other.hasCursor;

    75     data = other.data;

    76     isComplex = other.isComplex;

    77 }

    79 /**

    80  * Destructor

    81  */

    82 StringReplacer::~StringReplacer() {

    83 }

    85 /**

    86  * Implement UnicodeFunctor

    87  */

    88 UnicodeFunctor* StringReplacer::clone() const {

    89     return new StringReplacer(*this);

    90 }

    92 /**

    93  * Implement UnicodeFunctor

    94  */

    95 UnicodeReplacer* StringReplacer::toReplacer() const {

    96   return const_cast<StringReplacer *>(this);

    97 }

    99 /**

   100  * UnicodeReplacer API

   101  */

   102 int32_t StringReplacer::replace(Replaceable& text,

   103                                 int32_t start,

   104                                 int32_t limit,

   105                                 int32_t& cursor) {

   106     int32_t outLen;

   107     int32_t newStart = 0;

   109     // NOTE: It should be possible to _always_ run the complex

   110     // processing code; just slower.  If not, then there is a bug

   111     // in the complex processing code.

   113     // Simple (no nested replacers) Processing Code :

   114     if (!isComplex) {

   115         text.handleReplaceBetween(start, limit, output);

   116         outLen = output.length();

   118         // Setup default cursor position (for cursorPos within output)

   119         newStart = cursorPos;

   120     }

   122     // Complex (nested replacers) Processing Code :

   123     else {

   124         /* When there are segments to be copied, use the Replaceable.copy()

   125          * API in order to retain out-of-band data.  Copy everything to the

   126          * end of the string, then copy them back over the key.  This preserves

   127          * the integrity of indices into the key and surrounding context while

   128          * generating the output text.

   129          */

   130         UnicodeString buf;

   131         int32_t oOutput; // offset into 'output'

   132         isComplex = FALSE;

   134         // The temporary buffer starts at tempStart, and extends

   135         // to destLimit.  The start of the buffer has a single

   136         // character from before the key.  This provides style

   137         // data when addition characters are filled into the

   138         // temporary buffer.  If there is nothing to the left, use

   139         // the non-character U+FFFF, which Replaceable subclasses

   140         // should treat specially as a "no-style character."

   141         // destStart points to the point after the style context

   142         // character, so it is tempStart+1 or tempStart+2.

   143         int32_t tempStart = text.length(); // start of temp buffer

   144         int32_t destStart = tempStart; // copy new text to here

   145         if (start > 0) {

   146             int32_t len = U16_LENGTH(text.char32At(start-1));

   147             text.copy(start-len, start, tempStart);

   148             destStart += len;

   149         } else {

   150             UnicodeString str((UChar) 0xFFFF);

   151             text.handleReplaceBetween(tempStart, tempStart, str);

   152             destStart++;

   153         }

   154         int32_t destLimit = destStart;

   156         for (oOutput=0; oOutput<output.length(); ) {

   157             if (oOutput == cursorPos) {

   158                 // Record the position of the cursor

   159                 newStart = destLimit - destStart; // relative to start

   160             }

   161             UChar32 c = output.char32At(oOutput);

   162             UnicodeReplacer* r = data->lookupReplacer(c);

   163             if (r == NULL) {

   164                 // Accumulate straight (non-segment) text.

   165                 buf.append(c);

   166             } else {

   167                 isComplex = TRUE;

   169                 // Insert any accumulated straight text.

   170                 if (buf.length() > 0) {

   171                     text.handleReplaceBetween(destLimit, destLimit, buf);

   172                     destLimit += buf.length();

   173                     buf.truncate(0);

   174                 }

   176                 // Delegate output generation to replacer object

   177                 int32_t len = r->replace(text, destLimit, destLimit, cursor);

   178                 destLimit += len;

   179             }

   180             oOutput += U16_LENGTH(c);

   181         }

   182         // Insert any accumulated straight text.

   183         if (buf.length() > 0) {

   184             text.handleReplaceBetween(destLimit, destLimit, buf);

   185             destLimit += buf.length();

   186         }

   187         if (oOutput == cursorPos) {

   188             // Record the position of the cursor

   189             newStart = destLimit - destStart; // relative to start

   190         }

   192         outLen = destLimit - destStart;

   194         // Copy new text to start, and delete it

   195         text.copy(destStart, destLimit, start);

   196         text.handleReplaceBetween(tempStart + outLen, destLimit + outLen, UnicodeString());

   198         // Delete the old text (the key)

   199         text.handleReplaceBetween(start + outLen, limit + outLen, UnicodeString());

   200     }

   202     if (hasCursor) {

   203         // Adjust the cursor for positions outside the key.  These

   204         // refer to code points rather than code units.  If cursorPos

   205         // is within the output string, then use newStart, which has

   206         // already been set above.

   207         if (cursorPos < 0) {

   208             newStart = start;

   209             int32_t n = cursorPos;

   210             // Outside the output string, cursorPos counts code points

   211             while (n < 0 && newStart > 0) {

   212                 newStart -= U16_LENGTH(text.char32At(newStart-1));

   213                 ++n;

   214             }

   215             newStart += n;

   216         } else if (cursorPos > output.length()) {

   217             newStart = start + outLen;

   218             int32_t n = cursorPos - output.length();

   219             // Outside the output string, cursorPos counts code points

   220             while (n > 0 && newStart < text.length()) {

   221                 newStart += U16_LENGTH(text.char32At(newStart));

   222                 --n;

   223             }

   224             newStart += n;

   225         } else {

   226             // Cursor is within output string.  It has been set up above

   227             // to be relative to start.

   228             newStart += start;

   229         }

   231         cursor = newStart;

   232     }

   234     return outLen;

   235 }

   237 /**

   238  * UnicodeReplacer API

   239  */

   240 UnicodeString& StringReplacer::toReplacerPattern(UnicodeString& rule,

   241                                                  UBool escapeUnprintable) const {

   242     rule.truncate(0);

   243     UnicodeString quoteBuf;

   245     int32_t cursor = cursorPos;

   247     // Handle a cursor preceding the output

   248     if (hasCursor && cursor < 0) {

   249         while (cursor++ < 0) {

   250             ICU_Utility::appendToRule(rule, (UChar)0x0040 /*@*/, TRUE, escapeUnprintable, quoteBuf);

   251         }

   252         // Fall through and append '|' below

   253     }

   255     for (int32_t i=0; i<output.length(); ++i) {

   256         if (hasCursor && i == cursor) {

   257             ICU_Utility::appendToRule(rule, (UChar)0x007C /*|*/, TRUE, escapeUnprintable, quoteBuf);

   258         }

   259         UChar c = output.charAt(i); // Ok to use 16-bits here

   261         UnicodeReplacer* r = data->lookupReplacer(c);

   262         if (r == NULL) {

   263             ICU_Utility::appendToRule(rule, c, FALSE, escapeUnprintable, quoteBuf);

   264         } else {

   265             UnicodeString buf;

   266             r->toReplacerPattern(buf, escapeUnprintable);

   267             buf.insert(0, (UChar)0x20);

   268             buf.append((UChar)0x20);

   269             ICU_Utility::appendToRule(rule, buf,

   270                                       TRUE, escapeUnprintable, quoteBuf);

   271         }

   272     }

   274     // Handle a cursor after the output.  Use > rather than >= because

   275     // if cursor == output.length() it is at the end of the output,

   276     // which is the default position, so we need not emit it.

   277     if (hasCursor && cursor > output.length()) {

   278         cursor -= output.length();

   279         while (cursor-- > 0) {

   280             ICU_Utility::appendToRule(rule, (UChar)0x0040 /*@*/, TRUE, escapeUnprintable, quoteBuf);

   281         }

   282         ICU_Utility::appendToRule(rule, (UChar)0x007C /*|*/, TRUE, escapeUnprintable, quoteBuf);

   283     }

   284     // Flush quoteBuf out to result

   285     ICU_Utility::appendToRule(rule, -1,

   286                               TRUE, escapeUnprintable, quoteBuf);

   288     return rule;

   289 }

   291 /**

   292  * Implement UnicodeReplacer

   293  */

   294 void StringReplacer::addReplacementSetTo(UnicodeSet& toUnionTo) const {

   295     UChar32 ch;

   296     for (int32_t i=0; i<output.length(); i+=U16_LENGTH(ch)) {

   297     ch = output.char32At(i);

   298     UnicodeReplacer* r = data->lookupReplacer(ch);

   299     if (r == NULL) {

   300         toUnionTo.add(ch);

   301     } else {

   302         r->addReplacementSetTo(toUnionTo);

   303     }

   304     }

   305 }

   307 /**

   308  * UnicodeFunctor API

   309  */

   310 void StringReplacer::setData(const TransliterationRuleData* d) {

   311     data = d;

   312     int32_t i = 0;

   313     while (i<output.length()) {

   314         UChar32 c = output.char32At(i);

   315         UnicodeFunctor* f = data->lookup(c);

   316         if (f != NULL) {

   317             f->setData(data);

   318         }

   319         i += U16_LENGTH(c);

   320     }

   321 }

   323 U_NAMESPACE_END

   325 #endif /* #if !UCONFIG_NO_TRANSLITERATION */

   327 //eof

The Tor Browser / file revision

intl/icu/source/i18n/strrepl.cpp@6474c204b198

intl/icu/source/i18n/strrepl.cpp