intl/icu/source/i18n/nortrans.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2 **********************************************************************
     3 *   Copyright (C) 2001-2011, International Business Machines
     4 *   Corporation and others.  All Rights Reserved.
     5 **********************************************************************
     6 *   Date        Name        Description
     7 *   07/03/01    aliu        Creation.
     8 **********************************************************************
     9 */
    11 #include "unicode/utypes.h"
    13 #if !UCONFIG_NO_TRANSLITERATION
    15 #include "unicode/normalizer2.h"
    16 #include "unicode/utf16.h"
    17 #include "cstring.h"
    18 #include "nortrans.h"
    20 U_NAMESPACE_BEGIN
    22 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(NormalizationTransliterator)
    24 static inline Transliterator::Token cstrToken(const char *s) {
    25     return Transliterator::pointerToken((void *)s);
    26 }
    28 /**
    29  * System registration hook.
    30  */
    31 void NormalizationTransliterator::registerIDs() {
    32     // In the Token, the byte after the NUL is the UNormalization2Mode.
    33     Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-NFC"),
    34                                      _create, cstrToken("nfc\0\0"));
    35     Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-NFKC"),
    36                                      _create, cstrToken("nfkc\0\0"));
    37     Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-NFD"),
    38                                      _create, cstrToken("nfc\0\1"));
    39     Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-NFKD"),
    40                                      _create, cstrToken("nfkc\0\1"));
    41     Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-FCD"),
    42                                      _create, cstrToken("nfc\0\2"));
    43     Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-FCC"),
    44                                      _create, cstrToken("nfc\0\3"));
    45     Transliterator::_registerSpecialInverse(UNICODE_STRING_SIMPLE("NFC"),
    46                                             UNICODE_STRING_SIMPLE("NFD"), TRUE);
    47     Transliterator::_registerSpecialInverse(UNICODE_STRING_SIMPLE("NFKC"),
    48                                             UNICODE_STRING_SIMPLE("NFKD"), TRUE);
    49     Transliterator::_registerSpecialInverse(UNICODE_STRING_SIMPLE("FCC"),
    50                                             UNICODE_STRING_SIMPLE("NFD"), FALSE);
    51     Transliterator::_registerSpecialInverse(UNICODE_STRING_SIMPLE("FCD"),
    52                                             UNICODE_STRING_SIMPLE("FCD"), FALSE);
    53 }
    55 /**
    56  * Factory methods
    57  */
    58 Transliterator* NormalizationTransliterator::_create(const UnicodeString& ID,
    59                                                      Token context) {
    60     const char *name = (const char *)context.pointer;
    61     UNormalization2Mode mode = (UNormalization2Mode)uprv_strchr(name, 0)[1];
    62     UErrorCode errorCode = U_ZERO_ERROR;
    63     const Normalizer2 *norm2 = Normalizer2::getInstance(NULL, name, mode, errorCode);
    64     if(U_SUCCESS(errorCode)) {
    65         return new NormalizationTransliterator(ID, *norm2);
    66     } else {
    67         return NULL;
    68     }
    69 }
    71 /**
    72  * Constructs a transliterator.
    73  */
    74 NormalizationTransliterator::NormalizationTransliterator(const UnicodeString& id,
    75                                                          const Normalizer2 &norm2) :
    76     Transliterator(id, 0), fNorm2(norm2) {}
    78 /**
    79  * Destructor.
    80  */
    81 NormalizationTransliterator::~NormalizationTransliterator() {
    82 }
    84 /**
    85  * Copy constructor.
    86  */
    87 NormalizationTransliterator::NormalizationTransliterator(const NormalizationTransliterator& o) :
    88     Transliterator(o), fNorm2(o.fNorm2) {}
    90 /**
    91  * Transliterator API.
    92  */
    93 Transliterator* NormalizationTransliterator::clone(void) const {
    94     return new NormalizationTransliterator(*this);
    95 }
    97 /**
    98  * Implements {@link Transliterator#handleTransliterate}.
    99  */
   100 void NormalizationTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
   101                                                       UBool isIncremental) const {
   102     // start and limit of the input range
   103     int32_t start = offsets.start;
   104     int32_t limit = offsets.limit;
   105     if(start >= limit) {
   106         return;
   107     }
   109     /*
   110      * Normalize as short chunks at a time as possible even in
   111      * bulk mode, so that styled text is minimally disrupted.
   112      * In incremental mode, a chunk that ends with offsets.limit
   113      * must not be normalized.
   114      *
   115      * If it was known that the input text is not styled, then
   116      * a bulk mode normalization could look like this:
   118     UnicodeString input, normalized;
   119     int32_t length = limit - start;
   120     _Replaceable_extractBetween(text, start, limit, input.getBuffer(length));
   121     input.releaseBuffer(length);
   123     UErrorCode status = U_ZERO_ERROR;
   124     fNorm2.normalize(input, normalized, status);
   126     text.handleReplaceBetween(start, limit, normalized);
   128     int32_t delta = normalized.length() - length;
   129     offsets.contextLimit += delta;
   130     offsets.limit += delta;
   131     offsets.start = limit + delta;
   133      */
   134     UErrorCode errorCode = U_ZERO_ERROR;
   135     UnicodeString segment;
   136     UnicodeString normalized;
   137     UChar32 c = text.char32At(start);
   138     do {
   139         int32_t prev = start;
   140         // Skip at least one character so we make progress.
   141         // c holds the character at start.
   142         segment.remove();
   143         do {
   144             segment.append(c);
   145             start += U16_LENGTH(c);
   146         } while(start < limit && !fNorm2.hasBoundaryBefore(c = text.char32At(start)));
   147         if(start == limit && isIncremental && !fNorm2.hasBoundaryAfter(c)) {
   148             // stop in incremental mode when we reach the input limit
   149             // in case there are additional characters that could change the
   150             // normalization result
   151             start=prev;
   152             break;
   153         }
   154         fNorm2.normalize(segment, normalized, errorCode);
   155         if(U_FAILURE(errorCode)) {
   156             break;
   157         }
   158         if(segment != normalized) {
   159             // replace the input chunk with its normalized form
   160             text.handleReplaceBetween(prev, start, normalized);
   162             // update all necessary indexes accordingly
   163             int32_t delta = normalized.length() - (start - prev);
   164             start += delta;
   165             limit += delta;
   166         }
   167     } while(start < limit);
   169     offsets.start = start;
   170     offsets.contextLimit += limit - offsets.limit;
   171     offsets.limit = limit;
   172 }
   174 U_NAMESPACE_END
   176 #endif /* #if !UCONFIG_NO_TRANSLITERATION */

mercurial