intl/icu/source/i18n/esctrn.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/i18n/esctrn.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,179 @@
     1.4 +/*
     1.5 +**********************************************************************
     1.6 +*   Copyright (c) 2001-2011, International Business Machines
     1.7 +*   Corporation and others.  All Rights Reserved.
     1.8 +**********************************************************************
     1.9 +*   Date        Name        Description
    1.10 +*   11/19/2001  aliu        Creation.
    1.11 +**********************************************************************
    1.12 +*/
    1.13 +
    1.14 +#include "unicode/utypes.h"
    1.15 +
    1.16 +#if !UCONFIG_NO_TRANSLITERATION
    1.17 +
    1.18 +#include "unicode/utf16.h"
    1.19 +#include "esctrn.h"
    1.20 +#include "util.h"
    1.21 +
    1.22 +U_NAMESPACE_BEGIN
    1.23 +
    1.24 +static const UChar UNIPRE[] = {85,43,0}; // "U+"
    1.25 +static const UChar BS_u[] = {92,117,0}; // "\\u"
    1.26 +static const UChar BS_U[] = {92,85,0}; // "\\U"
    1.27 +static const UChar XMLPRE[] = {38,35,120,0}; // "&#x"
    1.28 +static const UChar XML10PRE[] = {38,35,0}; // "&#"
    1.29 +static const UChar PERLPRE[] = {92,120,123,0}; // "\\x{"
    1.30 +static const UChar SEMI[] = {59,0}; // ";"
    1.31 +static const UChar RBRACE[] = {125,0}; // "}"
    1.32 +
    1.33 +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(EscapeTransliterator)
    1.34 +
    1.35 +/**
    1.36 + * Factory methods
    1.37 + */
    1.38 +static Transliterator* _createEscUnicode(const UnicodeString& ID, Transliterator::Token /*context*/) {
    1.39 +    // Unicode: "U+10FFFF" hex, min=4, max=6
    1.40 +    return new EscapeTransliterator(ID, UnicodeString(TRUE, UNIPRE, 2), UnicodeString(), 16, 4, TRUE, NULL);
    1.41 +}
    1.42 +static Transliterator* _createEscJava(const UnicodeString& ID, Transliterator::Token /*context*/) {
    1.43 +    // Java: "\\uFFFF" hex, min=4, max=4
    1.44 +    return new EscapeTransliterator(ID, UnicodeString(TRUE, BS_u, 2), UnicodeString(), 16, 4, FALSE, NULL);
    1.45 +}
    1.46 +static Transliterator* _createEscC(const UnicodeString& ID, Transliterator::Token /*context*/) {
    1.47 +    // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8
    1.48 +    return new EscapeTransliterator(ID, UnicodeString(TRUE, BS_u, 2), UnicodeString(), 16, 4, TRUE,
    1.49 +             new EscapeTransliterator(UnicodeString(), UnicodeString(TRUE, BS_U, 2), UnicodeString(), 16, 8, TRUE, NULL));
    1.50 +}
    1.51 +static Transliterator* _createEscXML(const UnicodeString& ID, Transliterator::Token /*context*/) {
    1.52 +    // XML: "" hex, min=1, max=6
    1.53 +    return new EscapeTransliterator(ID, UnicodeString(TRUE, XMLPRE, 3), UnicodeString(SEMI[0]), 16, 1, TRUE, NULL);
    1.54 +}
    1.55 +static Transliterator* _createEscXML10(const UnicodeString& ID, Transliterator::Token /*context*/) {
    1.56 +    // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex")
    1.57 +    return new EscapeTransliterator(ID, UnicodeString(TRUE, XML10PRE, 2), UnicodeString(SEMI[0]), 10, 1, TRUE, NULL);
    1.58 +}
    1.59 +static Transliterator* _createEscPerl(const UnicodeString& ID, Transliterator::Token /*context*/) {
    1.60 +    // Perl: "\\x{263A}" hex, min=1, max=6
    1.61 +    return new EscapeTransliterator(ID, UnicodeString(TRUE, PERLPRE, 3), UnicodeString(RBRACE[0]), 16, 1, TRUE, NULL);
    1.62 +}
    1.63 +
    1.64 +/**
    1.65 + * Registers standard variants with the system.  Called by
    1.66 + * Transliterator during initialization.
    1.67 + */
    1.68 +void EscapeTransliterator::registerIDs() {
    1.69 +    Token t = integerToken(0);
    1.70 +
    1.71 +    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Unicode"), _createEscUnicode, t);
    1.72 +
    1.73 +    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Java"), _createEscJava, t);
    1.74 +
    1.75 +    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/C"), _createEscC, t);
    1.76 +
    1.77 +    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML"), _createEscXML, t);
    1.78 +
    1.79 +    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML10"), _createEscXML10, t);
    1.80 +
    1.81 +    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Perl"), _createEscPerl, t);
    1.82 +
    1.83 +    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex"), _createEscJava, t);
    1.84 +}
    1.85 +
    1.86 +/**
    1.87 + * Constructs an escape transliterator with the given ID and
    1.88 + * parameters.  See the class member documentation for details.
    1.89 + */
    1.90 +EscapeTransliterator::EscapeTransliterator(const UnicodeString& newID,
    1.91 +                         const UnicodeString& _prefix, const UnicodeString& _suffix,
    1.92 +                         int32_t _radix, int32_t _minDigits,
    1.93 +                         UBool _grokSupplementals,
    1.94 +                         EscapeTransliterator* adoptedSupplementalHandler) :
    1.95 +    Transliterator(newID, NULL)
    1.96 +{
    1.97 +    this->prefix = _prefix;
    1.98 +    this->suffix = _suffix;
    1.99 +    this->radix = _radix;
   1.100 +    this->minDigits = _minDigits;
   1.101 +    this->grokSupplementals = _grokSupplementals;
   1.102 +    this->supplementalHandler = adoptedSupplementalHandler;
   1.103 +}
   1.104 +
   1.105 +/**
   1.106 + * Copy constructor.
   1.107 + */
   1.108 +EscapeTransliterator::EscapeTransliterator(const EscapeTransliterator& o) :
   1.109 +    Transliterator(o),
   1.110 +    prefix(o.prefix),
   1.111 +    suffix(o.suffix),
   1.112 +    radix(o.radix),
   1.113 +    minDigits(o.minDigits),
   1.114 +    grokSupplementals(o.grokSupplementals) {
   1.115 +    supplementalHandler = (o.supplementalHandler != 0) ?
   1.116 +        new EscapeTransliterator(*o.supplementalHandler) : NULL;
   1.117 +}
   1.118 +
   1.119 +EscapeTransliterator::~EscapeTransliterator() {
   1.120 +    delete supplementalHandler;
   1.121 +}
   1.122 +
   1.123 +/**
   1.124 + * Transliterator API.
   1.125 + */
   1.126 +Transliterator* EscapeTransliterator::clone() const {
   1.127 +    return new EscapeTransliterator(*this);
   1.128 +}
   1.129 +
   1.130 +/**
   1.131 + * Implements {@link Transliterator#handleTransliterate}.
   1.132 + */
   1.133 +void EscapeTransliterator::handleTransliterate(Replaceable& text,
   1.134 +                                               UTransPosition& pos,
   1.135 +                                               UBool /*isIncremental*/) const
   1.136 +{
   1.137 +    /* TODO: Verify that isIncremental can be ignored */
   1.138 +    int32_t start = pos.start;
   1.139 +    int32_t limit = pos.limit;
   1.140 +
   1.141 +    UnicodeString buf(prefix);
   1.142 +    int32_t prefixLen = prefix.length();
   1.143 +    UBool redoPrefix = FALSE;
   1.144 +
   1.145 +    while (start < limit) {
   1.146 +        int32_t c = grokSupplementals ? text.char32At(start) : text.charAt(start);
   1.147 +        int32_t charLen = grokSupplementals ? U16_LENGTH(c) : 1;
   1.148 +
   1.149 +        if ((c & 0xFFFF0000) != 0 && supplementalHandler != NULL) {
   1.150 +            buf.truncate(0);
   1.151 +            buf.append(supplementalHandler->prefix);
   1.152 +            ICU_Utility::appendNumber(buf, c, supplementalHandler->radix,
   1.153 +                                  supplementalHandler->minDigits);
   1.154 +            buf.append(supplementalHandler->suffix);
   1.155 +            redoPrefix = TRUE;
   1.156 +        } else {
   1.157 +            if (redoPrefix) {
   1.158 +                buf.truncate(0);
   1.159 +                buf.append(prefix);
   1.160 +                redoPrefix = FALSE;
   1.161 +            } else {
   1.162 +                buf.truncate(prefixLen);
   1.163 +            }
   1.164 +            ICU_Utility::appendNumber(buf, c, radix, minDigits);
   1.165 +            buf.append(suffix);
   1.166 +        }
   1.167 +
   1.168 +        text.handleReplaceBetween(start, start + charLen, buf);
   1.169 +        start += buf.length();
   1.170 +        limit += buf.length() - charLen;
   1.171 +    }
   1.172 +
   1.173 +    pos.contextLimit += limit - pos.limit;
   1.174 +    pos.limit = limit;
   1.175 +    pos.start = start;
   1.176 +}
   1.177 +
   1.178 +U_NAMESPACE_END
   1.179 +
   1.180 +#endif /* #if !UCONFIG_NO_TRANSLITERATION */
   1.181 +
   1.182 +//eof

mercurial