1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/i18n/esctrn.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,179 @@ 1.4 +/* 1.5 +********************************************************************** 1.6 +* Copyright (c) 2001-2011, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +********************************************************************** 1.9 +* Date Name Description 1.10 +* 11/19/2001 aliu Creation. 1.11 +********************************************************************** 1.12 +*/ 1.13 + 1.14 +#include "unicode/utypes.h" 1.15 + 1.16 +#if !UCONFIG_NO_TRANSLITERATION 1.17 + 1.18 +#include "unicode/utf16.h" 1.19 +#include "esctrn.h" 1.20 +#include "util.h" 1.21 + 1.22 +U_NAMESPACE_BEGIN 1.23 + 1.24 +static const UChar UNIPRE[] = {85,43,0}; // "U+" 1.25 +static const UChar BS_u[] = {92,117,0}; // "\\u" 1.26 +static const UChar BS_U[] = {92,85,0}; // "\\U" 1.27 +static const UChar XMLPRE[] = {38,35,120,0}; // "&#x" 1.28 +static const UChar XML10PRE[] = {38,35,0}; // "&#" 1.29 +static const UChar PERLPRE[] = {92,120,123,0}; // "\\x{" 1.30 +static const UChar SEMI[] = {59,0}; // ";" 1.31 +static const UChar RBRACE[] = {125,0}; // "}" 1.32 + 1.33 +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(EscapeTransliterator) 1.34 + 1.35 +/** 1.36 + * Factory methods 1.37 + */ 1.38 +static Transliterator* _createEscUnicode(const UnicodeString& ID, Transliterator::Token /*context*/) { 1.39 + // Unicode: "U+10FFFF" hex, min=4, max=6 1.40 + return new EscapeTransliterator(ID, UnicodeString(TRUE, UNIPRE, 2), UnicodeString(), 16, 4, TRUE, NULL); 1.41 +} 1.42 +static Transliterator* _createEscJava(const UnicodeString& ID, Transliterator::Token /*context*/) { 1.43 + // Java: "\\uFFFF" hex, min=4, max=4 1.44 + return new EscapeTransliterator(ID, UnicodeString(TRUE, BS_u, 2), UnicodeString(), 16, 4, FALSE, NULL); 1.45 +} 1.46 +static Transliterator* _createEscC(const UnicodeString& ID, Transliterator::Token /*context*/) { 1.47 + // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8 1.48 + return new EscapeTransliterator(ID, UnicodeString(TRUE, BS_u, 2), UnicodeString(), 16, 4, TRUE, 1.49 + new EscapeTransliterator(UnicodeString(), UnicodeString(TRUE, BS_U, 2), UnicodeString(), 16, 8, TRUE, NULL)); 1.50 +} 1.51 +static Transliterator* _createEscXML(const UnicodeString& ID, Transliterator::Token /*context*/) { 1.52 + // XML: "" hex, min=1, max=6 1.53 + return new EscapeTransliterator(ID, UnicodeString(TRUE, XMLPRE, 3), UnicodeString(SEMI[0]), 16, 1, TRUE, NULL); 1.54 +} 1.55 +static Transliterator* _createEscXML10(const UnicodeString& ID, Transliterator::Token /*context*/) { 1.56 + // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex") 1.57 + return new EscapeTransliterator(ID, UnicodeString(TRUE, XML10PRE, 2), UnicodeString(SEMI[0]), 10, 1, TRUE, NULL); 1.58 +} 1.59 +static Transliterator* _createEscPerl(const UnicodeString& ID, Transliterator::Token /*context*/) { 1.60 + // Perl: "\\x{263A}" hex, min=1, max=6 1.61 + return new EscapeTransliterator(ID, UnicodeString(TRUE, PERLPRE, 3), UnicodeString(RBRACE[0]), 16, 1, TRUE, NULL); 1.62 +} 1.63 + 1.64 +/** 1.65 + * Registers standard variants with the system. Called by 1.66 + * Transliterator during initialization. 1.67 + */ 1.68 +void EscapeTransliterator::registerIDs() { 1.69 + Token t = integerToken(0); 1.70 + 1.71 + Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Unicode"), _createEscUnicode, t); 1.72 + 1.73 + Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Java"), _createEscJava, t); 1.74 + 1.75 + Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/C"), _createEscC, t); 1.76 + 1.77 + Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML"), _createEscXML, t); 1.78 + 1.79 + Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML10"), _createEscXML10, t); 1.80 + 1.81 + Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Perl"), _createEscPerl, t); 1.82 + 1.83 + Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex"), _createEscJava, t); 1.84 +} 1.85 + 1.86 +/** 1.87 + * Constructs an escape transliterator with the given ID and 1.88 + * parameters. See the class member documentation for details. 1.89 + */ 1.90 +EscapeTransliterator::EscapeTransliterator(const UnicodeString& newID, 1.91 + const UnicodeString& _prefix, const UnicodeString& _suffix, 1.92 + int32_t _radix, int32_t _minDigits, 1.93 + UBool _grokSupplementals, 1.94 + EscapeTransliterator* adoptedSupplementalHandler) : 1.95 + Transliterator(newID, NULL) 1.96 +{ 1.97 + this->prefix = _prefix; 1.98 + this->suffix = _suffix; 1.99 + this->radix = _radix; 1.100 + this->minDigits = _minDigits; 1.101 + this->grokSupplementals = _grokSupplementals; 1.102 + this->supplementalHandler = adoptedSupplementalHandler; 1.103 +} 1.104 + 1.105 +/** 1.106 + * Copy constructor. 1.107 + */ 1.108 +EscapeTransliterator::EscapeTransliterator(const EscapeTransliterator& o) : 1.109 + Transliterator(o), 1.110 + prefix(o.prefix), 1.111 + suffix(o.suffix), 1.112 + radix(o.radix), 1.113 + minDigits(o.minDigits), 1.114 + grokSupplementals(o.grokSupplementals) { 1.115 + supplementalHandler = (o.supplementalHandler != 0) ? 1.116 + new EscapeTransliterator(*o.supplementalHandler) : NULL; 1.117 +} 1.118 + 1.119 +EscapeTransliterator::~EscapeTransliterator() { 1.120 + delete supplementalHandler; 1.121 +} 1.122 + 1.123 +/** 1.124 + * Transliterator API. 1.125 + */ 1.126 +Transliterator* EscapeTransliterator::clone() const { 1.127 + return new EscapeTransliterator(*this); 1.128 +} 1.129 + 1.130 +/** 1.131 + * Implements {@link Transliterator#handleTransliterate}. 1.132 + */ 1.133 +void EscapeTransliterator::handleTransliterate(Replaceable& text, 1.134 + UTransPosition& pos, 1.135 + UBool /*isIncremental*/) const 1.136 +{ 1.137 + /* TODO: Verify that isIncremental can be ignored */ 1.138 + int32_t start = pos.start; 1.139 + int32_t limit = pos.limit; 1.140 + 1.141 + UnicodeString buf(prefix); 1.142 + int32_t prefixLen = prefix.length(); 1.143 + UBool redoPrefix = FALSE; 1.144 + 1.145 + while (start < limit) { 1.146 + int32_t c = grokSupplementals ? text.char32At(start) : text.charAt(start); 1.147 + int32_t charLen = grokSupplementals ? U16_LENGTH(c) : 1; 1.148 + 1.149 + if ((c & 0xFFFF0000) != 0 && supplementalHandler != NULL) { 1.150 + buf.truncate(0); 1.151 + buf.append(supplementalHandler->prefix); 1.152 + ICU_Utility::appendNumber(buf, c, supplementalHandler->radix, 1.153 + supplementalHandler->minDigits); 1.154 + buf.append(supplementalHandler->suffix); 1.155 + redoPrefix = TRUE; 1.156 + } else { 1.157 + if (redoPrefix) { 1.158 + buf.truncate(0); 1.159 + buf.append(prefix); 1.160 + redoPrefix = FALSE; 1.161 + } else { 1.162 + buf.truncate(prefixLen); 1.163 + } 1.164 + ICU_Utility::appendNumber(buf, c, radix, minDigits); 1.165 + buf.append(suffix); 1.166 + } 1.167 + 1.168 + text.handleReplaceBetween(start, start + charLen, buf); 1.169 + start += buf.length(); 1.170 + limit += buf.length() - charLen; 1.171 + } 1.172 + 1.173 + pos.contextLimit += limit - pos.limit; 1.174 + pos.limit = limit; 1.175 + pos.start = start; 1.176 +} 1.177 + 1.178 +U_NAMESPACE_END 1.179 + 1.180 +#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 1.181 + 1.182 +//eof