michael@0: /* michael@0: ********************************************************************** michael@0: * Copyright (c) 2001-2011, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ********************************************************************** michael@0: * Date Name Description michael@0: * 11/19/2001 aliu Creation. michael@0: ********************************************************************** michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: #if !UCONFIG_NO_TRANSLITERATION michael@0: michael@0: #include "unicode/utf16.h" michael@0: #include "esctrn.h" michael@0: #include "util.h" michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: static const UChar UNIPRE[] = {85,43,0}; // "U+" michael@0: static const UChar BS_u[] = {92,117,0}; // "\\u" michael@0: static const UChar BS_U[] = {92,85,0}; // "\\U" michael@0: static const UChar XMLPRE[] = {38,35,120,0}; // "&#x" michael@0: static const UChar XML10PRE[] = {38,35,0}; // "&#" michael@0: static const UChar PERLPRE[] = {92,120,123,0}; // "\\x{" michael@0: static const UChar SEMI[] = {59,0}; // ";" michael@0: static const UChar RBRACE[] = {125,0}; // "}" michael@0: michael@0: UOBJECT_DEFINE_RTTI_IMPLEMENTATION(EscapeTransliterator) michael@0: michael@0: /** michael@0: * Factory methods michael@0: */ michael@0: static Transliterator* _createEscUnicode(const UnicodeString& ID, Transliterator::Token /*context*/) { michael@0: // Unicode: "U+10FFFF" hex, min=4, max=6 michael@0: return new EscapeTransliterator(ID, UnicodeString(TRUE, UNIPRE, 2), UnicodeString(), 16, 4, TRUE, NULL); michael@0: } michael@0: static Transliterator* _createEscJava(const UnicodeString& ID, Transliterator::Token /*context*/) { michael@0: // Java: "\\uFFFF" hex, min=4, max=4 michael@0: return new EscapeTransliterator(ID, UnicodeString(TRUE, BS_u, 2), UnicodeString(), 16, 4, FALSE, NULL); michael@0: } michael@0: static Transliterator* _createEscC(const UnicodeString& ID, Transliterator::Token /*context*/) { michael@0: // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8 michael@0: return new EscapeTransliterator(ID, UnicodeString(TRUE, BS_u, 2), UnicodeString(), 16, 4, TRUE, michael@0: new EscapeTransliterator(UnicodeString(), UnicodeString(TRUE, BS_U, 2), UnicodeString(), 16, 8, TRUE, NULL)); michael@0: } michael@0: static Transliterator* _createEscXML(const UnicodeString& ID, Transliterator::Token /*context*/) { michael@0: // XML: "􏿿" hex, min=1, max=6 michael@0: return new EscapeTransliterator(ID, UnicodeString(TRUE, XMLPRE, 3), UnicodeString(SEMI[0]), 16, 1, TRUE, NULL); michael@0: } michael@0: static Transliterator* _createEscXML10(const UnicodeString& ID, Transliterator::Token /*context*/) { michael@0: // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex") michael@0: return new EscapeTransliterator(ID, UnicodeString(TRUE, XML10PRE, 2), UnicodeString(SEMI[0]), 10, 1, TRUE, NULL); michael@0: } michael@0: static Transliterator* _createEscPerl(const UnicodeString& ID, Transliterator::Token /*context*/) { michael@0: // Perl: "\\x{263A}" hex, min=1, max=6 michael@0: return new EscapeTransliterator(ID, UnicodeString(TRUE, PERLPRE, 3), UnicodeString(RBRACE[0]), 16, 1, TRUE, NULL); michael@0: } michael@0: michael@0: /** michael@0: * Registers standard variants with the system. Called by michael@0: * Transliterator during initialization. michael@0: */ michael@0: void EscapeTransliterator::registerIDs() { michael@0: Token t = integerToken(0); michael@0: michael@0: Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Unicode"), _createEscUnicode, t); michael@0: michael@0: Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Java"), _createEscJava, t); michael@0: michael@0: Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/C"), _createEscC, t); michael@0: michael@0: Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML"), _createEscXML, t); michael@0: michael@0: Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML10"), _createEscXML10, t); michael@0: michael@0: Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Perl"), _createEscPerl, t); michael@0: michael@0: Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex"), _createEscJava, t); michael@0: } michael@0: michael@0: /** michael@0: * Constructs an escape transliterator with the given ID and michael@0: * parameters. See the class member documentation for details. michael@0: */ michael@0: EscapeTransliterator::EscapeTransliterator(const UnicodeString& newID, michael@0: const UnicodeString& _prefix, const UnicodeString& _suffix, michael@0: int32_t _radix, int32_t _minDigits, michael@0: UBool _grokSupplementals, michael@0: EscapeTransliterator* adoptedSupplementalHandler) : michael@0: Transliterator(newID, NULL) michael@0: { michael@0: this->prefix = _prefix; michael@0: this->suffix = _suffix; michael@0: this->radix = _radix; michael@0: this->minDigits = _minDigits; michael@0: this->grokSupplementals = _grokSupplementals; michael@0: this->supplementalHandler = adoptedSupplementalHandler; michael@0: } michael@0: michael@0: /** michael@0: * Copy constructor. michael@0: */ michael@0: EscapeTransliterator::EscapeTransliterator(const EscapeTransliterator& o) : michael@0: Transliterator(o), michael@0: prefix(o.prefix), michael@0: suffix(o.suffix), michael@0: radix(o.radix), michael@0: minDigits(o.minDigits), michael@0: grokSupplementals(o.grokSupplementals) { michael@0: supplementalHandler = (o.supplementalHandler != 0) ? michael@0: new EscapeTransliterator(*o.supplementalHandler) : NULL; michael@0: } michael@0: michael@0: EscapeTransliterator::~EscapeTransliterator() { michael@0: delete supplementalHandler; michael@0: } michael@0: michael@0: /** michael@0: * Transliterator API. michael@0: */ michael@0: Transliterator* EscapeTransliterator::clone() const { michael@0: return new EscapeTransliterator(*this); michael@0: } michael@0: michael@0: /** michael@0: * Implements {@link Transliterator#handleTransliterate}. michael@0: */ michael@0: void EscapeTransliterator::handleTransliterate(Replaceable& text, michael@0: UTransPosition& pos, michael@0: UBool /*isIncremental*/) const michael@0: { michael@0: /* TODO: Verify that isIncremental can be ignored */ michael@0: int32_t start = pos.start; michael@0: int32_t limit = pos.limit; michael@0: michael@0: UnicodeString buf(prefix); michael@0: int32_t prefixLen = prefix.length(); michael@0: UBool redoPrefix = FALSE; michael@0: michael@0: while (start < limit) { michael@0: int32_t c = grokSupplementals ? text.char32At(start) : text.charAt(start); michael@0: int32_t charLen = grokSupplementals ? U16_LENGTH(c) : 1; michael@0: michael@0: if ((c & 0xFFFF0000) != 0 && supplementalHandler != NULL) { michael@0: buf.truncate(0); michael@0: buf.append(supplementalHandler->prefix); michael@0: ICU_Utility::appendNumber(buf, c, supplementalHandler->radix, michael@0: supplementalHandler->minDigits); michael@0: buf.append(supplementalHandler->suffix); michael@0: redoPrefix = TRUE; michael@0: } else { michael@0: if (redoPrefix) { michael@0: buf.truncate(0); michael@0: buf.append(prefix); michael@0: redoPrefix = FALSE; michael@0: } else { michael@0: buf.truncate(prefixLen); michael@0: } michael@0: ICU_Utility::appendNumber(buf, c, radix, minDigits); michael@0: buf.append(suffix); michael@0: } michael@0: michael@0: text.handleReplaceBetween(start, start + charLen, buf); michael@0: start += buf.length(); michael@0: limit += buf.length() - charLen; michael@0: } michael@0: michael@0: pos.contextLimit += limit - pos.limit; michael@0: pos.limit = limit; michael@0: pos.start = start; michael@0: } michael@0: michael@0: U_NAMESPACE_END michael@0: michael@0: #endif /* #if !UCONFIG_NO_TRANSLITERATION */ michael@0: michael@0: //eof