|
1 /* |
|
2 ********************************************************************** |
|
3 * Copyright (c) 2001-2011, International Business Machines |
|
4 * Corporation and others. All Rights Reserved. |
|
5 ********************************************************************** |
|
6 * Date Name Description |
|
7 * 11/19/2001 aliu Creation. |
|
8 ********************************************************************** |
|
9 */ |
|
10 |
|
11 #include "unicode/utypes.h" |
|
12 |
|
13 #if !UCONFIG_NO_TRANSLITERATION |
|
14 |
|
15 #include "unicode/utf16.h" |
|
16 #include "esctrn.h" |
|
17 #include "util.h" |
|
18 |
|
19 U_NAMESPACE_BEGIN |
|
20 |
|
21 static const UChar UNIPRE[] = {85,43,0}; // "U+" |
|
22 static const UChar BS_u[] = {92,117,0}; // "\\u" |
|
23 static const UChar BS_U[] = {92,85,0}; // "\\U" |
|
24 static const UChar XMLPRE[] = {38,35,120,0}; // "&#x" |
|
25 static const UChar XML10PRE[] = {38,35,0}; // "&#" |
|
26 static const UChar PERLPRE[] = {92,120,123,0}; // "\\x{" |
|
27 static const UChar SEMI[] = {59,0}; // ";" |
|
28 static const UChar RBRACE[] = {125,0}; // "}" |
|
29 |
|
30 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(EscapeTransliterator) |
|
31 |
|
32 /** |
|
33 * Factory methods |
|
34 */ |
|
35 static Transliterator* _createEscUnicode(const UnicodeString& ID, Transliterator::Token /*context*/) { |
|
36 // Unicode: "U+10FFFF" hex, min=4, max=6 |
|
37 return new EscapeTransliterator(ID, UnicodeString(TRUE, UNIPRE, 2), UnicodeString(), 16, 4, TRUE, NULL); |
|
38 } |
|
39 static Transliterator* _createEscJava(const UnicodeString& ID, Transliterator::Token /*context*/) { |
|
40 // Java: "\\uFFFF" hex, min=4, max=4 |
|
41 return new EscapeTransliterator(ID, UnicodeString(TRUE, BS_u, 2), UnicodeString(), 16, 4, FALSE, NULL); |
|
42 } |
|
43 static Transliterator* _createEscC(const UnicodeString& ID, Transliterator::Token /*context*/) { |
|
44 // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8 |
|
45 return new EscapeTransliterator(ID, UnicodeString(TRUE, BS_u, 2), UnicodeString(), 16, 4, TRUE, |
|
46 new EscapeTransliterator(UnicodeString(), UnicodeString(TRUE, BS_U, 2), UnicodeString(), 16, 8, TRUE, NULL)); |
|
47 } |
|
48 static Transliterator* _createEscXML(const UnicodeString& ID, Transliterator::Token /*context*/) { |
|
49 // XML: "" hex, min=1, max=6 |
|
50 return new EscapeTransliterator(ID, UnicodeString(TRUE, XMLPRE, 3), UnicodeString(SEMI[0]), 16, 1, TRUE, NULL); |
|
51 } |
|
52 static Transliterator* _createEscXML10(const UnicodeString& ID, Transliterator::Token /*context*/) { |
|
53 // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex") |
|
54 return new EscapeTransliterator(ID, UnicodeString(TRUE, XML10PRE, 2), UnicodeString(SEMI[0]), 10, 1, TRUE, NULL); |
|
55 } |
|
56 static Transliterator* _createEscPerl(const UnicodeString& ID, Transliterator::Token /*context*/) { |
|
57 // Perl: "\\x{263A}" hex, min=1, max=6 |
|
58 return new EscapeTransliterator(ID, UnicodeString(TRUE, PERLPRE, 3), UnicodeString(RBRACE[0]), 16, 1, TRUE, NULL); |
|
59 } |
|
60 |
|
61 /** |
|
62 * Registers standard variants with the system. Called by |
|
63 * Transliterator during initialization. |
|
64 */ |
|
65 void EscapeTransliterator::registerIDs() { |
|
66 Token t = integerToken(0); |
|
67 |
|
68 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Unicode"), _createEscUnicode, t); |
|
69 |
|
70 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Java"), _createEscJava, t); |
|
71 |
|
72 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/C"), _createEscC, t); |
|
73 |
|
74 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML"), _createEscXML, t); |
|
75 |
|
76 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML10"), _createEscXML10, t); |
|
77 |
|
78 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Perl"), _createEscPerl, t); |
|
79 |
|
80 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex"), _createEscJava, t); |
|
81 } |
|
82 |
|
83 /** |
|
84 * Constructs an escape transliterator with the given ID and |
|
85 * parameters. See the class member documentation for details. |
|
86 */ |
|
87 EscapeTransliterator::EscapeTransliterator(const UnicodeString& newID, |
|
88 const UnicodeString& _prefix, const UnicodeString& _suffix, |
|
89 int32_t _radix, int32_t _minDigits, |
|
90 UBool _grokSupplementals, |
|
91 EscapeTransliterator* adoptedSupplementalHandler) : |
|
92 Transliterator(newID, NULL) |
|
93 { |
|
94 this->prefix = _prefix; |
|
95 this->suffix = _suffix; |
|
96 this->radix = _radix; |
|
97 this->minDigits = _minDigits; |
|
98 this->grokSupplementals = _grokSupplementals; |
|
99 this->supplementalHandler = adoptedSupplementalHandler; |
|
100 } |
|
101 |
|
102 /** |
|
103 * Copy constructor. |
|
104 */ |
|
105 EscapeTransliterator::EscapeTransliterator(const EscapeTransliterator& o) : |
|
106 Transliterator(o), |
|
107 prefix(o.prefix), |
|
108 suffix(o.suffix), |
|
109 radix(o.radix), |
|
110 minDigits(o.minDigits), |
|
111 grokSupplementals(o.grokSupplementals) { |
|
112 supplementalHandler = (o.supplementalHandler != 0) ? |
|
113 new EscapeTransliterator(*o.supplementalHandler) : NULL; |
|
114 } |
|
115 |
|
116 EscapeTransliterator::~EscapeTransliterator() { |
|
117 delete supplementalHandler; |
|
118 } |
|
119 |
|
120 /** |
|
121 * Transliterator API. |
|
122 */ |
|
123 Transliterator* EscapeTransliterator::clone() const { |
|
124 return new EscapeTransliterator(*this); |
|
125 } |
|
126 |
|
127 /** |
|
128 * Implements {@link Transliterator#handleTransliterate}. |
|
129 */ |
|
130 void EscapeTransliterator::handleTransliterate(Replaceable& text, |
|
131 UTransPosition& pos, |
|
132 UBool /*isIncremental*/) const |
|
133 { |
|
134 /* TODO: Verify that isIncremental can be ignored */ |
|
135 int32_t start = pos.start; |
|
136 int32_t limit = pos.limit; |
|
137 |
|
138 UnicodeString buf(prefix); |
|
139 int32_t prefixLen = prefix.length(); |
|
140 UBool redoPrefix = FALSE; |
|
141 |
|
142 while (start < limit) { |
|
143 int32_t c = grokSupplementals ? text.char32At(start) : text.charAt(start); |
|
144 int32_t charLen = grokSupplementals ? U16_LENGTH(c) : 1; |
|
145 |
|
146 if ((c & 0xFFFF0000) != 0 && supplementalHandler != NULL) { |
|
147 buf.truncate(0); |
|
148 buf.append(supplementalHandler->prefix); |
|
149 ICU_Utility::appendNumber(buf, c, supplementalHandler->radix, |
|
150 supplementalHandler->minDigits); |
|
151 buf.append(supplementalHandler->suffix); |
|
152 redoPrefix = TRUE; |
|
153 } else { |
|
154 if (redoPrefix) { |
|
155 buf.truncate(0); |
|
156 buf.append(prefix); |
|
157 redoPrefix = FALSE; |
|
158 } else { |
|
159 buf.truncate(prefixLen); |
|
160 } |
|
161 ICU_Utility::appendNumber(buf, c, radix, minDigits); |
|
162 buf.append(suffix); |
|
163 } |
|
164 |
|
165 text.handleReplaceBetween(start, start + charLen, buf); |
|
166 start += buf.length(); |
|
167 limit += buf.length() - charLen; |
|
168 } |
|
169 |
|
170 pos.contextLimit += limit - pos.limit; |
|
171 pos.limit = limit; |
|
172 pos.start = start; |
|
173 } |
|
174 |
|
175 U_NAMESPACE_END |
|
176 |
|
177 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ |
|
178 |
|
179 //eof |