|
1 /* |
|
2 ******************************************************************************* |
|
3 * |
|
4 * Copyright (C) 2001-2011, International Business Machines |
|
5 * Corporation and others. All Rights Reserved. |
|
6 * |
|
7 ******************************************************************************* |
|
8 * file name: casetrn.cpp |
|
9 * encoding: US-ASCII |
|
10 * tab size: 8 (not used) |
|
11 * indentation:4 |
|
12 * |
|
13 * created on: 2004sep03 |
|
14 * created by: Markus W. Scherer |
|
15 * |
|
16 * Implementation class for lower-/upper-/title-casing transliterators. |
|
17 */ |
|
18 |
|
19 #include "unicode/utypes.h" |
|
20 |
|
21 #if !UCONFIG_NO_TRANSLITERATION |
|
22 |
|
23 #include "unicode/uchar.h" |
|
24 #include "unicode/ustring.h" |
|
25 #include "unicode/utf.h" |
|
26 #include "unicode/utf16.h" |
|
27 #include "tolowtrn.h" |
|
28 #include "ucase.h" |
|
29 #include "cpputils.h" |
|
30 |
|
31 /* case context iterator using a Replaceable */ |
|
32 U_CFUNC UChar32 U_CALLCONV |
|
33 utrans_rep_caseContextIterator(void *context, int8_t dir) |
|
34 { |
|
35 U_NAMESPACE_USE |
|
36 |
|
37 UCaseContext *csc=(UCaseContext *)context; |
|
38 Replaceable *rep=(Replaceable *)csc->p; |
|
39 UChar32 c; |
|
40 |
|
41 if(dir<0) { |
|
42 /* reset for backward iteration */ |
|
43 csc->index=csc->cpStart; |
|
44 csc->dir=dir; |
|
45 } else if(dir>0) { |
|
46 /* reset for forward iteration */ |
|
47 csc->index=csc->cpLimit; |
|
48 csc->dir=dir; |
|
49 } else { |
|
50 /* continue current iteration direction */ |
|
51 dir=csc->dir; |
|
52 } |
|
53 |
|
54 // automatically adjust start and limit if the Replaceable disagrees |
|
55 // with the original values |
|
56 if(dir<0) { |
|
57 if(csc->start<csc->index) { |
|
58 c=rep->char32At(csc->index-1); |
|
59 if(c<0) { |
|
60 csc->start=csc->index; |
|
61 } else { |
|
62 csc->index-=U16_LENGTH(c); |
|
63 return c; |
|
64 } |
|
65 } |
|
66 } else { |
|
67 // detect, and store in csc->b1, if we hit the limit |
|
68 if(csc->index<csc->limit) { |
|
69 c=rep->char32At(csc->index); |
|
70 if(c<0) { |
|
71 csc->limit=csc->index; |
|
72 csc->b1=TRUE; |
|
73 } else { |
|
74 csc->index+=U16_LENGTH(c); |
|
75 return c; |
|
76 } |
|
77 } else { |
|
78 csc->b1=TRUE; |
|
79 } |
|
80 } |
|
81 return U_SENTINEL; |
|
82 } |
|
83 |
|
84 U_NAMESPACE_BEGIN |
|
85 |
|
86 UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(CaseMapTransliterator) |
|
87 |
|
88 /** |
|
89 * Constructs a transliterator. |
|
90 */ |
|
91 CaseMapTransliterator::CaseMapTransliterator(const UnicodeString &id, UCaseMapFull *map) : |
|
92 Transliterator(id, 0), |
|
93 fCsp(ucase_getSingleton()), |
|
94 fMap(map) |
|
95 { |
|
96 // TODO test incremental mode with context-sensitive text (e.g. greek sigma) |
|
97 // TODO need to call setMaximumContextLength()?! |
|
98 } |
|
99 |
|
100 /** |
|
101 * Destructor. |
|
102 */ |
|
103 CaseMapTransliterator::~CaseMapTransliterator() { |
|
104 } |
|
105 |
|
106 /** |
|
107 * Copy constructor. |
|
108 */ |
|
109 CaseMapTransliterator::CaseMapTransliterator(const CaseMapTransliterator& o) : |
|
110 Transliterator(o), |
|
111 fCsp(o.fCsp), fMap(o.fMap) |
|
112 { |
|
113 } |
|
114 |
|
115 /** |
|
116 * Assignment operator. |
|
117 */ |
|
118 /*CaseMapTransliterator& CaseMapTransliterator::operator=(const CaseMapTransliterator& o) { |
|
119 Transliterator::operator=(o); |
|
120 fCsp = o.fCsp; |
|
121 fMap = o.fMap; |
|
122 return *this; |
|
123 }*/ |
|
124 |
|
125 /** |
|
126 * Transliterator API. |
|
127 */ |
|
128 /*Transliterator* CaseMapTransliterator::clone(void) const { |
|
129 return new CaseMapTransliterator(*this); |
|
130 }*/ |
|
131 |
|
132 /** |
|
133 * Implements {@link Transliterator#handleTransliterate}. |
|
134 */ |
|
135 void CaseMapTransliterator::handleTransliterate(Replaceable& text, |
|
136 UTransPosition& offsets, |
|
137 UBool isIncremental) const |
|
138 { |
|
139 if (offsets.start >= offsets.limit) { |
|
140 return; |
|
141 } |
|
142 |
|
143 UCaseContext csc; |
|
144 uprv_memset(&csc, 0, sizeof(csc)); |
|
145 csc.p = &text; |
|
146 csc.start = offsets.contextStart; |
|
147 csc.limit = offsets.contextLimit; |
|
148 |
|
149 UnicodeString tmp; |
|
150 const UChar *s; |
|
151 UChar32 c; |
|
152 int32_t textPos, delta, result, locCache=0; |
|
153 |
|
154 for(textPos=offsets.start; textPos<offsets.limit;) { |
|
155 csc.cpStart=textPos; |
|
156 c=text.char32At(textPos); |
|
157 csc.cpLimit=textPos+=U16_LENGTH(c); |
|
158 |
|
159 result=fMap(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, "", &locCache); |
|
160 |
|
161 if(csc.b1 && isIncremental) { |
|
162 // fMap() tried to look beyond the context limit |
|
163 // wait for more input |
|
164 offsets.start=csc.cpStart; |
|
165 return; |
|
166 } |
|
167 |
|
168 if(result>=0) { |
|
169 // replace the current code point with its full case mapping result |
|
170 // see UCASE_MAX_STRING_LENGTH |
|
171 if(result<=UCASE_MAX_STRING_LENGTH) { |
|
172 // string s[result] |
|
173 tmp.setTo(FALSE, s, result); |
|
174 delta=result-U16_LENGTH(c); |
|
175 } else { |
|
176 // single code point |
|
177 tmp.setTo(result); |
|
178 delta=tmp.length()-U16_LENGTH(c); |
|
179 } |
|
180 text.handleReplaceBetween(csc.cpStart, textPos, tmp); |
|
181 if(delta!=0) { |
|
182 textPos+=delta; |
|
183 csc.limit=offsets.contextLimit+=delta; |
|
184 offsets.limit+=delta; |
|
185 } |
|
186 } |
|
187 } |
|
188 offsets.start=textPos; |
|
189 } |
|
190 |
|
191 U_NAMESPACE_END |
|
192 |
|
193 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ |