| |
1 /* |
| |
2 ******************************************************************************* |
| |
3 * Copyright (C) 1996-2011, International Business Machines Corporation and * |
| |
4 * others. All Rights Reserved. * |
| |
5 ******************************************************************************* |
| |
6 */ |
| |
7 |
| |
8 /* |
| |
9 * File coleitr.cpp |
| |
10 * |
| |
11 * |
| |
12 * |
| |
13 * Created by: Helena Shih |
| |
14 * |
| |
15 * Modification History: |
| |
16 * |
| |
17 * Date Name Description |
| |
18 * |
| |
19 * 6/23/97 helena Adding comments to make code more readable. |
| |
20 * 08/03/98 erm Synched with 1.2 version of CollationElementIterator.java |
| |
21 * 12/10/99 aliu Ported Thai collation support from Java. |
| |
22 * 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h) |
| |
23 * 02/19/01 swquek Removed CollationElementsIterator() since it is |
| |
24 * private constructor and no calls are made to it |
| |
25 */ |
| |
26 |
| |
27 #include "unicode/utypes.h" |
| |
28 |
| |
29 #if !UCONFIG_NO_COLLATION |
| |
30 |
| |
31 #include "unicode/coleitr.h" |
| |
32 #include "unicode/ustring.h" |
| |
33 #include "ucol_imp.h" |
| |
34 #include "uassert.h" |
| |
35 #include "cmemory.h" |
| |
36 |
| |
37 |
| |
38 /* Constants --------------------------------------------------------------- */ |
| |
39 |
| |
40 U_NAMESPACE_BEGIN |
| |
41 |
| |
42 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator) |
| |
43 |
| |
44 /* CollationElementIterator public constructor/destructor ------------------ */ |
| |
45 |
| |
46 CollationElementIterator::CollationElementIterator( |
| |
47 const CollationElementIterator& other) |
| |
48 : UObject(other), isDataOwned_(TRUE) |
| |
49 { |
| |
50 UErrorCode status = U_ZERO_ERROR; |
| |
51 m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0, |
| |
52 &status); |
| |
53 |
| |
54 *this = other; |
| |
55 } |
| |
56 |
| |
57 CollationElementIterator::~CollationElementIterator() |
| |
58 { |
| |
59 if (isDataOwned_) { |
| |
60 ucol_closeElements(m_data_); |
| |
61 } |
| |
62 } |
| |
63 |
| |
64 /* CollationElementIterator public methods --------------------------------- */ |
| |
65 |
| |
66 int32_t CollationElementIterator::getOffset() const |
| |
67 { |
| |
68 return ucol_getOffset(m_data_); |
| |
69 } |
| |
70 |
| |
71 /** |
| |
72 * Get the ordering priority of the next character in the string. |
| |
73 * @return the next character's ordering. Returns NULLORDER if an error has |
| |
74 * occured or if the end of string has been reached |
| |
75 */ |
| |
76 int32_t CollationElementIterator::next(UErrorCode& status) |
| |
77 { |
| |
78 return ucol_next(m_data_, &status); |
| |
79 } |
| |
80 |
| |
81 UBool CollationElementIterator::operator!=( |
| |
82 const CollationElementIterator& other) const |
| |
83 { |
| |
84 return !(*this == other); |
| |
85 } |
| |
86 |
| |
87 UBool CollationElementIterator::operator==( |
| |
88 const CollationElementIterator& that) const |
| |
89 { |
| |
90 if (this == &that || m_data_ == that.m_data_) { |
| |
91 return TRUE; |
| |
92 } |
| |
93 |
| |
94 // option comparison |
| |
95 if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll) |
| |
96 { |
| |
97 return FALSE; |
| |
98 } |
| |
99 |
| |
100 // the constructor and setText always sets a length |
| |
101 // and we only compare the string not the contents of the normalization |
| |
102 // buffer |
| |
103 int thislength = (int)(m_data_->iteratordata_.endp - m_data_->iteratordata_.string); |
| |
104 int thatlength = (int)(that.m_data_->iteratordata_.endp - that.m_data_->iteratordata_.string); |
| |
105 |
| |
106 if (thislength != thatlength) { |
| |
107 return FALSE; |
| |
108 } |
| |
109 |
| |
110 if (uprv_memcmp(m_data_->iteratordata_.string, |
| |
111 that.m_data_->iteratordata_.string, |
| |
112 thislength * U_SIZEOF_UCHAR) != 0) { |
| |
113 return FALSE; |
| |
114 } |
| |
115 if (getOffset() != that.getOffset()) { |
| |
116 return FALSE; |
| |
117 } |
| |
118 |
| |
119 // checking normalization buffer |
| |
120 if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) { |
| |
121 if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) { |
| |
122 return FALSE; |
| |
123 } |
| |
124 // both are in the normalization buffer |
| |
125 if (m_data_->iteratordata_.pos |
| |
126 - m_data_->iteratordata_.writableBuffer.getBuffer() |
| |
127 != that.m_data_->iteratordata_.pos |
| |
128 - that.m_data_->iteratordata_.writableBuffer.getBuffer()) { |
| |
129 // not in the same position in the normalization buffer |
| |
130 return FALSE; |
| |
131 } |
| |
132 } |
| |
133 else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) { |
| |
134 return FALSE; |
| |
135 } |
| |
136 // checking ce position |
| |
137 return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs) |
| |
138 == (that.m_data_->iteratordata_.CEpos |
| |
139 - that.m_data_->iteratordata_.CEs); |
| |
140 } |
| |
141 |
| |
142 /** |
| |
143 * Get the ordering priority of the previous collation element in the string. |
| |
144 * @param status the error code status. |
| |
145 * @return the previous element's ordering. Returns NULLORDER if an error has |
| |
146 * occured or if the start of string has been reached. |
| |
147 */ |
| |
148 int32_t CollationElementIterator::previous(UErrorCode& status) |
| |
149 { |
| |
150 return ucol_previous(m_data_, &status); |
| |
151 } |
| |
152 |
| |
153 /** |
| |
154 * Resets the cursor to the beginning of the string. |
| |
155 */ |
| |
156 void CollationElementIterator::reset() |
| |
157 { |
| |
158 ucol_reset(m_data_); |
| |
159 } |
| |
160 |
| |
161 void CollationElementIterator::setOffset(int32_t newOffset, |
| |
162 UErrorCode& status) |
| |
163 { |
| |
164 ucol_setOffset(m_data_, newOffset, &status); |
| |
165 } |
| |
166 |
| |
167 /** |
| |
168 * Sets the source to the new source string. |
| |
169 */ |
| |
170 void CollationElementIterator::setText(const UnicodeString& source, |
| |
171 UErrorCode& status) |
| |
172 { |
| |
173 if (U_FAILURE(status)) { |
| |
174 return; |
| |
175 } |
| |
176 |
| |
177 int32_t length = source.length(); |
| |
178 UChar *string = NULL; |
| |
179 if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) { |
| |
180 uprv_free((UChar *)m_data_->iteratordata_.string); |
| |
181 } |
| |
182 m_data_->isWritable = TRUE; |
| |
183 if (length > 0) { |
| |
184 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); |
| |
185 /* test for NULL */ |
| |
186 if (string == NULL) { |
| |
187 status = U_MEMORY_ALLOCATION_ERROR; |
| |
188 return; |
| |
189 } |
| |
190 u_memcpy(string, source.getBuffer(), length); |
| |
191 } |
| |
192 else { |
| |
193 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); |
| |
194 /* test for NULL */ |
| |
195 if (string == NULL) { |
| |
196 status = U_MEMORY_ALLOCATION_ERROR; |
| |
197 return; |
| |
198 } |
| |
199 *string = 0; |
| |
200 } |
| |
201 /* Free offsetBuffer before initializing it. */ |
| |
202 ucol_freeOffsetBuffer(&(m_data_->iteratordata_)); |
| |
203 uprv_init_collIterate(m_data_->iteratordata_.coll, string, length, |
| |
204 &m_data_->iteratordata_, &status); |
| |
205 |
| |
206 m_data_->reset_ = TRUE; |
| |
207 } |
| |
208 |
| |
209 // Sets the source to the new character iterator. |
| |
210 void CollationElementIterator::setText(CharacterIterator& source, |
| |
211 UErrorCode& status) |
| |
212 { |
| |
213 if (U_FAILURE(status)) |
| |
214 return; |
| |
215 |
| |
216 int32_t length = source.getLength(); |
| |
217 UChar *buffer = NULL; |
| |
218 |
| |
219 if (length == 0) { |
| |
220 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); |
| |
221 /* test for NULL */ |
| |
222 if (buffer == NULL) { |
| |
223 status = U_MEMORY_ALLOCATION_ERROR; |
| |
224 return; |
| |
225 } |
| |
226 *buffer = 0; |
| |
227 } |
| |
228 else { |
| |
229 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); |
| |
230 /* test for NULL */ |
| |
231 if (buffer == NULL) { |
| |
232 status = U_MEMORY_ALLOCATION_ERROR; |
| |
233 return; |
| |
234 } |
| |
235 /* |
| |
236 Using this constructor will prevent buffer from being removed when |
| |
237 string gets removed |
| |
238 */ |
| |
239 UnicodeString string; |
| |
240 source.getText(string); |
| |
241 u_memcpy(buffer, string.getBuffer(), length); |
| |
242 } |
| |
243 |
| |
244 if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) { |
| |
245 uprv_free((UChar *)m_data_->iteratordata_.string); |
| |
246 } |
| |
247 m_data_->isWritable = TRUE; |
| |
248 /* Free offsetBuffer before initializing it. */ |
| |
249 ucol_freeOffsetBuffer(&(m_data_->iteratordata_)); |
| |
250 uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length, |
| |
251 &m_data_->iteratordata_, &status); |
| |
252 m_data_->reset_ = TRUE; |
| |
253 } |
| |
254 |
| |
255 int32_t CollationElementIterator::strengthOrder(int32_t order) const |
| |
256 { |
| |
257 UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll); |
| |
258 // Mask off the unwanted differences. |
| |
259 if (s == UCOL_PRIMARY) { |
| |
260 order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY; |
| |
261 } |
| |
262 else if (s == UCOL_SECONDARY) { |
| |
263 order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY; |
| |
264 } |
| |
265 |
| |
266 return order; |
| |
267 } |
| |
268 |
| |
269 /* CollationElementIterator private constructors/destructors --------------- */ |
| |
270 |
| |
271 /** |
| |
272 * This is the "real" constructor for this class; it constructs an iterator |
| |
273 * over the source text using the specified collator |
| |
274 */ |
| |
275 CollationElementIterator::CollationElementIterator( |
| |
276 const UnicodeString& sourceText, |
| |
277 const RuleBasedCollator* order, |
| |
278 UErrorCode& status) |
| |
279 : isDataOwned_(TRUE) |
| |
280 { |
| |
281 if (U_FAILURE(status)) { |
| |
282 return; |
| |
283 } |
| |
284 |
| |
285 int32_t length = sourceText.length(); |
| |
286 UChar *string = NULL; |
| |
287 |
| |
288 if (length > 0) { |
| |
289 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); |
| |
290 /* test for NULL */ |
| |
291 if (string == NULL) { |
| |
292 status = U_MEMORY_ALLOCATION_ERROR; |
| |
293 return; |
| |
294 } |
| |
295 /* |
| |
296 Using this constructor will prevent buffer from being removed when |
| |
297 string gets removed |
| |
298 */ |
| |
299 u_memcpy(string, sourceText.getBuffer(), length); |
| |
300 } |
| |
301 else { |
| |
302 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); |
| |
303 /* test for NULL */ |
| |
304 if (string == NULL) { |
| |
305 status = U_MEMORY_ALLOCATION_ERROR; |
| |
306 return; |
| |
307 } |
| |
308 *string = 0; |
| |
309 } |
| |
310 m_data_ = ucol_openElements(order->ucollator, string, length, &status); |
| |
311 |
| |
312 /* Test for buffer overflows */ |
| |
313 if (U_FAILURE(status)) { |
| |
314 return; |
| |
315 } |
| |
316 m_data_->isWritable = TRUE; |
| |
317 } |
| |
318 |
| |
319 /** |
| |
320 * This is the "real" constructor for this class; it constructs an iterator over |
| |
321 * the source text using the specified collator |
| |
322 */ |
| |
323 CollationElementIterator::CollationElementIterator( |
| |
324 const CharacterIterator& sourceText, |
| |
325 const RuleBasedCollator* order, |
| |
326 UErrorCode& status) |
| |
327 : isDataOwned_(TRUE) |
| |
328 { |
| |
329 if (U_FAILURE(status)) |
| |
330 return; |
| |
331 |
| |
332 // **** should I just drop this test? **** |
| |
333 /* |
| |
334 if ( sourceText.endIndex() != 0 ) |
| |
335 { |
| |
336 // A CollationElementIterator is really a two-layered beast. |
| |
337 // Internally it uses a Normalizer to munge the source text into a form |
| |
338 // where all "composed" Unicode characters (such as \u00FC) are split into a |
| |
339 // normal character and a combining accent character. |
| |
340 // Afterward, CollationElementIterator does its own processing to handle |
| |
341 // expanding and contracting collation sequences, ignorables, and so on. |
| |
342 |
| |
343 Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL |
| |
344 ? Normalizer::NO_OP : order->getDecomposition(); |
| |
345 |
| |
346 text = new Normalizer(sourceText, decomp); |
| |
347 if (text == NULL) |
| |
348 status = U_MEMORY_ALLOCATION_ERROR; |
| |
349 } |
| |
350 */ |
| |
351 int32_t length = sourceText.getLength(); |
| |
352 UChar *buffer; |
| |
353 if (length > 0) { |
| |
354 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length); |
| |
355 /* test for NULL */ |
| |
356 if (buffer == NULL) { |
| |
357 status = U_MEMORY_ALLOCATION_ERROR; |
| |
358 return; |
| |
359 } |
| |
360 /* |
| |
361 Using this constructor will prevent buffer from being removed when |
| |
362 string gets removed |
| |
363 */ |
| |
364 UnicodeString string(buffer, length, length); |
| |
365 ((CharacterIterator &)sourceText).getText(string); |
| |
366 const UChar *temp = string.getBuffer(); |
| |
367 u_memcpy(buffer, temp, length); |
| |
368 } |
| |
369 else { |
| |
370 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR); |
| |
371 /* test for NULL */ |
| |
372 if (buffer == NULL) { |
| |
373 status = U_MEMORY_ALLOCATION_ERROR; |
| |
374 return; |
| |
375 } |
| |
376 *buffer = 0; |
| |
377 } |
| |
378 m_data_ = ucol_openElements(order->ucollator, buffer, length, &status); |
| |
379 |
| |
380 /* Test for buffer overflows */ |
| |
381 if (U_FAILURE(status)) { |
| |
382 return; |
| |
383 } |
| |
384 m_data_->isWritable = TRUE; |
| |
385 } |
| |
386 |
| |
387 /* CollationElementIterator protected methods ----------------------------- */ |
| |
388 |
| |
389 const CollationElementIterator& CollationElementIterator::operator=( |
| |
390 const CollationElementIterator& other) |
| |
391 { |
| |
392 if (this != &other) |
| |
393 { |
| |
394 UCollationElements *ucolelem = this->m_data_; |
| |
395 UCollationElements *otherucolelem = other.m_data_; |
| |
396 collIterate *coliter = &(ucolelem->iteratordata_); |
| |
397 collIterate *othercoliter = &(otherucolelem->iteratordata_); |
| |
398 int length = 0; |
| |
399 |
| |
400 // checking only UCOL_ITER_HASLEN is not enough here as we may be in |
| |
401 // the normalization buffer |
| |
402 length = (int)(othercoliter->endp - othercoliter->string); |
| |
403 |
| |
404 ucolelem->reset_ = otherucolelem->reset_; |
| |
405 ucolelem->isWritable = TRUE; |
| |
406 |
| |
407 /* create a duplicate of string */ |
| |
408 if (length > 0) { |
| |
409 coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR); |
| |
410 if(coliter->string != NULL) { |
| |
411 uprv_memcpy((UChar *)coliter->string, othercoliter->string, |
| |
412 length * U_SIZEOF_UCHAR); |
| |
413 } else { // Error: couldn't allocate memory. No copying should be done |
| |
414 length = 0; |
| |
415 } |
| |
416 } |
| |
417 else { |
| |
418 coliter->string = NULL; |
| |
419 } |
| |
420 |
| |
421 /* start and end of string */ |
| |
422 coliter->endp = coliter->string == NULL ? NULL : coliter->string + length; |
| |
423 |
| |
424 /* handle writable buffer here */ |
| |
425 |
| |
426 if (othercoliter->flags & UCOL_ITER_INNORMBUF) { |
| |
427 coliter->writableBuffer = othercoliter->writableBuffer; |
| |
428 coliter->writableBuffer.getTerminatedBuffer(); |
| |
429 } |
| |
430 |
| |
431 /* current position */ |
| |
432 if (othercoliter->pos >= othercoliter->string && |
| |
433 othercoliter->pos <= othercoliter->endp) |
| |
434 { |
| |
435 U_ASSERT(coliter->string != NULL); |
| |
436 coliter->pos = coliter->string + |
| |
437 (othercoliter->pos - othercoliter->string); |
| |
438 } |
| |
439 else { |
| |
440 coliter->pos = coliter->writableBuffer.getTerminatedBuffer() + |
| |
441 (othercoliter->pos - othercoliter->writableBuffer.getBuffer()); |
| |
442 } |
| |
443 |
| |
444 /* CE buffer */ |
| |
445 int32_t CEsize; |
| |
446 if (coliter->extendCEs) { |
| |
447 uprv_memcpy(coliter->CEs, othercoliter->CEs, sizeof(uint32_t) * UCOL_EXPAND_CE_BUFFER_SIZE); |
| |
448 CEsize = sizeof(othercoliter->extendCEs); |
| |
449 if (CEsize > 0) { |
| |
450 othercoliter->extendCEs = (uint32_t *)uprv_malloc(CEsize); |
| |
451 uprv_memcpy(coliter->extendCEs, othercoliter->extendCEs, CEsize); |
| |
452 } |
| |
453 coliter->toReturn = coliter->extendCEs + |
| |
454 (othercoliter->toReturn - othercoliter->extendCEs); |
| |
455 coliter->CEpos = coliter->extendCEs + CEsize; |
| |
456 } else { |
| |
457 CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs); |
| |
458 if (CEsize > 0) { |
| |
459 uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize); |
| |
460 } |
| |
461 coliter->toReturn = coliter->CEs + |
| |
462 (othercoliter->toReturn - othercoliter->CEs); |
| |
463 coliter->CEpos = coliter->CEs + CEsize; |
| |
464 } |
| |
465 |
| |
466 if (othercoliter->fcdPosition != NULL) { |
| |
467 U_ASSERT(coliter->string != NULL); |
| |
468 coliter->fcdPosition = coliter->string + |
| |
469 (othercoliter->fcdPosition |
| |
470 - othercoliter->string); |
| |
471 } |
| |
472 else { |
| |
473 coliter->fcdPosition = NULL; |
| |
474 } |
| |
475 coliter->flags = othercoliter->flags/*| UCOL_ITER_HASLEN*/; |
| |
476 coliter->origFlags = othercoliter->origFlags; |
| |
477 coliter->coll = othercoliter->coll; |
| |
478 this->isDataOwned_ = TRUE; |
| |
479 } |
| |
480 |
| |
481 return *this; |
| |
482 } |
| |
483 |
| |
484 U_NAMESPACE_END |
| |
485 |
| |
486 #endif /* #if !UCONFIG_NO_COLLATION */ |
| |
487 |
| |
488 /* eof */ |