|
1 /* |
|
2 ****************************************************************************** |
|
3 * Copyright (C) 2001-2011, International Business Machines |
|
4 * Corporation and others. All Rights Reserved. |
|
5 ****************************************************************************** |
|
6 * |
|
7 * File ucoleitr.cpp |
|
8 * |
|
9 * Modification History: |
|
10 * |
|
11 * Date Name Description |
|
12 * 02/15/2001 synwee Modified all methods to process its own function |
|
13 * instead of calling the equivalent c++ api (coleitr.h) |
|
14 ******************************************************************************/ |
|
15 |
|
16 #include "unicode/utypes.h" |
|
17 |
|
18 #if !UCONFIG_NO_COLLATION |
|
19 |
|
20 #include "unicode/ucoleitr.h" |
|
21 #include "unicode/ustring.h" |
|
22 #include "unicode/sortkey.h" |
|
23 #include "unicode/uobject.h" |
|
24 #include "ucol_imp.h" |
|
25 #include "cmemory.h" |
|
26 |
|
27 U_NAMESPACE_USE |
|
28 |
|
29 #define BUFFER_LENGTH 100 |
|
30 |
|
31 #define DEFAULT_BUFFER_SIZE 16 |
|
32 #define BUFFER_GROW 8 |
|
33 |
|
34 #define ARRAY_SIZE(array) (sizeof array / sizeof array[0]) |
|
35 |
|
36 #define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (count) * sizeof (src)[0]) |
|
37 |
|
38 #define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type)) |
|
39 |
|
40 #define GROW_ARRAY(array, newSize) uprv_realloc((void *) (array), (newSize) * sizeof (array)[0]) |
|
41 |
|
42 #define DELETE_ARRAY(array) uprv_free((void *) (array)) |
|
43 |
|
44 typedef struct icu::collIterate collIterator; |
|
45 |
|
46 struct RCEI |
|
47 { |
|
48 uint32_t ce; |
|
49 int32_t low; |
|
50 int32_t high; |
|
51 }; |
|
52 |
|
53 U_NAMESPACE_BEGIN |
|
54 |
|
55 struct RCEBuffer |
|
56 { |
|
57 RCEI defaultBuffer[DEFAULT_BUFFER_SIZE]; |
|
58 RCEI *buffer; |
|
59 int32_t bufferIndex; |
|
60 int32_t bufferSize; |
|
61 |
|
62 RCEBuffer(); |
|
63 ~RCEBuffer(); |
|
64 |
|
65 UBool empty() const; |
|
66 void put(uint32_t ce, int32_t ixLow, int32_t ixHigh); |
|
67 const RCEI *get(); |
|
68 }; |
|
69 |
|
70 RCEBuffer::RCEBuffer() |
|
71 { |
|
72 buffer = defaultBuffer; |
|
73 bufferIndex = 0; |
|
74 bufferSize = DEFAULT_BUFFER_SIZE; |
|
75 } |
|
76 |
|
77 RCEBuffer::~RCEBuffer() |
|
78 { |
|
79 if (buffer != defaultBuffer) { |
|
80 DELETE_ARRAY(buffer); |
|
81 } |
|
82 } |
|
83 |
|
84 UBool RCEBuffer::empty() const |
|
85 { |
|
86 return bufferIndex <= 0; |
|
87 } |
|
88 |
|
89 void RCEBuffer::put(uint32_t ce, int32_t ixLow, int32_t ixHigh) |
|
90 { |
|
91 if (bufferIndex >= bufferSize) { |
|
92 RCEI *newBuffer = NEW_ARRAY(RCEI, bufferSize + BUFFER_GROW); |
|
93 |
|
94 ARRAY_COPY(newBuffer, buffer, bufferSize); |
|
95 |
|
96 if (buffer != defaultBuffer) { |
|
97 DELETE_ARRAY(buffer); |
|
98 } |
|
99 |
|
100 buffer = newBuffer; |
|
101 bufferSize += BUFFER_GROW; |
|
102 } |
|
103 |
|
104 buffer[bufferIndex].ce = ce; |
|
105 buffer[bufferIndex].low = ixLow; |
|
106 buffer[bufferIndex].high = ixHigh; |
|
107 |
|
108 bufferIndex += 1; |
|
109 } |
|
110 |
|
111 const RCEI *RCEBuffer::get() |
|
112 { |
|
113 if (bufferIndex > 0) { |
|
114 return &buffer[--bufferIndex]; |
|
115 } |
|
116 |
|
117 return NULL; |
|
118 } |
|
119 |
|
120 struct PCEI |
|
121 { |
|
122 uint64_t ce; |
|
123 int32_t low; |
|
124 int32_t high; |
|
125 }; |
|
126 |
|
127 struct PCEBuffer |
|
128 { |
|
129 PCEI defaultBuffer[DEFAULT_BUFFER_SIZE]; |
|
130 PCEI *buffer; |
|
131 int32_t bufferIndex; |
|
132 int32_t bufferSize; |
|
133 |
|
134 PCEBuffer(); |
|
135 ~PCEBuffer(); |
|
136 |
|
137 void reset(); |
|
138 UBool empty() const; |
|
139 void put(uint64_t ce, int32_t ixLow, int32_t ixHigh); |
|
140 const PCEI *get(); |
|
141 }; |
|
142 |
|
143 PCEBuffer::PCEBuffer() |
|
144 { |
|
145 buffer = defaultBuffer; |
|
146 bufferIndex = 0; |
|
147 bufferSize = DEFAULT_BUFFER_SIZE; |
|
148 } |
|
149 |
|
150 PCEBuffer::~PCEBuffer() |
|
151 { |
|
152 if (buffer != defaultBuffer) { |
|
153 DELETE_ARRAY(buffer); |
|
154 } |
|
155 } |
|
156 |
|
157 void PCEBuffer::reset() |
|
158 { |
|
159 bufferIndex = 0; |
|
160 } |
|
161 |
|
162 UBool PCEBuffer::empty() const |
|
163 { |
|
164 return bufferIndex <= 0; |
|
165 } |
|
166 |
|
167 void PCEBuffer::put(uint64_t ce, int32_t ixLow, int32_t ixHigh) |
|
168 { |
|
169 if (bufferIndex >= bufferSize) { |
|
170 PCEI *newBuffer = NEW_ARRAY(PCEI, bufferSize + BUFFER_GROW); |
|
171 |
|
172 ARRAY_COPY(newBuffer, buffer, bufferSize); |
|
173 |
|
174 if (buffer != defaultBuffer) { |
|
175 DELETE_ARRAY(buffer); |
|
176 } |
|
177 |
|
178 buffer = newBuffer; |
|
179 bufferSize += BUFFER_GROW; |
|
180 } |
|
181 |
|
182 buffer[bufferIndex].ce = ce; |
|
183 buffer[bufferIndex].low = ixLow; |
|
184 buffer[bufferIndex].high = ixHigh; |
|
185 |
|
186 bufferIndex += 1; |
|
187 } |
|
188 |
|
189 const PCEI *PCEBuffer::get() |
|
190 { |
|
191 if (bufferIndex > 0) { |
|
192 return &buffer[--bufferIndex]; |
|
193 } |
|
194 |
|
195 return NULL; |
|
196 } |
|
197 |
|
198 /* |
|
199 * This inherits from UObject so that |
|
200 * it can be allocated by new and the |
|
201 * constructor for PCEBuffer is called. |
|
202 */ |
|
203 struct UCollationPCE : public UObject |
|
204 { |
|
205 PCEBuffer pceBuffer; |
|
206 UCollationStrength strength; |
|
207 UBool toShift; |
|
208 UBool isShifted; |
|
209 uint32_t variableTop; |
|
210 |
|
211 UCollationPCE(UCollationElements *elems); |
|
212 ~UCollationPCE(); |
|
213 |
|
214 void init(const UCollator *coll); |
|
215 |
|
216 virtual UClassID getDynamicClassID() const; |
|
217 static UClassID getStaticClassID(); |
|
218 }; |
|
219 |
|
220 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UCollationPCE) |
|
221 |
|
222 UCollationPCE::UCollationPCE(UCollationElements *elems) |
|
223 { |
|
224 init(elems->iteratordata_.coll); |
|
225 } |
|
226 |
|
227 void UCollationPCE::init(const UCollator *coll) |
|
228 { |
|
229 UErrorCode status = U_ZERO_ERROR; |
|
230 |
|
231 strength = ucol_getStrength(coll); |
|
232 toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) == UCOL_SHIFTED; |
|
233 isShifted = FALSE; |
|
234 variableTop = coll->variableTopValue << 16; |
|
235 } |
|
236 |
|
237 UCollationPCE::~UCollationPCE() |
|
238 { |
|
239 // nothing to do |
|
240 } |
|
241 |
|
242 |
|
243 U_NAMESPACE_END |
|
244 |
|
245 |
|
246 inline uint64_t processCE(UCollationElements *elems, uint32_t ce) |
|
247 { |
|
248 uint64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0; |
|
249 |
|
250 // This is clean, but somewhat slow... |
|
251 // We could apply the mask to ce and then |
|
252 // just get all three orders... |
|
253 switch(elems->pce->strength) { |
|
254 default: |
|
255 tertiary = ucol_tertiaryOrder(ce); |
|
256 /* note fall-through */ |
|
257 |
|
258 case UCOL_SECONDARY: |
|
259 secondary = ucol_secondaryOrder(ce); |
|
260 /* note fall-through */ |
|
261 |
|
262 case UCOL_PRIMARY: |
|
263 primary = ucol_primaryOrder(ce); |
|
264 } |
|
265 |
|
266 // **** This should probably handle continuations too. **** |
|
267 // **** That means that we need 24 bits for the primary **** |
|
268 // **** instead of the 16 that we're currently using. **** |
|
269 // **** So we can lay out the 64 bits as: 24.12.12.16. **** |
|
270 // **** Another complication with continuations is that **** |
|
271 // **** the *second* CE is marked as a continuation, so **** |
|
272 // **** we always have to peek ahead to know how long **** |
|
273 // **** the primary is... **** |
|
274 if ((elems->pce->toShift && elems->pce->variableTop > ce && primary != 0) |
|
275 || (elems->pce->isShifted && primary == 0)) { |
|
276 |
|
277 if (primary == 0) { |
|
278 return UCOL_IGNORABLE; |
|
279 } |
|
280 |
|
281 if (elems->pce->strength >= UCOL_QUATERNARY) { |
|
282 quaternary = primary; |
|
283 } |
|
284 |
|
285 primary = secondary = tertiary = 0; |
|
286 elems->pce->isShifted = TRUE; |
|
287 } else { |
|
288 if (elems->pce->strength >= UCOL_QUATERNARY) { |
|
289 quaternary = 0xFFFF; |
|
290 } |
|
291 |
|
292 elems->pce->isShifted = FALSE; |
|
293 } |
|
294 |
|
295 return primary << 48 | secondary << 32 | tertiary << 16 | quaternary; |
|
296 } |
|
297 |
|
298 U_CAPI void U_EXPORT2 |
|
299 uprv_init_pce(const UCollationElements *elems) |
|
300 { |
|
301 if (elems->pce != NULL) { |
|
302 elems->pce->init(elems->iteratordata_.coll); |
|
303 } |
|
304 } |
|
305 |
|
306 |
|
307 |
|
308 /* public methods ---------------------------------------------------- */ |
|
309 |
|
310 U_CAPI UCollationElements* U_EXPORT2 |
|
311 ucol_openElements(const UCollator *coll, |
|
312 const UChar *text, |
|
313 int32_t textLength, |
|
314 UErrorCode *status) |
|
315 { |
|
316 if (U_FAILURE(*status)) { |
|
317 return NULL; |
|
318 } |
|
319 |
|
320 UCollationElements *result = new UCollationElements; |
|
321 if (result == NULL) { |
|
322 *status = U_MEMORY_ALLOCATION_ERROR; |
|
323 return NULL; |
|
324 } |
|
325 |
|
326 result->reset_ = TRUE; |
|
327 result->isWritable = FALSE; |
|
328 result->pce = NULL; |
|
329 |
|
330 if (text == NULL) { |
|
331 textLength = 0; |
|
332 } |
|
333 uprv_init_collIterate(coll, text, textLength, &result->iteratordata_, status); |
|
334 |
|
335 return result; |
|
336 } |
|
337 |
|
338 |
|
339 U_CAPI void U_EXPORT2 |
|
340 ucol_closeElements(UCollationElements *elems) |
|
341 { |
|
342 if (elems != NULL) { |
|
343 collIterate *ci = &elems->iteratordata_; |
|
344 |
|
345 if (ci->extendCEs) { |
|
346 uprv_free(ci->extendCEs); |
|
347 } |
|
348 |
|
349 if (ci->offsetBuffer) { |
|
350 uprv_free(ci->offsetBuffer); |
|
351 } |
|
352 |
|
353 if (elems->isWritable && elems->iteratordata_.string != NULL) |
|
354 { |
|
355 uprv_free((UChar *)elems->iteratordata_.string); |
|
356 } |
|
357 |
|
358 if (elems->pce != NULL) { |
|
359 delete elems->pce; |
|
360 } |
|
361 |
|
362 delete elems; |
|
363 } |
|
364 } |
|
365 |
|
366 U_CAPI void U_EXPORT2 |
|
367 ucol_reset(UCollationElements *elems) |
|
368 { |
|
369 collIterate *ci = &(elems->iteratordata_); |
|
370 elems->reset_ = TRUE; |
|
371 ci->pos = ci->string; |
|
372 if ((ci->flags & UCOL_ITER_HASLEN) == 0 || ci->endp == NULL) { |
|
373 ci->endp = ci->string + u_strlen(ci->string); |
|
374 } |
|
375 ci->CEpos = ci->toReturn = ci->CEs; |
|
376 ci->flags = (ci->flags & UCOL_FORCE_HAN_IMPLICIT) | UCOL_ITER_HASLEN; |
|
377 if (ci->coll->normalizationMode == UCOL_ON) { |
|
378 ci->flags |= UCOL_ITER_NORM; |
|
379 } |
|
380 |
|
381 ci->writableBuffer.remove(); |
|
382 ci->fcdPosition = NULL; |
|
383 |
|
384 //ci->offsetReturn = ci->offsetStore = NULL; |
|
385 ci->offsetRepeatCount = ci->offsetRepeatValue = 0; |
|
386 } |
|
387 |
|
388 U_CAPI void U_EXPORT2 |
|
389 ucol_forceHanImplicit(UCollationElements *elems, UErrorCode *status) |
|
390 { |
|
391 if (U_FAILURE(*status)) { |
|
392 return; |
|
393 } |
|
394 |
|
395 if (elems == NULL) { |
|
396 *status = U_ILLEGAL_ARGUMENT_ERROR; |
|
397 return; |
|
398 } |
|
399 |
|
400 elems->iteratordata_.flags |= UCOL_FORCE_HAN_IMPLICIT; |
|
401 } |
|
402 |
|
403 U_CAPI int32_t U_EXPORT2 |
|
404 ucol_next(UCollationElements *elems, |
|
405 UErrorCode *status) |
|
406 { |
|
407 int32_t result; |
|
408 if (U_FAILURE(*status)) { |
|
409 return UCOL_NULLORDER; |
|
410 } |
|
411 |
|
412 elems->reset_ = FALSE; |
|
413 |
|
414 result = (int32_t)ucol_getNextCE(elems->iteratordata_.coll, |
|
415 &elems->iteratordata_, |
|
416 status); |
|
417 |
|
418 if (result == UCOL_NO_MORE_CES) { |
|
419 result = UCOL_NULLORDER; |
|
420 } |
|
421 return result; |
|
422 } |
|
423 |
|
424 U_CAPI int64_t U_EXPORT2 |
|
425 ucol_nextProcessed(UCollationElements *elems, |
|
426 int32_t *ixLow, |
|
427 int32_t *ixHigh, |
|
428 UErrorCode *status) |
|
429 { |
|
430 const UCollator *coll = elems->iteratordata_.coll; |
|
431 int64_t result = UCOL_IGNORABLE; |
|
432 uint32_t low = 0, high = 0; |
|
433 |
|
434 if (U_FAILURE(*status)) { |
|
435 return UCOL_PROCESSED_NULLORDER; |
|
436 } |
|
437 |
|
438 if (elems->pce == NULL) { |
|
439 elems->pce = new UCollationPCE(elems); |
|
440 } else { |
|
441 elems->pce->pceBuffer.reset(); |
|
442 } |
|
443 |
|
444 elems->reset_ = FALSE; |
|
445 |
|
446 do { |
|
447 low = ucol_getOffset(elems); |
|
448 uint32_t ce = (uint32_t) ucol_getNextCE(coll, &elems->iteratordata_, status); |
|
449 high = ucol_getOffset(elems); |
|
450 |
|
451 if (ce == UCOL_NO_MORE_CES) { |
|
452 result = UCOL_PROCESSED_NULLORDER; |
|
453 break; |
|
454 } |
|
455 |
|
456 result = processCE(elems, ce); |
|
457 } while (result == UCOL_IGNORABLE); |
|
458 |
|
459 if (ixLow != NULL) { |
|
460 *ixLow = low; |
|
461 } |
|
462 |
|
463 if (ixHigh != NULL) { |
|
464 *ixHigh = high; |
|
465 } |
|
466 |
|
467 return result; |
|
468 } |
|
469 |
|
470 U_CAPI int32_t U_EXPORT2 |
|
471 ucol_previous(UCollationElements *elems, |
|
472 UErrorCode *status) |
|
473 { |
|
474 if(U_FAILURE(*status)) { |
|
475 return UCOL_NULLORDER; |
|
476 } |
|
477 else |
|
478 { |
|
479 int32_t result; |
|
480 |
|
481 if (elems->reset_ && (elems->iteratordata_.pos == elems->iteratordata_.string)) { |
|
482 if (elems->iteratordata_.endp == NULL) { |
|
483 elems->iteratordata_.endp = elems->iteratordata_.string + |
|
484 u_strlen(elems->iteratordata_.string); |
|
485 elems->iteratordata_.flags |= UCOL_ITER_HASLEN; |
|
486 } |
|
487 elems->iteratordata_.pos = elems->iteratordata_.endp; |
|
488 elems->iteratordata_.fcdPosition = elems->iteratordata_.endp; |
|
489 } |
|
490 |
|
491 elems->reset_ = FALSE; |
|
492 |
|
493 result = (int32_t)ucol_getPrevCE(elems->iteratordata_.coll, |
|
494 &(elems->iteratordata_), |
|
495 status); |
|
496 |
|
497 if (result == UCOL_NO_MORE_CES) { |
|
498 result = UCOL_NULLORDER; |
|
499 } |
|
500 |
|
501 return result; |
|
502 } |
|
503 } |
|
504 |
|
505 U_CAPI int64_t U_EXPORT2 |
|
506 ucol_previousProcessed(UCollationElements *elems, |
|
507 int32_t *ixLow, |
|
508 int32_t *ixHigh, |
|
509 UErrorCode *status) |
|
510 { |
|
511 const UCollator *coll = elems->iteratordata_.coll; |
|
512 int64_t result = UCOL_IGNORABLE; |
|
513 // int64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0; |
|
514 // UCollationStrength strength = ucol_getStrength(coll); |
|
515 // UBool toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED; |
|
516 // uint32_t variableTop = coll->variableTopValue; |
|
517 int32_t low = 0, high = 0; |
|
518 |
|
519 if (U_FAILURE(*status)) { |
|
520 return UCOL_PROCESSED_NULLORDER; |
|
521 } |
|
522 |
|
523 if (elems->reset_ && |
|
524 (elems->iteratordata_.pos == elems->iteratordata_.string)) { |
|
525 if (elems->iteratordata_.endp == NULL) { |
|
526 elems->iteratordata_.endp = elems->iteratordata_.string + |
|
527 u_strlen(elems->iteratordata_.string); |
|
528 elems->iteratordata_.flags |= UCOL_ITER_HASLEN; |
|
529 } |
|
530 |
|
531 elems->iteratordata_.pos = elems->iteratordata_.endp; |
|
532 elems->iteratordata_.fcdPosition = elems->iteratordata_.endp; |
|
533 } |
|
534 |
|
535 if (elems->pce == NULL) { |
|
536 elems->pce = new UCollationPCE(elems); |
|
537 } else { |
|
538 //elems->pce->pceBuffer.reset(); |
|
539 } |
|
540 |
|
541 elems->reset_ = FALSE; |
|
542 |
|
543 while (elems->pce->pceBuffer.empty()) { |
|
544 // buffer raw CEs up to non-ignorable primary |
|
545 RCEBuffer rceb; |
|
546 uint32_t ce; |
|
547 |
|
548 // **** do we need to reset rceb, or will it always be empty at this point **** |
|
549 do { |
|
550 high = ucol_getOffset(elems); |
|
551 ce = ucol_getPrevCE(coll, &elems->iteratordata_, status); |
|
552 low = ucol_getOffset(elems); |
|
553 |
|
554 if (ce == UCOL_NO_MORE_CES) { |
|
555 if (! rceb.empty()) { |
|
556 break; |
|
557 } |
|
558 |
|
559 goto finish; |
|
560 } |
|
561 |
|
562 rceb.put(ce, low, high); |
|
563 } while ((ce & UCOL_PRIMARYMASK) == 0); |
|
564 |
|
565 // process the raw CEs |
|
566 while (! rceb.empty()) { |
|
567 const RCEI *rcei = rceb.get(); |
|
568 |
|
569 result = processCE(elems, rcei->ce); |
|
570 |
|
571 if (result != UCOL_IGNORABLE) { |
|
572 elems->pce->pceBuffer.put(result, rcei->low, rcei->high); |
|
573 } |
|
574 } |
|
575 } |
|
576 |
|
577 finish: |
|
578 if (elems->pce->pceBuffer.empty()) { |
|
579 // **** Is -1 the right value for ixLow, ixHigh? **** |
|
580 if (ixLow != NULL) { |
|
581 *ixLow = -1; |
|
582 } |
|
583 |
|
584 if (ixHigh != NULL) { |
|
585 *ixHigh = -1 |
|
586 ; |
|
587 } |
|
588 return UCOL_PROCESSED_NULLORDER; |
|
589 } |
|
590 |
|
591 const PCEI *pcei = elems->pce->pceBuffer.get(); |
|
592 |
|
593 if (ixLow != NULL) { |
|
594 *ixLow = pcei->low; |
|
595 } |
|
596 |
|
597 if (ixHigh != NULL) { |
|
598 *ixHigh = pcei->high; |
|
599 } |
|
600 |
|
601 return pcei->ce; |
|
602 } |
|
603 |
|
604 U_CAPI int32_t U_EXPORT2 |
|
605 ucol_getMaxExpansion(const UCollationElements *elems, |
|
606 int32_t order) |
|
607 { |
|
608 uint8_t result; |
|
609 |
|
610 #if 0 |
|
611 UCOL_GETMAXEXPANSION(elems->iteratordata_.coll, (uint32_t)order, result); |
|
612 #else |
|
613 const UCollator *coll = elems->iteratordata_.coll; |
|
614 const uint32_t *start; |
|
615 const uint32_t *limit; |
|
616 const uint32_t *mid; |
|
617 uint32_t strengthMask = 0; |
|
618 uint32_t mOrder = (uint32_t) order; |
|
619 |
|
620 switch (coll->strength) |
|
621 { |
|
622 default: |
|
623 strengthMask |= UCOL_TERTIARYORDERMASK; |
|
624 /* fall through */ |
|
625 |
|
626 case UCOL_SECONDARY: |
|
627 strengthMask |= UCOL_SECONDARYORDERMASK; |
|
628 /* fall through */ |
|
629 |
|
630 case UCOL_PRIMARY: |
|
631 strengthMask |= UCOL_PRIMARYORDERMASK; |
|
632 } |
|
633 |
|
634 mOrder &= strengthMask; |
|
635 start = (coll)->endExpansionCE; |
|
636 limit = (coll)->lastEndExpansionCE; |
|
637 |
|
638 while (start < limit - 1) { |
|
639 mid = start + ((limit - start) >> 1); |
|
640 if (mOrder <= (*mid & strengthMask)) { |
|
641 limit = mid; |
|
642 } else { |
|
643 start = mid; |
|
644 } |
|
645 } |
|
646 |
|
647 // FIXME: with a masked search, there might be more than one hit, |
|
648 // so we need to look forward and backward from the match to find all |
|
649 // of the hits... |
|
650 if ((*start & strengthMask) == mOrder) { |
|
651 result = *((coll)->expansionCESize + (start - (coll)->endExpansionCE)); |
|
652 } else if ((*limit & strengthMask) == mOrder) { |
|
653 result = *(coll->expansionCESize + (limit - coll->endExpansionCE)); |
|
654 } else if ((mOrder & 0xFFFF) == 0x00C0) { |
|
655 result = 2; |
|
656 } else { |
|
657 result = 1; |
|
658 } |
|
659 #endif |
|
660 |
|
661 return result; |
|
662 } |
|
663 |
|
664 U_CAPI void U_EXPORT2 |
|
665 ucol_setText( UCollationElements *elems, |
|
666 const UChar *text, |
|
667 int32_t textLength, |
|
668 UErrorCode *status) |
|
669 { |
|
670 if (U_FAILURE(*status)) { |
|
671 return; |
|
672 } |
|
673 |
|
674 if (elems->isWritable && elems->iteratordata_.string != NULL) |
|
675 { |
|
676 uprv_free((UChar *)elems->iteratordata_.string); |
|
677 } |
|
678 |
|
679 if (text == NULL) { |
|
680 textLength = 0; |
|
681 } |
|
682 |
|
683 elems->isWritable = FALSE; |
|
684 |
|
685 /* free offset buffer to avoid memory leak before initializing. */ |
|
686 ucol_freeOffsetBuffer(&(elems->iteratordata_)); |
|
687 /* Ensure that previously allocated extendCEs is freed before setting to NULL. */ |
|
688 if (elems->iteratordata_.extendCEs != NULL) { |
|
689 uprv_free(elems->iteratordata_.extendCEs); |
|
690 } |
|
691 uprv_init_collIterate(elems->iteratordata_.coll, text, textLength, |
|
692 &elems->iteratordata_, status); |
|
693 |
|
694 elems->reset_ = TRUE; |
|
695 } |
|
696 |
|
697 U_CAPI int32_t U_EXPORT2 |
|
698 ucol_getOffset(const UCollationElements *elems) |
|
699 { |
|
700 const collIterate *ci = &(elems->iteratordata_); |
|
701 |
|
702 if (ci->offsetRepeatCount > 0 && ci->offsetRepeatValue != 0) { |
|
703 return ci->offsetRepeatValue; |
|
704 } |
|
705 |
|
706 if (ci->offsetReturn != NULL) { |
|
707 return *ci->offsetReturn; |
|
708 } |
|
709 |
|
710 // while processing characters in normalization buffer getOffset will |
|
711 // return the next non-normalized character. |
|
712 // should be inline with the old implementation since the old codes uses |
|
713 // nextDecomp in normalizer which also decomposes the string till the |
|
714 // first base character is found. |
|
715 if (ci->flags & UCOL_ITER_INNORMBUF) { |
|
716 if (ci->fcdPosition == NULL) { |
|
717 return 0; |
|
718 } |
|
719 return (int32_t)(ci->fcdPosition - ci->string); |
|
720 } |
|
721 else { |
|
722 return (int32_t)(ci->pos - ci->string); |
|
723 } |
|
724 } |
|
725 |
|
726 U_CAPI void U_EXPORT2 |
|
727 ucol_setOffset(UCollationElements *elems, |
|
728 int32_t offset, |
|
729 UErrorCode *status) |
|
730 { |
|
731 if (U_FAILURE(*status)) { |
|
732 return; |
|
733 } |
|
734 |
|
735 // this methods will clean up any use of the writable buffer and points to |
|
736 // the original string |
|
737 collIterate *ci = &(elems->iteratordata_); |
|
738 ci->pos = ci->string + offset; |
|
739 ci->CEpos = ci->toReturn = ci->CEs; |
|
740 if (ci->flags & UCOL_ITER_INNORMBUF) { |
|
741 ci->flags = ci->origFlags; |
|
742 } |
|
743 if ((ci->flags & UCOL_ITER_HASLEN) == 0) { |
|
744 ci->endp = ci->string + u_strlen(ci->string); |
|
745 ci->flags |= UCOL_ITER_HASLEN; |
|
746 } |
|
747 ci->fcdPosition = NULL; |
|
748 elems->reset_ = FALSE; |
|
749 |
|
750 ci->offsetReturn = NULL; |
|
751 ci->offsetStore = ci->offsetBuffer; |
|
752 ci->offsetRepeatCount = ci->offsetRepeatValue = 0; |
|
753 } |
|
754 |
|
755 U_CAPI int32_t U_EXPORT2 |
|
756 ucol_primaryOrder (int32_t order) |
|
757 { |
|
758 order &= UCOL_PRIMARYMASK; |
|
759 return (order >> UCOL_PRIMARYORDERSHIFT); |
|
760 } |
|
761 |
|
762 U_CAPI int32_t U_EXPORT2 |
|
763 ucol_secondaryOrder (int32_t order) |
|
764 { |
|
765 order &= UCOL_SECONDARYMASK; |
|
766 return (order >> UCOL_SECONDARYORDERSHIFT); |
|
767 } |
|
768 |
|
769 U_CAPI int32_t U_EXPORT2 |
|
770 ucol_tertiaryOrder (int32_t order) |
|
771 { |
|
772 return (order & UCOL_TERTIARYMASK); |
|
773 } |
|
774 |
|
775 |
|
776 void ucol_freeOffsetBuffer(collIterate *s) { |
|
777 if (s != NULL && s->offsetBuffer != NULL) { |
|
778 uprv_free(s->offsetBuffer); |
|
779 s->offsetBuffer = NULL; |
|
780 s->offsetBufferSize = 0; |
|
781 } |
|
782 } |
|
783 |
|
784 #endif /* #if !UCONFIG_NO_COLLATION */ |