Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 /*
2 ******************************************************************************
3 * Copyright (C) 2001-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 ******************************************************************************
6 *
7 * File ucoleitr.cpp
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 02/15/2001 synwee Modified all methods to process its own function
13 * instead of calling the equivalent c++ api (coleitr.h)
14 ******************************************************************************/
16 #include "unicode/utypes.h"
18 #if !UCONFIG_NO_COLLATION
20 #include "unicode/ucoleitr.h"
21 #include "unicode/ustring.h"
22 #include "unicode/sortkey.h"
23 #include "unicode/uobject.h"
24 #include "ucol_imp.h"
25 #include "cmemory.h"
27 U_NAMESPACE_USE
29 #define BUFFER_LENGTH 100
31 #define DEFAULT_BUFFER_SIZE 16
32 #define BUFFER_GROW 8
34 #define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
36 #define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (count) * sizeof (src)[0])
38 #define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
40 #define GROW_ARRAY(array, newSize) uprv_realloc((void *) (array), (newSize) * sizeof (array)[0])
42 #define DELETE_ARRAY(array) uprv_free((void *) (array))
44 typedef struct icu::collIterate collIterator;
46 struct RCEI
47 {
48 uint32_t ce;
49 int32_t low;
50 int32_t high;
51 };
53 U_NAMESPACE_BEGIN
55 struct RCEBuffer
56 {
57 RCEI defaultBuffer[DEFAULT_BUFFER_SIZE];
58 RCEI *buffer;
59 int32_t bufferIndex;
60 int32_t bufferSize;
62 RCEBuffer();
63 ~RCEBuffer();
65 UBool empty() const;
66 void put(uint32_t ce, int32_t ixLow, int32_t ixHigh);
67 const RCEI *get();
68 };
70 RCEBuffer::RCEBuffer()
71 {
72 buffer = defaultBuffer;
73 bufferIndex = 0;
74 bufferSize = DEFAULT_BUFFER_SIZE;
75 }
77 RCEBuffer::~RCEBuffer()
78 {
79 if (buffer != defaultBuffer) {
80 DELETE_ARRAY(buffer);
81 }
82 }
84 UBool RCEBuffer::empty() const
85 {
86 return bufferIndex <= 0;
87 }
89 void RCEBuffer::put(uint32_t ce, int32_t ixLow, int32_t ixHigh)
90 {
91 if (bufferIndex >= bufferSize) {
92 RCEI *newBuffer = NEW_ARRAY(RCEI, bufferSize + BUFFER_GROW);
94 ARRAY_COPY(newBuffer, buffer, bufferSize);
96 if (buffer != defaultBuffer) {
97 DELETE_ARRAY(buffer);
98 }
100 buffer = newBuffer;
101 bufferSize += BUFFER_GROW;
102 }
104 buffer[bufferIndex].ce = ce;
105 buffer[bufferIndex].low = ixLow;
106 buffer[bufferIndex].high = ixHigh;
108 bufferIndex += 1;
109 }
111 const RCEI *RCEBuffer::get()
112 {
113 if (bufferIndex > 0) {
114 return &buffer[--bufferIndex];
115 }
117 return NULL;
118 }
120 struct PCEI
121 {
122 uint64_t ce;
123 int32_t low;
124 int32_t high;
125 };
127 struct PCEBuffer
128 {
129 PCEI defaultBuffer[DEFAULT_BUFFER_SIZE];
130 PCEI *buffer;
131 int32_t bufferIndex;
132 int32_t bufferSize;
134 PCEBuffer();
135 ~PCEBuffer();
137 void reset();
138 UBool empty() const;
139 void put(uint64_t ce, int32_t ixLow, int32_t ixHigh);
140 const PCEI *get();
141 };
143 PCEBuffer::PCEBuffer()
144 {
145 buffer = defaultBuffer;
146 bufferIndex = 0;
147 bufferSize = DEFAULT_BUFFER_SIZE;
148 }
150 PCEBuffer::~PCEBuffer()
151 {
152 if (buffer != defaultBuffer) {
153 DELETE_ARRAY(buffer);
154 }
155 }
157 void PCEBuffer::reset()
158 {
159 bufferIndex = 0;
160 }
162 UBool PCEBuffer::empty() const
163 {
164 return bufferIndex <= 0;
165 }
167 void PCEBuffer::put(uint64_t ce, int32_t ixLow, int32_t ixHigh)
168 {
169 if (bufferIndex >= bufferSize) {
170 PCEI *newBuffer = NEW_ARRAY(PCEI, bufferSize + BUFFER_GROW);
172 ARRAY_COPY(newBuffer, buffer, bufferSize);
174 if (buffer != defaultBuffer) {
175 DELETE_ARRAY(buffer);
176 }
178 buffer = newBuffer;
179 bufferSize += BUFFER_GROW;
180 }
182 buffer[bufferIndex].ce = ce;
183 buffer[bufferIndex].low = ixLow;
184 buffer[bufferIndex].high = ixHigh;
186 bufferIndex += 1;
187 }
189 const PCEI *PCEBuffer::get()
190 {
191 if (bufferIndex > 0) {
192 return &buffer[--bufferIndex];
193 }
195 return NULL;
196 }
198 /*
199 * This inherits from UObject so that
200 * it can be allocated by new and the
201 * constructor for PCEBuffer is called.
202 */
203 struct UCollationPCE : public UObject
204 {
205 PCEBuffer pceBuffer;
206 UCollationStrength strength;
207 UBool toShift;
208 UBool isShifted;
209 uint32_t variableTop;
211 UCollationPCE(UCollationElements *elems);
212 ~UCollationPCE();
214 void init(const UCollator *coll);
216 virtual UClassID getDynamicClassID() const;
217 static UClassID getStaticClassID();
218 };
220 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UCollationPCE)
222 UCollationPCE::UCollationPCE(UCollationElements *elems)
223 {
224 init(elems->iteratordata_.coll);
225 }
227 void UCollationPCE::init(const UCollator *coll)
228 {
229 UErrorCode status = U_ZERO_ERROR;
231 strength = ucol_getStrength(coll);
232 toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) == UCOL_SHIFTED;
233 isShifted = FALSE;
234 variableTop = coll->variableTopValue << 16;
235 }
237 UCollationPCE::~UCollationPCE()
238 {
239 // nothing to do
240 }
243 U_NAMESPACE_END
246 inline uint64_t processCE(UCollationElements *elems, uint32_t ce)
247 {
248 uint64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0;
250 // This is clean, but somewhat slow...
251 // We could apply the mask to ce and then
252 // just get all three orders...
253 switch(elems->pce->strength) {
254 default:
255 tertiary = ucol_tertiaryOrder(ce);
256 /* note fall-through */
258 case UCOL_SECONDARY:
259 secondary = ucol_secondaryOrder(ce);
260 /* note fall-through */
262 case UCOL_PRIMARY:
263 primary = ucol_primaryOrder(ce);
264 }
266 // **** This should probably handle continuations too. ****
267 // **** That means that we need 24 bits for the primary ****
268 // **** instead of the 16 that we're currently using. ****
269 // **** So we can lay out the 64 bits as: 24.12.12.16. ****
270 // **** Another complication with continuations is that ****
271 // **** the *second* CE is marked as a continuation, so ****
272 // **** we always have to peek ahead to know how long ****
273 // **** the primary is... ****
274 if ((elems->pce->toShift && elems->pce->variableTop > ce && primary != 0)
275 || (elems->pce->isShifted && primary == 0)) {
277 if (primary == 0) {
278 return UCOL_IGNORABLE;
279 }
281 if (elems->pce->strength >= UCOL_QUATERNARY) {
282 quaternary = primary;
283 }
285 primary = secondary = tertiary = 0;
286 elems->pce->isShifted = TRUE;
287 } else {
288 if (elems->pce->strength >= UCOL_QUATERNARY) {
289 quaternary = 0xFFFF;
290 }
292 elems->pce->isShifted = FALSE;
293 }
295 return primary << 48 | secondary << 32 | tertiary << 16 | quaternary;
296 }
298 U_CAPI void U_EXPORT2
299 uprv_init_pce(const UCollationElements *elems)
300 {
301 if (elems->pce != NULL) {
302 elems->pce->init(elems->iteratordata_.coll);
303 }
304 }
308 /* public methods ---------------------------------------------------- */
310 U_CAPI UCollationElements* U_EXPORT2
311 ucol_openElements(const UCollator *coll,
312 const UChar *text,
313 int32_t textLength,
314 UErrorCode *status)
315 {
316 if (U_FAILURE(*status)) {
317 return NULL;
318 }
320 UCollationElements *result = new UCollationElements;
321 if (result == NULL) {
322 *status = U_MEMORY_ALLOCATION_ERROR;
323 return NULL;
324 }
326 result->reset_ = TRUE;
327 result->isWritable = FALSE;
328 result->pce = NULL;
330 if (text == NULL) {
331 textLength = 0;
332 }
333 uprv_init_collIterate(coll, text, textLength, &result->iteratordata_, status);
335 return result;
336 }
339 U_CAPI void U_EXPORT2
340 ucol_closeElements(UCollationElements *elems)
341 {
342 if (elems != NULL) {
343 collIterate *ci = &elems->iteratordata_;
345 if (ci->extendCEs) {
346 uprv_free(ci->extendCEs);
347 }
349 if (ci->offsetBuffer) {
350 uprv_free(ci->offsetBuffer);
351 }
353 if (elems->isWritable && elems->iteratordata_.string != NULL)
354 {
355 uprv_free((UChar *)elems->iteratordata_.string);
356 }
358 if (elems->pce != NULL) {
359 delete elems->pce;
360 }
362 delete elems;
363 }
364 }
366 U_CAPI void U_EXPORT2
367 ucol_reset(UCollationElements *elems)
368 {
369 collIterate *ci = &(elems->iteratordata_);
370 elems->reset_ = TRUE;
371 ci->pos = ci->string;
372 if ((ci->flags & UCOL_ITER_HASLEN) == 0 || ci->endp == NULL) {
373 ci->endp = ci->string + u_strlen(ci->string);
374 }
375 ci->CEpos = ci->toReturn = ci->CEs;
376 ci->flags = (ci->flags & UCOL_FORCE_HAN_IMPLICIT) | UCOL_ITER_HASLEN;
377 if (ci->coll->normalizationMode == UCOL_ON) {
378 ci->flags |= UCOL_ITER_NORM;
379 }
381 ci->writableBuffer.remove();
382 ci->fcdPosition = NULL;
384 //ci->offsetReturn = ci->offsetStore = NULL;
385 ci->offsetRepeatCount = ci->offsetRepeatValue = 0;
386 }
388 U_CAPI void U_EXPORT2
389 ucol_forceHanImplicit(UCollationElements *elems, UErrorCode *status)
390 {
391 if (U_FAILURE(*status)) {
392 return;
393 }
395 if (elems == NULL) {
396 *status = U_ILLEGAL_ARGUMENT_ERROR;
397 return;
398 }
400 elems->iteratordata_.flags |= UCOL_FORCE_HAN_IMPLICIT;
401 }
403 U_CAPI int32_t U_EXPORT2
404 ucol_next(UCollationElements *elems,
405 UErrorCode *status)
406 {
407 int32_t result;
408 if (U_FAILURE(*status)) {
409 return UCOL_NULLORDER;
410 }
412 elems->reset_ = FALSE;
414 result = (int32_t)ucol_getNextCE(elems->iteratordata_.coll,
415 &elems->iteratordata_,
416 status);
418 if (result == UCOL_NO_MORE_CES) {
419 result = UCOL_NULLORDER;
420 }
421 return result;
422 }
424 U_CAPI int64_t U_EXPORT2
425 ucol_nextProcessed(UCollationElements *elems,
426 int32_t *ixLow,
427 int32_t *ixHigh,
428 UErrorCode *status)
429 {
430 const UCollator *coll = elems->iteratordata_.coll;
431 int64_t result = UCOL_IGNORABLE;
432 uint32_t low = 0, high = 0;
434 if (U_FAILURE(*status)) {
435 return UCOL_PROCESSED_NULLORDER;
436 }
438 if (elems->pce == NULL) {
439 elems->pce = new UCollationPCE(elems);
440 } else {
441 elems->pce->pceBuffer.reset();
442 }
444 elems->reset_ = FALSE;
446 do {
447 low = ucol_getOffset(elems);
448 uint32_t ce = (uint32_t) ucol_getNextCE(coll, &elems->iteratordata_, status);
449 high = ucol_getOffset(elems);
451 if (ce == UCOL_NO_MORE_CES) {
452 result = UCOL_PROCESSED_NULLORDER;
453 break;
454 }
456 result = processCE(elems, ce);
457 } while (result == UCOL_IGNORABLE);
459 if (ixLow != NULL) {
460 *ixLow = low;
461 }
463 if (ixHigh != NULL) {
464 *ixHigh = high;
465 }
467 return result;
468 }
470 U_CAPI int32_t U_EXPORT2
471 ucol_previous(UCollationElements *elems,
472 UErrorCode *status)
473 {
474 if(U_FAILURE(*status)) {
475 return UCOL_NULLORDER;
476 }
477 else
478 {
479 int32_t result;
481 if (elems->reset_ && (elems->iteratordata_.pos == elems->iteratordata_.string)) {
482 if (elems->iteratordata_.endp == NULL) {
483 elems->iteratordata_.endp = elems->iteratordata_.string +
484 u_strlen(elems->iteratordata_.string);
485 elems->iteratordata_.flags |= UCOL_ITER_HASLEN;
486 }
487 elems->iteratordata_.pos = elems->iteratordata_.endp;
488 elems->iteratordata_.fcdPosition = elems->iteratordata_.endp;
489 }
491 elems->reset_ = FALSE;
493 result = (int32_t)ucol_getPrevCE(elems->iteratordata_.coll,
494 &(elems->iteratordata_),
495 status);
497 if (result == UCOL_NO_MORE_CES) {
498 result = UCOL_NULLORDER;
499 }
501 return result;
502 }
503 }
505 U_CAPI int64_t U_EXPORT2
506 ucol_previousProcessed(UCollationElements *elems,
507 int32_t *ixLow,
508 int32_t *ixHigh,
509 UErrorCode *status)
510 {
511 const UCollator *coll = elems->iteratordata_.coll;
512 int64_t result = UCOL_IGNORABLE;
513 // int64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0;
514 // UCollationStrength strength = ucol_getStrength(coll);
515 // UBool toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED;
516 // uint32_t variableTop = coll->variableTopValue;
517 int32_t low = 0, high = 0;
519 if (U_FAILURE(*status)) {
520 return UCOL_PROCESSED_NULLORDER;
521 }
523 if (elems->reset_ &&
524 (elems->iteratordata_.pos == elems->iteratordata_.string)) {
525 if (elems->iteratordata_.endp == NULL) {
526 elems->iteratordata_.endp = elems->iteratordata_.string +
527 u_strlen(elems->iteratordata_.string);
528 elems->iteratordata_.flags |= UCOL_ITER_HASLEN;
529 }
531 elems->iteratordata_.pos = elems->iteratordata_.endp;
532 elems->iteratordata_.fcdPosition = elems->iteratordata_.endp;
533 }
535 if (elems->pce == NULL) {
536 elems->pce = new UCollationPCE(elems);
537 } else {
538 //elems->pce->pceBuffer.reset();
539 }
541 elems->reset_ = FALSE;
543 while (elems->pce->pceBuffer.empty()) {
544 // buffer raw CEs up to non-ignorable primary
545 RCEBuffer rceb;
546 uint32_t ce;
548 // **** do we need to reset rceb, or will it always be empty at this point ****
549 do {
550 high = ucol_getOffset(elems);
551 ce = ucol_getPrevCE(coll, &elems->iteratordata_, status);
552 low = ucol_getOffset(elems);
554 if (ce == UCOL_NO_MORE_CES) {
555 if (! rceb.empty()) {
556 break;
557 }
559 goto finish;
560 }
562 rceb.put(ce, low, high);
563 } while ((ce & UCOL_PRIMARYMASK) == 0);
565 // process the raw CEs
566 while (! rceb.empty()) {
567 const RCEI *rcei = rceb.get();
569 result = processCE(elems, rcei->ce);
571 if (result != UCOL_IGNORABLE) {
572 elems->pce->pceBuffer.put(result, rcei->low, rcei->high);
573 }
574 }
575 }
577 finish:
578 if (elems->pce->pceBuffer.empty()) {
579 // **** Is -1 the right value for ixLow, ixHigh? ****
580 if (ixLow != NULL) {
581 *ixLow = -1;
582 }
584 if (ixHigh != NULL) {
585 *ixHigh = -1
586 ;
587 }
588 return UCOL_PROCESSED_NULLORDER;
589 }
591 const PCEI *pcei = elems->pce->pceBuffer.get();
593 if (ixLow != NULL) {
594 *ixLow = pcei->low;
595 }
597 if (ixHigh != NULL) {
598 *ixHigh = pcei->high;
599 }
601 return pcei->ce;
602 }
604 U_CAPI int32_t U_EXPORT2
605 ucol_getMaxExpansion(const UCollationElements *elems,
606 int32_t order)
607 {
608 uint8_t result;
610 #if 0
611 UCOL_GETMAXEXPANSION(elems->iteratordata_.coll, (uint32_t)order, result);
612 #else
613 const UCollator *coll = elems->iteratordata_.coll;
614 const uint32_t *start;
615 const uint32_t *limit;
616 const uint32_t *mid;
617 uint32_t strengthMask = 0;
618 uint32_t mOrder = (uint32_t) order;
620 switch (coll->strength)
621 {
622 default:
623 strengthMask |= UCOL_TERTIARYORDERMASK;
624 /* fall through */
626 case UCOL_SECONDARY:
627 strengthMask |= UCOL_SECONDARYORDERMASK;
628 /* fall through */
630 case UCOL_PRIMARY:
631 strengthMask |= UCOL_PRIMARYORDERMASK;
632 }
634 mOrder &= strengthMask;
635 start = (coll)->endExpansionCE;
636 limit = (coll)->lastEndExpansionCE;
638 while (start < limit - 1) {
639 mid = start + ((limit - start) >> 1);
640 if (mOrder <= (*mid & strengthMask)) {
641 limit = mid;
642 } else {
643 start = mid;
644 }
645 }
647 // FIXME: with a masked search, there might be more than one hit,
648 // so we need to look forward and backward from the match to find all
649 // of the hits...
650 if ((*start & strengthMask) == mOrder) {
651 result = *((coll)->expansionCESize + (start - (coll)->endExpansionCE));
652 } else if ((*limit & strengthMask) == mOrder) {
653 result = *(coll->expansionCESize + (limit - coll->endExpansionCE));
654 } else if ((mOrder & 0xFFFF) == 0x00C0) {
655 result = 2;
656 } else {
657 result = 1;
658 }
659 #endif
661 return result;
662 }
664 U_CAPI void U_EXPORT2
665 ucol_setText( UCollationElements *elems,
666 const UChar *text,
667 int32_t textLength,
668 UErrorCode *status)
669 {
670 if (U_FAILURE(*status)) {
671 return;
672 }
674 if (elems->isWritable && elems->iteratordata_.string != NULL)
675 {
676 uprv_free((UChar *)elems->iteratordata_.string);
677 }
679 if (text == NULL) {
680 textLength = 0;
681 }
683 elems->isWritable = FALSE;
685 /* free offset buffer to avoid memory leak before initializing. */
686 ucol_freeOffsetBuffer(&(elems->iteratordata_));
687 /* Ensure that previously allocated extendCEs is freed before setting to NULL. */
688 if (elems->iteratordata_.extendCEs != NULL) {
689 uprv_free(elems->iteratordata_.extendCEs);
690 }
691 uprv_init_collIterate(elems->iteratordata_.coll, text, textLength,
692 &elems->iteratordata_, status);
694 elems->reset_ = TRUE;
695 }
697 U_CAPI int32_t U_EXPORT2
698 ucol_getOffset(const UCollationElements *elems)
699 {
700 const collIterate *ci = &(elems->iteratordata_);
702 if (ci->offsetRepeatCount > 0 && ci->offsetRepeatValue != 0) {
703 return ci->offsetRepeatValue;
704 }
706 if (ci->offsetReturn != NULL) {
707 return *ci->offsetReturn;
708 }
710 // while processing characters in normalization buffer getOffset will
711 // return the next non-normalized character.
712 // should be inline with the old implementation since the old codes uses
713 // nextDecomp in normalizer which also decomposes the string till the
714 // first base character is found.
715 if (ci->flags & UCOL_ITER_INNORMBUF) {
716 if (ci->fcdPosition == NULL) {
717 return 0;
718 }
719 return (int32_t)(ci->fcdPosition - ci->string);
720 }
721 else {
722 return (int32_t)(ci->pos - ci->string);
723 }
724 }
726 U_CAPI void U_EXPORT2
727 ucol_setOffset(UCollationElements *elems,
728 int32_t offset,
729 UErrorCode *status)
730 {
731 if (U_FAILURE(*status)) {
732 return;
733 }
735 // this methods will clean up any use of the writable buffer and points to
736 // the original string
737 collIterate *ci = &(elems->iteratordata_);
738 ci->pos = ci->string + offset;
739 ci->CEpos = ci->toReturn = ci->CEs;
740 if (ci->flags & UCOL_ITER_INNORMBUF) {
741 ci->flags = ci->origFlags;
742 }
743 if ((ci->flags & UCOL_ITER_HASLEN) == 0) {
744 ci->endp = ci->string + u_strlen(ci->string);
745 ci->flags |= UCOL_ITER_HASLEN;
746 }
747 ci->fcdPosition = NULL;
748 elems->reset_ = FALSE;
750 ci->offsetReturn = NULL;
751 ci->offsetStore = ci->offsetBuffer;
752 ci->offsetRepeatCount = ci->offsetRepeatValue = 0;
753 }
755 U_CAPI int32_t U_EXPORT2
756 ucol_primaryOrder (int32_t order)
757 {
758 order &= UCOL_PRIMARYMASK;
759 return (order >> UCOL_PRIMARYORDERSHIFT);
760 }
762 U_CAPI int32_t U_EXPORT2
763 ucol_secondaryOrder (int32_t order)
764 {
765 order &= UCOL_SECONDARYMASK;
766 return (order >> UCOL_SECONDARYORDERSHIFT);
767 }
769 U_CAPI int32_t U_EXPORT2
770 ucol_tertiaryOrder (int32_t order)
771 {
772 return (order & UCOL_TERTIARYMASK);
773 }
776 void ucol_freeOffsetBuffer(collIterate *s) {
777 if (s != NULL && s->offsetBuffer != NULL) {
778 uprv_free(s->offsetBuffer);
779 s->offsetBuffer = NULL;
780 s->offsetBufferSize = 0;
781 }
782 }
784 #endif /* #if !UCONFIG_NO_COLLATION */