The Tor Browser: comparison intl/icu/source/i18n/alphaindex.cpp

--1:000000000000
+:490aab3cbe6f
+/*
+*******************************************************************************
+* Copyright (C) 2009-2013, International Business Machines Corporation and
+* others. All Rights Reserved.
+*******************************************************************************
+*/
+#include "unicode/utypes.h"
+#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION
+#include "unicode/alphaindex.h"
+#include "unicode/coleitr.h"
+#include "unicode/coll.h"
+#include "unicode/localpointer.h"
+#include "unicode/normalizer2.h"
+#include "unicode/tblcoll.h"
+#include "unicode/ulocdata.h"
+#include "unicode/uniset.h"
+#include "unicode/uobject.h"
+#include "unicode/usetiter.h"
+#include "unicode/utf16.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "uassert.h"
+#include "uvector.h"
+//#include <string>
+//#include <iostream>
+#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
+U_NAMESPACE_BEGIN
+namespace {
+/**
+* Prefix string for Chinese index buckets.
+* See http://unicode.org/repos/cldr/trunk/specs/ldml/tr35-collation.html#Collation_Indexes
+*/
+const UChar BASE[1] = { 0xFDD0 };
+const int32_t BASE_LENGTH = 1;
+UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
+const UnicodeString &one, const UnicodeString &other);
+}  // namespace
+static int32_t U_CALLCONV
+collatorComparator(const void *context, const void *left, const void *right);
+static int32_t U_CALLCONV
+recordCompareFn(const void *context, const void *left, const void *right);
+//  UVector<Record *> support function, delete a Record.
+static void U_CALLCONV
+alphaIndex_deleteRecord(void *obj) {
+delete static_cast<AlphabeticIndex::Record *>(obj);
+}
+namespace {
+UnicodeString *ownedString(const UnicodeString &s, LocalPointer<UnicodeString> &owned,
+UErrorCode &errorCode) {
+if (U_FAILURE(errorCode)) { return NULL; }
+if (owned.isValid()) {
+return owned.orphan();
+}
+UnicodeString *p = new UnicodeString(s);
+if (p == NULL) {
+errorCode = U_MEMORY_ALLOCATION_ERROR;
+}
+return p;
+}
+inline UnicodeString *getString(const UVector &list, int32_t i) {
+return static_cast<UnicodeString *>(list[i]);
+}
+inline AlphabeticIndex::Bucket *getBucket(const UVector &list, int32_t i) {
+return static_cast<AlphabeticIndex::Bucket *>(list[i]);
+}
+inline AlphabeticIndex::Record *getRecord(const UVector &list, int32_t i) {
+return static_cast<AlphabeticIndex::Record *>(list[i]);
+}
+/**
+* Like Java Collections.binarySearch(List, String, Comparator).
+*
+* @return the index>=0 where the item was found,
+*         or the index<0 for inserting the string at ~index in sorted order
+*/
+int32_t binarySearch(const UVector &list, const UnicodeString &s, const Collator &coll) {
+if (list.size() == 0) { return ~0; }
+int32_t start = 0;
+int32_t limit = list.size();
+for (;;) {
+int32_t i = (start + limit) / 2;
+const UnicodeString *si = static_cast<UnicodeString *>(list.elementAt(i));
+UErrorCode errorCode = U_ZERO_ERROR;
+UCollationResult cmp = coll.compare(s, *si, errorCode);
+if (cmp == UCOL_EQUAL) {
+return i;
+} else if (cmp < 0) {
+if (i == start) {
+return ~start;  // insert s before *si
+}
+limit = i;
+} else {
+if (i == start) {
+return ~(start + 1);  // insert s after *si
+}
+start = i;
+}
+}
+}
+}  // namespace
+// The BucketList is not in the anonymous namespace because only Clang
+// seems to support its use in other classes from there.
+// However, we also don't need U_I18N_API because it is not used from outside the i18n library.
+class BucketList : public UObject {
+public:
+BucketList(UVector *bucketList, UVector *publicBucketList)
+: bucketList_(bucketList), immutableVisibleList_(publicBucketList) {
+int32_t displayIndex = 0;
+for (int32_t i = 0; i < publicBucketList->size(); ++i) {
+getBucket(*publicBucketList, i)->displayIndex_ = displayIndex++;
+}
+}
+// The virtual destructor must not be inline.
+// See ticket #8454 for details.
+virtual ~BucketList();
+int32_t getBucketCount() const {
+return immutableVisibleList_->size();
+}
+int32_t getBucketIndex(const UnicodeString &name, const Collator &collatorPrimaryOnly,
+UErrorCode &errorCode) {
+// binary search
+int32_t start = 0;
+int32_t limit = bucketList_->size();
+while ((start + 1) < limit) {
+int32_t i = (start + limit) / 2;
+const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, i);
+UCollationResult nameVsBucket =
+collatorPrimaryOnly.compare(name, bucket->lowerBoundary_, errorCode);
+if (nameVsBucket < 0) {
+limit = i;
+} else {
+start = i;
+}
+}
+const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, start);
+if (bucket->displayBucket_ != NULL) {
+bucket = bucket->displayBucket_;
+}
+return bucket->displayIndex_;
+}
+/** All of the buckets, visible and invisible. */
+UVector *bucketList_;
+/** Just the visible buckets. */
+UVector *immutableVisibleList_;
+};
+BucketList::~BucketList() {
+delete bucketList_;
+if (immutableVisibleList_ != bucketList_) {
+delete immutableVisibleList_;
+}
+}
+AlphabeticIndex::ImmutableIndex::~ImmutableIndex() {
+delete buckets_;
+delete collatorPrimaryOnly_;
+}
+int32_t
+AlphabeticIndex::ImmutableIndex::getBucketCount() const {
+return buckets_->getBucketCount();
+}
+int32_t
+AlphabeticIndex::ImmutableIndex::getBucketIndex(
+const UnicodeString &name, UErrorCode &errorCode) const {
+return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, errorCode);
+}
+const AlphabeticIndex::Bucket *
+AlphabeticIndex::ImmutableIndex::getBucket(int32_t index) const {
+if (0 <= index && index < buckets_->getBucketCount()) {
+return icu::getBucket(*buckets_->immutableVisibleList_, index);
+} else {
+return NULL;
+}
+}
+AlphabeticIndex::AlphabeticIndex(const Locale &locale, UErrorCode &status)
+: inputList_(NULL),
+labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
+maxLabelCount_(99),
+initialLabels_(NULL), firstCharsInScripts_(NULL),
+collator_(NULL), collatorPrimaryOnly_(NULL),
+buckets_(NULL) {
+init(&locale, status);
+}
+AlphabeticIndex::AlphabeticIndex(RuleBasedCollator *collator, UErrorCode &status)
+: inputList_(NULL),
+labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
+maxLabelCount_(99),
+initialLabels_(NULL), firstCharsInScripts_(NULL),
+collator_(collator), collatorPrimaryOnly_(NULL),
+buckets_(NULL) {
+init(NULL, status);
+}
+AlphabeticIndex::~AlphabeticIndex() {
+delete collator_;
+delete collatorPrimaryOnly_;
+delete firstCharsInScripts_;
+delete buckets_;
+delete inputList_;
+delete initialLabels_;
+}
+AlphabeticIndex &AlphabeticIndex::addLabels(const UnicodeSet &additions, UErrorCode &status) {
+if (U_FAILURE(status)) {
+return *this;
+}
+initialLabels_->addAll(additions);
+clearBuckets();
+return *this;
+}
+AlphabeticIndex &AlphabeticIndex::addLabels(const Locale &locale, UErrorCode &status) {
+addIndexExemplars(locale, status);
+clearBuckets();
+return *this;
+}
+AlphabeticIndex::ImmutableIndex *AlphabeticIndex::buildImmutableIndex(UErrorCode &errorCode) {
+if (U_FAILURE(errorCode)) { return NULL; }
+// In C++, the ImmutableIndex must own its copy of the BucketList,
+// even if it contains no records, for proper memory management.
+// We could clone the buckets_ if they are not NULL,
+// but that would be worth it only if this method is called multiple times,
+// or called after using the old-style bucket iterator API.
+LocalPointer<BucketList> immutableBucketList(createBucketList(errorCode));
+LocalPointer<RuleBasedCollator> coll(
+static_cast<RuleBasedCollator *>(collatorPrimaryOnly_->clone()));
+if (immutableBucketList.isNull() || coll.isNull()) {
+errorCode = U_MEMORY_ALLOCATION_ERROR;
+return NULL;
+}
+ImmutableIndex *immIndex = new ImmutableIndex(immutableBucketList.getAlias(), coll.getAlias());
+if (immIndex == NULL) {
+errorCode = U_MEMORY_ALLOCATION_ERROR;
+return NULL;
+}
+// The ImmutableIndex adopted its parameter objects.
+immutableBucketList.orphan();
+coll.orphan();
+return immIndex;
+}
+int32_t AlphabeticIndex::getBucketCount(UErrorCode &status) {
+initBuckets(status);
+if (U_FAILURE(status)) {
+return 0;
+}
+return buckets_->getBucketCount();
+}
+int32_t AlphabeticIndex::getRecordCount(UErrorCode &status) {
+if (U_FAILURE(status) || inputList_ == NULL) {
+return 0;
+}
+return inputList_->size();
+}
+void AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const {
+const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode);
+if (U_FAILURE(errorCode)) { return; }
+const UnicodeString &firstScriptBoundary = *getString(*firstCharsInScripts_, 0);
+const UnicodeString &overflowBoundary =
+*getString(*firstCharsInScripts_, firstCharsInScripts_->size() - 1);
+// We make a sorted array of elements.
+// Some of the input may be redundant.
+// That is, we might have c, ch, d, where "ch" sorts just like "c", "h".
+// We filter out those cases.
+UnicodeSetIterator iter(*initialLabels_);
+while (iter.next()) {
+const UnicodeString *item = &iter.getString();
+LocalPointer<UnicodeString> ownedItem;
+UBool checkDistinct;
+int32_t itemLength = item->length();
+if (!item->hasMoreChar32Than(0, itemLength, 1)) {
+checkDistinct = FALSE;
+} else if(item->charAt(itemLength - 1) == 0x2a &&  // '*'
+item->charAt(itemLength - 2) != 0x2a) {
+// Use a label if it is marked with one trailing star,
+// even if the label string sorts the same when all contractions are suppressed.
+ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1));
+item = ownedItem.getAlias();
+if (item == NULL) {
+errorCode = U_MEMORY_ALLOCATION_ERROR;
+return;
+}
+checkDistinct = FALSE;
+} else {
+checkDistinct = TRUE;
+}
+if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) {
+// Ignore a primary-ignorable or non-alphabetic index character.
+} else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) {
+// Ignore an index characters that will land in the overflow bucket.
+} else if (checkDistinct &&
+collatorPrimaryOnly_->compare(*item, separated(*item), errorCode) == 0) {
+// Ignore a multi-code point index character that does not sort distinctly
+// from the sequence of its separate characters.
+} else {
+int32_t insertionPoint = binarySearch(indexCharacters, *item, *collatorPrimaryOnly_);
+if (insertionPoint < 0) {
+indexCharacters.insertElementAt(
+ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode);
+} else {
+const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint);
+if (isOneLabelBetterThanOther(*nfkdNormalizer, *item, itemAlreadyIn)) {
+indexCharacters.setElementAt(
+ownedString(*item, ownedItem, errorCode), insertionPoint);
+}
+}
+}
+}
+if (U_FAILURE(errorCode)) { return; }
+// if the result is still too large, cut down to maxCount elements, by removing every nth element
+int32_t size = indexCharacters.size() - 1;
+if (size > maxLabelCount_) {
+int32_t count = 0;
+int32_t old = -1;
+for (int32_t i = 0; i < indexCharacters.size();) {
+++count;
+int32_t bump = count * maxLabelCount_ / size;
+if (bump == old) {
+indexCharacters.removeElementAt(i);
+} else {
+old = bump;
+++i;
+}
+}
+}
+}
+namespace {
+const UnicodeString &fixLabel(const UnicodeString &current, UnicodeString &temp) {
+if (!current.startsWith(BASE, BASE_LENGTH)) {
+return current;
+}
+UChar rest = current.charAt(BASE_LENGTH);
+if (0x2800 < rest && rest <= 0x28FF) { // stroke count
+int32_t count = rest-0x2800;
+temp.setTo((UChar)(0x30 + count % 10));
+if (count >= 10) {
+count /= 10;
+temp.insert(0, (UChar)(0x30 + count % 10));
+if (count >= 10) {
+count /= 10;
+temp.insert(0, (UChar)(0x30 + count));
+}
+}
+return temp.append((UChar)0x5283);
+}
+return temp.setTo(current, BASE_LENGTH);
+}
+UBool hasMultiplePrimaryWeights(
+CollationElementIterator &cei, int32_t variableTop,
+const UnicodeString &s, UErrorCode &errorCode) {
+cei.setText(s, errorCode);
+UBool seenPrimary = FALSE;
+for (;;) {
+int32_t ce32 = cei.next(errorCode);
+if (ce32 == CollationElementIterator::NULLORDER) {
+break;
+}
+int32_t p = CollationElementIterator::primaryOrder(ce32);
+if (p > variableTop && (ce32 & 0xc0) != 0xc0) {
+// not primary ignorable, and not a continuation CE
+if (seenPrimary) {
+return TRUE;
+}
+seenPrimary = TRUE;
+}
+}
+return FALSE;
+}
+}  // namespace
+BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const {
+// Initialize indexCharacters.
+UVector indexCharacters(errorCode);
+indexCharacters.setDeleter(uprv_deleteUObject);
+initLabels(indexCharacters, errorCode);
+if (U_FAILURE(errorCode)) { return NULL; }
+// Variables for hasMultiplePrimaryWeights().
+LocalPointer<CollationElementIterator> cei(
+collatorPrimaryOnly_->createCollationElementIterator(emptyString_));
+if (cei.isNull()) {
+errorCode = U_MEMORY_ALLOCATION_ERROR;
+return NULL;
+}
+int32_t variableTop;
+if (collatorPrimaryOnly_->getAttribute(UCOL_ALTERNATE_HANDLING, errorCode) == UCOL_SHIFTED) {
+variableTop = CollationElementIterator::primaryOrder(
+(int32_t)collatorPrimaryOnly_->getVariableTop(errorCode));
+} else {
+variableTop = 0;
+}
+UBool hasInvisibleBuckets = FALSE;
+// Helper arrays for Chinese Pinyin collation.
+Bucket *asciiBuckets[26] = {
+NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
+};
+Bucket *pinyinBuckets[26] = {
+NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
+};
+UBool hasPinyin = FALSE;
+LocalPointer<UVector> bucketList(new UVector(errorCode));
+if (bucketList.isNull()) {
+errorCode = U_MEMORY_ALLOCATION_ERROR;
+return NULL;
+}
+bucketList->setDeleter(uprv_deleteUObject);
+// underflow bucket
+Bucket *bucket = new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW);
+if (bucket == NULL) {
+errorCode = U_MEMORY_ALLOCATION_ERROR;
+return NULL;
+}
+bucketList->addElement(bucket, errorCode);
+if (U_FAILURE(errorCode)) { return NULL; }
+UnicodeString temp;
+// fix up the list, adding underflow, additions, overflow
+// Insert inflow labels as needed.
+int32_t scriptIndex = -1;
+const UnicodeString *scriptUpperBoundary = &emptyString_;
+for (int32_t i = 0; i < indexCharacters.size(); ++i) {
+UnicodeString &current = *getString(indexCharacters, i);
+if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) >= 0) {
+// We crossed the script boundary into a new script.
+const UnicodeString &inflowBoundary = *scriptUpperBoundary;
+UBool skippedScript = FALSE;
+for (;;) {
+scriptUpperBoundary = getString(*firstCharsInScripts_, ++scriptIndex);
+if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) < 0) {
+break;
+}
+skippedScript = TRUE;
+}
+if (skippedScript && bucketList->size() > 1) {
+// We are skipping one or more scripts,
+// and we are not just getting out of the underflow label.
+bucket = new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW);
+if (bucket == NULL) {
+errorCode = U_MEMORY_ALLOCATION_ERROR;
+return NULL;
+}
+bucketList->addElement(bucket, errorCode);
+}
+}
+// Add a bucket with the current label.
+bucket = new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL);
+if (bucket == NULL) {
+errorCode = U_MEMORY_ALLOCATION_ERROR;
+return NULL;
+}
+bucketList->addElement(bucket, errorCode);
+// Remember ASCII and Pinyin buckets for Pinyin redirects.
+UChar c;
+if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) {  // A-Z
+asciiBuckets[c - 0x41] = bucket;
+} else if (current.length() == BASE_LENGTH + 1 && current.startsWith(BASE, BASE_LENGTH) &&
+0x41 <= (c = current.charAt(BASE_LENGTH)) && c <= 0x5A) {
+pinyinBuckets[c - 0x41] = bucket;
+hasPinyin = TRUE;
+}
+// Check for multiple primary weights.
+if (!current.startsWith(BASE, BASE_LENGTH) &&
+hasMultiplePrimaryWeights(*cei, variableTop, current, errorCode) &&
+current.charAt(current.length() - 1) != 0xFFFF /* !current.endsWith("\uffff") */) {
+// "AE-ligature" or "Sch" etc.
+for (int32_t i = bucketList->size() - 2;; --i) {
+Bucket *singleBucket = getBucket(*bucketList, i);
+if (singleBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
+// There is no single-character bucket since the last
+// underflow or inflow label.
+break;
+}
+if (singleBucket->displayBucket_ == NULL &&
+!hasMultiplePrimaryWeights(
+*cei, variableTop, singleBucket->lowerBoundary_, errorCode)) {
+// Add an invisible bucket that redirects strings greater than the expansion
+// to the previous single-character bucket.
+// For example, after ... Q R S Sch we add Sch\uFFFF->S
+// and after ... Q R S Sch Sch\uFFFF St we add St\uFFFF->S.
+bucket = new Bucket(emptyString_,
+UnicodeString(current).append((UChar)0xFFFF),
+U_ALPHAINDEX_NORMAL);
+if (bucket == NULL) {
+errorCode = U_MEMORY_ALLOCATION_ERROR;
+return NULL;
+}
+bucket->displayBucket_ = singleBucket;
+bucketList->addElement(bucket, errorCode);
+hasInvisibleBuckets = TRUE;
+break;
+}
+}
+}
+}
+if (U_FAILURE(errorCode)) { return NULL; }
+if (bucketList->size() == 1) {
+// No real labels, show only the underflow label.
+BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
+if (bl == NULL) {
+errorCode = U_MEMORY_ALLOCATION_ERROR;
+return NULL;
+}
+bucketList.orphan();
+return bl;
+}
+// overflow bucket
+bucket = new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW);
+if (bucket == NULL) {
+errorCode = U_MEMORY_ALLOCATION_ERROR;
+return NULL;
+}
+bucketList->addElement(bucket, errorCode); // final
+if (hasPinyin) {
+// Redirect Pinyin buckets.
+Bucket *asciiBucket = NULL;
+for (int32_t i = 0; i < 26; ++i) {
+if (asciiBuckets[i] != NULL) {
+asciiBucket = asciiBuckets[i];
+}
+if (pinyinBuckets[i] != NULL && asciiBucket != NULL) {
+pinyinBuckets[i]->displayBucket_ = asciiBucket;
+hasInvisibleBuckets = TRUE;
+}
+}
+}
+if (U_FAILURE(errorCode)) { return NULL; }
+if (!hasInvisibleBuckets) {
+BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
+if (bl == NULL) {
+errorCode = U_MEMORY_ALLOCATION_ERROR;
+return NULL;
+}
+bucketList.orphan();
+return bl;
+}
+// Merge inflow buckets that are visually adjacent.
+// Iterate backwards: Merge inflow into overflow rather than the other way around.
+int32_t i = bucketList->size() - 1;
+Bucket *nextBucket = getBucket(*bucketList, i);
+while (--i > 0) {
+bucket = getBucket(*bucketList, i);
+if (bucket->displayBucket_ != NULL) {
+continue;  // skip invisible buckets
+}
+if (bucket->labelType_ == U_ALPHAINDEX_INFLOW) {
+if (nextBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
+bucket->displayBucket_ = nextBucket;
+continue;
+}
+}
+nextBucket = bucket;
+}
+LocalPointer<UVector> publicBucketList(new UVector(errorCode));
+if (bucketList.isNull()) {
+errorCode = U_MEMORY_ALLOCATION_ERROR;
+return NULL;
+}
+// Do not call publicBucketList->setDeleter():
+// This vector shares its objects with the bucketList.
+for (int32_t i = 0; i < bucketList->size(); ++i) {
+bucket = getBucket(*bucketList, i);
+if (bucket->displayBucket_ == NULL) {
+publicBucketList->addElement(bucket, errorCode);
+}
+}
+if (U_FAILURE(errorCode)) { return NULL; }
+BucketList *bl = new BucketList(bucketList.getAlias(), publicBucketList.getAlias());
+if (bl == NULL) {
+errorCode = U_MEMORY_ALLOCATION_ERROR;
+return NULL;
+}
+bucketList.orphan();
+publicBucketList.orphan();
+return bl;
+}
+/**
+* Creates an index, and buckets and sorts the list of records into the index.
+*/
+void AlphabeticIndex::initBuckets(UErrorCode &errorCode) {
+if (U_FAILURE(errorCode) || buckets_ != NULL) {
+return;
+}
+buckets_ = createBucketList(errorCode);
+if (U_FAILURE(errorCode) || inputList_ == NULL || inputList_->isEmpty()) {
+return;
+}
+// Sort the records by name.
+// Stable sort preserves input order of collation duplicates.
+inputList_->sortWithUComparator(recordCompareFn, collator_, errorCode);
+// Now, we traverse all of the input, which is now sorted.
+// If the item doesn't go in the current bucket, we find the next bucket that contains it.
+// This makes the process order n*log(n), since we just sort the list and then do a linear process.
+// However, if the user adds an item at a time and then gets the buckets, this isn't efficient, so
+// we need to improve it for that case.
+Bucket *currentBucket = getBucket(*buckets_->bucketList_, 0);
+int32_t bucketIndex = 1;
+Bucket *nextBucket;
+const UnicodeString *upperBoundary;
+if (bucketIndex < buckets_->bucketList_->size()) {
+nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
+upperBoundary = &nextBucket->lowerBoundary_;
+} else {
+nextBucket = NULL;
+upperBoundary = NULL;
+}
+for (int32_t i = 0; i < inputList_->size(); ++i) {
+Record *r = getRecord(*inputList_, i);
+// if the current bucket isn't the right one, find the one that is
+// We have a special flag for the last bucket so that we don't look any further
+while (upperBoundary != NULL &&
+collatorPrimaryOnly_->compare(r->name_, *upperBoundary, errorCode) >= 0) {
+currentBucket = nextBucket;
+// now reset the boundary that we compare against
+if (bucketIndex < buckets_->bucketList_->size()) {
+nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
+upperBoundary = &nextBucket->lowerBoundary_;
+} else {
+upperBoundary = NULL;
+}
+}
+// now put the record into the bucket.
+Bucket *bucket = currentBucket;
+if (bucket->displayBucket_ != NULL) {
+bucket = bucket->displayBucket_;
+}
+if (bucket->records_ == NULL) {
+bucket->records_ = new UVector(errorCode);
+if (bucket->records_ == NULL) {
+errorCode = U_MEMORY_ALLOCATION_ERROR;
+return;
+}
+}
+bucket->records_->addElement(r, errorCode);
+}
+}
+void AlphabeticIndex::clearBuckets() {
+if (buckets_ != NULL) {
+delete buckets_;
+buckets_ = NULL;
+internalResetBucketIterator();
+}
+}
+void AlphabeticIndex::internalResetBucketIterator() {
+labelsIterIndex_ = -1;
+currentBucket_ = NULL;
+}
+void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) {
+if (U_FAILURE(status)) { return; }
+// Chinese index characters, which are specific to each of the several Chinese tailorings,
+// take precedence over the single locale data exemplar set per language.
+const char *language = locale.getLanguage();
+if (uprv_strcmp(language, "zh") == 0 || uprv_strcmp(language, "ja") == 0 ||
+uprv_strcmp(language, "ko") == 0) {
+// TODO: This should be done regardless of the language, but it's expensive.
+// We should add a Collator function (can be @internal)
+// to enumerate just the contractions that start with a given code point or string.
+if (addChineseIndexCharacters(status) || U_FAILURE(status)) {
+return;
+}
+}
+LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status));
+if (U_FAILURE(status)) {
+return;
+}
+UnicodeSet exemplars;
+ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status);
+if (U_SUCCESS(status)) {
+initialLabels_->addAll(exemplars);
+return;
+}
+status = U_ZERO_ERROR;  // Clear out U_MISSING_RESOURCE_ERROR
+// The locale data did not include explicit Index characters.
+// Synthesize a set of them from the locale's standard exemplar characters.
+ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status);
+if (U_FAILURE(status)) {
+return;
+}
+// question: should we add auxiliary exemplars?
+if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) {
+exemplars.add(0x61, 0x7A);
+}
+if (exemplars.containsSome(0xAC00, 0xD7A3)) {  // Hangul syllables
+// cut down to small list
+exemplars.remove(0xAC00, 0xD7A3).
+add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C).
+add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544).
+add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0).
+add(0xD30C).add(0xD558);
+}
+if (exemplars.containsSome(0x1200, 0x137F)) {  // Ethiopic block
+// cut down to small list
+// make use of the fact that Ethiopic is allocated in 8's, where
+// the base is 0 mod 8.
+UnicodeSet ethiopic(
+UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status);
+UnicodeSetIterator it(ethiopic);
+while (it.next() && !it.isString()) {
+if ((it.getCodepoint() & 0x7) != 0) {
+exemplars.remove(it.getCodepoint());
+}
+}
+}
+// Upper-case any that aren't already so.
+//   (We only do this for synthesized index characters.)
+UnicodeSetIterator it(exemplars);
+UnicodeString upperC;
+while (it.next()) {
+const UnicodeString &exemplarC = it.getString();
+upperC = exemplarC;
+upperC.toUpper(locale);
+initialLabels_->add(upperC);
+}
+}
+UBool AlphabeticIndex::addChineseIndexCharacters(UErrorCode &errorCode) {
+UnicodeSet contractions;
+ucol_getContractionsAndExpansions(collatorPrimaryOnly_->getUCollator(),
+contractions.toUSet(), NULL, FALSE, &errorCode);
+if (U_FAILURE(errorCode)) { return FALSE; }
+UnicodeString firstHanBoundary;
+UBool hasPinyin = FALSE;
+UnicodeSetIterator iter(contractions);
+while (iter.next()) {
+const UnicodeString &s = iter.getString();
+if (s.startsWith(BASE, BASE_LENGTH)) {
+initialLabels_->add(s);
+if (firstHanBoundary.isEmpty() ||
+collatorPrimaryOnly_->compare(s, firstHanBoundary, errorCode) < 0) {
+firstHanBoundary = s;
+}
+UChar c = s.charAt(s.length() - 1);
+if (0x41 <= c && c <= 0x5A) {  // A-Z
+hasPinyin = TRUE;
+}
+}
+}
+if (hasPinyin) {
+initialLabels_->add(0x41, 0x5A);  // A-Z
+}
+if (!firstHanBoundary.isEmpty()) {
+// The hardcoded list of script boundaries includes U+4E00
+// which is tailored to not be the first primary
+// in all Chinese tailorings except "unihan".
+// Replace U+4E00 with the first boundary string from the tailoring.
+// TODO: This becomes obsolete when the root collator gets
+// reliable script-first-primary mappings.
+int32_t hanIndex = binarySearch(
+*firstCharsInScripts_, UnicodeString((UChar)0x4E00), *collatorPrimaryOnly_);
+if (hanIndex >= 0) {
+UnicodeString *fh = new UnicodeString(firstHanBoundary);
+if (fh == NULL) {
+errorCode = U_MEMORY_ALLOCATION_ERROR;
+return FALSE;
+}
+firstCharsInScripts_->setElementAt(fh, hanIndex);
+}
+return TRUE;
+} else {
+return FALSE;
+}
+}
+/*
+* Return the string with interspersed CGJs. Input must have more than 2 codepoints.
+*/
+static const UChar CGJ = 0x034F;
+UnicodeString AlphabeticIndex::separated(const UnicodeString &item) {
+UnicodeString result;
+if (item.length() == 0) {
+return result;
+}
+int32_t i = 0;
+for (;;) {
+UChar32  cp = item.char32At(i);
+result.append(cp);
+i = item.moveIndex32(i, 1);
+if (i >= item.length()) {
+break;
+}
+result.append(CGJ);
+}
+return result;
+}
+UBool AlphabeticIndex::operator==(const AlphabeticIndex& /* other */) const {
+return FALSE;
+}
+UBool AlphabeticIndex::operator!=(const AlphabeticIndex& /* other */) const {
+return FALSE;
+}
+const RuleBasedCollator &AlphabeticIndex::getCollator() const {
+// There are no known non-RuleBasedCollator collators, and none ever expected.
+// But, in case that changes, better a null pointer than a wrong type.
+return *dynamic_cast<RuleBasedCollator *>(collator_);
+}
+const UnicodeString &AlphabeticIndex::getInflowLabel() const {
+return inflowLabel_;
+}
+const UnicodeString &AlphabeticIndex::getOverflowLabel() const {
+return overflowLabel_;
+}
+const UnicodeString &AlphabeticIndex::getUnderflowLabel() const {
+return underflowLabel_;
+}
+AlphabeticIndex &AlphabeticIndex::setInflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
+inflowLabel_ = label;
+clearBuckets();
+return *this;
+}
+AlphabeticIndex &AlphabeticIndex::setOverflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
+overflowLabel_ = label;
+clearBuckets();
+return *this;
+}
+AlphabeticIndex &AlphabeticIndex::setUnderflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
+underflowLabel_ = label;
+clearBuckets();
+return *this;
+}
+int32_t AlphabeticIndex::getMaxLabelCount() const {
+return maxLabelCount_;
+}
+AlphabeticIndex &AlphabeticIndex::setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status) {
+if (U_FAILURE(status)) {
+return *this;
+}
+if (maxLabelCount <= 0) {
+status = U_ILLEGAL_ARGUMENT_ERROR;
+return *this;
+}
+maxLabelCount_ = maxLabelCount;
+clearBuckets();
+return *this;
+}
+//
+//  init() - Common code for constructors.
+//
+void AlphabeticIndex::init(const Locale *locale, UErrorCode &status) {
+if (U_FAILURE(status)) { return; }
+if (locale == NULL && collator_ == NULL) {
+status = U_ILLEGAL_ARGUMENT_ERROR;
+return;
+}
+initialLabels_         = new UnicodeSet();
+if (initialLabels_ == NULL) {
+status = U_MEMORY_ALLOCATION_ERROR;
+return;
+}
+inflowLabel_.setTo((UChar)0x2026);    // Ellipsis
+overflowLabel_ = inflowLabel_;
+underflowLabel_ = inflowLabel_;
+if (collator_ == NULL) {
+collator_ = static_cast<RuleBasedCollator *>(Collator::createInstance(*locale, status));
+if (U_FAILURE(status)) { return; }
+if (collator_ == NULL) {
+status = U_MEMORY_ALLOCATION_ERROR;
+return;
+}
+}
+collatorPrimaryOnly_ = static_cast<RuleBasedCollator *>(collator_->clone());
+if (collatorPrimaryOnly_ == NULL) {
+status = U_MEMORY_ALLOCATION_ERROR;
+return;
+}
+collatorPrimaryOnly_->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, status);
+firstCharsInScripts_ = firstStringsInScript(status);
+if (U_FAILURE(status)) { return; }
+firstCharsInScripts_->sortWithUComparator(collatorComparator, collatorPrimaryOnly_, status);
+UnicodeString _4E00((UChar)0x4E00);
+UnicodeString _1100((UChar)0x1100);
+UnicodeString _1112((UChar)0x1112);
+if (collatorPrimaryOnly_->compare(_4E00, _1112, status) <= 0 &&
+collatorPrimaryOnly_->compare(_1100, _4E00, status) <= 0) {
+// The standard Korean tailoring sorts Hanja (Han characters)
+// as secondary differences from Hangul syllables.
+// This makes U+4E00 not useful as a Han-script boundary.
+// TODO: This becomes obsolete when the root collator gets
+// reliable script-first-primary mappings.
+int32_t hanIndex = binarySearch(
+*firstCharsInScripts_, _4E00, *collatorPrimaryOnly_);
+if (hanIndex >= 0) {
+firstCharsInScripts_->removeElementAt(hanIndex);
+}
+}
+// Guard against a degenerate collator where
+// some script boundary strings are primary ignorable.
+for (;;) {
+if (U_FAILURE(status)) { return; }
+if (firstCharsInScripts_->isEmpty()) {
+// AlphabeticIndex requires some non-ignorable script boundary strings.
+status = U_ILLEGAL_ARGUMENT_ERROR;
+return;
+}
+if (collatorPrimaryOnly_->compare(
+*static_cast<UnicodeString *>(firstCharsInScripts_->elementAt(0)),
+emptyString_, status) == UCOL_EQUAL) {
+firstCharsInScripts_->removeElementAt(0);
+} else {
+break;
+}
+}
+if (locale != NULL) {
+addIndexExemplars(*locale, status);
+}
+}
+//
+//  Comparison function for UVector<UnicodeString *> sorting with a collator.
+//
+static int32_t U_CALLCONV
+collatorComparator(const void *context, const void *left, const void *right) {
+const UElement *leftElement = static_cast<const UElement *>(left);
+const UElement *rightElement = static_cast<const UElement *>(right);
+const UnicodeString *leftString  = static_cast<const UnicodeString *>(leftElement->pointer);
+const UnicodeString *rightString = static_cast<const UnicodeString *>(rightElement->pointer);
+if (leftString == rightString) {
+// Catches case where both are NULL
+return 0;
+}
+if (leftString == NULL) {
+return 1;
+};
+if (rightString == NULL) {
+return -1;
+}
+const Collator *col = static_cast<const Collator *>(context);
+UErrorCode errorCode = U_ZERO_ERROR;
+return col->compare(*leftString, *rightString, errorCode);
+}
+//
+//  Comparison function for UVector<Record *> sorting with a collator.
+//
+static int32_t U_CALLCONV
+recordCompareFn(const void *context, const void *left, const void *right) {
+const UElement *leftElement = static_cast<const UElement *>(left);
+const UElement *rightElement = static_cast<const UElement *>(right);
+const AlphabeticIndex::Record *leftRec  = static_cast<const AlphabeticIndex::Record *>(leftElement->pointer);
+const AlphabeticIndex::Record *rightRec = static_cast<const AlphabeticIndex::Record *>(rightElement->pointer);
+const Collator *col = static_cast<const Collator *>(context);
+UErrorCode errorCode = U_ZERO_ERROR;
+return col->compare(leftRec->name_, rightRec->name_, errorCode);
+}
+/**
+* This list contains one character per script that has the
+* lowest primary weight for that script in the root collator.
+* This list will be copied and sorted to account for script reordering.
+*
+* <p>TODO: This is fragile. If the first character of a script is tailored
+* so that it does not map to the script's lowest primary weight any more,
+* then the buckets will be off.
+* There are hacks in the code to handle the known CJK tailorings of U+4E00.
+*
+* <p>We use "A" not "a" because the en_US_POSIX tailoring sorts A primary-before a.
+*
+* Keep this in sync with HACK_FIRST_CHARS_IN_SCRIPTS in
+* ICU4J main/classes/collate/src/com/ibm/icu/text/AlphabeticIndex.java
+*/
+static const UChar HACK_FIRST_CHARS_IN_SCRIPTS[] =  {
+0x41, 0, 0x03B1, 0,
+0x2C81, 0, 0x0430, 0, 0x2C30, 0, 0x10D0, 0, 0x0561, 0, 0x05D0, 0, 0xD802, 0xDD00, 0, 0x0800, 0, 0x0621, 0, 0x0710, 0,
+0x0780, 0, 0x07CA, 0, 0x2D30, 0, 0x1200, 0, 0x0950, 0, 0x0985, 0, 0x0A74, 0, 0x0AD0, 0, 0x0B05, 0, 0x0BD0, 0,
+0x0C05, 0, 0x0C85, 0, 0x0D05, 0, 0x0D85, 0,
+0xAAF2, 0,  // Meetei Mayek
+0xA800, 0, 0xA882, 0, 0xD804, 0xDC83, 0,
+U16_LEAD(0x111C4), U16_TRAIL(0x111C4), 0,  // Sharada
+U16_LEAD(0x11680), U16_TRAIL(0x11680), 0,  // Takri
+0x1B83, 0,
+0xD802, 0xDE00, 0, 0x0E01, 0,
+0x0EDE, 0,  // Lao
+0xAA80, 0, 0x0F40, 0, 0x1C00, 0, 0xA840, 0, 0x1900, 0, 0x1700, 0, 0x1720, 0,
+0x1740, 0, 0x1760, 0, 0x1A00, 0, 0xA930, 0, 0xA90A, 0, 0x1000, 0,
+U16_LEAD(0x11103), U16_TRAIL(0x11103), 0,  // Chakma
+0x1780, 0, 0x1950, 0, 0x1980, 0, 0x1A20, 0,
+0xAA00, 0, 0x1B05, 0, 0xA984, 0, 0x1880, 0, 0x1C5A, 0, 0x13A0, 0, 0x1401, 0, 0x1681, 0, 0x16A0, 0, 0xD803, 0xDC00, 0,
+0xA500, 0, 0xA6A0, 0, 0x1100, 0, 0x3041, 0, 0x30A1, 0, 0x3105, 0, 0xA000, 0, 0xA4F8, 0,
+U16_LEAD(0x16F00), U16_TRAIL(0x16F00), 0,  // Miao
+0xD800, 0xDE80, 0,
+0xD800, 0xDEA0, 0, 0xD802, 0xDD20, 0, 0xD800, 0xDF00, 0, 0xD800, 0xDF30, 0, 0xD801, 0xDC28, 0, 0xD801, 0xDC50, 0,
+0xD801, 0xDC80, 0,
+U16_LEAD(0x110D0), U16_TRAIL(0x110D0), 0,  // Sora Sompeng
+0xD800, 0xDC00, 0, 0xD802, 0xDC00, 0, 0xD802, 0xDE60, 0, 0xD802, 0xDF00, 0, 0xD802, 0xDC40, 0,
+0xD802, 0xDF40, 0, 0xD802, 0xDF60, 0, 0xD800, 0xDF80, 0, 0xD800, 0xDFA0, 0, 0xD808, 0xDC00, 0, 0xD80C, 0xDC00, 0,
+U16_LEAD(0x109A0), U16_TRAIL(0x109A0), 0,  // Meroitic Cursive
+U16_LEAD(0x10980), U16_TRAIL(0x10980), 0,  // Meroitic Hieroglyphs
+0x4E00, 0,
+// TODO: The overflow bucket's lowerBoundary string should be the
+// first item after the last reordering group in the collator's script order.
+// This should normally be the first Unicode code point
+// that is unassigned (U+0378 in Unicode 6.3) and untailored.
+// However, at least up to ICU 51 the Hani reordering group includes
+// unassigned code points,
+// and there is no stable string for the start of the trailing-weights range.
+// The only known string that sorts "high" is U+FFFF.
+// When ICU separates Hani vs. unassigned reordering groups, we need to fix this,
+// and fix relevant test code.
+// Ideally, FractionalUCA.txt will have a "script first primary"
+// for unassigned code points.
+0xFFFF, 0
+};
+UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
+if (U_FAILURE(status)) {
+return NULL;
+}
+UVector *dest = new UVector(status);
+if (dest == NULL) {
+status = U_MEMORY_ALLOCATION_ERROR;
+return NULL;
+}
+dest->setDeleter(uprv_deleteUObject);
+const UChar *src  = HACK_FIRST_CHARS_IN_SCRIPTS;
+const UChar *limit = src + LENGTHOF(HACK_FIRST_CHARS_IN_SCRIPTS);
+do {
+if (U_FAILURE(status)) {
+return dest;
+}
+UnicodeString *str = new UnicodeString(src, -1);
+if (str == NULL) {
+status = U_MEMORY_ALLOCATION_ERROR;
+return dest;
+}
+dest->addElement(str, status);
+src += str->length() + 1;
+} while (src < limit);
+return dest;
+}
+namespace {
+/**
+* Returns true if one index character string is "better" than the other.
+* Shorter NFKD is better, and otherwise NFKD-binary-less-than is
+* better, and otherwise binary-less-than is better.
+*/
+UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
+const UnicodeString &one, const UnicodeString &other) {
+// This is called with primary-equal strings, but never with one.equals(other).
+UErrorCode status = U_ZERO_ERROR;
+UnicodeString n1 = nfkdNormalizer.normalize(one, status);
+UnicodeString n2 = nfkdNormalizer.normalize(other, status);
+if (U_FAILURE(status)) { return FALSE; }
+int32_t result = n1.countChar32() - n2.countChar32();
+if (result != 0) {
+return result < 0;
+}
+result = n1.compareCodePointOrder(n2);
+if (result != 0) {
+return result < 0;
+}
+return one.compareCodePointOrder(other) < 0;
+}
+}  // namespace
+//
+//  Constructor & Destructor for AlphabeticIndex::Record
+//
+//     Records are internal only, instances are not directly surfaced in the public API.
+//     This class is mostly struct-like, with all public fields.
+AlphabeticIndex::Record::Record(const UnicodeString &name, const void *data)
+: name_(name), data_(data) {}
+AlphabeticIndex::Record::~Record() {
+}
+AlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const void *data, UErrorCode &status) {
+if (U_FAILURE(status)) {
+return *this;
+}
+if (inputList_ == NULL) {
+inputList_ = new UVector(status);
+if (inputList_ == NULL) {
+status = U_MEMORY_ALLOCATION_ERROR;
+return *this;
+}
+inputList_->setDeleter(alphaIndex_deleteRecord);
+}
+Record *r = new Record(name, data);
+if (r == NULL) {
+status = U_MEMORY_ALLOCATION_ERROR;
+return *this;
+}
+inputList_->addElement(r, status);
+clearBuckets();
+//std::string ss;
+//std::string ss2;
+//std::cout << "added record: name = \"" << r->name_.toUTF8String(ss) << "\"" <<
+//             "   sortingName = \"" << r->sortingName_.toUTF8String(ss2) << "\"" << std::endl;
+return *this;
+}
+AlphabeticIndex &AlphabeticIndex::clearRecords(UErrorCode &status) {
+if (U_SUCCESS(status) && inputList_ != NULL && !inputList_->isEmpty()) {
+inputList_->removeAllElements();
+clearBuckets();
+}
+return *this;
+}
+int32_t AlphabeticIndex::getBucketIndex(const UnicodeString &name, UErrorCode &status) {
+initBuckets(status);
+if (U_FAILURE(status)) {
+return 0;
+}
+return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, status);
+}
+int32_t AlphabeticIndex::getBucketIndex() const {
+return labelsIterIndex_;
+}
+UBool AlphabeticIndex::nextBucket(UErrorCode &status) {
+if (U_FAILURE(status)) {
+return FALSE;
+}
+if (buckets_ == NULL && currentBucket_ != NULL) {
+status = U_ENUM_OUT_OF_SYNC_ERROR;
+return FALSE;
+}
+initBuckets(status);
+if (U_FAILURE(status)) {
+return FALSE;
+}
+++labelsIterIndex_;
+if (labelsIterIndex_ >= buckets_->getBucketCount()) {
+labelsIterIndex_ = buckets_->getBucketCount();
+return FALSE;
+}
+currentBucket_ = getBucket(*buckets_->immutableVisibleList_, labelsIterIndex_);
+resetRecordIterator();
+return TRUE;
+}
+const UnicodeString &AlphabeticIndex::getBucketLabel() const {
+if (currentBucket_ != NULL) {
+return currentBucket_->label_;
+} else {
+return emptyString_;
+}
+}
+UAlphabeticIndexLabelType AlphabeticIndex::getBucketLabelType() const {
+if (currentBucket_ != NULL) {
+return currentBucket_->labelType_;
+} else {
+return U_ALPHAINDEX_NORMAL;
+}
+}
+int32_t AlphabeticIndex::getBucketRecordCount() const {
+if (currentBucket_ != NULL && currentBucket_->records_ != NULL) {
+return currentBucket_->records_->size();
+} else {
+return 0;
+}
+}
+AlphabeticIndex &AlphabeticIndex::resetBucketIterator(UErrorCode &status) {
+if (U_FAILURE(status)) {
+return *this;
+}
+internalResetBucketIterator();
+return *this;
+}
+UBool AlphabeticIndex::nextRecord(UErrorCode &status) {
+if (U_FAILURE(status)) {
+return FALSE;
+}
+if (currentBucket_ == NULL) {
+// We are trying to iterate over the items in a bucket, but there is no
+// current bucket from the enumeration of buckets.
+status = U_INVALID_STATE_ERROR;
+return FALSE;
+}
+if (buckets_ == NULL) {
+status = U_ENUM_OUT_OF_SYNC_ERROR;
+return FALSE;
+}
+if (currentBucket_->records_ == NULL) {
+return FALSE;
+}
+++itemsIterIndex_;
+if (itemsIterIndex_ >= currentBucket_->records_->size()) {
+itemsIterIndex_  = currentBucket_->records_->size();
+return FALSE;
+}
+return TRUE;
+}
+const UnicodeString &AlphabeticIndex::getRecordName() const {
+const UnicodeString *retStr = &emptyString_;
+if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
+itemsIterIndex_ >= 0 &&
+itemsIterIndex_ < currentBucket_->records_->size()) {
+Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
+retStr = &item->name_;
+}
+return *retStr;
+}
+const void *AlphabeticIndex::getRecordData() const {
+const void *retPtr = NULL;
+if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
+itemsIterIndex_ >= 0 &&
+itemsIterIndex_ < currentBucket_->records_->size()) {
+Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
+retPtr = item->data_;
+}
+return retPtr;
+}
+AlphabeticIndex & AlphabeticIndex::resetRecordIterator() {
+itemsIterIndex_ = -1;
+return *this;
+}
+AlphabeticIndex::Bucket::Bucket(const UnicodeString &label,
+const UnicodeString &lowerBoundary,
+UAlphabeticIndexLabelType type)
+: label_(label), lowerBoundary_(lowerBoundary), labelType_(type),
+displayBucket_(NULL), displayIndex_(-1),
+records_(NULL) {
+}
+AlphabeticIndex::Bucket::~Bucket() {
+delete records_;
+}
+U_NAMESPACE_END
+#endif

The Tor Browser / file comparison

comparison: intl/icu/source/i18n/alphaindex.cpp

intl/icu/source/i18n/alphaindex.cpp