The Tor Browser: diff intl/icu/source/i18n/ucol.cpp

     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/i18n/ucol.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,8809 @@
     1.4 +/*
     1.5 +*******************************************************************************
     1.6 +*   Copyright (C) 1996-2013, International Business Machines
     1.7 +*   Corporation and others.  All Rights Reserved.
     1.8 +*******************************************************************************
     1.9 +*   file name:  ucol.cpp
    1.10 +*   encoding:   US-ASCII
    1.11 +*   tab size:   8 (not used)
    1.12 +*   indentation:4
    1.13 +*
    1.14 +* Modification history
    1.15 +* Date        Name      Comments
    1.16 +* 1996-1999   various members of ICU team maintained C API for collation framework
    1.17 +* 02/16/2001  synwee    Added internal method getPrevSpecialCE
    1.18 +* 03/01/2001  synwee    Added maxexpansion functionality.
    1.19 +* 03/16/2001  weiv      Collation framework is rewritten in C and made UCA compliant
    1.20 +*/
    1.21 +
    1.22 +#include "unicode/utypes.h"
    1.23 +
    1.24 +#if !UCONFIG_NO_COLLATION
    1.25 +
    1.26 +#include "unicode/bytestream.h"
    1.27 +#include "unicode/coleitr.h"
    1.28 +#include "unicode/unorm.h"
    1.29 +#include "unicode/udata.h"
    1.30 +#include "unicode/ustring.h"
    1.31 +#include "unicode/utf8.h"
    1.32 +
    1.33 +#include "ucol_imp.h"
    1.34 +#include "bocsu.h"
    1.35 +
    1.36 +#include "normalizer2impl.h"
    1.37 +#include "unorm_it.h"
    1.38 +#include "umutex.h"
    1.39 +#include "cmemory.h"
    1.40 +#include "ucln_in.h"
    1.41 +#include "cstring.h"
    1.42 +#include "utracimp.h"
    1.43 +#include "putilimp.h"
    1.44 +#include "uassert.h"
    1.45 +#include "unicode/coll.h"
    1.46 +
    1.47 +#ifdef UCOL_DEBUG
    1.48 +#include <stdio.h>
    1.49 +#endif
    1.50 +
    1.51 +U_NAMESPACE_USE
    1.52 +
    1.53 +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
    1.54 +
    1.55 +#define LAST_BYTE_MASK_           0xFF
    1.56 +#define SECOND_LAST_BYTE_SHIFT_   8
    1.57 +
    1.58 +#define ZERO_CC_LIMIT_            0xC0
    1.59 +
    1.60 +// These are static pointers to the NFC/NFD implementation instance.
    1.61 +// Each of them is always the same between calls to u_cleanup
    1.62 +// and therefore writing to it is not synchronized.
    1.63 +// They are cleaned in ucol_cleanup
    1.64 +static const Normalizer2 *g_nfd = NULL;
    1.65 +static const Normalizer2Impl *g_nfcImpl = NULL;
    1.66 +
    1.67 +// These are values from UCA required for
    1.68 +// implicit generation and supressing sort key compression
    1.69 +// they should regularly be in the UCA, but if one
    1.70 +// is running without UCA, it could be a problem
    1.71 +static const int32_t maxRegularPrimary  = 0x7A;
    1.72 +static const int32_t minImplicitPrimary = 0xE0;
    1.73 +static const int32_t maxImplicitPrimary = 0xE4;
    1.74 +
    1.75 +U_CDECL_BEGIN
    1.76 +static UBool U_CALLCONV
    1.77 +ucol_cleanup(void)
    1.78 +{
    1.79 +    g_nfd = NULL;
    1.80 +    g_nfcImpl = NULL;
    1.81 +    return TRUE;
    1.82 +}
    1.83 +
    1.84 +static int32_t U_CALLCONV
    1.85 +_getFoldingOffset(uint32_t data) {
    1.86 +    return (int32_t)(data&0xFFFFFF);
    1.87 +}
    1.88 +
    1.89 +U_CDECL_END
    1.90 +
    1.91 +static inline
    1.92 +UBool initializeNFD(UErrorCode *status) {
    1.93 +    if (g_nfd != NULL) {
    1.94 +        return TRUE;
    1.95 +    } else {
    1.96 +        // The result is constant, until the library is reloaded.
    1.97 +        g_nfd = Normalizer2Factory::getNFDInstance(*status);
    1.98 +        ucln_i18n_registerCleanup(UCLN_I18N_UCOL, ucol_cleanup);
    1.99 +        return U_SUCCESS(*status);
   1.100 +    }
   1.101 +}
   1.102 +
   1.103 +// init FCD data
   1.104 +static inline
   1.105 +UBool initializeFCD(UErrorCode *status) {
   1.106 +    if (g_nfcImpl != NULL) {
   1.107 +        return TRUE;
   1.108 +    } else {
   1.109 +        // The result is constant, until the library is reloaded.
   1.110 +        g_nfcImpl = Normalizer2Factory::getNFCImpl(*status);
   1.111 +        // Note: Alternatively, we could also store this pointer in each collIterate struct,
   1.112 +        // same as Normalizer2Factory::getImpl(collIterate->nfd).
   1.113 +        ucln_i18n_registerCleanup(UCLN_I18N_UCOL, ucol_cleanup);
   1.114 +        return U_SUCCESS(*status);
   1.115 +    }
   1.116 +}
   1.117 +
   1.118 +static
   1.119 +inline void IInit_collIterate(const UCollator *collator, const UChar *sourceString,
   1.120 +                              int32_t sourceLen, collIterate *s,
   1.121 +                              UErrorCode *status)
   1.122 +{
   1.123 +    (s)->string = (s)->pos = sourceString;
   1.124 +    (s)->origFlags = 0;
   1.125 +    (s)->flags = 0;
   1.126 +    if (sourceLen >= 0) {
   1.127 +        s->flags |= UCOL_ITER_HASLEN;
   1.128 +        (s)->endp = (UChar *)sourceString+sourceLen;
   1.129 +    }
   1.130 +    else {
   1.131 +        /* change to enable easier checking for end of string for fcdpositon */
   1.132 +        (s)->endp = NULL;
   1.133 +    }
   1.134 +    (s)->extendCEs = NULL;
   1.135 +    (s)->extendCEsSize = 0;
   1.136 +    (s)->CEpos = (s)->toReturn = (s)->CEs;
   1.137 +    (s)->offsetBuffer = NULL;
   1.138 +    (s)->offsetBufferSize = 0;
   1.139 +    (s)->offsetReturn = (s)->offsetStore = NULL;
   1.140 +    (s)->offsetRepeatCount = (s)->offsetRepeatValue = 0;
   1.141 +    (s)->coll = (collator);
   1.142 +    if (initializeNFD(status)) {
   1.143 +        (s)->nfd = g_nfd;
   1.144 +    } else {
   1.145 +        return;
   1.146 +    }
   1.147 +    (s)->fcdPosition = 0;
   1.148 +    if(collator->normalizationMode == UCOL_ON) {
   1.149 +        (s)->flags |= UCOL_ITER_NORM;
   1.150 +    }
   1.151 +    if(collator->hiraganaQ == UCOL_ON && collator->strength >= UCOL_QUATERNARY) {
   1.152 +        (s)->flags |= UCOL_HIRAGANA_Q;
   1.153 +    }
   1.154 +    (s)->iterator = NULL;
   1.155 +    //(s)->iteratorIndex = 0;
   1.156 +}
   1.157 +
   1.158 +U_CAPI void  U_EXPORT2
   1.159 +uprv_init_collIterate(const UCollator *collator, const UChar *sourceString,
   1.160 +                             int32_t sourceLen, collIterate *s,
   1.161 +                             UErrorCode *status) {
   1.162 +    /* Out-of-line version for use from other files. */
   1.163 +    IInit_collIterate(collator, sourceString, sourceLen, s, status);
   1.164 +}
   1.165 +
   1.166 +U_CAPI collIterate * U_EXPORT2 
   1.167 +uprv_new_collIterate(UErrorCode *status) {
   1.168 +    if(U_FAILURE(*status)) {
   1.169 +        return NULL;
   1.170 +    }
   1.171 +    collIterate *s = new collIterate;
   1.172 +    if(s == NULL) {
   1.173 +        *status = U_MEMORY_ALLOCATION_ERROR;
   1.174 +        return NULL;
   1.175 +    }
   1.176 +    return s;
   1.177 +}
   1.178 +
   1.179 +U_CAPI void U_EXPORT2 
   1.180 +uprv_delete_collIterate(collIterate *s) {
   1.181 +    delete s;
   1.182 +}
   1.183 +
   1.184 +U_CAPI UBool U_EXPORT2
   1.185 +uprv_collIterateAtEnd(collIterate *s) {
   1.186 +    return s == NULL || s->pos == s->endp;
   1.187 +}
   1.188 +
   1.189 +/**
   1.190 +* Backup the state of the collIterate struct data
   1.191 +* @param data collIterate to backup
   1.192 +* @param backup storage
   1.193 +*/
   1.194 +static
   1.195 +inline void backupState(const collIterate *data, collIterateState *backup)
   1.196 +{
   1.197 +    backup->fcdPosition = data->fcdPosition;
   1.198 +    backup->flags       = data->flags;
   1.199 +    backup->origFlags   = data->origFlags;
   1.200 +    backup->pos         = data->pos;
   1.201 +    backup->bufferaddress = data->writableBuffer.getBuffer();
   1.202 +    backup->buffersize    = data->writableBuffer.length();
   1.203 +    backup->iteratorMove = 0;
   1.204 +    backup->iteratorIndex = 0;
   1.205 +    if(data->iterator != NULL) {
   1.206 +        //backup->iteratorIndex = data->iterator->getIndex(data->iterator, UITER_CURRENT);
   1.207 +        backup->iteratorIndex = data->iterator->getState(data->iterator);
   1.208 +        // no we try to fixup if we're using a normalizing iterator and we get UITER_NO_STATE
   1.209 +        if(backup->iteratorIndex == UITER_NO_STATE) {
   1.210 +            while((backup->iteratorIndex = data->iterator->getState(data->iterator)) == UITER_NO_STATE) {
   1.211 +                backup->iteratorMove++;
   1.212 +                data->iterator->move(data->iterator, -1, UITER_CURRENT);
   1.213 +            }
   1.214 +            data->iterator->move(data->iterator, backup->iteratorMove, UITER_CURRENT);
   1.215 +        }
   1.216 +    }
   1.217 +}
   1.218 +
   1.219 +/**
   1.220 +* Loads the state into the collIterate struct data
   1.221 +* @param data collIterate to backup
   1.222 +* @param backup storage
   1.223 +* @param forwards boolean to indicate if forwards iteration is used,
   1.224 +*        false indicates backwards iteration
   1.225 +*/
   1.226 +static
   1.227 +inline void loadState(collIterate *data, const collIterateState *backup,
   1.228 +                      UBool        forwards)
   1.229 +{
   1.230 +    UErrorCode status = U_ZERO_ERROR;
   1.231 +    data->flags       = backup->flags;
   1.232 +    data->origFlags   = backup->origFlags;
   1.233 +    if(data->iterator != NULL) {
   1.234 +        //data->iterator->move(data->iterator, backup->iteratorIndex, UITER_ZERO);
   1.235 +        data->iterator->setState(data->iterator, backup->iteratorIndex, &status);
   1.236 +        if(backup->iteratorMove != 0) {
   1.237 +            data->iterator->move(data->iterator, backup->iteratorMove, UITER_CURRENT);
   1.238 +        }
   1.239 +    }
   1.240 +    data->pos         = backup->pos;
   1.241 +
   1.242 +    if ((data->flags & UCOL_ITER_INNORMBUF) &&
   1.243 +        data->writableBuffer.getBuffer() != backup->bufferaddress) {
   1.244 +        /*
   1.245 +        this is when a new buffer has been reallocated and we'll have to
   1.246 +        calculate the new position.
   1.247 +        note the new buffer has to contain the contents of the old buffer.
   1.248 +        */
   1.249 +        if (forwards) {
   1.250 +            data->pos = data->writableBuffer.getTerminatedBuffer() +
   1.251 +                                         (data->pos - backup->bufferaddress);
   1.252 +        }
   1.253 +        else {
   1.254 +            /* backwards direction */
   1.255 +            int32_t temp = backup->buffersize -
   1.256 +                                  (int32_t)(data->pos - backup->bufferaddress);
   1.257 +            data->pos = data->writableBuffer.getTerminatedBuffer() + (data->writableBuffer.length() - temp);
   1.258 +        }
   1.259 +    }
   1.260 +    if ((data->flags & UCOL_ITER_INNORMBUF) == 0) {
   1.261 +        /*
   1.262 +        this is alittle tricky.
   1.263 +        if we are initially not in the normalization buffer, even if we
   1.264 +        normalize in the later stage, the data in the buffer will be
   1.265 +        ignored, since we skip back up to the data string.
   1.266 +        however if we are already in the normalization buffer, any
   1.267 +        further normalization will pull data into the normalization
   1.268 +        buffer and modify the fcdPosition.
   1.269 +        since we are keeping the data in the buffer for use, the
   1.270 +        fcdPosition can not be reverted back.
   1.271 +        arrgghh....
   1.272 +        */
   1.273 +        data->fcdPosition = backup->fcdPosition;
   1.274 +    }
   1.275 +}
   1.276 +
   1.277 +static UBool
   1.278 +reallocCEs(collIterate *data, int32_t newCapacity) {
   1.279 +    uint32_t *oldCEs = data->extendCEs;
   1.280 +    if(oldCEs == NULL) {
   1.281 +        oldCEs = data->CEs;
   1.282 +    }
   1.283 +    int32_t length = data->CEpos - oldCEs;
   1.284 +    uint32_t *newCEs = (uint32_t *)uprv_malloc(newCapacity * 4);
   1.285 +    if(newCEs == NULL) {
   1.286 +        return FALSE;
   1.287 +    }
   1.288 +    uprv_memcpy(newCEs, oldCEs, length * 4);
   1.289 +    uprv_free(data->extendCEs);
   1.290 +    data->extendCEs = newCEs;
   1.291 +    data->extendCEsSize = newCapacity;
   1.292 +    data->CEpos = newCEs + length;
   1.293 +    return TRUE;
   1.294 +}
   1.295 +
   1.296 +static UBool
   1.297 +increaseCEsCapacity(collIterate *data) {
   1.298 +    int32_t oldCapacity;
   1.299 +    if(data->extendCEs != NULL) {
   1.300 +        oldCapacity = data->extendCEsSize;
   1.301 +    } else {
   1.302 +        oldCapacity = LENGTHOF(data->CEs);
   1.303 +    }
   1.304 +    return reallocCEs(data, 2 * oldCapacity);
   1.305 +}
   1.306 +
   1.307 +static UBool
   1.308 +ensureCEsCapacity(collIterate *data, int32_t minCapacity) {
   1.309 +    int32_t oldCapacity;
   1.310 +    if(data->extendCEs != NULL) {
   1.311 +        oldCapacity = data->extendCEsSize;
   1.312 +    } else {
   1.313 +        oldCapacity = LENGTHOF(data->CEs);
   1.314 +    }
   1.315 +    if(minCapacity <= oldCapacity) {
   1.316 +        return TRUE;
   1.317 +    }
   1.318 +    oldCapacity *= 2;
   1.319 +    return reallocCEs(data, minCapacity > oldCapacity ? minCapacity : oldCapacity);
   1.320 +}
   1.321 +
   1.322 +void collIterate::appendOffset(int32_t offset, UErrorCode &errorCode) {
   1.323 +    if(U_FAILURE(errorCode)) {
   1.324 +        return;
   1.325 +    }
   1.326 +    int32_t length = offsetStore == NULL ? 0 : (int32_t)(offsetStore - offsetBuffer);
   1.327 +    U_ASSERT(length >= offsetBufferSize || offsetStore != NULL);
   1.328 +    if(length >= offsetBufferSize) {
   1.329 +        int32_t newCapacity = 2 * offsetBufferSize + UCOL_EXPAND_CE_BUFFER_SIZE;
   1.330 +        int32_t *newBuffer = static_cast<int32_t *>(uprv_malloc(newCapacity * 4));
   1.331 +        if(newBuffer == NULL) {
   1.332 +            errorCode = U_MEMORY_ALLOCATION_ERROR;
   1.333 +            return;
   1.334 +        }
   1.335 +        if(length > 0) {
   1.336 +            uprv_memcpy(newBuffer, offsetBuffer, length * 4);
   1.337 +        }
   1.338 +        uprv_free(offsetBuffer);
   1.339 +        offsetBuffer = newBuffer;
   1.340 +        offsetStore = offsetBuffer + length;
   1.341 +        offsetBufferSize = newCapacity;
   1.342 +    }
   1.343 +    *offsetStore++ = offset;
   1.344 +}
   1.345 +
   1.346 +/*
   1.347 +* collIter_eos()
   1.348 +*     Checks for a collIterate being positioned at the end of
   1.349 +*     its source string.
   1.350 +*
   1.351 +*/
   1.352 +static
   1.353 +inline UBool collIter_eos(collIterate *s) {
   1.354 +    if(s->flags & UCOL_USE_ITERATOR) {
   1.355 +      return !(s->iterator->hasNext(s->iterator));
   1.356 +    }
   1.357 +    if ((s->flags & UCOL_ITER_HASLEN) == 0 && *s->pos != 0) {
   1.358 +        // Null terminated string, but not at null, so not at end.
   1.359 +        //   Whether in main or normalization buffer doesn't matter.
   1.360 +        return FALSE;
   1.361 +    }
   1.362 +
   1.363 +    // String with length.  Can't be in normalization buffer, which is always
   1.364 +    //  null termintated.
   1.365 +    if (s->flags & UCOL_ITER_HASLEN) {
   1.366 +        return (s->pos == s->endp);
   1.367 +    }
   1.368 +
   1.369 +    // We are at a null termination, could be either normalization buffer or main string.
   1.370 +    if ((s->flags & UCOL_ITER_INNORMBUF) == 0) {
   1.371 +        // At null at end of main string.
   1.372 +        return TRUE;
   1.373 +    }
   1.374 +
   1.375 +    // At null at end of normalization buffer.  Need to check whether there there are
   1.376 +    //   any characters left in the main buffer.
   1.377 +    if(s->origFlags & UCOL_USE_ITERATOR) {
   1.378 +      return !(s->iterator->hasNext(s->iterator));
   1.379 +    } else if ((s->origFlags & UCOL_ITER_HASLEN) == 0) {
   1.380 +        // Null terminated main string.  fcdPosition is the 'return' position into main buf.
   1.381 +        return (*s->fcdPosition == 0);
   1.382 +    }
   1.383 +    else {
   1.384 +        // Main string with an end pointer.
   1.385 +        return s->fcdPosition == s->endp;
   1.386 +    }
   1.387 +}
   1.388 +
   1.389 +/*
   1.390 +* collIter_bos()
   1.391 +*     Checks for a collIterate being positioned at the start of
   1.392 +*     its source string.
   1.393 +*
   1.394 +*/
   1.395 +static
   1.396 +inline UBool collIter_bos(collIterate *source) {
   1.397 +  // if we're going backwards, we need to know whether there is more in the
   1.398 +  // iterator, even if we are in the side buffer
   1.399 +  if(source->flags & UCOL_USE_ITERATOR || source->origFlags & UCOL_USE_ITERATOR) {
   1.400 +    return !source->iterator->hasPrevious(source->iterator);
   1.401 +  }
   1.402 +  if (source->pos <= source->string ||
   1.403 +      ((source->flags & UCOL_ITER_INNORMBUF) &&
   1.404 +      *(source->pos - 1) == 0 && source->fcdPosition == NULL)) {
   1.405 +    return TRUE;
   1.406 +  }
   1.407 +  return FALSE;
   1.408 +}
   1.409 +
   1.410 +/*static
   1.411 +inline UBool collIter_SimpleBos(collIterate *source) {
   1.412 +  // if we're going backwards, we need to know whether there is more in the
   1.413 +  // iterator, even if we are in the side buffer
   1.414 +  if(source->flags & UCOL_USE_ITERATOR || source->origFlags & UCOL_USE_ITERATOR) {
   1.415 +    return !source->iterator->hasPrevious(source->iterator);
   1.416 +  }
   1.417 +  if (source->pos == source->string) {
   1.418 +    return TRUE;
   1.419 +  }
   1.420 +  return FALSE;
   1.421 +}*/
   1.422 +    //return (data->pos == data->string) ||
   1.423 +
   1.424 +
   1.425 +/****************************************************************************/
   1.426 +/* Following are the open/close functions                                   */
   1.427 +/*                                                                          */
   1.428 +/****************************************************************************/
   1.429 +
   1.430 +static UCollator*
   1.431 +ucol_initFromBinary(const uint8_t *bin, int32_t length,
   1.432 +                const UCollator *base,
   1.433 +                UCollator *fillIn,
   1.434 +                UErrorCode *status)
   1.435 +{
   1.436 +    UCollator *result = fillIn;
   1.437 +    if(U_FAILURE(*status)) {
   1.438 +        return NULL;
   1.439 +    }
   1.440 +    /*
   1.441 +    if(base == NULL) {
   1.442 +        // we don't support null base yet
   1.443 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
   1.444 +        return NULL;
   1.445 +    }
   1.446 +    */
   1.447 +    // We need these and we could be running without UCA
   1.448 +    uprv_uca_initImplicitConstants(status);
   1.449 +    UCATableHeader *colData = (UCATableHeader *)bin;
   1.450 +    // do we want version check here? We're trying to figure out whether collators are compatible
   1.451 +    if((base && (uprv_memcmp(colData->UCAVersion, base->image->UCAVersion, sizeof(UVersionInfo)) != 0 ||
   1.452 +        uprv_memcmp(colData->UCDVersion, base->image->UCDVersion, sizeof(UVersionInfo)) != 0)) ||
   1.453 +        colData->version[0] != UCOL_BUILDER_VERSION)
   1.454 +    {
   1.455 +        *status = U_COLLATOR_VERSION_MISMATCH;
   1.456 +        return NULL;
   1.457 +    }
   1.458 +    else {
   1.459 +        if((uint32_t)length > (paddedsize(sizeof(UCATableHeader)) + paddedsize(sizeof(UColOptionSet)))) {
   1.460 +            result = ucol_initCollator((const UCATableHeader *)bin, result, base, status);
   1.461 +            if(U_FAILURE(*status)){
   1.462 +                return NULL;
   1.463 +            }
   1.464 +            result->hasRealData = TRUE;
   1.465 +        }
   1.466 +        else {
   1.467 +            if(base) {
   1.468 +                result = ucol_initCollator(base->image, result, base, status);
   1.469 +                ucol_setOptionsFromHeader(result, (UColOptionSet *)(bin+((const UCATableHeader *)bin)->options), status);
   1.470 +                if(U_FAILURE(*status)){
   1.471 +                    return NULL;
   1.472 +                }
   1.473 +                result->hasRealData = FALSE;
   1.474 +            }
   1.475 +            else {
   1.476 +                *status = U_USELESS_COLLATOR_ERROR;
   1.477 +                return NULL;
   1.478 +            }
   1.479 +        }
   1.480 +        result->freeImageOnClose = FALSE;
   1.481 +    }
   1.482 +    result->actualLocale = NULL;
   1.483 +    result->validLocale = NULL;
   1.484 +    result->requestedLocale = NULL;
   1.485 +    result->rules = NULL;
   1.486 +    result->rulesLength = 0;
   1.487 +    result->freeRulesOnClose = FALSE;
   1.488 +    result->ucaRules = NULL;
   1.489 +    return result;
   1.490 +}
   1.491 +
   1.492 +U_CAPI UCollator* U_EXPORT2
   1.493 +ucol_openBinary(const uint8_t *bin, int32_t length,
   1.494 +                const UCollator *base,
   1.495 +                UErrorCode *status)
   1.496 +{
   1.497 +    return ucol_initFromBinary(bin, length, base, NULL, status);
   1.498 +}
   1.499 +
   1.500 +U_CAPI int32_t U_EXPORT2
   1.501 +ucol_cloneBinary(const UCollator *coll,
   1.502 +                 uint8_t *buffer, int32_t capacity,
   1.503 +                 UErrorCode *status)
   1.504 +{
   1.505 +    int32_t length = 0;
   1.506 +    if(U_FAILURE(*status)) {
   1.507 +        return length;
   1.508 +    }
   1.509 +    if(capacity < 0) {
   1.510 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
   1.511 +        return length;
   1.512 +    }
   1.513 +    if(coll->hasRealData == TRUE) {
   1.514 +        length = coll->image->size;
   1.515 +        if(length <= capacity) {
   1.516 +            uprv_memcpy(buffer, coll->image, length);
   1.517 +        } else {
   1.518 +            *status = U_BUFFER_OVERFLOW_ERROR;
   1.519 +        }
   1.520 +    } else {
   1.521 +        length = (int32_t)(paddedsize(sizeof(UCATableHeader))+paddedsize(sizeof(UColOptionSet)));
   1.522 +        if(length <= capacity) {
   1.523 +            /* build the UCATableHeader with minimal entries */
   1.524 +            /* do not copy the header from the UCA file because its values are wrong! */
   1.525 +            /* uprv_memcpy(result, UCA->image, sizeof(UCATableHeader)); */
   1.526 +
   1.527 +            /* reset everything */
   1.528 +            uprv_memset(buffer, 0, length);
   1.529 +
   1.530 +            /* set the tailoring-specific values */
   1.531 +            UCATableHeader *myData = (UCATableHeader *)buffer;
   1.532 +            myData->size = length;
   1.533 +
   1.534 +            /* offset for the options, the only part of the data that is present after the header */
   1.535 +            myData->options = sizeof(UCATableHeader);
   1.536 +
   1.537 +            /* need to always set the expansion value for an upper bound of the options */
   1.538 +            myData->expansion = myData->options + sizeof(UColOptionSet);
   1.539 +
   1.540 +            myData->magic = UCOL_HEADER_MAGIC;
   1.541 +            myData->isBigEndian = U_IS_BIG_ENDIAN;
   1.542 +            myData->charSetFamily = U_CHARSET_FAMILY;
   1.543 +
   1.544 +            /* copy UCA's version; genrb will override all but the builder version with tailoring data */
   1.545 +            uprv_memcpy(myData->version, coll->image->version, sizeof(UVersionInfo));
   1.546 +
   1.547 +            uprv_memcpy(myData->UCAVersion, coll->image->UCAVersion, sizeof(UVersionInfo));
   1.548 +            uprv_memcpy(myData->UCDVersion, coll->image->UCDVersion, sizeof(UVersionInfo));
   1.549 +            uprv_memcpy(myData->formatVersion, coll->image->formatVersion, sizeof(UVersionInfo));
   1.550 +            myData->jamoSpecial = coll->image->jamoSpecial;
   1.551 +
   1.552 +            /* copy the collator options */
   1.553 +            uprv_memcpy(buffer+paddedsize(sizeof(UCATableHeader)), coll->options, sizeof(UColOptionSet));
   1.554 +        } else {
   1.555 +            *status = U_BUFFER_OVERFLOW_ERROR;
   1.556 +        }
   1.557 +    }
   1.558 +    return length;
   1.559 +}
   1.560 +
   1.561 +U_CAPI UCollator* U_EXPORT2
   1.562 +ucol_safeClone(const UCollator *coll, void * /*stackBuffer*/, int32_t * pBufferSize, UErrorCode *status)
   1.563 +{
   1.564 +    UCollator * localCollator;
   1.565 +    int32_t bufferSizeNeeded = (int32_t)sizeof(UCollator);
   1.566 +    int32_t imageSize = 0;
   1.567 +    int32_t rulesSize = 0;
   1.568 +    int32_t rulesPadding = 0;
   1.569 +    int32_t defaultReorderCodesSize = 0;
   1.570 +    int32_t reorderCodesSize = 0;
   1.571 +    uint8_t *image;
   1.572 +    UChar *rules;
   1.573 +    int32_t* defaultReorderCodes;
   1.574 +    int32_t* reorderCodes;
   1.575 +    uint8_t* leadBytePermutationTable;
   1.576 +    UBool imageAllocated = FALSE;
   1.577 +
   1.578 +    if (status == NULL || U_FAILURE(*status)){
   1.579 +        return NULL;
   1.580 +    }
   1.581 +    if (coll == NULL) {
   1.582 +       *status = U_ILLEGAL_ARGUMENT_ERROR;
   1.583 +        return NULL;
   1.584 +    }
   1.585 +
   1.586 +    if (coll->rules && coll->freeRulesOnClose) {
   1.587 +        rulesSize = (int32_t)(coll->rulesLength + 1)*sizeof(UChar);
   1.588 +        rulesPadding = (int32_t)(bufferSizeNeeded % sizeof(UChar));
   1.589 +        bufferSizeNeeded += rulesSize + rulesPadding;
   1.590 +    }
   1.591 +    // no padding for alignment needed from here since the next two are 4 byte quantities
   1.592 +    if (coll->defaultReorderCodes) {
   1.593 +        defaultReorderCodesSize = coll->defaultReorderCodesLength * sizeof(int32_t);
   1.594 +        bufferSizeNeeded += defaultReorderCodesSize;
   1.595 +    }
   1.596 +    if (coll->reorderCodes) {
   1.597 +        reorderCodesSize = coll->reorderCodesLength * sizeof(int32_t);
   1.598 +        bufferSizeNeeded += reorderCodesSize;
   1.599 +    }
   1.600 +    if (coll->leadBytePermutationTable) {
   1.601 +        bufferSizeNeeded += 256 * sizeof(uint8_t);
   1.602 +    }
   1.603 +
   1.604 +    if (pBufferSize != NULL) {
   1.605 +        int32_t inputSize = *pBufferSize;
   1.606 +        *pBufferSize = 1;
   1.607 +        if (inputSize == 0) {
   1.608 +            return NULL;  // preflighting for deprecated functionality
   1.609 +        }
   1.610 +    }
   1.611 +
   1.612 +    char *stackBufferChars = (char *)uprv_malloc(bufferSizeNeeded);
   1.613 +    // Null pointer check.
   1.614 +    if (stackBufferChars == NULL) {
   1.615 +        *status = U_MEMORY_ALLOCATION_ERROR;
   1.616 +        return NULL;
   1.617 +    }
   1.618 +    *status = U_SAFECLONE_ALLOCATED_WARNING;
   1.619 +
   1.620 +    localCollator = (UCollator *)stackBufferChars;
   1.621 +    rules = (UChar *)(stackBufferChars + sizeof(UCollator) + rulesPadding);
   1.622 +    defaultReorderCodes = (int32_t*)((uint8_t*)rules + rulesSize);
   1.623 +    reorderCodes = (int32_t*)((uint8_t*)defaultReorderCodes + defaultReorderCodesSize);
   1.624 +    leadBytePermutationTable = (uint8_t*)reorderCodes + reorderCodesSize;
   1.625 +
   1.626 +    {
   1.627 +        UErrorCode tempStatus = U_ZERO_ERROR;
   1.628 +        imageSize = ucol_cloneBinary(coll, NULL, 0, &tempStatus);
   1.629 +    }
   1.630 +    if (coll->freeImageOnClose) {
   1.631 +        image = (uint8_t *)uprv_malloc(imageSize);
   1.632 +        // Null pointer check
   1.633 +        if (image == NULL) {
   1.634 +            *status = U_MEMORY_ALLOCATION_ERROR;
   1.635 +            return NULL;
   1.636 +        }
   1.637 +        ucol_cloneBinary(coll, image, imageSize, status);
   1.638 +        imageAllocated = TRUE;
   1.639 +    }
   1.640 +    else {
   1.641 +        image = (uint8_t *)coll->image;
   1.642 +    }
   1.643 +    localCollator = ucol_initFromBinary(image, imageSize, coll->UCA, localCollator, status);
   1.644 +    if (U_FAILURE(*status)) {
   1.645 +        return NULL;
   1.646 +    }
   1.647 +
   1.648 +    if (coll->rules) {
   1.649 +        if (coll->freeRulesOnClose) {
   1.650 +            localCollator->rules = u_strcpy(rules, coll->rules);
   1.651 +            //bufferEnd += rulesSize;
   1.652 +        }
   1.653 +        else {
   1.654 +            localCollator->rules = coll->rules;
   1.655 +        }
   1.656 +        localCollator->freeRulesOnClose = FALSE;
   1.657 +        localCollator->rulesLength = coll->rulesLength;
   1.658 +    }
   1.659 +    
   1.660 +    // collator reordering
   1.661 +    if (coll->defaultReorderCodes) {
   1.662 +        localCollator->defaultReorderCodes = 
   1.663 +            (int32_t*) uprv_memcpy(defaultReorderCodes, coll->defaultReorderCodes, coll->defaultReorderCodesLength * sizeof(int32_t));
   1.664 +        localCollator->defaultReorderCodesLength = coll->defaultReorderCodesLength;
   1.665 +        localCollator->freeDefaultReorderCodesOnClose = FALSE;
   1.666 +    }
   1.667 +    if (coll->reorderCodes) {
   1.668 +        localCollator->reorderCodes = 
   1.669 +            (int32_t*)uprv_memcpy(reorderCodes, coll->reorderCodes, coll->reorderCodesLength * sizeof(int32_t));
   1.670 +        localCollator->reorderCodesLength = coll->reorderCodesLength;
   1.671 +        localCollator->freeReorderCodesOnClose = FALSE;
   1.672 +    }
   1.673 +    if (coll->leadBytePermutationTable) {
   1.674 +        localCollator->leadBytePermutationTable = 
   1.675 +            (uint8_t*) uprv_memcpy(leadBytePermutationTable, coll->leadBytePermutationTable, 256);
   1.676 +        localCollator->freeLeadBytePermutationTableOnClose = FALSE;
   1.677 +    }
   1.678 +
   1.679 +    int32_t i;
   1.680 +    for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) {
   1.681 +        ucol_setAttribute(localCollator, (UColAttribute)i, ucol_getAttribute(coll, (UColAttribute)i, status), status);
   1.682 +    }
   1.683 +    // zero copies of pointers
   1.684 +    localCollator->actualLocale = NULL;
   1.685 +    localCollator->validLocale = NULL;
   1.686 +    localCollator->requestedLocale = NULL;
   1.687 +    localCollator->ucaRules = coll->ucaRules; // There should only be one copy here.
   1.688 +    localCollator->freeOnClose = TRUE;
   1.689 +    localCollator->freeImageOnClose = imageAllocated;
   1.690 +    return localCollator;
   1.691 +}
   1.692 +
   1.693 +U_CAPI void U_EXPORT2
   1.694 +ucol_close(UCollator *coll)
   1.695 +{
   1.696 +    UTRACE_ENTRY_OC(UTRACE_UCOL_CLOSE);
   1.697 +    UTRACE_DATA1(UTRACE_INFO, "coll = %p", coll);
   1.698 +    if(coll != NULL) {
   1.699 +        // these are always owned by each UCollator struct,
   1.700 +        // so we always free them
   1.701 +        if(coll->validLocale != NULL) {
   1.702 +            uprv_free(coll->validLocale);
   1.703 +        }
   1.704 +        if(coll->actualLocale != NULL) {
   1.705 +            uprv_free(coll->actualLocale);
   1.706 +        }
   1.707 +        if(coll->requestedLocale != NULL) {
   1.708 +            uprv_free(coll->requestedLocale);
   1.709 +        }
   1.710 +        if(coll->latinOneCEs != NULL) {
   1.711 +            uprv_free(coll->latinOneCEs);
   1.712 +        }
   1.713 +        if(coll->options != NULL && coll->freeOptionsOnClose) {
   1.714 +            uprv_free(coll->options);
   1.715 +        }
   1.716 +        if(coll->rules != NULL && coll->freeRulesOnClose) {
   1.717 +            uprv_free((UChar *)coll->rules);
   1.718 +        }
   1.719 +        if(coll->image != NULL && coll->freeImageOnClose) {
   1.720 +            uprv_free((UCATableHeader *)coll->image);
   1.721 +        }
   1.722 +
   1.723 +        if(coll->leadBytePermutationTable != NULL && coll->freeLeadBytePermutationTableOnClose == TRUE) {
   1.724 +            uprv_free(coll->leadBytePermutationTable);
   1.725 +        }
   1.726 +        if(coll->defaultReorderCodes != NULL && coll->freeDefaultReorderCodesOnClose == TRUE) {
   1.727 +            uprv_free(coll->defaultReorderCodes);
   1.728 +        }
   1.729 +        if(coll->reorderCodes != NULL && coll->freeReorderCodesOnClose == TRUE) {
   1.730 +            uprv_free(coll->reorderCodes);
   1.731 +        }
   1.732 +
   1.733 +        if(coll->delegate != NULL) {
   1.734 +          delete (Collator*)coll->delegate;
   1.735 +        }
   1.736 +
   1.737 +        /* Here, it would be advisable to close: */
   1.738 +        /* - UData for UCA (unless we stuff it in the root resb */
   1.739 +        /* Again, do we need additional housekeeping... HMMM! */
   1.740 +        UTRACE_DATA1(UTRACE_INFO, "coll->freeOnClose: %d", coll->freeOnClose);
   1.741 +        if(coll->freeOnClose){
   1.742 +            /* for safeClone, if freeOnClose is FALSE,
   1.743 +            don't free the other instance data */
   1.744 +            uprv_free(coll);
   1.745 +        }
   1.746 +    }
   1.747 +    UTRACE_EXIT();
   1.748 +}
   1.749 +
   1.750 +void ucol_setOptionsFromHeader(UCollator* result, UColOptionSet * opts, UErrorCode *status) {
   1.751 +    if(U_FAILURE(*status)) {
   1.752 +        return;
   1.753 +    }
   1.754 +    result->caseFirst = (UColAttributeValue)opts->caseFirst;
   1.755 +    result->caseLevel = (UColAttributeValue)opts->caseLevel;
   1.756 +    result->frenchCollation = (UColAttributeValue)opts->frenchCollation;
   1.757 +    result->normalizationMode = (UColAttributeValue)opts->normalizationMode;
   1.758 +    if(result->normalizationMode == UCOL_ON && !initializeFCD(status)) {
   1.759 +        return;
   1.760 +    }
   1.761 +    result->strength = (UColAttributeValue)opts->strength;
   1.762 +    result->variableTopValue = opts->variableTopValue;
   1.763 +    result->alternateHandling = (UColAttributeValue)opts->alternateHandling;
   1.764 +    result->hiraganaQ = (UColAttributeValue)opts->hiraganaQ;
   1.765 +    result->numericCollation = (UColAttributeValue)opts->numericCollation;
   1.766 +    result->caseFirstisDefault = TRUE;
   1.767 +    result->caseLevelisDefault = TRUE;
   1.768 +    result->frenchCollationisDefault = TRUE;
   1.769 +    result->normalizationModeisDefault = TRUE;
   1.770 +    result->strengthisDefault = TRUE;
   1.771 +    result->variableTopValueisDefault = TRUE;
   1.772 +    result->alternateHandlingisDefault = TRUE;
   1.773 +    result->hiraganaQisDefault = TRUE;
   1.774 +    result->numericCollationisDefault = TRUE;
   1.775 +
   1.776 +    ucol_updateInternalState(result, status);
   1.777 +
   1.778 +    result->options = opts;
   1.779 +}
   1.780 +
   1.781 +
   1.782 +/**
   1.783 +* Approximate determination if a character is at a contraction end.
   1.784 +* Guaranteed to be TRUE if a character is at the end of a contraction,
   1.785 +* otherwise it is not deterministic.
   1.786 +* @param c character to be determined
   1.787 +* @param coll collator
   1.788 +*/
   1.789 +static
   1.790 +inline UBool ucol_contractionEndCP(UChar c, const UCollator *coll) {
   1.791 +    if (c < coll->minContrEndCP) {
   1.792 +        return FALSE;
   1.793 +    }
   1.794 +
   1.795 +    int32_t  hash = c;
   1.796 +    uint8_t  htbyte;
   1.797 +    if (hash >= UCOL_UNSAFECP_TABLE_SIZE*8) {
   1.798 +        if (U16_IS_TRAIL(c)) {
   1.799 +            return TRUE;
   1.800 +        }
   1.801 +        hash = (hash & UCOL_UNSAFECP_TABLE_MASK) + 256;
   1.802 +    }
   1.803 +    htbyte = coll->contrEndCP[hash>>3];
   1.804 +    return (((htbyte >> (hash & 7)) & 1) == 1);
   1.805 +}
   1.806 +
   1.807 +
   1.808 +
   1.809 +/*
   1.810 +*   i_getCombiningClass()
   1.811 +*        A fast, at least partly inline version of u_getCombiningClass()
   1.812 +*        This is a candidate for further optimization.  Used heavily
   1.813 +*        in contraction processing.
   1.814 +*/
   1.815 +static
   1.816 +inline uint8_t i_getCombiningClass(UChar32 c, const UCollator *coll) {
   1.817 +    uint8_t sCC = 0;
   1.818 +    if ((c >= 0x300 && ucol_unsafeCP(c, coll)) || c > 0xFFFF) {
   1.819 +        sCC = u_getCombiningClass(c);
   1.820 +    }
   1.821 +    return sCC;
   1.822 +}
   1.823 +
   1.824 +UCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, const UCollator *UCA, UErrorCode *status) {
   1.825 +    UChar c;
   1.826 +    UCollator *result = fillIn;
   1.827 +    if(U_FAILURE(*status) || image == NULL) {
   1.828 +        return NULL;
   1.829 +    }
   1.830 +
   1.831 +    if(result == NULL) {
   1.832 +        result = (UCollator *)uprv_malloc(sizeof(UCollator));
   1.833 +        if(result == NULL) {
   1.834 +            *status = U_MEMORY_ALLOCATION_ERROR;
   1.835 +            return result;
   1.836 +        }
   1.837 +        result->freeOnClose = TRUE;
   1.838 +    } else {
   1.839 +        result->freeOnClose = FALSE;
   1.840 +    }
   1.841 +
   1.842 +    result->delegate = NULL;
   1.843 +
   1.844 +    result->image = image;
   1.845 +    result->mapping.getFoldingOffset = _getFoldingOffset;
   1.846 +    const uint8_t *mapping = (uint8_t*)result->image+result->image->mappingPosition;
   1.847 +    utrie_unserialize(&result->mapping, mapping, result->image->endExpansionCE - result->image->mappingPosition, status);
   1.848 +    if(U_FAILURE(*status)) {
   1.849 +        if(result->freeOnClose == TRUE) {
   1.850 +            uprv_free(result);
   1.851 +            result = NULL;
   1.852 +        }
   1.853 +        return result;
   1.854 +    }
   1.855 +
   1.856 +    result->latinOneMapping = UTRIE_GET32_LATIN1(&result->mapping);
   1.857 +    result->contractionCEs = (uint32_t*)((uint8_t*)result->image+result->image->contractionCEs);
   1.858 +    result->contractionIndex = (UChar*)((uint8_t*)result->image+result->image->contractionIndex);
   1.859 +    result->expansion = (uint32_t*)((uint8_t*)result->image+result->image->expansion);
   1.860 +    result->rules = NULL;
   1.861 +    result->rulesLength = 0;
   1.862 +    result->freeRulesOnClose = FALSE;
   1.863 +    result->defaultReorderCodes = NULL;
   1.864 +    result->defaultReorderCodesLength = 0;
   1.865 +    result->freeDefaultReorderCodesOnClose = FALSE;
   1.866 +    result->reorderCodes = NULL;
   1.867 +    result->reorderCodesLength = 0;
   1.868 +    result->freeReorderCodesOnClose = FALSE;
   1.869 +    result->leadBytePermutationTable = NULL;
   1.870 +    result->freeLeadBytePermutationTableOnClose = FALSE;
   1.871 +
   1.872 +    /* get the version info from UCATableHeader and populate the Collator struct*/
   1.873 +    result->dataVersion[0] = result->image->version[0]; /* UCA Builder version*/
   1.874 +    result->dataVersion[1] = result->image->version[1]; /* UCA Tailoring rules version*/
   1.875 +    result->dataVersion[2] = 0;
   1.876 +    result->dataVersion[3] = 0;
   1.877 +
   1.878 +    result->unsafeCP = (uint8_t *)result->image + result->image->unsafeCP;
   1.879 +    result->minUnsafeCP = 0;
   1.880 +    for (c=0; c<0x300; c++) {  // Find the smallest unsafe char.
   1.881 +        if (ucol_unsafeCP(c, result)) break;
   1.882 +    }
   1.883 +    result->minUnsafeCP = c;
   1.884 +
   1.885 +    result->contrEndCP = (uint8_t *)result->image + result->image->contrEndCP;
   1.886 +    result->minContrEndCP = 0;
   1.887 +    for (c=0; c<0x300; c++) {  // Find the Contraction-ending char.
   1.888 +        if (ucol_contractionEndCP(c, result)) break;
   1.889 +    }
   1.890 +    result->minContrEndCP = c;
   1.891 +
   1.892 +    /* max expansion tables */
   1.893 +    result->endExpansionCE = (uint32_t*)((uint8_t*)result->image +
   1.894 +                                         result->image->endExpansionCE);
   1.895 +    result->lastEndExpansionCE = result->endExpansionCE +
   1.896 +                                 result->image->endExpansionCECount - 1;
   1.897 +    result->expansionCESize = (uint8_t*)result->image +
   1.898 +                                               result->image->expansionCESize;
   1.899 +
   1.900 +
   1.901 +    //result->errorCode = *status;
   1.902 +
   1.903 +    result->latinOneCEs = NULL;
   1.904 +
   1.905 +    result->latinOneRegenTable = FALSE;
   1.906 +    result->latinOneFailed = FALSE;
   1.907 +    result->UCA = UCA;
   1.908 +
   1.909 +    /* Normally these will be set correctly later. This is the default if you use UCA or the default. */
   1.910 +    result->ucaRules = NULL;
   1.911 +    result->actualLocale = NULL;
   1.912 +    result->validLocale = NULL;
   1.913 +    result->requestedLocale = NULL;
   1.914 +    result->hasRealData = FALSE; // real data lives in .dat file...
   1.915 +    result->freeImageOnClose = FALSE;
   1.916 +
   1.917 +    /* set attributes */
   1.918 +    ucol_setOptionsFromHeader(
   1.919 +        result,
   1.920 +        (UColOptionSet*)((uint8_t*)result->image+result->image->options),
   1.921 +        status);
   1.922 +    result->freeOptionsOnClose = FALSE;
   1.923 +
   1.924 +    return result;
   1.925 +}
   1.926 +
   1.927 +/* new Mark's code */
   1.928 +
   1.929 +/**
   1.930 + * For generation of Implicit CEs
   1.931 + * @author Davis
   1.932 + *
   1.933 + * Cleaned up so that changes can be made more easily.
   1.934 + * Old values:
   1.935 +# First Implicit: E26A792D
   1.936 +# Last Implicit: E3DC70C0
   1.937 +# First CJK: E0030300
   1.938 +# Last CJK: E0A9DD00
   1.939 +# First CJK_A: E0A9DF00
   1.940 +# Last CJK_A: E0DE3100
   1.941 + */
   1.942 +/* Following is a port of Mark's code for new treatment of implicits.
   1.943 + * It is positioned here, since ucol_initUCA need to initialize the
   1.944 + * variables below according to the data in the fractional UCA.
   1.945 + */
   1.946 +
   1.947 +/**
   1.948 + * Function used to:
   1.949 + * a) collapse the 2 different Han ranges from UCA into one (in the right order), and
   1.950 + * b) bump any non-CJK characters by 10FFFF.
   1.951 + * The relevant blocks are:
   1.952 + * A:    4E00..9FFF; CJK Unified Ideographs
   1.953 + *       F900..FAFF; CJK Compatibility Ideographs
   1.954 + * B:    3400..4DBF; CJK Unified Ideographs Extension A
   1.955 + *       20000..XX;  CJK Unified Ideographs Extension B (and others later on)
   1.956 + * As long as
   1.957 + *   no new B characters are allocated between 4E00 and FAFF, and
   1.958 + *   no new A characters are outside of this range,
   1.959 + * (very high probability) this simple code will work.
   1.960 + * The reordered blocks are:
   1.961 + * Block1 is CJK
   1.962 + * Block2 is CJK_COMPAT_USED
   1.963 + * Block3 is CJK_A
   1.964 + * (all contiguous)
   1.965 + * Any other CJK gets its normal code point
   1.966 + * Any non-CJK gets +10FFFF
   1.967 + * When we reorder Block1, we make sure that it is at the very start,
   1.968 + * so that it will use a 3-byte form.
   1.969 + * Warning: the we only pick up the compatibility characters that are
   1.970 + * NOT decomposed, so that block is smaller!
   1.971 + */
   1.972 +
   1.973 +// CONSTANTS
   1.974 +static const UChar32
   1.975 +    NON_CJK_OFFSET = 0x110000,
   1.976 +    UCOL_MAX_INPUT = 0x220001; // 2 * Unicode range + 2
   1.977 +
   1.978 +/**
   1.979 + * Precomputed by initImplicitConstants()
   1.980 + */
   1.981 +static int32_t
   1.982 +    final3Multiplier = 0,
   1.983 +    final4Multiplier = 0,
   1.984 +    final3Count = 0,
   1.985 +    final4Count = 0,
   1.986 +    medialCount = 0,
   1.987 +    min3Primary = 0,
   1.988 +    min4Primary = 0,
   1.989 +    max4Primary = 0,
   1.990 +    minTrail = 0,
   1.991 +    maxTrail = 0,
   1.992 +    max3Trail = 0,
   1.993 +    max4Trail = 0,
   1.994 +    min4Boundary = 0;
   1.995 +
   1.996 +static const UChar32
   1.997 +    // 4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;;
   1.998 +    // 9FCC;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;;  (Unicode 6.1)
   1.999 +    CJK_BASE = 0x4E00,
  1.1000 +    CJK_LIMIT = 0x9FCC+1,
  1.1001 +    // Unified CJK ideographs in the compatibility ideographs block.
  1.1002 +    CJK_COMPAT_USED_BASE = 0xFA0E,
  1.1003 +    CJK_COMPAT_USED_LIMIT = 0xFA2F+1,
  1.1004 +    // 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
  1.1005 +    // 4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
  1.1006 +    CJK_A_BASE = 0x3400,
  1.1007 +    CJK_A_LIMIT = 0x4DB5+1,
  1.1008 +    // 20000;<CJK Ideograph Extension B, First>;Lo;0;L;;;;;N;;;;;
  1.1009 +    // 2A6D6;<CJK Ideograph Extension B, Last>;Lo;0;L;;;;;N;;;;;
  1.1010 +    CJK_B_BASE = 0x20000,
  1.1011 +    CJK_B_LIMIT = 0x2A6D6+1,
  1.1012 +    // 2A700;<CJK Ideograph Extension C, First>;Lo;0;L;;;;;N;;;;;
  1.1013 +    // 2B734;<CJK Ideograph Extension C, Last>;Lo;0;L;;;;;N;;;;;
  1.1014 +    CJK_C_BASE = 0x2A700,
  1.1015 +    CJK_C_LIMIT = 0x2B734+1,
  1.1016 +    // 2B740;<CJK Ideograph Extension D, First>;Lo;0;L;;;;;N;;;;;
  1.1017 +    // 2B81D;<CJK Ideograph Extension D, Last>;Lo;0;L;;;;;N;;;;;
  1.1018 +    CJK_D_BASE = 0x2B740,
  1.1019 +    CJK_D_LIMIT = 0x2B81D+1;
  1.1020 +    // when adding to this list, look for all occurrences (in project)
  1.1021 +    // of CJK_C_BASE and CJK_C_LIMIT, etc. to check for code that needs changing!!!!
  1.1022 +
  1.1023 +static UChar32 swapCJK(UChar32 i) {
  1.1024 +    if (i < CJK_A_BASE) {
  1.1025 +        // non-CJK
  1.1026 +    } else if (i < CJK_A_LIMIT) {
  1.1027 +        // Extension A has lower code points than the original Unihan+compat
  1.1028 +        // but sorts higher.
  1.1029 +        return i - CJK_A_BASE
  1.1030 +                + (CJK_LIMIT - CJK_BASE)
  1.1031 +                + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);
  1.1032 +    } else if (i < CJK_BASE) {
  1.1033 +        // non-CJK
  1.1034 +    } else if (i < CJK_LIMIT) {
  1.1035 +        return i - CJK_BASE;
  1.1036 +    } else if (i < CJK_COMPAT_USED_BASE) {
  1.1037 +        // non-CJK
  1.1038 +    } else if (i < CJK_COMPAT_USED_LIMIT) {
  1.1039 +        return i - CJK_COMPAT_USED_BASE
  1.1040 +                + (CJK_LIMIT - CJK_BASE);
  1.1041 +    } else if (i < CJK_B_BASE) {
  1.1042 +        // non-CJK
  1.1043 +    } else if (i < CJK_B_LIMIT) {
  1.1044 +        return i; // non-BMP-CJK
  1.1045 +    } else if (i < CJK_C_BASE) {
  1.1046 +        // non-CJK
  1.1047 +    } else if (i < CJK_C_LIMIT) {
  1.1048 +        return i; // non-BMP-CJK
  1.1049 +    } else if (i < CJK_D_BASE) {
  1.1050 +        // non-CJK
  1.1051 +    } else if (i < CJK_D_LIMIT) {
  1.1052 +        return i; // non-BMP-CJK
  1.1053 +    }
  1.1054 +    return i + NON_CJK_OFFSET; // non-CJK
  1.1055 +}
  1.1056 +
  1.1057 +U_CAPI UChar32 U_EXPORT2
  1.1058 +uprv_uca_getRawFromCodePoint(UChar32 i) {
  1.1059 +    return swapCJK(i)+1;
  1.1060 +}
  1.1061 +
  1.1062 +U_CAPI UChar32 U_EXPORT2
  1.1063 +uprv_uca_getCodePointFromRaw(UChar32 i) {
  1.1064 +    i--;
  1.1065 +    UChar32 result = 0;
  1.1066 +    if(i >= NON_CJK_OFFSET) {
  1.1067 +        result = i - NON_CJK_OFFSET;
  1.1068 +    } else if(i >= CJK_B_BASE) {
  1.1069 +        result = i;
  1.1070 +    } else if(i < CJK_A_LIMIT + (CJK_LIMIT - CJK_BASE) + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE)) { // rest of CJKs, compacted
  1.1071 +        if(i < CJK_LIMIT - CJK_BASE) {
  1.1072 +            result = i + CJK_BASE;
  1.1073 +        } else if(i < (CJK_LIMIT - CJK_BASE) + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE)) {
  1.1074 +            result = i + CJK_COMPAT_USED_BASE - (CJK_LIMIT - CJK_BASE);
  1.1075 +        } else {
  1.1076 +            result = i + CJK_A_BASE - (CJK_LIMIT - CJK_BASE) - (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);
  1.1077 +        }
  1.1078 +    } else {
  1.1079 +        result = -1;
  1.1080 +    }
  1.1081 +    return result;
  1.1082 +}
  1.1083 +
  1.1084 +// GET IMPLICIT PRIMARY WEIGHTS
  1.1085 +// Return value is left justified primary key
  1.1086 +U_CAPI uint32_t U_EXPORT2
  1.1087 +uprv_uca_getImplicitFromRaw(UChar32 cp) {
  1.1088 +    /*
  1.1089 +    if (cp < 0 || cp > UCOL_MAX_INPUT) {
  1.1090 +        throw new IllegalArgumentException("Code point out of range " + Utility.hex(cp));
  1.1091 +    }
  1.1092 +    */
  1.1093 +    int32_t last0 = cp - min4Boundary;
  1.1094 +    if (last0 < 0) {
  1.1095 +        int32_t last1 = cp / final3Count;
  1.1096 +        last0 = cp % final3Count;
  1.1097 +
  1.1098 +        int32_t last2 = last1 / medialCount;
  1.1099 +        last1 %= medialCount;
  1.1100 +
  1.1101 +        last0 = minTrail + last0*final3Multiplier; // spread out, leaving gap at start
  1.1102 +        last1 = minTrail + last1; // offset
  1.1103 +        last2 = min3Primary + last2; // offset
  1.1104 +        /*
  1.1105 +        if (last2 >= min4Primary) {
  1.1106 +            throw new IllegalArgumentException("4-byte out of range: " + Utility.hex(cp) + ", " + Utility.hex(last2));
  1.1107 +        }
  1.1108 +        */
  1.1109 +        return (last2 << 24) + (last1 << 16) + (last0 << 8);
  1.1110 +    } else {
  1.1111 +        int32_t last1 = last0 / final4Count;
  1.1112 +        last0 %= final4Count;
  1.1113 +
  1.1114 +        int32_t last2 = last1 / medialCount;
  1.1115 +        last1 %= medialCount;
  1.1116 +
  1.1117 +        int32_t last3 = last2 / medialCount;
  1.1118 +        last2 %= medialCount;
  1.1119 +
  1.1120 +        last0 = minTrail + last0*final4Multiplier; // spread out, leaving gap at start
  1.1121 +        last1 = minTrail + last1; // offset
  1.1122 +        last2 = minTrail + last2; // offset
  1.1123 +        last3 = min4Primary + last3; // offset
  1.1124 +        /*
  1.1125 +        if (last3 > max4Primary) {
  1.1126 +            throw new IllegalArgumentException("4-byte out of range: " + Utility.hex(cp) + ", " + Utility.hex(last3));
  1.1127 +        }
  1.1128 +        */
  1.1129 +        return (last3 << 24) + (last2 << 16) + (last1 << 8) + last0;
  1.1130 +    }
  1.1131 +}
  1.1132 +
  1.1133 +static uint32_t U_EXPORT2
  1.1134 +uprv_uca_getImplicitPrimary(UChar32 cp) {
  1.1135 +   //fprintf(stdout, "Incoming: %04x\n", cp);
  1.1136 +    //if (DEBUG) System.out.println("Incoming: " + Utility.hex(cp));
  1.1137 +
  1.1138 +    cp = swapCJK(cp);
  1.1139 +    cp++;
  1.1140 +    // we now have a range of numbers from 0 to 21FFFF.
  1.1141 +
  1.1142 +    //if (DEBUG) System.out.println("CJK swapped: " + Utility.hex(cp));
  1.1143 +    //fprintf(stdout, "CJK swapped: %04x\n", cp);
  1.1144 +
  1.1145 +    return uprv_uca_getImplicitFromRaw(cp);
  1.1146 +}
  1.1147 +
  1.1148 +/**
  1.1149 + * Converts implicit CE into raw integer ("code point")
  1.1150 + * @param implicit
  1.1151 + * @return -1 if illegal format
  1.1152 + */
  1.1153 +U_CAPI UChar32 U_EXPORT2
  1.1154 +uprv_uca_getRawFromImplicit(uint32_t implicit) {
  1.1155 +    UChar32 result;
  1.1156 +    UChar32 b3 = implicit & 0xFF;
  1.1157 +    UChar32 b2 = (implicit >> 8) & 0xFF;
  1.1158 +    UChar32 b1 = (implicit >> 16) & 0xFF;
  1.1159 +    UChar32 b0 = (implicit >> 24) & 0xFF;
  1.1160 +
  1.1161 +    // simple parameter checks
  1.1162 +    if (b0 < min3Primary || b0 > max4Primary
  1.1163 +        || b1 < minTrail || b1 > maxTrail)
  1.1164 +        return -1;
  1.1165 +    // normal offsets
  1.1166 +    b1 -= minTrail;
  1.1167 +
  1.1168 +    // take care of the final values, and compose
  1.1169 +    if (b0 < min4Primary) {
  1.1170 +        if (b2 < minTrail || b2 > max3Trail || b3 != 0)
  1.1171 +            return -1;
  1.1172 +        b2 -= minTrail;
  1.1173 +        UChar32 remainder = b2 % final3Multiplier;
  1.1174 +        if (remainder != 0)
  1.1175 +            return -1;
  1.1176 +        b0 -= min3Primary;
  1.1177 +        b2 /= final3Multiplier;
  1.1178 +        result = ((b0 * medialCount) + b1) * final3Count + b2;
  1.1179 +    } else {
  1.1180 +        if (b2 < minTrail || b2 > maxTrail
  1.1181 +            || b3 < minTrail || b3 > max4Trail)
  1.1182 +            return -1;
  1.1183 +        b2 -= minTrail;
  1.1184 +        b3 -= minTrail;
  1.1185 +        UChar32 remainder = b3 % final4Multiplier;
  1.1186 +        if (remainder != 0)
  1.1187 +            return -1;
  1.1188 +        b3 /= final4Multiplier;
  1.1189 +        b0 -= min4Primary;
  1.1190 +        result = (((b0 * medialCount) + b1) * medialCount + b2) * final4Count + b3 + min4Boundary;
  1.1191 +    }
  1.1192 +    // final check
  1.1193 +    if (result < 0 || result > UCOL_MAX_INPUT)
  1.1194 +        return -1;
  1.1195 +    return result;
  1.1196 +}
  1.1197 +
  1.1198 +
  1.1199 +static inline int32_t divideAndRoundUp(int a, int b) {
  1.1200 +    return 1 + (a-1)/b;
  1.1201 +}
  1.1202 +
  1.1203 +/* this function is either called from initUCA or from genUCA before
  1.1204 + * doing canonical closure for the UCA.
  1.1205 + */
  1.1206 +
  1.1207 +/**
  1.1208 + * Set up to generate implicits.
  1.1209 + * Maintenance Note:  this function may end up being called more than once, due
  1.1210 + *                    to threading races during initialization.  Make sure that
  1.1211 + *                    none of the Constants is ever transiently assigned an
  1.1212 + *                    incorrect value.
  1.1213 + * @param minPrimary
  1.1214 + * @param maxPrimary
  1.1215 + * @param minTrail final byte
  1.1216 + * @param maxTrail final byte
  1.1217 + * @param gap3 the gap we leave for tailoring for 3-byte forms
  1.1218 + * @param gap4 the gap we leave for tailoring for 4-byte forms
  1.1219 + */
  1.1220 +static void initImplicitConstants(int minPrimary, int maxPrimary,
  1.1221 +                                    int minTrailIn, int maxTrailIn,
  1.1222 +                                    int gap3, int primaries3count,
  1.1223 +                                    UErrorCode *status) {
  1.1224 +    // some simple parameter checks
  1.1225 +    if ((minPrimary < 0 || minPrimary >= maxPrimary || maxPrimary > 0xFF)
  1.1226 +        || (minTrailIn < 0 || minTrailIn >= maxTrailIn || maxTrailIn > 0xFF)
  1.1227 +        || (primaries3count < 1))
  1.1228 +    {
  1.1229 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
  1.1230 +        return;
  1.1231 +    };
  1.1232 +
  1.1233 +    minTrail = minTrailIn;
  1.1234 +    maxTrail = maxTrailIn;
  1.1235 +
  1.1236 +    min3Primary = minPrimary;
  1.1237 +    max4Primary = maxPrimary;
  1.1238 +    // compute constants for use later.
  1.1239 +    // number of values we can use in trailing bytes
  1.1240 +    // leave room for empty values between AND above, e.g. if gap = 2
  1.1241 +    // range 3..7 => +3 -4 -5 -6 -7: so 1 value
  1.1242 +    // range 3..8 => +3 -4 -5 +6 -7 -8: so 2 values
  1.1243 +    // range 3..9 => +3 -4 -5 +6 -7 -8 -9: so 2 values
  1.1244 +    final3Multiplier = gap3 + 1;
  1.1245 +    final3Count = (maxTrail - minTrail + 1) / final3Multiplier;
  1.1246 +    max3Trail = minTrail + (final3Count - 1) * final3Multiplier;
  1.1247 +
  1.1248 +    // medials can use full range
  1.1249 +    medialCount = (maxTrail - minTrail + 1);
  1.1250 +    // find out how many values fit in each form
  1.1251 +    int32_t threeByteCount = medialCount * final3Count;
  1.1252 +    // now determine where the 3/4 boundary is.
  1.1253 +    // we use 3 bytes below the boundary, and 4 above
  1.1254 +    int32_t primariesAvailable = maxPrimary - minPrimary + 1;
  1.1255 +    int32_t primaries4count = primariesAvailable - primaries3count;
  1.1256 +
  1.1257 +
  1.1258 +    int32_t min3ByteCoverage = primaries3count * threeByteCount;
  1.1259 +    min4Primary = minPrimary + primaries3count;
  1.1260 +    min4Boundary = min3ByteCoverage;
  1.1261 +    // Now expand out the multiplier for the 4 bytes, and redo.
  1.1262 +
  1.1263 +    int32_t totalNeeded = UCOL_MAX_INPUT - min4Boundary;
  1.1264 +    int32_t neededPerPrimaryByte = divideAndRoundUp(totalNeeded, primaries4count);
  1.1265 +    int32_t neededPerFinalByte = divideAndRoundUp(neededPerPrimaryByte, medialCount * medialCount);
  1.1266 +    int32_t gap4 = (maxTrail - minTrail - 1) / neededPerFinalByte;
  1.1267 +    if (gap4 < 1) {
  1.1268 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
  1.1269 +        return;
  1.1270 +    }
  1.1271 +    final4Multiplier = gap4 + 1;
  1.1272 +    final4Count = neededPerFinalByte;
  1.1273 +    max4Trail = minTrail + (final4Count - 1) * final4Multiplier;
  1.1274 +}
  1.1275 +
  1.1276 +    /**
  1.1277 +     * Supply parameters for generating implicit CEs
  1.1278 +     */
  1.1279 +U_CAPI void U_EXPORT2
  1.1280 +uprv_uca_initImplicitConstants(UErrorCode *status) {
  1.1281 +    // 13 is the largest 4-byte gap we can use without getting 2 four-byte forms.
  1.1282 +    //initImplicitConstants(minPrimary, maxPrimary, 0x04, 0xFE, 1, 1, status);
  1.1283 +    initImplicitConstants(minImplicitPrimary, maxImplicitPrimary, 0x04, 0xFE, 1, 1, status);
  1.1284 +}
  1.1285 +
  1.1286 +
  1.1287 +/*    collIterNormalize     Incremental Normalization happens here.                       */
  1.1288 +/*                          pick up the range of chars identifed by FCD,                  */
  1.1289 +/*                          normalize it into the collIterate's writable buffer,          */
  1.1290 +/*                          switch the collIterate's state to use the writable buffer.    */
  1.1291 +/*                                                                                        */
  1.1292 +static
  1.1293 +void collIterNormalize(collIterate *collationSource)
  1.1294 +{
  1.1295 +    UErrorCode  status = U_ZERO_ERROR;
  1.1296 +    const UChar *srcP = collationSource->pos - 1;      /*  Start of chars to normalize    */
  1.1297 +    const UChar *endP = collationSource->fcdPosition;  /* End of region to normalize+1    */
  1.1298 +
  1.1299 +    collationSource->nfd->normalize(UnicodeString(FALSE, srcP, (int32_t)(endP - srcP)),
  1.1300 +                                    collationSource->writableBuffer,
  1.1301 +                                    status);
  1.1302 +    if (U_FAILURE(status)) {
  1.1303 +#ifdef UCOL_DEBUG
  1.1304 +        fprintf(stderr, "collIterNormalize(), NFD failed, status = %s\n", u_errorName(status));
  1.1305 +#endif
  1.1306 +        return;
  1.1307 +    }
  1.1308 +
  1.1309 +    collationSource->pos        = collationSource->writableBuffer.getTerminatedBuffer();
  1.1310 +    collationSource->origFlags  = collationSource->flags;
  1.1311 +    collationSource->flags     |= UCOL_ITER_INNORMBUF;
  1.1312 +    collationSource->flags     &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN | UCOL_USE_ITERATOR);
  1.1313 +}
  1.1314 +
  1.1315 +
  1.1316 +// This function takes the iterator and extracts normalized stuff up to the next boundary
  1.1317 +// It is similar in the end results to the collIterNormalize, but for the cases when we
  1.1318 +// use an iterator
  1.1319 +/*static
  1.1320 +inline void normalizeIterator(collIterate *collationSource) {
  1.1321 +  UErrorCode status = U_ZERO_ERROR;
  1.1322 +  UBool wasNormalized = FALSE;
  1.1323 +  //int32_t iterIndex = collationSource->iterator->getIndex(collationSource->iterator, UITER_CURRENT);
  1.1324 +  uint32_t iterIndex = collationSource->iterator->getState(collationSource->iterator);
  1.1325 +  int32_t normLen = unorm_next(collationSource->iterator, collationSource->writableBuffer,
  1.1326 +    (int32_t)collationSource->writableBufSize, UNORM_FCD, 0, TRUE, &wasNormalized, &status);
  1.1327 +  if(status == U_BUFFER_OVERFLOW_ERROR || normLen == (int32_t)collationSource->writableBufSize) {
  1.1328 +    // reallocate and terminate
  1.1329 +    if(!u_growBufferFromStatic(collationSource->stackWritableBuffer,
  1.1330 +                               &collationSource->writableBuffer,
  1.1331 +                               (int32_t *)&collationSource->writableBufSize, normLen + 1,
  1.1332 +                               0)
  1.1333 +    ) {
  1.1334 +    #ifdef UCOL_DEBUG
  1.1335 +        fprintf(stderr, "normalizeIterator(), out of memory\n");
  1.1336 +    #endif
  1.1337 +        return;
  1.1338 +    }
  1.1339 +    status = U_ZERO_ERROR;
  1.1340 +    //collationSource->iterator->move(collationSource->iterator, iterIndex, UITER_ZERO);
  1.1341 +    collationSource->iterator->setState(collationSource->iterator, iterIndex, &status);
  1.1342 +    normLen = unorm_next(collationSource->iterator, collationSource->writableBuffer,
  1.1343 +    (int32_t)collationSource->writableBufSize, UNORM_FCD, 0, TRUE, &wasNormalized, &status);
  1.1344 +  }
  1.1345 +  // Terminate the buffer - we already checked that it is big enough
  1.1346 +  collationSource->writableBuffer[normLen] = 0;
  1.1347 +  if(collationSource->writableBuffer != collationSource->stackWritableBuffer) {
  1.1348 +      collationSource->flags |= UCOL_ITER_ALLOCATED;
  1.1349 +  }
  1.1350 +  collationSource->pos        = collationSource->writableBuffer;
  1.1351 +  collationSource->origFlags  = collationSource->flags;
  1.1352 +  collationSource->flags     |= UCOL_ITER_INNORMBUF;
  1.1353 +  collationSource->flags     &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN | UCOL_USE_ITERATOR);
  1.1354 +}*/
  1.1355 +
  1.1356 +
  1.1357 +/* Incremental FCD check and normalize                                                    */
  1.1358 +/*   Called from getNextCE when normalization state is suspect.                           */
  1.1359 +/*   When entering, the state is known to be this:                                        */
  1.1360 +/*      o   We are working in the main buffer of the collIterate, not the side            */
  1.1361 +/*          writable buffer.  When in the side buffer, normalization mode is always off,  */
  1.1362 +/*          so we won't get here.                                                         */
  1.1363 +/*      o   The leading combining class from the current character is 0 or                */
  1.1364 +/*          the trailing combining class of the previous char was zero.                   */
  1.1365 +/*          True because the previous call to this function will have always exited       */
  1.1366 +/*          that way, and we get called for every char where cc might be non-zero.        */
  1.1367 +static
  1.1368 +inline UBool collIterFCD(collIterate *collationSource) {
  1.1369 +    const UChar *srcP, *endP;
  1.1370 +    uint8_t     leadingCC;
  1.1371 +    uint8_t     prevTrailingCC = 0;
  1.1372 +    uint16_t    fcd;
  1.1373 +    UBool       needNormalize = FALSE;
  1.1374 +
  1.1375 +    srcP = collationSource->pos-1;
  1.1376 +
  1.1377 +    if (collationSource->flags & UCOL_ITER_HASLEN) {
  1.1378 +        endP = collationSource->endp;
  1.1379 +    } else {
  1.1380 +        endP = NULL;
  1.1381 +    }
  1.1382 +
  1.1383 +    // Get the trailing combining class of the current character. If it's zero, we are OK.
  1.1384 +    fcd = g_nfcImpl->nextFCD16(srcP, endP);
  1.1385 +    if (fcd != 0) {
  1.1386 +        prevTrailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_);
  1.1387 +
  1.1388 +        if (prevTrailingCC != 0) {
  1.1389 +            // The current char has a non-zero trailing CC.  Scan forward until we find
  1.1390 +            //   a char with a leading cc of zero.
  1.1391 +            while (endP == NULL || srcP != endP)
  1.1392 +            {
  1.1393 +                const UChar *savedSrcP = srcP;
  1.1394 +
  1.1395 +                fcd = g_nfcImpl->nextFCD16(srcP, endP);
  1.1396 +                leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_);
  1.1397 +                if (leadingCC == 0) {
  1.1398 +                    srcP = savedSrcP;      // Hit char that is not part of combining sequence.
  1.1399 +                                           //   back up over it.  (Could be surrogate pair!)
  1.1400 +                    break;
  1.1401 +                }
  1.1402 +
  1.1403 +                if (leadingCC < prevTrailingCC) {
  1.1404 +                    needNormalize = TRUE;
  1.1405 +                }
  1.1406 +
  1.1407 +                prevTrailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_);
  1.1408 +            }
  1.1409 +        }
  1.1410 +    }
  1.1411 +
  1.1412 +    collationSource->fcdPosition = (UChar *)srcP;
  1.1413 +
  1.1414 +    return needNormalize;
  1.1415 +}
  1.1416 +
  1.1417 +/****************************************************************************/
  1.1418 +/* Following are the CE retrieval functions                                 */
  1.1419 +/*                                                                          */
  1.1420 +/****************************************************************************/
  1.1421 +
  1.1422 +static uint32_t getImplicit(UChar32 cp, collIterate *collationSource);
  1.1423 +static uint32_t getPrevImplicit(UChar32 cp, collIterate *collationSource);
  1.1424 +
  1.1425 +/* there should be a macro version of this function in the header file */
  1.1426 +/* This is the first function that tries to fetch a collation element  */
  1.1427 +/* If it's not succesfull or it encounters a more difficult situation  */
  1.1428 +/* some more sofisticated and slower functions are invoked             */
  1.1429 +static
  1.1430 +inline uint32_t ucol_IGetNextCE(const UCollator *coll, collIterate *collationSource, UErrorCode *status) {
  1.1431 +    uint32_t order = 0;
  1.1432 +    if (collationSource->CEpos > collationSource->toReturn) {       /* Are there any CEs from previous expansions? */
  1.1433 +        order = *(collationSource->toReturn++);                         /* if so, return them */
  1.1434 +        if(collationSource->CEpos == collationSource->toReturn) {
  1.1435 +            collationSource->CEpos = collationSource->toReturn = collationSource->extendCEs ? collationSource->extendCEs : collationSource->CEs;
  1.1436 +        }
  1.1437 +        return order;
  1.1438 +    }
  1.1439 +
  1.1440 +    UChar ch = 0;
  1.1441 +    collationSource->offsetReturn = NULL;
  1.1442 +
  1.1443 +    do {
  1.1444 +        for (;;)                           /* Loop handles case when incremental normalize switches   */
  1.1445 +        {                                  /*   to or from the side buffer / original string, and we  */
  1.1446 +            /*   need to start again to get the next character.        */
  1.1447 +
  1.1448 +            if ((collationSource->flags & (UCOL_ITER_HASLEN | UCOL_ITER_INNORMBUF | UCOL_ITER_NORM | UCOL_HIRAGANA_Q | UCOL_USE_ITERATOR)) == 0)
  1.1449 +            {
  1.1450 +                // The source string is null terminated and we're not working from the side buffer,
  1.1451 +                //   and we're not normalizing.  This is the fast path.
  1.1452 +                //   (We can be in the side buffer for Thai pre-vowel reordering even when not normalizing.)
  1.1453 +                ch = *collationSource->pos++;
  1.1454 +                if (ch != 0) {
  1.1455 +                    break;
  1.1456 +                }
  1.1457 +                else {
  1.1458 +                    return UCOL_NO_MORE_CES;
  1.1459 +                }
  1.1460 +            }
  1.1461 +
  1.1462 +            if (collationSource->flags & UCOL_ITER_HASLEN) {
  1.1463 +                // Normal path for strings when length is specified.
  1.1464 +                //   (We can't be in side buffer because it is always null terminated.)
  1.1465 +                if (collationSource->pos >= collationSource->endp) {
  1.1466 +                    // Ran off of the end of the main source string.  We're done.
  1.1467 +                    return UCOL_NO_MORE_CES;
  1.1468 +                }
  1.1469 +                ch = *collationSource->pos++;
  1.1470 +            }
  1.1471 +            else if(collationSource->flags & UCOL_USE_ITERATOR) {
  1.1472 +                UChar32 iterCh = collationSource->iterator->next(collationSource->iterator);
  1.1473 +                if(iterCh == U_SENTINEL) {
  1.1474 +                    return UCOL_NO_MORE_CES;
  1.1475 +                }
  1.1476 +                ch = (UChar)iterCh;
  1.1477 +            }
  1.1478 +            else
  1.1479 +            {
  1.1480 +                // Null terminated string.
  1.1481 +                ch = *collationSource->pos++;
  1.1482 +                if (ch == 0) {
  1.1483 +                    // Ran off end of buffer.
  1.1484 +                    if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) {
  1.1485 +                        // Ran off end of main string. backing up one character.
  1.1486 +                        collationSource->pos--;
  1.1487 +                        return UCOL_NO_MORE_CES;
  1.1488 +                    }
  1.1489 +                    else
  1.1490 +                    {
  1.1491 +                        // Hit null in the normalize side buffer.
  1.1492 +                        // Usually this means the end of the normalized data,
  1.1493 +                        // except for one odd case: a null followed by combining chars,
  1.1494 +                        //   which is the case if we are at the start of the buffer.
  1.1495 +                        if (collationSource->pos == collationSource->writableBuffer.getBuffer()+1) {
  1.1496 +                            break;
  1.1497 +                        }
  1.1498 +
  1.1499 +                        //  Null marked end of side buffer.
  1.1500 +                        //   Revert to the main string and
  1.1501 +                        //   loop back to top to try again to get a character.
  1.1502 +                        collationSource->pos   = collationSource->fcdPosition;
  1.1503 +                        collationSource->flags = collationSource->origFlags;
  1.1504 +                        continue;
  1.1505 +                    }
  1.1506 +                }
  1.1507 +            }
  1.1508 +
  1.1509 +            if(collationSource->flags&UCOL_HIRAGANA_Q) {
  1.1510 +                /* Codepoints \u3099-\u309C are both Hiragana and Katakana. Set the flag
  1.1511 +                 * based on whether the previous codepoint was Hiragana or Katakana.
  1.1512 +                 */
  1.1513 +                if(((ch>=0x3040 && ch<=0x3096) || (ch >= 0x309d && ch <= 0x309f)) ||
  1.1514 +                        ((collationSource->flags & UCOL_WAS_HIRAGANA) && (ch >= 0x3099 && ch <= 0x309C))) {
  1.1515 +                    collationSource->flags |= UCOL_WAS_HIRAGANA;
  1.1516 +                } else {
  1.1517 +                    collationSource->flags &= ~UCOL_WAS_HIRAGANA;
  1.1518 +                }
  1.1519 +            }
  1.1520 +
  1.1521 +            // We've got a character.  See if there's any fcd and/or normalization stuff to do.
  1.1522 +            //    Note that UCOL_ITER_NORM flag is always zero when we are in the side buffer.
  1.1523 +            if ((collationSource->flags & UCOL_ITER_NORM) == 0) {
  1.1524 +                break;
  1.1525 +            }
  1.1526 +
  1.1527 +            if (collationSource->fcdPosition >= collationSource->pos) {
  1.1528 +                // An earlier FCD check has already covered the current character.
  1.1529 +                // We can go ahead and process this char.
  1.1530 +                break;
  1.1531 +            }
  1.1532 +
  1.1533 +            if (ch < ZERO_CC_LIMIT_ ) {
  1.1534 +                // Fast fcd safe path.  Trailing combining class == 0.  This char is OK.
  1.1535 +                break;
  1.1536 +            }
  1.1537 +
  1.1538 +            if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) {
  1.1539 +                // We need to peek at the next character in order to tell if we are FCD
  1.1540 +                if ((collationSource->flags & UCOL_ITER_HASLEN) && collationSource->pos >= collationSource->endp) {
  1.1541 +                    // We are at the last char of source string.
  1.1542 +                    //  It is always OK for FCD check.
  1.1543 +                    break;
  1.1544 +                }
  1.1545 +
  1.1546 +                // Not at last char of source string (or we'll check against terminating null).  Do the FCD fast test
  1.1547 +                if (*collationSource->pos < NFC_ZERO_CC_BLOCK_LIMIT_) {
  1.1548 +                    break;
  1.1549 +                }
  1.1550 +            }
  1.1551 +
  1.1552 +
  1.1553 +            // Need a more complete FCD check and possible normalization.
  1.1554 +            if (collIterFCD(collationSource)) {
  1.1555 +                collIterNormalize(collationSource);
  1.1556 +            }
  1.1557 +            if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) {
  1.1558 +                //  No normalization was needed.  Go ahead and process the char we already had.
  1.1559 +                break;
  1.1560 +            }
  1.1561 +
  1.1562 +            // Some normalization happened.  Next loop iteration will pick up a char
  1.1563 +            //   from the normalization buffer.
  1.1564 +
  1.1565 +        }   // end for (;;)
  1.1566 +
  1.1567 +
  1.1568 +        if (ch <= 0xFF) {
  1.1569 +            /*  For latin-1 characters we never need to fall back to the UCA table        */
  1.1570 +            /*    because all of the UCA data is replicated in the latinOneMapping array  */
  1.1571 +            order = coll->latinOneMapping[ch];
  1.1572 +            if (order > UCOL_NOT_FOUND) {
  1.1573 +                order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status);
  1.1574 +            }
  1.1575 +        }
  1.1576 +        else
  1.1577 +        {
  1.1578 +            // Always use UCA for Han, Hangul
  1.1579 +            // (Han extension A is before main Han block)
  1.1580 +            // **** Han compatibility chars ?? ****
  1.1581 +            if ((collationSource->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 &&
  1.1582 +                (ch >= UCOL_FIRST_HAN_A && ch <= UCOL_LAST_HANGUL)) {
  1.1583 +                if (ch > UCOL_LAST_HAN && ch < UCOL_FIRST_HANGUL) {
  1.1584 +                    // between the two target ranges; do normal lookup
  1.1585 +                    // **** this range is YI, Modifier tone letters, ****
  1.1586 +                    // **** Latin-D, Syloti Nagari, Phagas-pa.       ****
  1.1587 +                    // **** Latin-D might be tailored, so we need to ****
  1.1588 +                    // **** do the normal lookup for these guys.     ****
  1.1589 +                    order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
  1.1590 +                } else {
  1.1591 +                    // in one of the target ranges; use UCA
  1.1592 +                    order = UCOL_NOT_FOUND;
  1.1593 +                }
  1.1594 +            } else {
  1.1595 +                order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
  1.1596 +            }
  1.1597 +
  1.1598 +            if(order > UCOL_NOT_FOUND) {                                       /* if a CE is special                */
  1.1599 +                order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, status);    /* and try to get the special CE     */
  1.1600 +            }
  1.1601 +
  1.1602 +            if(order == UCOL_NOT_FOUND && coll->UCA) {   /* We couldn't find a good CE in the tailoring */
  1.1603 +                /* if we got here, the codepoint MUST be over 0xFF - so we look directly in the trie */
  1.1604 +                order = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);
  1.1605 +
  1.1606 +                if(order > UCOL_NOT_FOUND) { /* UCA also gives us a special CE */
  1.1607 +                    order = ucol_prv_getSpecialCE(coll->UCA, ch, order, collationSource, status);
  1.1608 +                }
  1.1609 +            }
  1.1610 +        }
  1.1611 +    } while ( order == UCOL_IGNORABLE && ch >= UCOL_FIRST_HANGUL && ch <= UCOL_LAST_HANGUL );
  1.1612 +
  1.1613 +    if(order == UCOL_NOT_FOUND) {
  1.1614 +        order = getImplicit(ch, collationSource);
  1.1615 +    }
  1.1616 +    return order; /* return the CE */
  1.1617 +}
  1.1618 +
  1.1619 +/* ucol_getNextCE, out-of-line version for use from other files.   */
  1.1620 +U_CAPI uint32_t  U_EXPORT2
  1.1621 +ucol_getNextCE(const UCollator *coll, collIterate *collationSource, UErrorCode *status) {
  1.1622 +    return ucol_IGetNextCE(coll, collationSource, status);
  1.1623 +}
  1.1624 +
  1.1625 +
  1.1626 +/**
  1.1627 +* Incremental previous normalization happens here. Pick up the range of chars
  1.1628 +* identifed by FCD, normalize it into the collIterate's writable buffer,
  1.1629 +* switch the collIterate's state to use the writable buffer.
  1.1630 +* @param data collation iterator data
  1.1631 +*/
  1.1632 +static
  1.1633 +void collPrevIterNormalize(collIterate *data)
  1.1634 +{
  1.1635 +    UErrorCode status  = U_ZERO_ERROR;
  1.1636 +    const UChar *pEnd   = data->pos;  /* End normalize + 1 */
  1.1637 +    const UChar *pStart;
  1.1638 +
  1.1639 +    /* Start normalize */
  1.1640 +    if (data->fcdPosition == NULL) {
  1.1641 +        pStart = data->string;
  1.1642 +    }
  1.1643 +    else {
  1.1644 +        pStart = data->fcdPosition + 1;
  1.1645 +    }
  1.1646 +
  1.1647 +    int32_t normLen =
  1.1648 +        data->nfd->normalize(UnicodeString(FALSE, pStart, (int32_t)((pEnd - pStart) + 1)),
  1.1649 +                             data->writableBuffer,
  1.1650 +                             status).
  1.1651 +        length();
  1.1652 +    if(U_FAILURE(status)) {
  1.1653 +        return;
  1.1654 +    }
  1.1655 +    /*
  1.1656 +    this puts the null termination infront of the normalized string instead
  1.1657 +    of the end
  1.1658 +    */
  1.1659 +    data->writableBuffer.insert(0, (UChar)0);
  1.1660 +
  1.1661 +    /*
  1.1662 +     * The usual case at this point is that we've got a base
  1.1663 +     * character followed by marks that were normalized. If
  1.1664 +     * fcdPosition is NULL, that means that we backed up to
  1.1665 +     * the beginning of the string and there's no base character.
  1.1666 +     *
  1.1667 +     * Forward processing will usually normalize when it sees
  1.1668 +     * the first mark, so that mark will get it's natural offset
  1.1669 +     * and the rest will get the offset of the character following
  1.1670 +     * the marks. The base character will also get its natural offset.
  1.1671 +     *
  1.1672 +     * We write the offset of the base character, if there is one,
  1.1673 +     * followed by the offset of the first mark and then the offsets
  1.1674 +     * of the rest of the marks.
  1.1675 +     */
  1.1676 +    int32_t firstMarkOffset = 0;
  1.1677 +    int32_t trailOffset     = (int32_t)(data->pos - data->string + 1);
  1.1678 +    int32_t trailCount      = normLen - 1;
  1.1679 +
  1.1680 +    if (data->fcdPosition != NULL) {
  1.1681 +        int32_t baseOffset = (int32_t)(data->fcdPosition - data->string);
  1.1682 +        UChar   baseChar   = *data->fcdPosition;
  1.1683 +
  1.1684 +        firstMarkOffset = baseOffset + 1;
  1.1685 +
  1.1686 +        /*
  1.1687 +         * If the base character is the start of a contraction, forward processing
  1.1688 +         * will normalize the marks while checking for the contraction, which means
  1.1689 +         * that the offset of the first mark will the same as the other marks.
  1.1690 +         *
  1.1691 +         * **** THIS IS PROBABLY NOT A COMPLETE TEST ****
  1.1692 +         */
  1.1693 +        if (baseChar >= 0x100) {
  1.1694 +            uint32_t baseOrder = UTRIE_GET32_FROM_LEAD(&data->coll->mapping, baseChar);
  1.1695 +
  1.1696 +            if (baseOrder == UCOL_NOT_FOUND && data->coll->UCA) {
  1.1697 +                baseOrder = UTRIE_GET32_FROM_LEAD(&data->coll->UCA->mapping, baseChar);
  1.1698 +            }
  1.1699 +
  1.1700 +            if (baseOrder > UCOL_NOT_FOUND && getCETag(baseOrder) == CONTRACTION_TAG) {
  1.1701 +                firstMarkOffset = trailOffset;
  1.1702 +            }
  1.1703 +        }
  1.1704 +
  1.1705 +        data->appendOffset(baseOffset, status);
  1.1706 +    }
  1.1707 +
  1.1708 +    data->appendOffset(firstMarkOffset, status);
  1.1709 +
  1.1710 +    for (int32_t i = 0; i < trailCount; i += 1) {
  1.1711 +        data->appendOffset(trailOffset, status);
  1.1712 +    }
  1.1713 +
  1.1714 +    data->offsetRepeatValue = trailOffset;
  1.1715 +
  1.1716 +    data->offsetReturn = data->offsetStore - 1;
  1.1717 +    if (data->offsetReturn == data->offsetBuffer) {
  1.1718 +        data->offsetStore = data->offsetBuffer;
  1.1719 +    }
  1.1720 +
  1.1721 +    data->pos        = data->writableBuffer.getTerminatedBuffer() + 1 + normLen;
  1.1722 +    data->origFlags  = data->flags;
  1.1723 +    data->flags     |= UCOL_ITER_INNORMBUF;
  1.1724 +    data->flags     &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN);
  1.1725 +}
  1.1726 +
  1.1727 +
  1.1728 +/**
  1.1729 +* Incremental FCD check for previous iteration and normalize. Called from
  1.1730 +* getPrevCE when normalization state is suspect.
  1.1731 +* When entering, the state is known to be this:
  1.1732 +* o  We are working in the main buffer of the collIterate, not the side
  1.1733 +*    writable buffer. When in the side buffer, normalization mode is always
  1.1734 +*    off, so we won't get here.
  1.1735 +* o  The leading combining class from the current character is 0 or the
  1.1736 +*    trailing combining class of the previous char was zero.
  1.1737 +*    True because the previous call to this function will have always exited
  1.1738 +*    that way, and we get called for every char where cc might be non-zero.
  1.1739 +* @param data collation iterate struct
  1.1740 +* @return normalization status, TRUE for normalization to be done, FALSE
  1.1741 +*         otherwise
  1.1742 +*/
  1.1743 +static
  1.1744 +inline UBool collPrevIterFCD(collIterate *data)
  1.1745 +{
  1.1746 +    const UChar *src, *start;
  1.1747 +    uint8_t     leadingCC;
  1.1748 +    uint8_t     trailingCC = 0;
  1.1749 +    uint16_t    fcd;
  1.1750 +    UBool       result = FALSE;
  1.1751 +
  1.1752 +    start = data->string;
  1.1753 +    src = data->pos + 1;
  1.1754 +
  1.1755 +    /* Get the trailing combining class of the current character. */
  1.1756 +    fcd = g_nfcImpl->previousFCD16(start, src);
  1.1757 +
  1.1758 +    leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_);
  1.1759 +
  1.1760 +    if (leadingCC != 0) {
  1.1761 +        /*
  1.1762 +        The current char has a non-zero leading combining class.
  1.1763 +        Scan backward until we find a char with a trailing cc of zero.
  1.1764 +        */
  1.1765 +        for (;;)
  1.1766 +        {
  1.1767 +            if (start == src) {
  1.1768 +                data->fcdPosition = NULL;
  1.1769 +                return result;
  1.1770 +            }
  1.1771 +
  1.1772 +            fcd = g_nfcImpl->previousFCD16(start, src);
  1.1773 +
  1.1774 +            trailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_);
  1.1775 +
  1.1776 +            if (trailingCC == 0) {
  1.1777 +                break;
  1.1778 +            }
  1.1779 +
  1.1780 +            if (leadingCC < trailingCC) {
  1.1781 +                result = TRUE;
  1.1782 +            }
  1.1783 +
  1.1784 +            leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_);
  1.1785 +        }
  1.1786 +    }
  1.1787 +
  1.1788 +    data->fcdPosition = (UChar *)src;
  1.1789 +
  1.1790 +    return result;
  1.1791 +}
  1.1792 +
  1.1793 +/** gets a code unit from the string at a given offset
  1.1794 + *  Handles both normal and iterative cases.
  1.1795 + *  No error checking - caller beware!
  1.1796 + */
  1.1797 +static inline
  1.1798 +UChar peekCodeUnit(collIterate *source, int32_t offset) {
  1.1799 +    if(source->pos != NULL) {
  1.1800 +        return *(source->pos + offset);
  1.1801 +    } else if(source->iterator != NULL) {
  1.1802 +        UChar32 c;
  1.1803 +        if(offset != 0) {
  1.1804 +            source->iterator->move(source->iterator, offset, UITER_CURRENT);
  1.1805 +            c = source->iterator->next(source->iterator);
  1.1806 +            source->iterator->move(source->iterator, -offset-1, UITER_CURRENT);
  1.1807 +        } else {
  1.1808 +            c = source->iterator->current(source->iterator);
  1.1809 +        }
  1.1810 +        return c >= 0 ? (UChar)c : 0xfffd;  // If the caller works properly, we should never see c<0.
  1.1811 +    } else {
  1.1812 +        return 0xfffd;
  1.1813 +    }
  1.1814 +}
  1.1815 +
  1.1816 +// Code point version. Treats the offset as a _code point_ delta.
  1.1817 +// We cannot use U16_FWD_1_UNSAFE and similar because we might not have well-formed UTF-16.
  1.1818 +// We cannot use U16_FWD_1 and similar because we do not know the start and limit of the buffer.
  1.1819 +static inline
  1.1820 +UChar32 peekCodePoint(collIterate *source, int32_t offset) {
  1.1821 +    UChar32 c;
  1.1822 +    if(source->pos != NULL) {
  1.1823 +        const UChar *p = source->pos;
  1.1824 +        if(offset >= 0) {
  1.1825 +            // Skip forward over (offset-1) code points.
  1.1826 +            while(--offset >= 0) {
  1.1827 +                if(U16_IS_LEAD(*p++) && U16_IS_TRAIL(*p)) {
  1.1828 +                    ++p;
  1.1829 +                }
  1.1830 +            }
  1.1831 +            // Read the code point there.
  1.1832 +            c = *p++;
  1.1833 +            UChar trail;
  1.1834 +            if(U16_IS_LEAD(c) && U16_IS_TRAIL(trail = *p)) {
  1.1835 +                c = U16_GET_SUPPLEMENTARY(c, trail);
  1.1836 +            }
  1.1837 +        } else /* offset<0 */ {
  1.1838 +            // Skip backward over (offset-1) code points.
  1.1839 +            while(++offset < 0) {
  1.1840 +                if(U16_IS_TRAIL(*--p) && U16_IS_LEAD(*(p - 1))) {
  1.1841 +                    --p;
  1.1842 +                }
  1.1843 +            }
  1.1844 +            // Read the code point before that.
  1.1845 +            c = *--p;
  1.1846 +            UChar lead;
  1.1847 +            if(U16_IS_TRAIL(c) && U16_IS_LEAD(lead = *(p - 1))) {
  1.1848 +                c = U16_GET_SUPPLEMENTARY(lead, c);
  1.1849 +            }
  1.1850 +        }
  1.1851 +    } else if(source->iterator != NULL) {
  1.1852 +        if(offset >= 0) {
  1.1853 +            // Skip forward over (offset-1) code points.
  1.1854 +            int32_t fwd = offset;
  1.1855 +            while(fwd-- > 0) {
  1.1856 +                uiter_next32(source->iterator);
  1.1857 +            }
  1.1858 +            // Read the code point there.
  1.1859 +            c = uiter_current32(source->iterator);
  1.1860 +            // Return to the starting point, skipping backward over (offset-1) code points.
  1.1861 +            while(offset-- > 0) {
  1.1862 +                uiter_previous32(source->iterator);
  1.1863 +            }
  1.1864 +        } else /* offset<0 */ {
  1.1865 +            // Read backward, reading offset code points, remember only the last-read one.
  1.1866 +            int32_t back = offset;
  1.1867 +            do {
  1.1868 +                c = uiter_previous32(source->iterator);
  1.1869 +            } while(++back < 0);
  1.1870 +            // Return to the starting position, skipping forward over offset code points.
  1.1871 +            do {
  1.1872 +                uiter_next32(source->iterator);
  1.1873 +            } while(++offset < 0);
  1.1874 +        }
  1.1875 +    } else {
  1.1876 +        c = U_SENTINEL;
  1.1877 +    }
  1.1878 +    return c;
  1.1879 +}
  1.1880 +
  1.1881 +/**
  1.1882 +* Determines if we are at the start of the data string in the backwards
  1.1883 +* collation iterator
  1.1884 +* @param data collation iterator
  1.1885 +* @return TRUE if we are at the start
  1.1886 +*/
  1.1887 +static
  1.1888 +inline UBool isAtStartPrevIterate(collIterate *data) {
  1.1889 +    if(data->pos == NULL && data->iterator != NULL) {
  1.1890 +        return !data->iterator->hasPrevious(data->iterator);
  1.1891 +    }
  1.1892 +    //return (collIter_bos(data)) ||
  1.1893 +    return (data->pos == data->string) ||
  1.1894 +              ((data->flags & UCOL_ITER_INNORMBUF) && (data->pos != NULL) &&
  1.1895 +              *(data->pos - 1) == 0 && data->fcdPosition == NULL);
  1.1896 +}
  1.1897 +
  1.1898 +static
  1.1899 +inline void goBackOne(collIterate *data) {
  1.1900 +# if 0
  1.1901 +    // somehow, it looks like we need to keep iterator synced up
  1.1902 +    // at all times, as above.
  1.1903 +    if(data->pos) {
  1.1904 +        data->pos--;
  1.1905 +    }
  1.1906 +    if(data->iterator) {
  1.1907 +        data->iterator->previous(data->iterator);
  1.1908 +    }
  1.1909 +#endif
  1.1910 +    if(data->iterator && (data->flags & UCOL_USE_ITERATOR)) {
  1.1911 +        data->iterator->previous(data->iterator);
  1.1912 +    }
  1.1913 +    if(data->pos) {
  1.1914 +        data->pos --;
  1.1915 +    }
  1.1916 +}
  1.1917 +
  1.1918 +/**
  1.1919 +* Inline function that gets a simple CE.
  1.1920 +* So what it does is that it will first check the expansion buffer. If the
  1.1921 +* expansion buffer is not empty, ie the end pointer to the expansion buffer
  1.1922 +* is different from the string pointer, we return the collation element at the
  1.1923 +* return pointer and decrement it.
  1.1924 +* For more complicated CEs it resorts to getComplicatedCE.
  1.1925 +* @param coll collator data
  1.1926 +* @param data collation iterator struct
  1.1927 +* @param status error status
  1.1928 +*/
  1.1929 +static
  1.1930 +inline uint32_t ucol_IGetPrevCE(const UCollator *coll, collIterate *data,
  1.1931 +                               UErrorCode *status)
  1.1932 +{
  1.1933 +    uint32_t result = (uint32_t)UCOL_NULLORDER;
  1.1934 +
  1.1935 +    if (data->offsetReturn != NULL) {
  1.1936 +        if (data->offsetRepeatCount > 0) {
  1.1937 +                data->offsetRepeatCount -= 1;
  1.1938 +        } else {
  1.1939 +            if (data->offsetReturn == data->offsetBuffer) {
  1.1940 +                data->offsetReturn = NULL;
  1.1941 +                data->offsetStore  = data->offsetBuffer;
  1.1942 +            } else {
  1.1943 +                data->offsetReturn -= 1;
  1.1944 +            }
  1.1945 +        }
  1.1946 +    }
  1.1947 +
  1.1948 +    if ((data->extendCEs && data->toReturn > data->extendCEs) ||
  1.1949 +            (!data->extendCEs && data->toReturn > data->CEs))
  1.1950 +    {
  1.1951 +        data->toReturn -= 1;
  1.1952 +        result = *(data->toReturn);
  1.1953 +        if (data->CEs == data->toReturn || data->extendCEs == data->toReturn) {
  1.1954 +            data->CEpos = data->toReturn;
  1.1955 +        }
  1.1956 +    }
  1.1957 +    else {
  1.1958 +        UChar ch = 0;
  1.1959 +
  1.1960 +        do {
  1.1961 +            /*
  1.1962 +            Loop handles case when incremental normalize switches to or from the
  1.1963 +            side buffer / original string, and we need to start again to get the
  1.1964 +            next character.
  1.1965 +            */
  1.1966 +            for (;;) {
  1.1967 +                if (data->flags & UCOL_ITER_HASLEN) {
  1.1968 +                    /*
  1.1969 +                    Normal path for strings when length is specified.
  1.1970 +                    Not in side buffer because it is always null terminated.
  1.1971 +                    */
  1.1972 +                    if (data->pos <= data->string) {
  1.1973 +                        /* End of the main source string */
  1.1974 +                        return UCOL_NO_MORE_CES;
  1.1975 +                    }
  1.1976 +                    data->pos --;
  1.1977 +                    ch = *data->pos;
  1.1978 +                }
  1.1979 +                // we are using an iterator to go back. Pray for us!
  1.1980 +                else if (data->flags & UCOL_USE_ITERATOR) {
  1.1981 +                  UChar32 iterCh = data->iterator->previous(data->iterator);
  1.1982 +                  if(iterCh == U_SENTINEL) {
  1.1983 +                    return UCOL_NO_MORE_CES;
  1.1984 +                  } else {
  1.1985 +                    ch = (UChar)iterCh;
  1.1986 +                  }
  1.1987 +                }
  1.1988 +                else {
  1.1989 +                    data->pos --;
  1.1990 +                    ch = *data->pos;
  1.1991 +                    /* we are in the side buffer. */
  1.1992 +                    if (ch == 0) {
  1.1993 +                        /*
  1.1994 +                        At the start of the normalize side buffer.
  1.1995 +                        Go back to string.
  1.1996 +                        Because pointer points to the last accessed character,
  1.1997 +                        hence we have to increment it by one here.
  1.1998 +                        */
  1.1999 +                        data->flags = data->origFlags;
  1.2000 +                        data->offsetRepeatValue = 0;
  1.2001 +
  1.2002 +                         if (data->fcdPosition == NULL) {
  1.2003 +                            data->pos = data->string;
  1.2004 +                            return UCOL_NO_MORE_CES;
  1.2005 +                        }
  1.2006 +                        else {
  1.2007 +                            data->pos   = data->fcdPosition + 1;
  1.2008 +                        }
  1.2009 +
  1.2010 +                       continue;
  1.2011 +                    }
  1.2012 +                }
  1.2013 +
  1.2014 +                if(data->flags&UCOL_HIRAGANA_Q) {
  1.2015 +                  if(ch>=0x3040 && ch<=0x309f) {
  1.2016 +                    data->flags |= UCOL_WAS_HIRAGANA;
  1.2017 +                  } else {
  1.2018 +                    data->flags &= ~UCOL_WAS_HIRAGANA;
  1.2019 +                  }
  1.2020 +                }
  1.2021 +
  1.2022 +                /*
  1.2023 +                * got a character to determine if there's fcd and/or normalization
  1.2024 +                * stuff to do.
  1.2025 +                * if the current character is not fcd.
  1.2026 +                * if current character is at the start of the string
  1.2027 +                * Trailing combining class == 0.
  1.2028 +                * Note if pos is in the writablebuffer, norm is always 0
  1.2029 +                */
  1.2030 +                if (ch < ZERO_CC_LIMIT_ ||
  1.2031 +                  // this should propel us out of the loop in the iterator case
  1.2032 +                    (data->flags & UCOL_ITER_NORM) == 0 ||
  1.2033 +                    (data->fcdPosition != NULL && data->fcdPosition <= data->pos)
  1.2034 +                    || data->string == data->pos) {
  1.2035 +                    break;
  1.2036 +                }
  1.2037 +
  1.2038 +                if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) {
  1.2039 +                    /* if next character is FCD */
  1.2040 +                    if (data->pos == data->string) {
  1.2041 +                        /* First char of string is always OK for FCD check */
  1.2042 +                        break;
  1.2043 +                    }
  1.2044 +
  1.2045 +                    /* Not first char of string, do the FCD fast test */
  1.2046 +                    if (*(data->pos - 1) < NFC_ZERO_CC_BLOCK_LIMIT_) {
  1.2047 +                        break;
  1.2048 +                    }
  1.2049 +                }
  1.2050 +
  1.2051 +                /* Need a more complete FCD check and possible normalization. */
  1.2052 +                if (collPrevIterFCD(data)) {
  1.2053 +                    collPrevIterNormalize(data);
  1.2054 +                }
  1.2055 +
  1.2056 +                if ((data->flags & UCOL_ITER_INNORMBUF) == 0) {
  1.2057 +                    /*  No normalization. Go ahead and process the char. */
  1.2058 +                    break;
  1.2059 +                }
  1.2060 +
  1.2061 +                /*
  1.2062 +                Some normalization happened.
  1.2063 +                Next loop picks up a char from the normalization buffer.
  1.2064 +                */
  1.2065 +            }
  1.2066 +
  1.2067 +            /* attempt to handle contractions, after removal of the backwards
  1.2068 +            contraction
  1.2069 +            */
  1.2070 +            if (ucol_contractionEndCP(ch, coll) && !isAtStartPrevIterate(data)) {
  1.2071 +                result = ucol_prv_getSpecialPrevCE(coll, ch, UCOL_CONTRACTION, data, status);
  1.2072 +            } else {
  1.2073 +                if (ch <= 0xFF) {
  1.2074 +                    result = coll->latinOneMapping[ch];
  1.2075 +                }
  1.2076 +                else {
  1.2077 +                    // Always use UCA for [3400..9FFF], [AC00..D7AF]
  1.2078 +                    // **** [FA0E..FA2F] ?? ****
  1.2079 +                    if ((data->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 &&
  1.2080 +                        (ch >= 0x3400 && ch <= 0xD7AF)) {
  1.2081 +                        if (ch > 0x9FFF && ch < 0xAC00) {
  1.2082 +                            // between the two target ranges; do normal lookup
  1.2083 +                            // **** this range is YI, Modifier tone letters, ****
  1.2084 +                            // **** Latin-D, Syloti Nagari, Phagas-pa.       ****
  1.2085 +                            // **** Latin-D might be tailored, so we need to ****
  1.2086 +                            // **** do the normal lookup for these guys.     ****
  1.2087 +                             result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
  1.2088 +                        } else {
  1.2089 +                            result = UCOL_NOT_FOUND;
  1.2090 +                        }
  1.2091 +                    } else {
  1.2092 +                        result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
  1.2093 +                    }
  1.2094 +                }
  1.2095 +                if (result > UCOL_NOT_FOUND) {
  1.2096 +                    result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, status);
  1.2097 +                }
  1.2098 +                if (result == UCOL_NOT_FOUND) { // Not found in master list
  1.2099 +                    if (!isAtStartPrevIterate(data) &&
  1.2100 +                        ucol_contractionEndCP(ch, data->coll))
  1.2101 +                    {
  1.2102 +                        result = UCOL_CONTRACTION;
  1.2103 +                    } else {
  1.2104 +                        if(coll->UCA) {
  1.2105 +                            result = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);
  1.2106 +                        }
  1.2107 +                    }
  1.2108 +
  1.2109 +                    if (result > UCOL_NOT_FOUND) {
  1.2110 +                        if(coll->UCA) {
  1.2111 +                            result = ucol_prv_getSpecialPrevCE(coll->UCA, ch, result, data, status);
  1.2112 +                        }
  1.2113 +                    }
  1.2114 +                }
  1.2115 +            }
  1.2116 +        } while ( result == UCOL_IGNORABLE && ch >= UCOL_FIRST_HANGUL && ch <= UCOL_LAST_HANGUL );
  1.2117 +
  1.2118 +        if(result == UCOL_NOT_FOUND) {
  1.2119 +            result = getPrevImplicit(ch, data);
  1.2120 +        }
  1.2121 +    }
  1.2122 +
  1.2123 +    return result;
  1.2124 +}
  1.2125 +
  1.2126 +
  1.2127 +/*   ucol_getPrevCE, out-of-line version for use from other files.  */
  1.2128 +U_CFUNC uint32_t  U_EXPORT2
  1.2129 +ucol_getPrevCE(const UCollator *coll, collIterate *data,
  1.2130 +                        UErrorCode *status) {
  1.2131 +    return ucol_IGetPrevCE(coll, data, status);
  1.2132 +}
  1.2133 +
  1.2134 +
  1.2135 +/* this should be connected to special Jamo handling */
  1.2136 +U_CFUNC uint32_t  U_EXPORT2
  1.2137 +ucol_getFirstCE(const UCollator *coll, UChar u, UErrorCode *status) {
  1.2138 +    collIterate colIt;
  1.2139 +    IInit_collIterate(coll, &u, 1, &colIt, status);
  1.2140 +    if(U_FAILURE(*status)) {
  1.2141 +        return 0;
  1.2142 +    }
  1.2143 +    return ucol_IGetNextCE(coll, &colIt, status);
  1.2144 +}
  1.2145 +
  1.2146 +/**
  1.2147 +* Inserts the argument character into the end of the buffer pushing back the
  1.2148 +* null terminator.
  1.2149 +* @param data collIterate struct data
  1.2150 +* @param ch character to be appended
  1.2151 +* @return the position of the new addition
  1.2152 +*/
  1.2153 +static
  1.2154 +inline const UChar * insertBufferEnd(collIterate *data, UChar ch)
  1.2155 +{
  1.2156 +    int32_t oldLength = data->writableBuffer.length();
  1.2157 +    return data->writableBuffer.append(ch).getTerminatedBuffer() + oldLength;
  1.2158 +}
  1.2159 +
  1.2160 +/**
  1.2161 +* Inserts the argument string into the end of the buffer pushing back the
  1.2162 +* null terminator.
  1.2163 +* @param data collIterate struct data
  1.2164 +* @param string to be appended
  1.2165 +* @param length of the string to be appended
  1.2166 +* @return the position of the new addition
  1.2167 +*/
  1.2168 +static
  1.2169 +inline const UChar * insertBufferEnd(collIterate *data, const UChar *str, int32_t length)
  1.2170 +{
  1.2171 +    int32_t oldLength = data->writableBuffer.length();
  1.2172 +    return data->writableBuffer.append(str, length).getTerminatedBuffer() + oldLength;
  1.2173 +}
  1.2174 +
  1.2175 +/**
  1.2176 +* Special normalization function for contraction in the forwards iterator.
  1.2177 +* This normalization sequence will place the current character at source->pos
  1.2178 +* and its following normalized sequence into the buffer.
  1.2179 +* The fcd position, pos will be changed.
  1.2180 +* pos will now point to positions in the buffer.
  1.2181 +* Flags will be changed accordingly.
  1.2182 +* @param data collation iterator data
  1.2183 +*/
  1.2184 +static
  1.2185 +inline void normalizeNextContraction(collIterate *data)
  1.2186 +{
  1.2187 +    int32_t     strsize;
  1.2188 +    UErrorCode  status     = U_ZERO_ERROR;
  1.2189 +    /* because the pointer points to the next character */
  1.2190 +    const UChar *pStart    = data->pos - 1;
  1.2191 +    const UChar *pEnd;
  1.2192 +
  1.2193 +    if ((data->flags & UCOL_ITER_INNORMBUF) == 0) {
  1.2194 +        data->writableBuffer.setTo(*(pStart - 1));
  1.2195 +        strsize               = 1;
  1.2196 +    }
  1.2197 +    else {
  1.2198 +        strsize = data->writableBuffer.length();
  1.2199 +    }
  1.2200 +
  1.2201 +    pEnd = data->fcdPosition;
  1.2202 +
  1.2203 +    data->writableBuffer.append(
  1.2204 +        data->nfd->normalize(UnicodeString(FALSE, pStart, (int32_t)(pEnd - pStart)), status));
  1.2205 +    if(U_FAILURE(status)) {
  1.2206 +        return;
  1.2207 +    }
  1.2208 +
  1.2209 +    data->pos        = data->writableBuffer.getTerminatedBuffer() + strsize;
  1.2210 +    data->origFlags  = data->flags;
  1.2211 +    data->flags     |= UCOL_ITER_INNORMBUF;
  1.2212 +    data->flags     &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN);
  1.2213 +}
  1.2214 +
  1.2215 +/**
  1.2216 +* Contraction character management function that returns the next character
  1.2217 +* for the forwards iterator.
  1.2218 +* Does nothing if the next character is in buffer and not the first character
  1.2219 +* in it.
  1.2220 +* Else it checks next character in data string to see if it is normalizable.
  1.2221 +* If it is not, the character is simply copied into the buffer, else
  1.2222 +* the whole normalized substring is copied into the buffer, including the
  1.2223 +* current character.
  1.2224 +* @param data collation element iterator data
  1.2225 +* @return next character
  1.2226 +*/
  1.2227 +static
  1.2228 +inline UChar getNextNormalizedChar(collIterate *data)
  1.2229 +{
  1.2230 +    UChar  nextch;
  1.2231 +    UChar  ch;
  1.2232 +    // Here we need to add the iterator code. One problem is the way
  1.2233 +    // end of string is handled. If we just return next char, it could
  1.2234 +    // be the sentinel. Most of the cases already check for this, but we
  1.2235 +    // need to be sure.
  1.2236 +    if ((data->flags & (UCOL_ITER_NORM | UCOL_ITER_INNORMBUF)) == 0 ) {
  1.2237 +         /* if no normalization and not in buffer. */
  1.2238 +      if(data->flags & UCOL_USE_ITERATOR) {
  1.2239 +         return (UChar)data->iterator->next(data->iterator);
  1.2240 +      } else {
  1.2241 +         return *(data->pos ++);
  1.2242 +      }
  1.2243 +    }
  1.2244 +
  1.2245 +    //if (data->flags & UCOL_ITER_NORM && data->flags & UCOL_USE_ITERATOR) {
  1.2246 +      //normalizeIterator(data);
  1.2247 +    //}
  1.2248 +
  1.2249 +    UBool  innormbuf = (UBool)(data->flags & UCOL_ITER_INNORMBUF);
  1.2250 +    if ((innormbuf && *data->pos != 0) ||
  1.2251 +        (data->fcdPosition != NULL && !innormbuf &&
  1.2252 +        data->pos < data->fcdPosition)) {
  1.2253 +        /*
  1.2254 +        if next character is in normalized buffer, no further normalization
  1.2255 +        is required
  1.2256 +        */
  1.2257 +        return *(data->pos ++);
  1.2258 +    }
  1.2259 +
  1.2260 +    if (data->flags & UCOL_ITER_HASLEN) {
  1.2261 +        /* in data string */
  1.2262 +        if (data->pos + 1 == data->endp) {
  1.2263 +            return *(data->pos ++);
  1.2264 +        }
  1.2265 +    }
  1.2266 +    else {
  1.2267 +        if (innormbuf) {
  1.2268 +          // inside the normalization buffer, but at the end
  1.2269 +          // (since we encountered zero). This means, in the
  1.2270 +          // case we're using char iterator, that we need to
  1.2271 +          // do another round of normalization.
  1.2272 +          //if(data->origFlags & UCOL_USE_ITERATOR) {
  1.2273 +            // we need to restore original flags,
  1.2274 +            // otherwise, we'll lose them
  1.2275 +            //data->flags = data->origFlags;
  1.2276 +            //normalizeIterator(data);
  1.2277 +            //return *(data->pos++);
  1.2278 +          //} else {
  1.2279 +            /*
  1.2280 +            in writable buffer, at this point fcdPosition can not be
  1.2281 +            pointing to the end of the data string. see contracting tag.
  1.2282 +            */
  1.2283 +          if(data->fcdPosition) {
  1.2284 +            if (*(data->fcdPosition + 1) == 0 ||
  1.2285 +                data->fcdPosition + 1 == data->endp) {
  1.2286 +                /* at the end of the string, dump it into the normalizer */
  1.2287 +                data->pos = insertBufferEnd(data, *(data->fcdPosition)) + 1;
  1.2288 +                // Check if data->pos received a null pointer
  1.2289 +                if (data->pos == NULL) {
  1.2290 +                    return (UChar)-1; // Return to indicate error.
  1.2291 +                }
  1.2292 +                return *(data->fcdPosition ++);
  1.2293 +            }
  1.2294 +            data->pos = data->fcdPosition;
  1.2295 +          } else if(data->origFlags & UCOL_USE_ITERATOR) {
  1.2296 +            // if we are here, we're using a normalizing iterator.
  1.2297 +            // we should just continue further.
  1.2298 +            data->flags = data->origFlags;
  1.2299 +            data->pos = NULL;
  1.2300 +            return (UChar)data->iterator->next(data->iterator);
  1.2301 +          }
  1.2302 +          //}
  1.2303 +        }
  1.2304 +        else {
  1.2305 +            if (*(data->pos + 1) == 0) {
  1.2306 +                return *(data->pos ++);
  1.2307 +            }
  1.2308 +        }
  1.2309 +    }
  1.2310 +
  1.2311 +    ch = *data->pos ++;
  1.2312 +    nextch = *data->pos;
  1.2313 +
  1.2314 +    /*
  1.2315 +    * if the current character is not fcd.
  1.2316 +    * Trailing combining class == 0.
  1.2317 +    */
  1.2318 +    if ((data->fcdPosition == NULL || data->fcdPosition < data->pos) &&
  1.2319 +        (nextch >= NFC_ZERO_CC_BLOCK_LIMIT_ ||
  1.2320 +         ch >= NFC_ZERO_CC_BLOCK_LIMIT_)) {
  1.2321 +            /*
  1.2322 +            Need a more complete FCD check and possible normalization.
  1.2323 +            normalize substring will be appended to buffer
  1.2324 +            */
  1.2325 +        if (collIterFCD(data)) {
  1.2326 +            normalizeNextContraction(data);
  1.2327 +            return *(data->pos ++);
  1.2328 +        }
  1.2329 +        else if (innormbuf) {
  1.2330 +            /* fcdposition shifted even when there's no normalization, if we
  1.2331 +            don't input the rest into this, we'll get the wrong position when
  1.2332 +            we reach the end of the writableBuffer */
  1.2333 +            int32_t length = (int32_t)(data->fcdPosition - data->pos + 1);
  1.2334 +            data->pos = insertBufferEnd(data, data->pos - 1, length);
  1.2335 +            // Check if data->pos received a null pointer
  1.2336 +            if (data->pos == NULL) {
  1.2337 +                return (UChar)-1; // Return to indicate error.
  1.2338 +            }
  1.2339 +            return *(data->pos ++);
  1.2340 +        }
  1.2341 +    }
  1.2342 +
  1.2343 +    if (innormbuf) {
  1.2344 +        /*
  1.2345 +        no normalization is to be done hence only one character will be
  1.2346 +        appended to the buffer.
  1.2347 +        */
  1.2348 +        data->pos = insertBufferEnd(data, ch) + 1;
  1.2349 +        // Check if data->pos received a null pointer
  1.2350 +        if (data->pos == NULL) {
  1.2351 +            return (UChar)-1; // Return to indicate error.
  1.2352 +        }
  1.2353 +    }
  1.2354 +
  1.2355 +    /* points back to the pos in string */
  1.2356 +    return ch;
  1.2357 +}
  1.2358 +
  1.2359 +
  1.2360 +
  1.2361 +/**
  1.2362 +* Function to copy the buffer into writableBuffer and sets the fcd position to
  1.2363 +* the correct position
  1.2364 +* @param source data string source
  1.2365 +* @param buffer character buffer
  1.2366 +*/
  1.2367 +static
  1.2368 +inline void setDiscontiguosAttribute(collIterate *source, const UnicodeString &buffer)
  1.2369 +{
  1.2370 +    /* okay confusing part here. to ensure that the skipped characters are
  1.2371 +    considered later, we need to place it in the appropriate position in the
  1.2372 +    normalization buffer and reassign the pos pointer. simple case if pos
  1.2373 +    reside in string, simply copy to normalization buffer and
  1.2374 +    fcdposition = pos, pos = start of normalization buffer. if pos in
  1.2375 +    normalization buffer, we'll insert the copy infront of pos and point pos
  1.2376 +    to the start of the normalization buffer. why am i doing these copies?
  1.2377 +    well, so that the whole chunk of codes in the getNextCE, ucol_prv_getSpecialCE does
  1.2378 +    not require any changes, which be really painful. */
  1.2379 +    if (source->flags & UCOL_ITER_INNORMBUF) {
  1.2380 +        int32_t replaceLength = source->pos - source->writableBuffer.getBuffer();
  1.2381 +        source->writableBuffer.replace(0, replaceLength, buffer);
  1.2382 +    }
  1.2383 +    else {
  1.2384 +        source->fcdPosition  = source->pos;
  1.2385 +        source->origFlags    = source->flags;
  1.2386 +        source->flags       |= UCOL_ITER_INNORMBUF;
  1.2387 +        source->flags       &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN | UCOL_USE_ITERATOR);
  1.2388 +        source->writableBuffer = buffer;
  1.2389 +    }
  1.2390 +
  1.2391 +    source->pos = source->writableBuffer.getTerminatedBuffer();
  1.2392 +}
  1.2393 +
  1.2394 +/**
  1.2395 +* Function to get the discontiguos collation element within the source.
  1.2396 +* Note this function will set the position to the appropriate places.
  1.2397 +* @param coll current collator used
  1.2398 +* @param source data string source
  1.2399 +* @param constart index to the start character in the contraction table
  1.2400 +* @return discontiguos collation element offset
  1.2401 +*/
  1.2402 +static
  1.2403 +uint32_t getDiscontiguous(const UCollator *coll, collIterate *source,
  1.2404 +                                const UChar *constart)
  1.2405 +{
  1.2406 +    /* source->pos currently points to the second combining character after
  1.2407 +       the start character */
  1.2408 +          const UChar *temppos      = source->pos;
  1.2409 +          UnicodeString buffer;
  1.2410 +    const UChar   *tempconstart = constart;
  1.2411 +          uint8_t  tempflags    = source->flags;
  1.2412 +          UBool    multicontraction = FALSE;
  1.2413 +          collIterateState discState;
  1.2414 +
  1.2415 +          backupState(source, &discState);
  1.2416 +
  1.2417 +    buffer.setTo(peekCodePoint(source, -1));
  1.2418 +    for (;;) {
  1.2419 +        UChar    *UCharOffset;
  1.2420 +        UChar     schar,
  1.2421 +                  tchar;
  1.2422 +        uint32_t  result;
  1.2423 +
  1.2424 +        if (((source->flags & UCOL_ITER_HASLEN) && source->pos >= source->endp)
  1.2425 +            || (peekCodeUnit(source, 0) == 0  &&
  1.2426 +            //|| (*source->pos == 0  &&
  1.2427 +                ((source->flags & UCOL_ITER_INNORMBUF) == 0 ||
  1.2428 +                 source->fcdPosition == NULL ||
  1.2429 +                 source->fcdPosition == source->endp ||
  1.2430 +                 *(source->fcdPosition) == 0 ||
  1.2431 +                 u_getCombiningClass(*(source->fcdPosition)) == 0)) ||
  1.2432 +                 /* end of string in null terminated string or stopped by a
  1.2433 +                 null character, note fcd does not always point to a base
  1.2434 +                 character after the discontiguos change */
  1.2435 +                 u_getCombiningClass(peekCodePoint(source, 0)) == 0) {
  1.2436 +                 //u_getCombiningClass(*(source->pos)) == 0) {
  1.2437 +            //constart = (UChar *)coll->image + getContractOffset(CE);
  1.2438 +            if (multicontraction) {
  1.2439 +                source->pos    = temppos - 1;
  1.2440 +                setDiscontiguosAttribute(source, buffer);
  1.2441 +                return *(coll->contractionCEs +
  1.2442 +                                    (tempconstart - coll->contractionIndex));
  1.2443 +            }
  1.2444 +            constart = tempconstart;
  1.2445 +            break;
  1.2446 +        }
  1.2447 +
  1.2448 +        UCharOffset = (UChar *)(tempconstart + 1); /* skip the backward offset*/
  1.2449 +        schar = getNextNormalizedChar(source);
  1.2450 +
  1.2451 +        while (schar > (tchar = *UCharOffset)) {
  1.2452 +            UCharOffset++;
  1.2453 +        }
  1.2454 +
  1.2455 +        if (schar != tchar) {
  1.2456 +            /* not the correct codepoint. we stuff the current codepoint into
  1.2457 +            the discontiguos buffer and try the next character */
  1.2458 +            buffer.append(schar);
  1.2459 +            continue;
  1.2460 +        }
  1.2461 +        else {
  1.2462 +            if (u_getCombiningClass(schar) ==
  1.2463 +                u_getCombiningClass(peekCodePoint(source, -2))) {
  1.2464 +                buffer.append(schar);
  1.2465 +                continue;
  1.2466 +            }
  1.2467 +            result = *(coll->contractionCEs +
  1.2468 +                                      (UCharOffset - coll->contractionIndex));
  1.2469 +        }
  1.2470 +
  1.2471 +        if (result == UCOL_NOT_FOUND) {
  1.2472 +          break;
  1.2473 +        } else if (isContraction(result)) {
  1.2474 +            /* this is a multi-contraction*/
  1.2475 +            tempconstart = (UChar *)coll->image + getContractOffset(result);
  1.2476 +            if (*(coll->contractionCEs + (constart - coll->contractionIndex))
  1.2477 +                != UCOL_NOT_FOUND) {
  1.2478 +                multicontraction = TRUE;
  1.2479 +                temppos       = source->pos + 1;
  1.2480 +            }
  1.2481 +        } else {
  1.2482 +            setDiscontiguosAttribute(source, buffer);
  1.2483 +            return result;
  1.2484 +        }
  1.2485 +    }
  1.2486 +
  1.2487 +    /* no problems simply reverting just like that,
  1.2488 +    if we are in string before getting into this function, points back to
  1.2489 +    string hence no problem.
  1.2490 +    if we are in normalization buffer before getting into this function,
  1.2491 +    since we'll never use another normalization within this function, we
  1.2492 +    know that fcdposition points to a base character. the normalization buffer
  1.2493 +    never change, hence this revert works. */
  1.2494 +    loadState(source, &discState, TRUE);
  1.2495 +    goBackOne(source);
  1.2496 +
  1.2497 +    //source->pos   = temppos - 1;
  1.2498 +    source->flags = tempflags;
  1.2499 +    return *(coll->contractionCEs + (constart - coll->contractionIndex));
  1.2500 +}
  1.2501 +
  1.2502 +/* now uses Mark's getImplicitPrimary code */
  1.2503 +static
  1.2504 +inline uint32_t getImplicit(UChar32 cp, collIterate *collationSource) {
  1.2505 +    uint32_t r = uprv_uca_getImplicitPrimary(cp);
  1.2506 +    *(collationSource->CEpos++) = ((r & 0x0000FFFF)<<16) | 0x000000C0;
  1.2507 +    collationSource->offsetRepeatCount += 1;
  1.2508 +    return (r & UCOL_PRIMARYMASK) | 0x00000505; // This was 'order'
  1.2509 +}
  1.2510 +
  1.2511 +/**
  1.2512 +* Inserts the argument character into the front of the buffer replacing the
  1.2513 +* front null terminator.
  1.2514 +* @param data collation element iterator data
  1.2515 +* @param ch character to be appended
  1.2516 +*/
  1.2517 +static
  1.2518 +inline void insertBufferFront(collIterate *data, UChar ch)
  1.2519 +{
  1.2520 +    data->pos = data->writableBuffer.setCharAt(0, ch).insert(0, (UChar)0).getTerminatedBuffer() + 2;
  1.2521 +}
  1.2522 +
  1.2523 +/**
  1.2524 +* Special normalization function for contraction in the previous iterator.
  1.2525 +* This normalization sequence will place the current character at source->pos
  1.2526 +* and its following normalized sequence into the buffer.
  1.2527 +* The fcd position, pos will be changed.
  1.2528 +* pos will now point to positions in the buffer.
  1.2529 +* Flags will be changed accordingly.
  1.2530 +* @param data collation iterator data
  1.2531 +*/
  1.2532 +static
  1.2533 +inline void normalizePrevContraction(collIterate *data, UErrorCode *status)
  1.2534 +{
  1.2535 +    const UChar *pEnd = data->pos + 1;         /* End normalize + 1 */
  1.2536 +    const UChar *pStart;
  1.2537 +
  1.2538 +    UnicodeString endOfBuffer;
  1.2539 +    if (data->flags & UCOL_ITER_HASLEN) {
  1.2540 +        /*
  1.2541 +        normalization buffer not used yet, we'll pull down the next
  1.2542 +        character into the end of the buffer
  1.2543 +        */
  1.2544 +        endOfBuffer.setTo(*pEnd);
  1.2545 +    }
  1.2546 +    else {
  1.2547 +        endOfBuffer.setTo(data->writableBuffer, 1);  // after the leading NUL
  1.2548 +    }
  1.2549 +
  1.2550 +    if (data->fcdPosition == NULL) {
  1.2551 +        pStart = data->string;
  1.2552 +    }
  1.2553 +    else {
  1.2554 +        pStart = data->fcdPosition + 1;
  1.2555 +    }
  1.2556 +    int32_t normLen =
  1.2557 +        data->nfd->normalize(UnicodeString(FALSE, pStart, (int32_t)(pEnd - pStart)),
  1.2558 +                             data->writableBuffer,
  1.2559 +                             *status).
  1.2560 +        length();
  1.2561 +    if(U_FAILURE(*status)) {
  1.2562 +        return;
  1.2563 +    }
  1.2564 +    /*
  1.2565 +    this puts the null termination infront of the normalized string instead
  1.2566 +    of the end
  1.2567 +    */
  1.2568 +    data->pos =
  1.2569 +        data->writableBuffer.insert(0, (UChar)0).append(endOfBuffer).getTerminatedBuffer() +
  1.2570 +        1 + normLen;
  1.2571 +    data->origFlags  = data->flags;
  1.2572 +    data->flags     |= UCOL_ITER_INNORMBUF;
  1.2573 +    data->flags     &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN);
  1.2574 +}
  1.2575 +
  1.2576 +/**
  1.2577 +* Contraction character management function that returns the previous character
  1.2578 +* for the backwards iterator.
  1.2579 +* Does nothing if the previous character is in buffer and not the first
  1.2580 +* character in it.
  1.2581 +* Else it checks previous character in data string to see if it is
  1.2582 +* normalizable.
  1.2583 +* If it is not, the character is simply copied into the buffer, else
  1.2584 +* the whole normalized substring is copied into the buffer, including the
  1.2585 +* current character.
  1.2586 +* @param data collation element iterator data
  1.2587 +* @return previous character
  1.2588 +*/
  1.2589 +static
  1.2590 +inline UChar getPrevNormalizedChar(collIterate *data, UErrorCode *status)
  1.2591 +{
  1.2592 +    UChar  prevch;
  1.2593 +    UChar  ch;
  1.2594 +    const UChar *start;
  1.2595 +    UBool  innormbuf = (UBool)(data->flags & UCOL_ITER_INNORMBUF);
  1.2596 +    if ((data->flags & (UCOL_ITER_NORM | UCOL_ITER_INNORMBUF)) == 0 ||
  1.2597 +        (innormbuf && *(data->pos - 1) != 0)) {
  1.2598 +        /*
  1.2599 +        if no normalization.
  1.2600 +        if previous character is in normalized buffer, no further normalization
  1.2601 +        is required
  1.2602 +        */
  1.2603 +      if(data->flags & UCOL_USE_ITERATOR) {
  1.2604 +        data->iterator->move(data->iterator, -1, UITER_CURRENT);
  1.2605 +        return (UChar)data->iterator->next(data->iterator);
  1.2606 +      } else {
  1.2607 +        return *(data->pos - 1);
  1.2608 +      }
  1.2609 +    }
  1.2610 +
  1.2611 +    start = data->pos;
  1.2612 +    if ((data->fcdPosition==NULL)||(data->flags & UCOL_ITER_HASLEN)) {
  1.2613 +        /* in data string */
  1.2614 +        if ((start - 1) == data->string) {
  1.2615 +            return *(start - 1);
  1.2616 +        }
  1.2617 +        start --;
  1.2618 +        ch     = *start;
  1.2619 +        prevch = *(start - 1);
  1.2620 +    }
  1.2621 +    else {
  1.2622 +        /*
  1.2623 +        in writable buffer, at this point fcdPosition can not be NULL.
  1.2624 +        see contracting tag.
  1.2625 +        */
  1.2626 +        if (data->fcdPosition == data->string) {
  1.2627 +            /* at the start of the string, just dump it into the normalizer */
  1.2628 +            insertBufferFront(data, *(data->fcdPosition));
  1.2629 +            data->fcdPosition = NULL;
  1.2630 +            return *(data->pos - 1);
  1.2631 +        }
  1.2632 +        start  = data->fcdPosition;
  1.2633 +        ch     = *start;
  1.2634 +        prevch = *(start - 1);
  1.2635 +    }
  1.2636 +    /*
  1.2637 +    * if the current character is not fcd.
  1.2638 +    * Trailing combining class == 0.
  1.2639 +    */
  1.2640 +    if (data->fcdPosition > start &&
  1.2641 +       (ch >= NFC_ZERO_CC_BLOCK_LIMIT_ || prevch >= NFC_ZERO_CC_BLOCK_LIMIT_))
  1.2642 +    {
  1.2643 +        /*
  1.2644 +        Need a more complete FCD check and possible normalization.
  1.2645 +        normalize substring will be appended to buffer
  1.2646 +        */
  1.2647 +        const UChar *backuppos = data->pos;
  1.2648 +        data->pos = start;
  1.2649 +        if (collPrevIterFCD(data)) {
  1.2650 +            normalizePrevContraction(data, status);
  1.2651 +            return *(data->pos - 1);
  1.2652 +        }
  1.2653 +        data->pos = backuppos;
  1.2654 +        data->fcdPosition ++;
  1.2655 +    }
  1.2656 +
  1.2657 +    if (innormbuf) {
  1.2658 +    /*
  1.2659 +    no normalization is to be done hence only one character will be
  1.2660 +    appended to the buffer.
  1.2661 +    */
  1.2662 +        insertBufferFront(data, ch);
  1.2663 +        data->fcdPosition --;
  1.2664 +    }
  1.2665 +
  1.2666 +    return ch;
  1.2667 +}
  1.2668 +
  1.2669 +/* This function handles the special CEs like contractions, expansions, surrogates, Thai */
  1.2670 +/* It is called by getNextCE */
  1.2671 +
  1.2672 +/* The following should be even */
  1.2673 +#define UCOL_MAX_DIGITS_FOR_NUMBER 254
  1.2674 +
  1.2675 +uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, collIterate *source, UErrorCode *status) {
  1.2676 +    collIterateState entryState;
  1.2677 +    backupState(source, &entryState);
  1.2678 +    UChar32 cp = ch;
  1.2679 +
  1.2680 +    for (;;) {
  1.2681 +        // This loop will repeat only in the case of contractions, and only when a contraction
  1.2682 +        //   is found and the first CE resulting from that contraction is itself a special
  1.2683 +        //   (an expansion, for example.)  All other special CE types are fully handled the
  1.2684 +        //   first time through, and the loop exits.
  1.2685 +
  1.2686 +        const uint32_t *CEOffset = NULL;
  1.2687 +        switch(getCETag(CE)) {
  1.2688 +        case NOT_FOUND_TAG:
  1.2689 +            /* This one is not found, and we'll let somebody else bother about it... no more games */
  1.2690 +            return CE;
  1.2691 +        case SPEC_PROC_TAG:
  1.2692 +            {
  1.2693 +                // Special processing is getting a CE that is preceded by a certain prefix
  1.2694 +                // Currently this is only needed for optimizing Japanese length and iteration marks.
  1.2695 +                // When we encouter a special processing tag, we go backwards and try to see if
  1.2696 +                // we have a match.
  1.2697 +                // Contraction tables are used - so the whole process is not unlike contraction.
  1.2698 +                // prefix data is stored backwards in the table.
  1.2699 +                const UChar *UCharOffset;
  1.2700 +                UChar schar, tchar;
  1.2701 +                collIterateState prefixState;
  1.2702 +                backupState(source, &prefixState);
  1.2703 +                loadState(source, &entryState, TRUE);
  1.2704 +                goBackOne(source); // We want to look at the point where we entered - actually one
  1.2705 +                // before that...
  1.2706 +
  1.2707 +                for(;;) {
  1.2708 +                    // This loop will run once per source string character, for as long as we
  1.2709 +                    //  are matching a potential contraction sequence
  1.2710 +
  1.2711 +                    // First we position ourselves at the begining of contraction sequence
  1.2712 +                    const UChar *ContractionStart = UCharOffset = (UChar *)coll->image+getContractOffset(CE);
  1.2713 +                    if (collIter_bos(source)) {
  1.2714 +                        CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
  1.2715 +                        break;
  1.2716 +                    }
  1.2717 +                    schar = getPrevNormalizedChar(source, status);
  1.2718 +                    goBackOne(source);
  1.2719 +
  1.2720 +                    while(schar > (tchar = *UCharOffset)) { /* since the contraction codepoints should be ordered, we skip all that are smaller */
  1.2721 +                        UCharOffset++;
  1.2722 +                    }
  1.2723 +
  1.2724 +                    if (schar == tchar) {
  1.2725 +                        // Found the source string char in the table.
  1.2726 +                        //  Pick up the corresponding CE from the table.
  1.2727 +                        CE = *(coll->contractionCEs +
  1.2728 +                            (UCharOffset - coll->contractionIndex));
  1.2729 +                    }
  1.2730 +                    else
  1.2731 +                    {
  1.2732 +                        // Source string char was not in the table.
  1.2733 +                        //   We have not found the prefix.
  1.2734 +                        CE = *(coll->contractionCEs +
  1.2735 +                            (ContractionStart - coll->contractionIndex));
  1.2736 +                    }
  1.2737 +
  1.2738 +                    if(!isPrefix(CE)) {
  1.2739 +                        // The source string char was in the contraction table, and the corresponding
  1.2740 +                        //   CE is not a prefix CE.  We found the prefix, break
  1.2741 +                        //   out of loop, this CE will end up being returned.  This is the normal
  1.2742 +                        //   way out of prefix handling when the source actually contained
  1.2743 +                        //   the prefix.
  1.2744 +                        break;
  1.2745 +                    }
  1.2746 +                }
  1.2747 +                if(CE != UCOL_NOT_FOUND) { // we found something and we can merilly continue
  1.2748 +                    loadState(source, &prefixState, TRUE);
  1.2749 +                    if(source->origFlags & UCOL_USE_ITERATOR) {
  1.2750 +                        source->flags = source->origFlags;
  1.2751 +                    }
  1.2752 +                } else { // prefix search was a failure, we have to backup all the way to the start
  1.2753 +                    loadState(source, &entryState, TRUE);
  1.2754 +                }
  1.2755 +                break;
  1.2756 +            }
  1.2757 +        case CONTRACTION_TAG:
  1.2758 +            {
  1.2759 +                /* This should handle contractions */
  1.2760 +                collIterateState state;
  1.2761 +                backupState(source, &state);
  1.2762 +                uint32_t firstCE = *(coll->contractionCEs + ((UChar *)coll->image+getContractOffset(CE) - coll->contractionIndex)); //UCOL_NOT_FOUND;
  1.2763 +                const UChar *UCharOffset;
  1.2764 +                UChar schar, tchar;
  1.2765 +
  1.2766 +                for (;;) {
  1.2767 +                    /* This loop will run once per source string character, for as long as we     */
  1.2768 +                    /*  are matching a potential contraction sequence                  */
  1.2769 +
  1.2770 +                    /* First we position ourselves at the begining of contraction sequence */
  1.2771 +                    const UChar *ContractionStart = UCharOffset = (UChar *)coll->image+getContractOffset(CE);
  1.2772 +
  1.2773 +                    if (collIter_eos(source)) {
  1.2774 +                        // Ran off the end of the source string.
  1.2775 +                        CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
  1.2776 +                        // So we'll pick whatever we have at the point...
  1.2777 +                        if (CE == UCOL_NOT_FOUND) {
  1.2778 +                            // back up the source over all the chars we scanned going into this contraction.
  1.2779 +                            CE = firstCE;
  1.2780 +                            loadState(source, &state, TRUE);
  1.2781 +                            if(source->origFlags & UCOL_USE_ITERATOR) {
  1.2782 +                                source->flags = source->origFlags;
  1.2783 +                            }
  1.2784 +                        }
  1.2785 +                        break;
  1.2786 +                    }
  1.2787 +
  1.2788 +                    uint8_t maxCC = (uint8_t)(*(UCharOffset)&0xFF); /*get the discontiguos stuff */ /* skip the backward offset, see above */
  1.2789 +                    uint8_t allSame = (uint8_t)(*(UCharOffset++)>>8);
  1.2790 +
  1.2791 +                    schar = getNextNormalizedChar(source);
  1.2792 +                    while(schar > (tchar = *UCharOffset)) { /* since the contraction codepoints should be ordered, we skip all that are smaller */
  1.2793 +                        UCharOffset++;
  1.2794 +                    }
  1.2795 +
  1.2796 +                    if (schar == tchar) {
  1.2797 +                        // Found the source string char in the contraction table.
  1.2798 +                        //  Pick up the corresponding CE from the table.
  1.2799 +                        CE = *(coll->contractionCEs +
  1.2800 +                            (UCharOffset - coll->contractionIndex));
  1.2801 +                    }
  1.2802 +                    else
  1.2803 +                    {
  1.2804 +                        // Source string char was not in contraction table.
  1.2805 +                        //   Unless we have a discontiguous contraction, we have finished
  1.2806 +                        //   with this contraction.
  1.2807 +                        // in order to do the proper detection, we
  1.2808 +                        // need to see if we're dealing with a supplementary
  1.2809 +                        /* We test whether the next two char are surrogate pairs.
  1.2810 +                        * This test is done if the iterator is not NULL.
  1.2811 +                        * If there is no surrogate pair, the iterator
  1.2812 +                        * goes back one if needed. */
  1.2813 +                        UChar32 miss = schar;
  1.2814 +                        if (source->iterator) {
  1.2815 +                            UChar32 surrNextChar; /* the next char in the iteration to test */
  1.2816 +                            int32_t prevPos; /* holds the previous position before move forward of the source iterator */
  1.2817 +                            if(U16_IS_LEAD(schar) && source->iterator->hasNext(source->iterator)) {
  1.2818 +                                prevPos = source->iterator->index;
  1.2819 +                                surrNextChar = getNextNormalizedChar(source);
  1.2820 +                                if (U16_IS_TRAIL(surrNextChar)) {
  1.2821 +                                    miss = U16_GET_SUPPLEMENTARY(schar, surrNextChar);
  1.2822 +                                } else if (prevPos < source->iterator->index){
  1.2823 +                                    goBackOne(source);
  1.2824 +                                }
  1.2825 +                            }
  1.2826 +                        } else if (U16_IS_LEAD(schar)) {
  1.2827 +                            miss = U16_GET_SUPPLEMENTARY(schar, getNextNormalizedChar(source));
  1.2828 +                        }
  1.2829 +
  1.2830 +                        uint8_t sCC;
  1.2831 +                        if (miss < 0x300 ||
  1.2832 +                            maxCC == 0 ||
  1.2833 +                            (sCC = i_getCombiningClass(miss, coll)) == 0 ||
  1.2834 +                            sCC>maxCC ||
  1.2835 +                            (allSame != 0 && sCC == maxCC) ||
  1.2836 +                            collIter_eos(source))
  1.2837 +                        {
  1.2838 +                            //  Contraction can not be discontiguous.
  1.2839 +                            goBackOne(source);  // back up the source string by one,
  1.2840 +                            //  because  the character we just looked at was
  1.2841 +                            //  not part of the contraction.   */
  1.2842 +                            if(U_IS_SUPPLEMENTARY(miss)) {
  1.2843 +                                goBackOne(source);
  1.2844 +                            }
  1.2845 +                            CE = *(coll->contractionCEs +
  1.2846 +                                (ContractionStart - coll->contractionIndex));
  1.2847 +                        } else {
  1.2848 +                            //
  1.2849 +                            // Contraction is possibly discontiguous.
  1.2850 +                            //   Scan more of source string looking for a match
  1.2851 +                            //
  1.2852 +                            UChar tempchar;
  1.2853 +                            /* find the next character if schar is not a base character
  1.2854 +                            and we are not yet at the end of the string */
  1.2855 +                            tempchar = getNextNormalizedChar(source);
  1.2856 +                            // probably need another supplementary thingie here
  1.2857 +                            goBackOne(source);
  1.2858 +                            if (i_getCombiningClass(tempchar, coll) == 0) {
  1.2859 +                                goBackOne(source);
  1.2860 +                                if(U_IS_SUPPLEMENTARY(miss)) {
  1.2861 +                                    goBackOne(source);
  1.2862 +                                }
  1.2863 +                                /* Spit out the last char of the string, wasn't tasty enough */
  1.2864 +                                CE = *(coll->contractionCEs +
  1.2865 +                                    (ContractionStart - coll->contractionIndex));
  1.2866 +                            } else {
  1.2867 +                                CE = getDiscontiguous(coll, source, ContractionStart);
  1.2868 +                            }
  1.2869 +                        }
  1.2870 +                    } // else after if(schar == tchar)
  1.2871 +
  1.2872 +                    if(CE == UCOL_NOT_FOUND) {
  1.2873 +                        /* The Source string did not match the contraction that we were checking.  */
  1.2874 +                        /*  Back up the source position to undo the effects of having partially    */
  1.2875 +                        /*   scanned through what ultimately proved to not be a contraction.       */
  1.2876 +                        loadState(source, &state, TRUE);
  1.2877 +                        CE = firstCE;
  1.2878 +                        break;
  1.2879 +                    }
  1.2880 +
  1.2881 +                    if(!isContraction(CE)) {
  1.2882 +                        // The source string char was in the contraction table, and the corresponding
  1.2883 +                        //   CE is not a contraction CE.  We completed the contraction, break
  1.2884 +                        //   out of loop, this CE will end up being returned.  This is the normal
  1.2885 +                        //   way out of contraction handling when the source actually contained
  1.2886 +                        //   the contraction.
  1.2887 +                        break;
  1.2888 +                    }
  1.2889 +
  1.2890 +
  1.2891 +                    // The source string char was in the contraction table, and the corresponding
  1.2892 +                    //   CE is IS  a contraction CE.  We will continue looping to check the source
  1.2893 +                    //   string for the remaining chars in the contraction.
  1.2894 +                    uint32_t tempCE = *(coll->contractionCEs + (ContractionStart - coll->contractionIndex));
  1.2895 +                    if(tempCE != UCOL_NOT_FOUND) {
  1.2896 +                        // We have scanned a a section of source string for which there is a
  1.2897 +                        //  CE from the contraction table.  Remember the CE and scan position, so
  1.2898 +                        //  that we can return to this point if further scanning fails to
  1.2899 +                        //  match a longer contraction sequence.
  1.2900 +                        firstCE = tempCE;
  1.2901 +
  1.2902 +                        goBackOne(source);
  1.2903 +                        backupState(source, &state);
  1.2904 +                        getNextNormalizedChar(source);
  1.2905 +
  1.2906 +                        // Another way to do this is:
  1.2907 +                        //collIterateState tempState;
  1.2908 +                        //backupState(source, &tempState);
  1.2909 +                        //goBackOne(source);
  1.2910 +                        //backupState(source, &state);
  1.2911 +                        //loadState(source, &tempState, TRUE);
  1.2912 +
  1.2913 +                        // The problem is that for incomplete contractions we have to remember the previous
  1.2914 +                        // position. Before, the only thing I needed to do was state.pos--;
  1.2915 +                        // After iterator introduction and especially after introduction of normalizing
  1.2916 +                        // iterators, it became much more difficult to decrease the saved state.
  1.2917 +                        // I'm not yet sure which of the two methods above is faster.
  1.2918 +                    }
  1.2919 +                } // for(;;)
  1.2920 +                break;
  1.2921 +            } // case CONTRACTION_TAG:
  1.2922 +        case LONG_PRIMARY_TAG:
  1.2923 +            {
  1.2924 +                *(source->CEpos++) = ((CE & 0xFF)<<24)|UCOL_CONTINUATION_MARKER;
  1.2925 +                CE = ((CE & 0xFFFF00) << 8) | (UCOL_BYTE_COMMON << 8) | UCOL_BYTE_COMMON;
  1.2926 +                source->offsetRepeatCount += 1;
  1.2927 +                return CE;
  1.2928 +            }
  1.2929 +        case EXPANSION_TAG:
  1.2930 +            {
  1.2931 +                /* This should handle expansion. */
  1.2932 +                /* NOTE: we can encounter both continuations and expansions in an expansion! */
  1.2933 +                /* I have to decide where continuations are going to be dealt with */
  1.2934 +                uint32_t size;
  1.2935 +                uint32_t i;    /* general counter */
  1.2936 +
  1.2937 +                CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */
  1.2938 +                size = getExpansionCount(CE);
  1.2939 +                CE = *CEOffset++;
  1.2940 +              //source->offsetRepeatCount = -1;
  1.2941 +
  1.2942 +                if(size != 0) { /* if there are less than 16 elements in expansion, we don't terminate */
  1.2943 +                    for(i = 1; i<size; i++) {
  1.2944 +                        *(source->CEpos++) = *CEOffset++;
  1.2945 +                        source->offsetRepeatCount += 1;
  1.2946 +                    }
  1.2947 +                } else { /* else, we do */
  1.2948 +                    while(*CEOffset != 0) {
  1.2949 +                        *(source->CEpos++) = *CEOffset++;
  1.2950 +                        source->offsetRepeatCount += 1;
  1.2951 +                    }
  1.2952 +                }
  1.2953 +
  1.2954 +                return CE;
  1.2955 +            }
  1.2956 +        case DIGIT_TAG:
  1.2957 +            {
  1.2958 +                /*
  1.2959 +                We do a check to see if we want to collate digits as numbers; if so we generate
  1.2960 +                a custom collation key. Otherwise we pull out the value stored in the expansion table.
  1.2961 +                */
  1.2962 +                //uint32_t size;
  1.2963 +                uint32_t i;    /* general counter */
  1.2964 +
  1.2965 +                if (source->coll->numericCollation == UCOL_ON){
  1.2966 +                    collIterateState digitState = {0,0,0,0,0,0,0,0,0};
  1.2967 +                    UChar32 char32 = 0;
  1.2968 +                    int32_t digVal = 0;
  1.2969 +
  1.2970 +                    uint32_t digIndx = 0;
  1.2971 +                    uint32_t endIndex = 0;
  1.2972 +                    uint32_t trailingZeroIndex = 0;
  1.2973 +
  1.2974 +                    uint8_t collateVal = 0;
  1.2975 +
  1.2976 +                    UBool nonZeroValReached = FALSE;
  1.2977 +
  1.2978 +                    uint8_t numTempBuf[UCOL_MAX_DIGITS_FOR_NUMBER/2 + 3]; // I just need a temporary place to store my generated CEs.
  1.2979 +                    /*
  1.2980 +                         We parse the source string until we hit a char that's NOT a digit.
  1.2981 +                        Use this u_charDigitValue. This might be slow because we have to
  1.2982 +                        handle surrogates...
  1.2983 +                    */
  1.2984 +            /*
  1.2985 +                    if (U16_IS_LEAD(ch)){
  1.2986 +                      if (!collIter_eos(source)) {
  1.2987 +                        backupState(source, &digitState);
  1.2988 +                        UChar trail = getNextNormalizedChar(source);
  1.2989 +                        if(U16_IS_TRAIL(trail)) {
  1.2990 +                          char32 = U16_GET_SUPPLEMENTARY(ch, trail);
  1.2991 +                        } else {
  1.2992 +                          loadState(source, &digitState, TRUE);
  1.2993 +                          char32 = ch;
  1.2994 +                        }
  1.2995 +                      } else {
  1.2996 +                        char32 = ch;
  1.2997 +                      }
  1.2998 +                    } else {
  1.2999 +                      char32 = ch;
  1.3000 +                    }
  1.3001 +                    digVal = u_charDigitValue(char32);
  1.3002 +            */
  1.3003 +                    digVal = u_charDigitValue(cp); // if we have arrived here, we have
  1.3004 +                    // already processed possible supplementaries that trigered the digit tag -
  1.3005 +                    // all supplementaries are marked in the UCA.
  1.3006 +                    /*
  1.3007 +                        We  pad a zero in front of the first element anyways. This takes
  1.3008 +                        care of the (probably) most common case where people are sorting things followed
  1.3009 +                        by a single digit
  1.3010 +                    */
  1.3011 +                    digIndx++;
  1.3012 +                    for(;;){
  1.3013 +                        // Make sure we have enough space. No longer needed;
  1.3014 +                        // at this point digIndx now has a max value of UCOL_MAX_DIGITS_FOR_NUMBER
  1.3015 +                        // (it has been pre-incremented) so we just ensure that numTempBuf is big enough
  1.3016 +                        // (UCOL_MAX_DIGITS_FOR_NUMBER/2 + 3).
  1.3017 +
  1.3018 +                        // Skipping over leading zeroes.
  1.3019 +                        if (digVal != 0) {
  1.3020 +                            nonZeroValReached = TRUE;
  1.3021 +                        }
  1.3022 +                        if (nonZeroValReached) {
  1.3023 +                            /*
  1.3024 +                            We parse the digit string into base 100 numbers (this fits into a byte).
  1.3025 +                            We only add to the buffer in twos, thus if we are parsing an odd character,
  1.3026 +                            that serves as the 'tens' digit while the if we are parsing an even one, that
  1.3027 +                            is the 'ones' digit. We dumped the parsed base 100 value (collateVal) into
  1.3028 +                            a buffer. We multiply each collateVal by 2 (to give us room) and add 5 (to avoid
  1.3029 +                            overlapping magic CE byte values). The last byte we subtract 1 to ensure it is less
  1.3030 +                            than all the other bytes.
  1.3031 +                            */
  1.3032 +
  1.3033 +                            if (digIndx % 2 == 1){
  1.3034 +                                collateVal += (uint8_t)digVal;
  1.3035 +
  1.3036 +                                // We don't enter the low-order-digit case unless we've already seen
  1.3037 +                                // the high order, or for the first digit, which is always non-zero.
  1.3038 +                                if (collateVal != 0)
  1.3039 +                                    trailingZeroIndex = 0;
  1.3040 +
  1.3041 +                                numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6;
  1.3042 +                                collateVal = 0;
  1.3043 +                            }
  1.3044 +                            else{
  1.3045 +                                // We drop the collation value into the buffer so if we need to do
  1.3046 +                                // a "front patch" we don't have to check to see if we're hitting the
  1.3047 +                                // last element.
  1.3048 +                                collateVal = (uint8_t)(digVal * 10);
  1.3049 +
  1.3050 +                                // Check for trailing zeroes.
  1.3051 +                                if (collateVal == 0)
  1.3052 +                                {
  1.3053 +                                    if (!trailingZeroIndex)
  1.3054 +                                        trailingZeroIndex = (digIndx/2) + 2;
  1.3055 +                                }
  1.3056 +                                else
  1.3057 +                                    trailingZeroIndex = 0;
  1.3058 +
  1.3059 +                                numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6;
  1.3060 +                            }
  1.3061 +                            digIndx++;
  1.3062 +                        }
  1.3063 +
  1.3064 +                        // Get next character.
  1.3065 +                        if (!collIter_eos(source)){
  1.3066 +                            ch = getNextNormalizedChar(source);
  1.3067 +                            if (U16_IS_LEAD(ch)){
  1.3068 +                                if (!collIter_eos(source)) {
  1.3069 +                                    backupState(source, &digitState);
  1.3070 +                                    UChar trail = getNextNormalizedChar(source);
  1.3071 +                                    if(U16_IS_TRAIL(trail)) {
  1.3072 +                                        char32 = U16_GET_SUPPLEMENTARY(ch, trail);
  1.3073 +                                    } else {
  1.3074 +                                        loadState(source, &digitState, TRUE);
  1.3075 +                                        char32 = ch;
  1.3076 +                                    }
  1.3077 +                                }
  1.3078 +                            } else {
  1.3079 +                                char32 = ch;
  1.3080 +                            }
  1.3081 +
  1.3082 +                            if ((digVal = u_charDigitValue(char32)) == -1 || digIndx > UCOL_MAX_DIGITS_FOR_NUMBER){
  1.3083 +                                // Resetting position to point to the next unprocessed char. We
  1.3084 +                                // overshot it when doing our test/set for numbers.
  1.3085 +                                if (char32 > 0xFFFF) { // For surrogates.
  1.3086 +                                    loadState(source, &digitState, TRUE);
  1.3087 +                                    //goBackOne(source);
  1.3088 +                                }
  1.3089 +                                goBackOne(source);
  1.3090 +                                break;
  1.3091 +                            }
  1.3092 +                        } else {
  1.3093 +                            break;
  1.3094 +                        }
  1.3095 +                    }
  1.3096 +
  1.3097 +                    if (nonZeroValReached == FALSE){
  1.3098 +                        digIndx = 2;
  1.3099 +                        numTempBuf[2] = 6;
  1.3100 +                    }
  1.3101 +
  1.3102 +                    endIndex = trailingZeroIndex ? trailingZeroIndex : ((digIndx/2) + 2) ;
  1.3103 +                    if (digIndx % 2 != 0){
  1.3104 +                        /*
  1.3105 +                        We missed a value. Since digIndx isn't even, stuck too many values into the buffer (this is what
  1.3106 +                        we get for padding the first byte with a zero). "Front-patch" now by pushing all nybbles forward.
  1.3107 +                        Doing it this way ensures that at least 50% of the time (statistically speaking) we'll only be doing a
  1.3108 +                        single pass and optimizes for strings with single digits. I'm just assuming that's the more common case.
  1.3109 +                        */
  1.3110 +
  1.3111 +                        for(i = 2; i < endIndex; i++){
  1.3112 +                            numTempBuf[i] =     (((((numTempBuf[i] - 6)/2) % 10) * 10) +
  1.3113 +                                (((numTempBuf[i+1])-6)/2) / 10) * 2 + 6;
  1.3114 +                        }
  1.3115 +                        --digIndx;
  1.3116 +                    }
  1.3117 +
  1.3118 +                    // Subtract one off of the last byte.
  1.3119 +                    numTempBuf[endIndex-1] -= 1;
  1.3120 +
  1.3121 +                    /*
  1.3122 +                    We want to skip over the first two slots in the buffer. The first slot
  1.3123 +                    is reserved for the header byte UCOL_CODAN_PLACEHOLDER. The second slot is for the
  1.3124 +                    sign/exponent byte: 0x80 + (decimalPos/2) & 7f.
  1.3125 +                    */
  1.3126 +                    numTempBuf[0] = UCOL_CODAN_PLACEHOLDER;
  1.3127 +                    numTempBuf[1] = (uint8_t)(0x80 + ((digIndx/2) & 0x7F));
  1.3128 +
  1.3129 +                    // Now transfer the collation key to our collIterate struct.
  1.3130 +                    // The total size for our collation key is endIndx bumped up to the next largest even value divided by two.
  1.3131 +                    //size = ((endIndex+1) & ~1)/2;
  1.3132 +                    CE = (((numTempBuf[0] << 8) | numTempBuf[1]) << UCOL_PRIMARYORDERSHIFT) | //Primary weight
  1.3133 +                        (UCOL_BYTE_COMMON << UCOL_SECONDARYORDERSHIFT) | // Secondary weight
  1.3134 +                        UCOL_BYTE_COMMON; // Tertiary weight.
  1.3135 +                    i = 2; // Reset the index into the buffer.
  1.3136 +                    while(i < endIndex)
  1.3137 +                    {
  1.3138 +                        uint32_t primWeight = numTempBuf[i++] << 8;
  1.3139 +                        if ( i < endIndex)
  1.3140 +                            primWeight |= numTempBuf[i++];
  1.3141 +                        *(source->CEpos++) = (primWeight << UCOL_PRIMARYORDERSHIFT) | UCOL_CONTINUATION_MARKER;
  1.3142 +                    }
  1.3143 +
  1.3144 +                } else {
  1.3145 +                    // no numeric mode, we'll just switch to whatever we stashed and continue
  1.3146 +                    CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */
  1.3147 +                    CE = *CEOffset++;
  1.3148 +                    break;
  1.3149 +                }
  1.3150 +                return CE;
  1.3151 +            }
  1.3152 +            /* various implicits optimization */
  1.3153 +        case IMPLICIT_TAG:        /* everything that is not defined otherwise */
  1.3154 +            /* UCA is filled with these. Tailorings are NOT_FOUND */
  1.3155 +            return getImplicit(cp, source);
  1.3156 +        case CJK_IMPLICIT_TAG:    /* 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D*/
  1.3157 +            // TODO: remove CJK_IMPLICIT_TAG completely - handled by the getImplicit
  1.3158 +            return getImplicit(cp, source);
  1.3159 +        case HANGUL_SYLLABLE_TAG: /* AC00-D7AF*/
  1.3160 +            {
  1.3161 +                static const uint32_t
  1.3162 +                    SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7;
  1.3163 +                //const uint32_t LCount = 19;
  1.3164 +                static const uint32_t VCount = 21;
  1.3165 +                static const uint32_t TCount = 28;
  1.3166 +                //const uint32_t NCount = VCount * TCount;   // 588
  1.3167 +                //const uint32_t SCount = LCount * NCount;   // 11172
  1.3168 +                uint32_t L = ch - SBase;
  1.3169 +
  1.3170 +                // divide into pieces
  1.3171 +
  1.3172 +                uint32_t T = L % TCount; // we do it in this order since some compilers can do % and / in one operation
  1.3173 +                L /= TCount;
  1.3174 +                uint32_t V = L % VCount;
  1.3175 +                L /= VCount;
  1.3176 +
  1.3177 +                // offset them
  1.3178 +
  1.3179 +                L += LBase;
  1.3180 +                V += VBase;
  1.3181 +                T += TBase;
  1.3182 +
  1.3183 +                // return the first CE, but first put the rest into the expansion buffer
  1.3184 +                if (!source->coll->image->jamoSpecial) { // FAST PATH
  1.3185 +
  1.3186 +                    *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, V);
  1.3187 +                    if (T != TBase) {
  1.3188 +                        *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, T);
  1.3189 +                    }
  1.3190 +
  1.3191 +                    return UTRIE_GET32_FROM_LEAD(&coll->mapping, L);
  1.3192 +
  1.3193 +                } else { // Jamo is Special
  1.3194 +                    // Since Hanguls pass the FCD check, it is
  1.3195 +                    // guaranteed that we won't be in
  1.3196 +                    // the normalization buffer if something like this happens
  1.3197 +
  1.3198 +                    // However, if we are using a uchar iterator and normalization
  1.3199 +                    // is ON, the Hangul that lead us here is going to be in that
  1.3200 +                    // normalization buffer. Here we want to restore the uchar
  1.3201 +                    // iterator state and pull out of the normalization buffer
  1.3202 +                    if(source->iterator != NULL && source->flags & UCOL_ITER_INNORMBUF) {
  1.3203 +                        source->flags = source->origFlags; // restore the iterator
  1.3204 +                        source->pos = NULL;
  1.3205 +                    }
  1.3206 +
  1.3207 +                    // Move Jamos into normalization buffer
  1.3208 +                    UChar *buffer = source->writableBuffer.getBuffer(4);
  1.3209 +                    int32_t bufferLength;
  1.3210 +                    buffer[0] = (UChar)L;
  1.3211 +                    buffer[1] = (UChar)V;
  1.3212 +                    if (T != TBase) {
  1.3213 +                        buffer[2] = (UChar)T;
  1.3214 +                        bufferLength = 3;
  1.3215 +                    } else {
  1.3216 +                        bufferLength = 2;
  1.3217 +                    }
  1.3218 +                    source->writableBuffer.releaseBuffer(bufferLength);
  1.3219 +
  1.3220 +                    // Indicate where to continue in main input string after exhausting the writableBuffer
  1.3221 +                    source->fcdPosition       = source->pos;
  1.3222 +
  1.3223 +                    source->pos   = source->writableBuffer.getTerminatedBuffer();
  1.3224 +                    source->origFlags   = source->flags;
  1.3225 +                    source->flags       |= UCOL_ITER_INNORMBUF;
  1.3226 +                    source->flags       &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN);
  1.3227 +
  1.3228 +                    return(UCOL_IGNORABLE);
  1.3229 +                }
  1.3230 +            }
  1.3231 +        case SURROGATE_TAG:
  1.3232 +            /* we encountered a leading surrogate. We shall get the CE by using the following code unit */
  1.3233 +            /* two things can happen here: next code point can be a trailing surrogate - we will use it */
  1.3234 +            /* to retrieve the CE, or it is not a trailing surrogate (or the string is done). In that case */
  1.3235 +            /* we treat it like an unassigned code point. */
  1.3236 +            {
  1.3237 +                UChar trail;
  1.3238 +                collIterateState state;
  1.3239 +                backupState(source, &state);
  1.3240 +                if (collIter_eos(source) || !(U16_IS_TRAIL((trail = getNextNormalizedChar(source))))) {
  1.3241 +                    // we chould have stepped one char forward and it might have turned that it
  1.3242 +                    // was not a trail surrogate. In that case, we have to backup.
  1.3243 +                    loadState(source, &state, TRUE);
  1.3244 +                    return UCOL_NOT_FOUND;
  1.3245 +                } else {
  1.3246 +                    /* TODO: CE contain the data from the previous CE + the mask. It should at least be unmasked */
  1.3247 +                    CE = UTRIE_GET32_FROM_OFFSET_TRAIL(&coll->mapping, CE&0xFFFFFF, trail);
  1.3248 +                    if(CE == UCOL_NOT_FOUND) { // there are tailored surrogates in this block, but not this one.
  1.3249 +                        // We need to backup
  1.3250 +                        loadState(source, &state, TRUE);
  1.3251 +                        return CE;
  1.3252 +                    }
  1.3253 +                    // calculate the supplementary code point value, if surrogate was not tailored
  1.3254 +                    cp = ((((uint32_t)ch)<<10UL)+(trail)-(((uint32_t)0xd800<<10UL)+0xdc00-0x10000));
  1.3255 +                }
  1.3256 +            }
  1.3257 +            break;
  1.3258 +        case LEAD_SURROGATE_TAG:  /* D800-DBFF*/
  1.3259 +            UChar nextChar;
  1.3260 +            if( source->flags & UCOL_USE_ITERATOR) {
  1.3261 +                if(U_IS_TRAIL(nextChar = (UChar)source->iterator->current(source->iterator))) {
  1.3262 +                    cp = U16_GET_SUPPLEMENTARY(ch, nextChar);
  1.3263 +                    source->iterator->next(source->iterator);
  1.3264 +                    return getImplicit(cp, source);
  1.3265 +                }
  1.3266 +            } else if((((source->flags & UCOL_ITER_HASLEN) == 0 ) || (source->pos<source->endp)) &&
  1.3267 +                      U_IS_TRAIL((nextChar=*source->pos))) {
  1.3268 +                cp = U16_GET_SUPPLEMENTARY(ch, nextChar);
  1.3269 +                source->pos++;
  1.3270 +                return getImplicit(cp, source);
  1.3271 +            }
  1.3272 +            return UCOL_NOT_FOUND;
  1.3273 +        case TRAIL_SURROGATE_TAG: /* DC00-DFFF*/
  1.3274 +            return UCOL_NOT_FOUND; /* broken surrogate sequence */
  1.3275 +        case CHARSET_TAG:
  1.3276 +            /* not yet implemented */
  1.3277 +            /* probably after 1.8 */
  1.3278 +            return UCOL_NOT_FOUND;
  1.3279 +        default:
  1.3280 +            *status = U_INTERNAL_PROGRAM_ERROR;
  1.3281 +            CE=0;
  1.3282 +            break;
  1.3283 +    }
  1.3284 +    if (CE <= UCOL_NOT_FOUND) break;
  1.3285 +  }
  1.3286 +  return CE;
  1.3287 +}
  1.3288 +
  1.3289 +
  1.3290 +/* now uses Mark's getImplicitPrimary code */
  1.3291 +static
  1.3292 +inline uint32_t getPrevImplicit(UChar32 cp, collIterate *collationSource) {
  1.3293 +    uint32_t r = uprv_uca_getImplicitPrimary(cp);
  1.3294 +
  1.3295 +    *(collationSource->CEpos++) = (r & UCOL_PRIMARYMASK) | 0x00000505;
  1.3296 +    collationSource->toReturn = collationSource->CEpos;
  1.3297 +
  1.3298 +    // **** doesn't work if using iterator ****
  1.3299 +    if (collationSource->flags & UCOL_ITER_INNORMBUF) {
  1.3300 +        collationSource->offsetRepeatCount = 1;
  1.3301 +    } else {
  1.3302 +        int32_t firstOffset = (int32_t)(collationSource->pos - collationSource->string);
  1.3303 +
  1.3304 +        UErrorCode errorCode = U_ZERO_ERROR;
  1.3305 +        collationSource->appendOffset(firstOffset, errorCode);
  1.3306 +        collationSource->appendOffset(firstOffset + 1, errorCode);
  1.3307 +
  1.3308 +        collationSource->offsetReturn = collationSource->offsetStore - 1;
  1.3309 +        *(collationSource->offsetBuffer) = firstOffset;
  1.3310 +        if (collationSource->offsetReturn == collationSource->offsetBuffer) {
  1.3311 +            collationSource->offsetStore = collationSource->offsetBuffer;
  1.3312 +        }
  1.3313 +    }
  1.3314 +
  1.3315 +    return ((r & 0x0000FFFF)<<16) | 0x000000C0;
  1.3316 +}
  1.3317 +
  1.3318 +/**
  1.3319 + * This function handles the special CEs like contractions, expansions,
  1.3320 + * surrogates, Thai.
  1.3321 + * It is called by both getPrevCE
  1.3322 + */
  1.3323 +uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
  1.3324 +                          collIterate *source,
  1.3325 +                          UErrorCode *status)
  1.3326 +{
  1.3327 +    const uint32_t *CEOffset    = NULL;
  1.3328 +          UChar    *UCharOffset = NULL;
  1.3329 +          UChar    schar;
  1.3330 +    const UChar    *constart    = NULL;
  1.3331 +          uint32_t size;
  1.3332 +          UChar    buffer[UCOL_MAX_BUFFER];
  1.3333 +          uint32_t *endCEBuffer;
  1.3334 +          UChar   *strbuffer;
  1.3335 +          int32_t noChars = 0;
  1.3336 +          int32_t CECount = 0;
  1.3337 +
  1.3338 +    for(;;)
  1.3339 +    {
  1.3340 +        /* the only ces that loops are thai and contractions */
  1.3341 +        switch (getCETag(CE))
  1.3342 +        {
  1.3343 +        case NOT_FOUND_TAG:  /* this tag always returns */
  1.3344 +            return CE;
  1.3345 +
  1.3346 +        case SPEC_PROC_TAG:
  1.3347 +            {
  1.3348 +                // Special processing is getting a CE that is preceded by a certain prefix
  1.3349 +                // Currently this is only needed for optimizing Japanese length and iteration marks.
  1.3350 +                // When we encouter a special processing tag, we go backwards and try to see if
  1.3351 +                // we have a match.
  1.3352 +                // Contraction tables are used - so the whole process is not unlike contraction.
  1.3353 +                // prefix data is stored backwards in the table.
  1.3354 +                const UChar *UCharOffset;
  1.3355 +                UChar schar, tchar;
  1.3356 +                collIterateState prefixState;
  1.3357 +                backupState(source, &prefixState);
  1.3358 +                for(;;) {
  1.3359 +                    // This loop will run once per source string character, for as long as we
  1.3360 +                    //  are matching a potential contraction sequence
  1.3361 +
  1.3362 +                    // First we position ourselves at the begining of contraction sequence
  1.3363 +                    const UChar *ContractionStart = UCharOffset = (UChar *)coll->image+getContractOffset(CE);
  1.3364 +
  1.3365 +                    if (collIter_bos(source)) {
  1.3366 +                        CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
  1.3367 +                        break;
  1.3368 +                    }
  1.3369 +                    schar = getPrevNormalizedChar(source, status);
  1.3370 +                    goBackOne(source);
  1.3371 +
  1.3372 +                    while(schar > (tchar = *UCharOffset)) { /* since the contraction codepoints should be ordered, we skip all that are smaller */
  1.3373 +                        UCharOffset++;
  1.3374 +                    }
  1.3375 +
  1.3376 +                    if (schar == tchar) {
  1.3377 +                        // Found the source string char in the table.
  1.3378 +                        //  Pick up the corresponding CE from the table.
  1.3379 +                        CE = *(coll->contractionCEs +
  1.3380 +                            (UCharOffset - coll->contractionIndex));
  1.3381 +                    }
  1.3382 +                    else
  1.3383 +                    {
  1.3384 +                        // if there is a completely ignorable code point in the middle of
  1.3385 +                        // a prefix, we need to act as if it's not there
  1.3386 +                        // assumption: 'real' noncharacters (*fffe, *ffff, fdd0-fdef are set to zero)
  1.3387 +                        // lone surrogates cannot be set to zero as it would break other processing
  1.3388 +                        uint32_t isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapping, schar);
  1.3389 +                        // it's easy for BMP code points
  1.3390 +                        if(isZeroCE == 0) {
  1.3391 +                            continue;
  1.3392 +                        } else if(U16_IS_SURROGATE(schar)) {
  1.3393 +                            // for supplementary code points, we have to check the next one
  1.3394 +                            // situations where we are going to ignore
  1.3395 +                            // 1. beginning of the string: schar is a lone surrogate
  1.3396 +                            // 2. schar is a lone surrogate
  1.3397 +                            // 3. schar is a trail surrogate in a valid surrogate sequence
  1.3398 +                            //    that is explicitly set to zero.
  1.3399 +                            if (!collIter_bos(source)) {
  1.3400 +                                UChar lead;
  1.3401 +                                if(!U16_IS_SURROGATE_LEAD(schar) && U16_IS_LEAD(lead = getPrevNormalizedChar(source, status))) {
  1.3402 +                                    isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapping, lead);
  1.3403 +                                    if(isSpecial(isZeroCE) && getCETag(isZeroCE) == SURROGATE_TAG) {
  1.3404 +                                        uint32_t finalCE = UTRIE_GET32_FROM_OFFSET_TRAIL(&coll->mapping, isZeroCE&0xFFFFFF, schar);
  1.3405 +                                        if(finalCE == 0) {
  1.3406 +                                            // this is a real, assigned completely ignorable code point
  1.3407 +                                            goBackOne(source);
  1.3408 +                                            continue;
  1.3409 +                                        }
  1.3410 +                                    }
  1.3411 +                                } else {
  1.3412 +                                    // lone surrogate, treat like unassigned
  1.3413 +                                    return UCOL_NOT_FOUND;
  1.3414 +                                }
  1.3415 +                            } else {
  1.3416 +                                // lone surrogate at the beggining, treat like unassigned
  1.3417 +                                return UCOL_NOT_FOUND;
  1.3418 +                            }
  1.3419 +                        }
  1.3420 +                        // Source string char was not in the table.
  1.3421 +                        //   We have not found the prefix.
  1.3422 +                        CE = *(coll->contractionCEs +
  1.3423 +                            (ContractionStart - coll->contractionIndex));
  1.3424 +                    }
  1.3425 +
  1.3426 +                    if(!isPrefix(CE)) {
  1.3427 +                        // The source string char was in the contraction table, and the corresponding
  1.3428 +                        //   CE is not a prefix CE.  We found the prefix, break
  1.3429 +                        //   out of loop, this CE will end up being returned.  This is the normal
  1.3430 +                        //   way out of prefix handling when the source actually contained
  1.3431 +                        //   the prefix.
  1.3432 +                        break;
  1.3433 +                    }
  1.3434 +                }
  1.3435 +                loadState(source, &prefixState, TRUE);
  1.3436 +                break;
  1.3437 +            }
  1.3438 +
  1.3439 +        case CONTRACTION_TAG: {
  1.3440 +            /* to ensure that the backwards and forwards iteration matches, we
  1.3441 +            take the current region of most possible match and pass it through
  1.3442 +            the forward iteration. this will ensure that the obstinate problem of
  1.3443 +            overlapping contractions will not occur.
  1.3444 +            */
  1.3445 +            schar = peekCodeUnit(source, 0);
  1.3446 +            constart = (UChar *)coll->image + getContractOffset(CE);
  1.3447 +            if (isAtStartPrevIterate(source)
  1.3448 +                /* commented away contraction end checks after adding the checks
  1.3449 +                in getPrevCE  */) {
  1.3450 +                    /* start of string or this is not the end of any contraction */
  1.3451 +                    CE = *(coll->contractionCEs +
  1.3452 +                        (constart - coll->contractionIndex));
  1.3453 +                    break;
  1.3454 +            }
  1.3455 +            strbuffer = buffer;
  1.3456 +            UCharOffset = strbuffer + (UCOL_MAX_BUFFER - 1);
  1.3457 +            *(UCharOffset --) = 0;
  1.3458 +            noChars = 0;
  1.3459 +            // have to swap thai characters
  1.3460 +            while (ucol_unsafeCP(schar, coll)) {
  1.3461 +                *(UCharOffset) = schar;
  1.3462 +                noChars++;
  1.3463 +                UCharOffset --;
  1.3464 +                schar = getPrevNormalizedChar(source, status);
  1.3465 +                goBackOne(source);
  1.3466 +                // TODO: when we exhaust the contraction buffer,
  1.3467 +                // it needs to get reallocated. The problem is
  1.3468 +                // that the size depends on the string which is
  1.3469 +                // not iterated over. However, since we're travelling
  1.3470 +                // backwards, we already had to set the iterator at
  1.3471 +                // the end - so we might as well know where we are?
  1.3472 +                if (UCharOffset + 1 == buffer) {
  1.3473 +                    /* we have exhausted the buffer */
  1.3474 +                    int32_t newsize = 0;
  1.3475 +                    if(source->pos) { // actually dealing with a position
  1.3476 +                        newsize = (int32_t)(source->pos - source->string + 1);
  1.3477 +                    } else { // iterator
  1.3478 +                        newsize = 4 * UCOL_MAX_BUFFER;
  1.3479 +                    }
  1.3480 +                    strbuffer = (UChar *)uprv_malloc(sizeof(UChar) *
  1.3481 +                        (newsize + UCOL_MAX_BUFFER));
  1.3482 +                    /* test for NULL */
  1.3483 +                    if (strbuffer == NULL) {
  1.3484 +                        *status = U_MEMORY_ALLOCATION_ERROR;
  1.3485 +                        return UCOL_NO_MORE_CES;
  1.3486 +                    }
  1.3487 +                    UCharOffset = strbuffer + newsize;
  1.3488 +                    uprv_memcpy(UCharOffset, buffer,
  1.3489 +                        UCOL_MAX_BUFFER * sizeof(UChar));
  1.3490 +                    UCharOffset --;
  1.3491 +                }
  1.3492 +                if ((source->pos && (source->pos == source->string ||
  1.3493 +                    ((source->flags & UCOL_ITER_INNORMBUF) &&
  1.3494 +                    *(source->pos - 1) == 0 && source->fcdPosition == NULL)))
  1.3495 +                    || (source->iterator && !source->iterator->hasPrevious(source->iterator))) {
  1.3496 +                        break;
  1.3497 +                }
  1.3498 +            }
  1.3499 +            /* adds the initial base character to the string */
  1.3500 +            *(UCharOffset) = schar;
  1.3501 +            noChars++;
  1.3502 +
  1.3503 +            int32_t offsetBias;
  1.3504 +
  1.3505 +            // **** doesn't work if using iterator ****
  1.3506 +            if (source->flags & UCOL_ITER_INNORMBUF) {
  1.3507 +                offsetBias = -1;
  1.3508 +            } else {
  1.3509 +                offsetBias = (int32_t)(source->pos - source->string);
  1.3510 +            }
  1.3511 +
  1.3512 +            /* a new collIterate is used to simplify things, since using the current
  1.3513 +            collIterate will mean that the forward and backwards iteration will
  1.3514 +            share and change the same buffers. we don't want to get into that. */
  1.3515 +            collIterate temp;
  1.3516 +            int32_t rawOffset;
  1.3517 +
  1.3518 +            IInit_collIterate(coll, UCharOffset, noChars, &temp, status);
  1.3519 +            if(U_FAILURE(*status)) {
  1.3520 +                return (uint32_t)UCOL_NULLORDER;
  1.3521 +            }
  1.3522 +            temp.flags &= ~UCOL_ITER_NORM;
  1.3523 +            temp.flags |= source->flags & UCOL_FORCE_HAN_IMPLICIT;
  1.3524 +
  1.3525 +            rawOffset = (int32_t)(temp.pos - temp.string); // should always be zero?
  1.3526 +            CE = ucol_IGetNextCE(coll, &temp, status);
  1.3527 +
  1.3528 +            if (source->extendCEs) {
  1.3529 +                endCEBuffer = source->extendCEs + source->extendCEsSize;
  1.3530 +                CECount = (int32_t)((source->CEpos - source->extendCEs)/sizeof(uint32_t));
  1.3531 +            } else {
  1.3532 +                endCEBuffer = source->CEs + UCOL_EXPAND_CE_BUFFER_SIZE;
  1.3533 +                CECount = (int32_t)((source->CEpos - source->CEs)/sizeof(uint32_t));
  1.3534 +            }
  1.3535 +
  1.3536 +            while (CE != UCOL_NO_MORE_CES) {
  1.3537 +                *(source->CEpos ++) = CE;
  1.3538 +
  1.3539 +                if (offsetBias >= 0) {
  1.3540 +                    source->appendOffset(rawOffset + offsetBias, *status);
  1.3541 +                }
  1.3542 +
  1.3543 +                CECount++;
  1.3544 +                if (source->CEpos == endCEBuffer) {
  1.3545 +                    /* ran out of CE space, reallocate to new buffer.
  1.3546 +                    If reallocation fails, reset pointers and bail out,
  1.3547 +                    there's no guarantee of the right character position after
  1.3548 +                    this bail*/
  1.3549 +                    if (!increaseCEsCapacity(source)) {
  1.3550 +                        *status = U_MEMORY_ALLOCATION_ERROR;
  1.3551 +                        break;
  1.3552 +                    }
  1.3553 +
  1.3554 +                    endCEBuffer = source->extendCEs + source->extendCEsSize;
  1.3555 +                }
  1.3556 +
  1.3557 +                if ((temp.flags & UCOL_ITER_INNORMBUF) != 0) {
  1.3558 +                    rawOffset = (int32_t)(temp.fcdPosition - temp.string);
  1.3559 +                } else {
  1.3560 +                    rawOffset = (int32_t)(temp.pos - temp.string);
  1.3561 +                }
  1.3562 +
  1.3563 +                CE = ucol_IGetNextCE(coll, &temp, status);
  1.3564 +            }
  1.3565 +
  1.3566 +            if (strbuffer != buffer) {
  1.3567 +                uprv_free(strbuffer);
  1.3568 +            }
  1.3569 +            if (U_FAILURE(*status)) {
  1.3570 +                return (uint32_t)UCOL_NULLORDER;
  1.3571 +            }
  1.3572 +
  1.3573 +            if (source->offsetRepeatValue != 0) {
  1.3574 +                if (CECount > noChars) {
  1.3575 +                    source->offsetRepeatCount += temp.offsetRepeatCount;
  1.3576 +                } else {
  1.3577 +                    // **** does this really skip the right offsets? ****
  1.3578 +                    source->offsetReturn -= (noChars - CECount);
  1.3579 +                }
  1.3580 +            }
  1.3581 +
  1.3582 +            if (offsetBias >= 0) {
  1.3583 +                source->offsetReturn = source->offsetStore - 1;
  1.3584 +                if (source->offsetReturn == source->offsetBuffer) {
  1.3585 +                    source->offsetStore = source->offsetBuffer;
  1.3586 +                }
  1.3587 +            }
  1.3588 +
  1.3589 +            source->toReturn = source->CEpos - 1;
  1.3590 +            if (source->toReturn == source->CEs) {
  1.3591 +                source->CEpos = source->CEs;
  1.3592 +            }
  1.3593 +
  1.3594 +            return *(source->toReturn);
  1.3595 +        }
  1.3596 +        case LONG_PRIMARY_TAG:
  1.3597 +            {
  1.3598 +                *(source->CEpos++) = ((CE & 0xFFFF00) << 8) | (UCOL_BYTE_COMMON << 8) | UCOL_BYTE_COMMON;
  1.3599 +                *(source->CEpos++) = ((CE & 0xFF)<<24)|UCOL_CONTINUATION_MARKER;
  1.3600 +                source->toReturn = source->CEpos - 1;
  1.3601 +
  1.3602 +                if (source->flags & UCOL_ITER_INNORMBUF) {
  1.3603 +                    source->offsetRepeatCount = 1;
  1.3604 +                } else {
  1.3605 +                    int32_t firstOffset = (int32_t)(source->pos - source->string);
  1.3606 +
  1.3607 +                    source->appendOffset(firstOffset, *status);
  1.3608 +                    source->appendOffset(firstOffset + 1, *status);
  1.3609 +
  1.3610 +                    source->offsetReturn = source->offsetStore - 1;
  1.3611 +                    *(source->offsetBuffer) = firstOffset;
  1.3612 +                    if (source->offsetReturn == source->offsetBuffer) {
  1.3613 +                        source->offsetStore = source->offsetBuffer;
  1.3614 +                    }
  1.3615 +                }
  1.3616 +
  1.3617 +
  1.3618 +                return *(source->toReturn);
  1.3619 +            }
  1.3620 +
  1.3621 +        case EXPANSION_TAG: /* this tag always returns */
  1.3622 +            {
  1.3623 +            /*
  1.3624 +            This should handle expansion.
  1.3625 +            NOTE: we can encounter both continuations and expansions in an expansion!
  1.3626 +            I have to decide where continuations are going to be dealt with
  1.3627 +            */
  1.3628 +            int32_t firstOffset = (int32_t)(source->pos - source->string);
  1.3629 +
  1.3630 +            // **** doesn't work if using iterator ****
  1.3631 +            if (source->offsetReturn != NULL) {
  1.3632 +                if (! (source->flags & UCOL_ITER_INNORMBUF) && source->offsetReturn == source->offsetBuffer) {
  1.3633 +                    source->offsetStore = source->offsetBuffer;
  1.3634 +                }else {
  1.3635 +                  firstOffset = -1;
  1.3636 +                }
  1.3637 +            }
  1.3638 +
  1.3639 +            /* find the offset to expansion table */
  1.3640 +            CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE);
  1.3641 +            size     = getExpansionCount(CE);
  1.3642 +            if (size != 0) {
  1.3643 +                /*
  1.3644 +                if there are less than 16 elements in expansion, we don't terminate
  1.3645 +                */
  1.3646 +                uint32_t count;
  1.3647 +
  1.3648 +                for (count = 0; count < size; count++) {
  1.3649 +                    *(source->CEpos ++) = *CEOffset++;
  1.3650 +
  1.3651 +                    if (firstOffset >= 0) {
  1.3652 +                        source->appendOffset(firstOffset + 1, *status);
  1.3653 +                    }
  1.3654 +                }
  1.3655 +            } else {
  1.3656 +                /* else, we do */
  1.3657 +                while (*CEOffset != 0) {
  1.3658 +                    *(source->CEpos ++) = *CEOffset ++;
  1.3659 +
  1.3660 +                    if (firstOffset >= 0) {
  1.3661 +                        source->appendOffset(firstOffset + 1, *status);
  1.3662 +                    }
  1.3663 +                }
  1.3664 +            }
  1.3665 +
  1.3666 +            if (firstOffset >= 0) {
  1.3667 +                source->offsetReturn = source->offsetStore - 1;
  1.3668 +                *(source->offsetBuffer) = firstOffset;
  1.3669 +                if (source->offsetReturn == source->offsetBuffer) {
  1.3670 +                    source->offsetStore = source->offsetBuffer;
  1.3671 +                }
  1.3672 +            } else {
  1.3673 +                source->offsetRepeatCount += size - 1;
  1.3674 +            }
  1.3675 +
  1.3676 +            source->toReturn = source->CEpos - 1;
  1.3677 +            // in case of one element expansion, we
  1.3678 +            // want to immediately return CEpos
  1.3679 +            if(source->toReturn == source->CEs) {
  1.3680 +                source->CEpos = source->CEs;
  1.3681 +            }
  1.3682 +
  1.3683 +            return *(source->toReturn);
  1.3684 +            }
  1.3685 +
  1.3686 +        case DIGIT_TAG:
  1.3687 +            {
  1.3688 +                /*
  1.3689 +                We do a check to see if we want to collate digits as numbers; if so we generate
  1.3690 +                a custom collation key. Otherwise we pull out the value stored in the expansion table.
  1.3691 +                */
  1.3692 +                uint32_t i;    /* general counter */
  1.3693 +
  1.3694 +                if (source->coll->numericCollation == UCOL_ON){
  1.3695 +                    uint32_t digIndx = 0;
  1.3696 +                    uint32_t endIndex = 0;
  1.3697 +                    uint32_t leadingZeroIndex = 0;
  1.3698 +                    uint32_t trailingZeroCount = 0;
  1.3699 +
  1.3700 +                    uint8_t collateVal = 0;
  1.3701 +
  1.3702 +                    UBool nonZeroValReached = FALSE;
  1.3703 +
  1.3704 +                    uint8_t numTempBuf[UCOL_MAX_DIGITS_FOR_NUMBER/2 + 2]; // I just need a temporary place to store my generated CEs.
  1.3705 +                    /*
  1.3706 +                    We parse the source string until we hit a char that's NOT a digit.
  1.3707 +                    Use this u_charDigitValue. This might be slow because we have to
  1.3708 +                    handle surrogates...
  1.3709 +                    */
  1.3710 +                    /*
  1.3711 +                    We need to break up the digit string into collection elements of UCOL_MAX_DIGITS_FOR_NUMBER or less,
  1.3712 +                    with any chunks smaller than that being on the right end of the digit string - i.e. the first collation
  1.3713 +                    element we process when going backward. To determine how long that chunk might be, we may need to make
  1.3714 +                    two passes through the loop that collects digits - one to see how long the string is (and how much is
  1.3715 +                    leading zeros) to determine the length of that right-hand chunk, and a second (if the whole string has
  1.3716 +                    more than UCOL_MAX_DIGITS_FOR_NUMBER non-leading-zero digits) to actually process that collation
  1.3717 +                    element chunk after resetting the state to the initialState at the right side of the digit string.
  1.3718 +                    */
  1.3719 +                    uint32_t ceLimit = 0;
  1.3720 +                    UChar initial_ch = ch;
  1.3721 +                    collIterateState initialState = {0,0,0,0,0,0,0,0,0};
  1.3722 +                    backupState(source, &initialState);
  1.3723 +
  1.3724 +                    for(;;) {
  1.3725 +                        collIterateState state = {0,0,0,0,0,0,0,0,0};
  1.3726 +                        UChar32 char32 = 0;
  1.3727 +                        int32_t digVal = 0;
  1.3728 +
  1.3729 +                        if (U16_IS_TRAIL (ch)) {
  1.3730 +                            if (!collIter_bos(source)){
  1.3731 +                                UChar lead = getPrevNormalizedChar(source, status);
  1.3732 +                                if(U16_IS_LEAD(lead)) {
  1.3733 +                                    char32 = U16_GET_SUPPLEMENTARY(lead,ch);
  1.3734 +                                    goBackOne(source);
  1.3735 +                                } else {
  1.3736 +                                    char32 = ch;
  1.3737 +                                }
  1.3738 +                            } else {
  1.3739 +                                char32 = ch;
  1.3740 +                            }
  1.3741 +                        } else {
  1.3742 +                            char32 = ch;
  1.3743 +                        }
  1.3744 +                        digVal = u_charDigitValue(char32);
  1.3745 +
  1.3746 +                        for(;;) {
  1.3747 +                            // Make sure we have enough space. No longer needed;
  1.3748 +                            // at this point the largest value of digIndx when we need to save data in numTempBuf
  1.3749 +                            // is UCOL_MAX_DIGITS_FOR_NUMBER-1 (digIndx is post-incremented) so we just ensure
  1.3750 +                            // that numTempBuf is big enough (UCOL_MAX_DIGITS_FOR_NUMBER/2 + 2).
  1.3751 +
  1.3752 +                            // Skip over trailing zeroes, and keep a count of them.
  1.3753 +                            if (digVal != 0)
  1.3754 +                                nonZeroValReached = TRUE;
  1.3755 +
  1.3756 +                            if (nonZeroValReached) {
  1.3757 +                                /*
  1.3758 +                                We parse the digit string into base 100 numbers (this fits into a byte).
  1.3759 +                                We only add to the buffer in twos, thus if we are parsing an odd character,
  1.3760 +                                that serves as the 'tens' digit while the if we are parsing an even one, that
  1.3761 +                                is the 'ones' digit. We dumped the parsed base 100 value (collateVal) into
  1.3762 +                                a buffer. We multiply each collateVal by 2 (to give us room) and add 5 (to avoid
  1.3763 +                                overlapping magic CE byte values). The last byte we subtract 1 to ensure it is less
  1.3764 +                                than all the other bytes.
  1.3765 +
  1.3766 +                                Since we're doing in this reverse we want to put the first digit encountered into the
  1.3767 +                                ones place and the second digit encountered into the tens place.
  1.3768 +                                */
  1.3769 +
  1.3770 +                                if ((digIndx + trailingZeroCount) % 2 == 1) {
  1.3771 +                                    // High-order digit case (tens place)
  1.3772 +                                    collateVal += (uint8_t)(digVal * 10);
  1.3773 +
  1.3774 +                                    // We cannot set leadingZeroIndex unless it has been set for the
  1.3775 +                                    // low-order digit. Therefore, all we can do for the high-order
  1.3776 +                                    // digit is turn it off, never on.
  1.3777 +                                    // The only time we will have a high digit without a low is for
  1.3778 +                                    // the very first non-zero digit, so no zero check is necessary.
  1.3779 +                                    if (collateVal != 0)
  1.3780 +                                        leadingZeroIndex = 0;
  1.3781 +
  1.3782 +                                    // The first pass through, digIndx may exceed the limit, but in that case
  1.3783 +                                    // we no longer care about numTempBuf contents since they will be discarded
  1.3784 +                                    if ( digIndx < UCOL_MAX_DIGITS_FOR_NUMBER ) {
  1.3785 +                                        numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6;
  1.3786 +                                    }
  1.3787 +                                    collateVal = 0;
  1.3788 +                                } else {
  1.3789 +                                    // Low-order digit case (ones place)
  1.3790 +                                    collateVal = (uint8_t)digVal;
  1.3791 +
  1.3792 +                                    // Check for leading zeroes.
  1.3793 +                                    if (collateVal == 0) {
  1.3794 +                                        if (!leadingZeroIndex)
  1.3795 +                                            leadingZeroIndex = (digIndx/2) + 2;
  1.3796 +                                    } else
  1.3797 +                                        leadingZeroIndex = 0;
  1.3798 +
  1.3799 +                                    // No need to write to buffer; the case of a last odd digit
  1.3800 +                                    // is handled below.
  1.3801 +                                }
  1.3802 +                                ++digIndx;
  1.3803 +                            } else
  1.3804 +                                ++trailingZeroCount;
  1.3805 +
  1.3806 +                            if (!collIter_bos(source)) {
  1.3807 +                                ch = getPrevNormalizedChar(source, status);
  1.3808 +                                //goBackOne(source);
  1.3809 +                                if (U16_IS_TRAIL(ch)) {
  1.3810 +                                    backupState(source, &state);
  1.3811 +                                    if (!collIter_bos(source)) {
  1.3812 +                                        goBackOne(source);
  1.3813 +                                        UChar lead = getPrevNormalizedChar(source, status);
  1.3814 +
  1.3815 +                                        if(U16_IS_LEAD(lead)) {
  1.3816 +                                            char32 = U16_GET_SUPPLEMENTARY(lead,ch);
  1.3817 +                                        } else {
  1.3818 +                                            loadState(source, &state, FALSE);
  1.3819 +                                            char32 = ch;
  1.3820 +                                        }
  1.3821 +                                    }
  1.3822 +                                } else
  1.3823 +                                    char32 = ch;
  1.3824 +
  1.3825 +                                if ((digVal = u_charDigitValue(char32)) == -1 || (ceLimit > 0 && (digIndx + trailingZeroCount) >= ceLimit)) {
  1.3826 +                                    if (char32 > 0xFFFF) {// For surrogates.
  1.3827 +                                        loadState(source, &state, FALSE);
  1.3828 +                                    }
  1.3829 +                                    // Don't need to "reverse" the goBackOne call,
  1.3830 +                                    // as this points to the next position to process..
  1.3831 +                                    //if (char32 > 0xFFFF) // For surrogates.
  1.3832 +                                    //getNextNormalizedChar(source);
  1.3833 +                                    break;
  1.3834 +                                }
  1.3835 +
  1.3836 +                                goBackOne(source);
  1.3837 +                            }else
  1.3838 +                                break;
  1.3839 +                        }
  1.3840 +
  1.3841 +                        if (digIndx + trailingZeroCount <= UCOL_MAX_DIGITS_FOR_NUMBER) {
  1.3842 +                            // our collation element is not too big, go ahead and finish with it
  1.3843 +                            break;
  1.3844 +                        }
  1.3845 +                        // our digit string is too long for a collation element;
  1.3846 +                        // set the limit for it, reset the state and begin again
  1.3847 +                        ceLimit = (digIndx + trailingZeroCount) % UCOL_MAX_DIGITS_FOR_NUMBER;
  1.3848 +                        if ( ceLimit == 0 ) {
  1.3849 +                            ceLimit = UCOL_MAX_DIGITS_FOR_NUMBER;
  1.3850 +                        }
  1.3851 +                        ch = initial_ch;
  1.3852 +                        loadState(source, &initialState, FALSE);
  1.3853 +                        digIndx = endIndex = leadingZeroIndex = trailingZeroCount = 0;
  1.3854 +                        collateVal = 0;
  1.3855 +                        nonZeroValReached = FALSE;
  1.3856 +                    }
  1.3857 +
  1.3858 +                    if (! nonZeroValReached) {
  1.3859 +                        digIndx = 2;
  1.3860 +                        trailingZeroCount = 0;
  1.3861 +                        numTempBuf[2] = 6;
  1.3862 +                    }
  1.3863 +
  1.3864 +                    if ((digIndx + trailingZeroCount) % 2 != 0) {
  1.3865 +                        numTempBuf[((digIndx)/2) + 2] = collateVal*2 + 6;
  1.3866 +                        digIndx += 1;       // The implicit leading zero
  1.3867 +                    }
  1.3868 +                    if (trailingZeroCount % 2 != 0) {
  1.3869 +                        // We had to consume one trailing zero for the low digit
  1.3870 +                        // of the least significant byte
  1.3871 +                        digIndx += 1;       // The trailing zero not in the exponent
  1.3872 +                        trailingZeroCount -= 1;
  1.3873 +                    }
  1.3874 +
  1.3875 +                    endIndex = leadingZeroIndex ? leadingZeroIndex : ((digIndx/2) + 2) ;
  1.3876 +
  1.3877 +                    // Subtract one off of the last byte. Really the first byte here, but it's reversed...
  1.3878 +                    numTempBuf[2] -= 1;
  1.3879 +
  1.3880 +                    /*
  1.3881 +                    We want to skip over the first two slots in the buffer. The first slot
  1.3882 +                    is reserved for the header byte UCOL_CODAN_PLACEHOLDER. The second slot is for the
  1.3883 +                    sign/exponent byte: 0x80 + (decimalPos/2) & 7f.
  1.3884 +                    The exponent must be adjusted by the number of leading zeroes, and the number of
  1.3885 +                    trailing zeroes.
  1.3886 +                    */
  1.3887 +                    numTempBuf[0] = UCOL_CODAN_PLACEHOLDER;
  1.3888 +                    uint32_t exponent = (digIndx+trailingZeroCount)/2;
  1.3889 +                    if (leadingZeroIndex)
  1.3890 +                        exponent -= ((digIndx/2) + 2 - leadingZeroIndex);
  1.3891 +                    numTempBuf[1] = (uint8_t)(0x80 + (exponent & 0x7F));
  1.3892 +
  1.3893 +                    // Now transfer the collation key to our collIterate struct.
  1.3894 +                    // The total size for our collation key is half of endIndex, rounded up.
  1.3895 +                    int32_t size = (endIndex+1)/2;
  1.3896 +                    if(!ensureCEsCapacity(source, size)) {
  1.3897 +                        return (uint32_t)UCOL_NULLORDER;
  1.3898 +                    }
  1.3899 +                    *(source->CEpos++) = (((numTempBuf[0] << 8) | numTempBuf[1]) << UCOL_PRIMARYORDERSHIFT) | //Primary weight
  1.3900 +                        (UCOL_BYTE_COMMON << UCOL_SECONDARYORDERSHIFT) | // Secondary weight
  1.3901 +                        UCOL_BYTE_COMMON; // Tertiary weight.
  1.3902 +                    i = endIndex - 1; // Reset the index into the buffer.
  1.3903 +                    while(i >= 2) {
  1.3904 +                        uint32_t primWeight = numTempBuf[i--] << 8;
  1.3905 +                        if ( i >= 2)
  1.3906 +                            primWeight |= numTempBuf[i--];
  1.3907 +                        *(source->CEpos++) = (primWeight << UCOL_PRIMARYORDERSHIFT) | UCOL_CONTINUATION_MARKER;
  1.3908 +                    }
  1.3909 +
  1.3910 +                    source->toReturn = source->CEpos -1;
  1.3911 +                    return *(source->toReturn);
  1.3912 +                } else {
  1.3913 +                    CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE);
  1.3914 +                    CE = *(CEOffset++);
  1.3915 +                    break;
  1.3916 +                }
  1.3917 +            }
  1.3918 +
  1.3919 +        case HANGUL_SYLLABLE_TAG: /* AC00-D7AF*/
  1.3920 +            {
  1.3921 +                static const uint32_t
  1.3922 +                    SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7;
  1.3923 +                //const uint32_t LCount = 19;
  1.3924 +                static const uint32_t VCount = 21;
  1.3925 +                static const uint32_t TCount = 28;
  1.3926 +                //const uint32_t NCount = VCount * TCount;   /* 588 */
  1.3927 +                //const uint32_t SCount = LCount * NCount;   /* 11172 */
  1.3928 +
  1.3929 +                uint32_t L = ch - SBase;
  1.3930 +                /*
  1.3931 +                divide into pieces.
  1.3932 +                we do it in this order since some compilers can do % and / in one
  1.3933 +                operation
  1.3934 +                */
  1.3935 +                uint32_t T = L % TCount;
  1.3936 +                L /= TCount;
  1.3937 +                uint32_t V = L % VCount;
  1.3938 +                L /= VCount;
  1.3939 +
  1.3940 +                /* offset them */
  1.3941 +                L += LBase;
  1.3942 +                V += VBase;
  1.3943 +                T += TBase;
  1.3944 +
  1.3945 +                int32_t firstOffset = (int32_t)(source->pos - source->string);
  1.3946 +                source->appendOffset(firstOffset, *status);
  1.3947 +
  1.3948 +                /*
  1.3949 +                 * return the first CE, but first put the rest into the expansion buffer
  1.3950 +                 */
  1.3951 +                if (!source->coll->image->jamoSpecial) {
  1.3952 +                    *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, L);
  1.3953 +                    *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, V);
  1.3954 +                    source->appendOffset(firstOffset + 1, *status);
  1.3955 +
  1.3956 +                    if (T != TBase) {
  1.3957 +                        *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, T);
  1.3958 +                        source->appendOffset(firstOffset + 1, *status);
  1.3959 +                    }
  1.3960 +
  1.3961 +                    source->toReturn = source->CEpos - 1;
  1.3962 +
  1.3963 +                    source->offsetReturn = source->offsetStore - 1;
  1.3964 +                    if (source->offsetReturn == source->offsetBuffer) {
  1.3965 +                        source->offsetStore = source->offsetBuffer;
  1.3966 +                    }
  1.3967 +
  1.3968 +                    return *(source->toReturn);
  1.3969 +                } else {
  1.3970 +                    // Since Hanguls pass the FCD check, it is
  1.3971 +                    // guaranteed that we won't be in
  1.3972 +                    // the normalization buffer if something like this happens
  1.3973 +
  1.3974 +                    // Move Jamos into normalization buffer
  1.3975 +                    UChar *tempbuffer = source->writableBuffer.getBuffer(5);
  1.3976 +                    int32_t tempbufferLength, jamoOffset;
  1.3977 +                    tempbuffer[0] = 0;
  1.3978 +                    tempbuffer[1] = (UChar)L;
  1.3979 +                    tempbuffer[2] = (UChar)V;
  1.3980 +                    if (T != TBase) {
  1.3981 +                        tempbuffer[3] = (UChar)T;
  1.3982 +                        tempbufferLength = 4;
  1.3983 +                    } else {
  1.3984 +                        tempbufferLength = 3;
  1.3985 +                    }
  1.3986 +                    source->writableBuffer.releaseBuffer(tempbufferLength);
  1.3987 +
  1.3988 +                    // Indicate where to continue in main input string after exhausting the writableBuffer
  1.3989 +                    if (source->pos  == source->string) {
  1.3990 +                        jamoOffset = 0;
  1.3991 +                        source->fcdPosition = NULL;
  1.3992 +                    } else {
  1.3993 +                        jamoOffset = source->pos - source->string;
  1.3994 +                        source->fcdPosition       = source->pos-1;
  1.3995 +                    }
  1.3996 +                    
  1.3997 +                    // Append offsets for the additional chars
  1.3998 +                    // (not the 0, and not the L whose offsets match the original Hangul)
  1.3999 +                    int32_t jamoRemaining = tempbufferLength - 2;
  1.4000 +                    jamoOffset++; // appended offsets should match end of original Hangul
  1.4001 +                    while (jamoRemaining-- > 0) {
  1.4002 +                        source->appendOffset(jamoOffset, *status);
  1.4003 +                    }
  1.4004 +
  1.4005 +                    source->offsetRepeatValue = jamoOffset;
  1.4006 +
  1.4007 +                    source->offsetReturn = source->offsetStore - 1;
  1.4008 +                    if (source->offsetReturn == source->offsetBuffer) {
  1.4009 +                        source->offsetStore = source->offsetBuffer;
  1.4010 +                    }
  1.4011 +
  1.4012 +                    source->pos               = source->writableBuffer.getTerminatedBuffer() + tempbufferLength;
  1.4013 +                    source->origFlags         = source->flags;
  1.4014 +                    source->flags            |= UCOL_ITER_INNORMBUF;
  1.4015 +                    source->flags            &= ~(UCOL_ITER_NORM | UCOL_ITER_HASLEN);
  1.4016 +
  1.4017 +                    return(UCOL_IGNORABLE);
  1.4018 +                }
  1.4019 +            }
  1.4020 +
  1.4021 +        case IMPLICIT_TAG:        /* everything that is not defined otherwise */
  1.4022 +            return getPrevImplicit(ch, source);
  1.4023 +
  1.4024 +            // TODO: Remove CJK implicits as they are handled by the getImplicitPrimary function
  1.4025 +        case CJK_IMPLICIT_TAG:    /* 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D*/
  1.4026 +            return getPrevImplicit(ch, source);
  1.4027 +
  1.4028 +        case SURROGATE_TAG:  /* This is a surrogate pair */
  1.4029 +            /* essentially an engaged lead surrogate. */
  1.4030 +            /* if you have encountered it here, it means that a */
  1.4031 +            /* broken sequence was encountered and this is an error */
  1.4032 +            return UCOL_NOT_FOUND;
  1.4033 +
  1.4034 +        case LEAD_SURROGATE_TAG:  /* D800-DBFF*/
  1.4035 +            return UCOL_NOT_FOUND; /* broken surrogate sequence */
  1.4036 +
  1.4037 +        case TRAIL_SURROGATE_TAG: /* DC00-DFFF*/
  1.4038 +            {
  1.4039 +                UChar32 cp = 0;
  1.4040 +                UChar  prevChar;
  1.4041 +                const UChar *prev;
  1.4042 +                if (isAtStartPrevIterate(source)) {
  1.4043 +                    /* we are at the start of the string, wrong place to be at */
  1.4044 +                    return UCOL_NOT_FOUND;
  1.4045 +                }
  1.4046 +                if (source->pos != source->writableBuffer.getBuffer()) {
  1.4047 +                    prev     = source->pos - 1;
  1.4048 +                } else {
  1.4049 +                    prev     = source->fcdPosition;
  1.4050 +                }
  1.4051 +                prevChar = *prev;
  1.4052 +
  1.4053 +                /* Handles Han and Supplementary characters here.*/
  1.4054 +                if (U16_IS_LEAD(prevChar)) {
  1.4055 +                    cp = ((((uint32_t)prevChar)<<10UL)+(ch)-(((uint32_t)0xd800<<10UL)+0xdc00-0x10000));
  1.4056 +                    source->pos = prev;
  1.4057 +                } else {
  1.4058 +                    return UCOL_NOT_FOUND; /* like unassigned */
  1.4059 +                }
  1.4060 +
  1.4061 +                return getPrevImplicit(cp, source);
  1.4062 +            }
  1.4063 +
  1.4064 +            /* UCA is filled with these. Tailorings are NOT_FOUND */
  1.4065 +            /* not yet implemented */
  1.4066 +        case CHARSET_TAG:  /* this tag always returns */
  1.4067 +            /* probably after 1.8 */
  1.4068 +            return UCOL_NOT_FOUND;
  1.4069 +
  1.4070 +        default:           /* this tag always returns */
  1.4071 +            *status = U_INTERNAL_PROGRAM_ERROR;
  1.4072 +            CE=0;
  1.4073 +            break;
  1.4074 +        }
  1.4075 +
  1.4076 +        if (CE <= UCOL_NOT_FOUND) {
  1.4077 +            break;
  1.4078 +        }
  1.4079 +    }
  1.4080 +
  1.4081 +    return CE;
  1.4082 +}
  1.4083 +
  1.4084 +/* This should really be a macro                                                                      */
  1.4085 +/* This function is used to reverse parts of a buffer. We need this operation when doing continuation */
  1.4086 +/* secondaries in French                                                                              */
  1.4087 +/*
  1.4088 +void uprv_ucol_reverse_buffer(uint8_t *start, uint8_t *end) {
  1.4089 +  uint8_t temp;
  1.4090 +  while(start<end) {
  1.4091 +    temp = *start;
  1.4092 +    *start++ = *end;
  1.4093 +    *end-- = temp;
  1.4094 +  }
  1.4095 +}
  1.4096 +*/
  1.4097 +
  1.4098 +#define uprv_ucol_reverse_buffer(TYPE, start, end) { \
  1.4099 +  TYPE tempA; \
  1.4100 +while((start)<(end)) { \
  1.4101 +    tempA = *(start); \
  1.4102 +    *(start)++ = *(end); \
  1.4103 +    *(end)-- = tempA; \
  1.4104 +} \
  1.4105 +}
  1.4106 +
  1.4107 +/****************************************************************************/
  1.4108 +/* Following are the sortkey generation functions                           */
  1.4109 +/*                                                                          */
  1.4110 +/****************************************************************************/
  1.4111 +
  1.4112 +U_CAPI int32_t U_EXPORT2
  1.4113 +ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length,
  1.4114 +                   const uint8_t *src2, int32_t src2Length,
  1.4115 +                   uint8_t *dest, int32_t destCapacity) {
  1.4116 +    /* check arguments */
  1.4117 +    if( src1==NULL || src1Length<-1 || src1Length==0 || (src1Length>0 && src1[src1Length-1]!=0) ||
  1.4118 +        src2==NULL || src2Length<-1 || src2Length==0 || (src2Length>0 && src2[src2Length-1]!=0) ||
  1.4119 +        destCapacity<0 || (destCapacity>0 && dest==NULL)
  1.4120 +    ) {
  1.4121 +        /* error, attempt to write a zero byte and return 0 */
  1.4122 +        if(dest!=NULL && destCapacity>0) {
  1.4123 +            *dest=0;
  1.4124 +        }
  1.4125 +        return 0;
  1.4126 +    }
  1.4127 +
  1.4128 +    /* check lengths and capacity */
  1.4129 +    if(src1Length<0) {
  1.4130 +        src1Length=(int32_t)uprv_strlen((const char *)src1)+1;
  1.4131 +    }
  1.4132 +    if(src2Length<0) {
  1.4133 +        src2Length=(int32_t)uprv_strlen((const char *)src2)+1;
  1.4134 +    }
  1.4135 +
  1.4136 +    int32_t destLength=src1Length+src2Length;
  1.4137 +    if(destLength>destCapacity) {
  1.4138 +        /* the merged sort key does not fit into the destination */
  1.4139 +        return destLength;
  1.4140 +    }
  1.4141 +
  1.4142 +    /* merge the sort keys with the same number of levels */
  1.4143 +    uint8_t *p=dest;
  1.4144 +    for(;;) {
  1.4145 +        /* copy level from src1 not including 00 or 01 */
  1.4146 +        uint8_t b;
  1.4147 +        while((b=*src1)>=2) {
  1.4148 +            ++src1;
  1.4149 +            *p++=b;
  1.4150 +        }
  1.4151 +
  1.4152 +        /* add a 02 merge separator */
  1.4153 +        *p++=2;
  1.4154 +
  1.4155 +        /* copy level from src2 not including 00 or 01 */
  1.4156 +        while((b=*src2)>=2) {
  1.4157 +            ++src2;
  1.4158 +            *p++=b;
  1.4159 +        }
  1.4160 +
  1.4161 +        /* if both sort keys have another level, then add a 01 level separator and continue */
  1.4162 +        if(*src1==1 && *src2==1) {
  1.4163 +            ++src1;
  1.4164 +            ++src2;
  1.4165 +            *p++=1;
  1.4166 +        } else {
  1.4167 +            break;
  1.4168 +        }
  1.4169 +    }
  1.4170 +
  1.4171 +    /*
  1.4172 +     * here, at least one sort key is finished now, but the other one
  1.4173 +     * might have some contents left from containing more levels;
  1.4174 +     * that contents is just appended to the result
  1.4175 +     */
  1.4176 +    if(*src1!=0) {
  1.4177 +        /* src1 is not finished, therefore *src2==0, and src1 is appended */
  1.4178 +        src2=src1;
  1.4179 +    }
  1.4180 +    /* append src2, "the other, unfinished sort key" */
  1.4181 +    while((*p++=*src2++)!=0) {}
  1.4182 +
  1.4183 +    /* the actual length might be less than destLength if either sort key contained illegally embedded zero bytes */
  1.4184 +    return (int32_t)(p-dest);
  1.4185 +}
  1.4186 +
  1.4187 +U_NAMESPACE_BEGIN
  1.4188 +
  1.4189 +class SortKeyByteSink : public ByteSink {
  1.4190 +public:
  1.4191 +    SortKeyByteSink(char *dest, int32_t destCapacity)
  1.4192 +            : buffer_(dest), capacity_(destCapacity),
  1.4193 +              appended_(0) {
  1.4194 +        if (buffer_ == NULL) {
  1.4195 +            capacity_ = 0;
  1.4196 +        } else if(capacity_ < 0) {
  1.4197 +            buffer_ = NULL;
  1.4198 +            capacity_ = 0;
  1.4199 +        }
  1.4200 +    }
  1.4201 +    virtual ~SortKeyByteSink();
  1.4202 +
  1.4203 +    virtual void Append(const char *bytes, int32_t n);
  1.4204 +    void Append(uint32_t b) {
  1.4205 +        if (appended_ < capacity_ || Resize(1, appended_)) {
  1.4206 +            buffer_[appended_] = (char)b;
  1.4207 +        }
  1.4208 +        ++appended_;
  1.4209 +    }
  1.4210 +    void Append(uint32_t b1, uint32_t b2) {
  1.4211 +        int32_t a2 = appended_ + 2;
  1.4212 +        if (a2 <= capacity_ || Resize(2, appended_)) {
  1.4213 +            buffer_[appended_] = (char)b1;
  1.4214 +            buffer_[appended_ + 1] = (char)b2;
  1.4215 +        } else if(appended_ < capacity_) {
  1.4216 +            buffer_[appended_] = (char)b1;
  1.4217 +        }
  1.4218 +        appended_ = a2;
  1.4219 +    }
  1.4220 +    virtual char *GetAppendBuffer(int32_t min_capacity,
  1.4221 +                                  int32_t desired_capacity_hint,
  1.4222 +                                  char *scratch, int32_t scratch_capacity,
  1.4223 +                                  int32_t *result_capacity);
  1.4224 +    int32_t NumberOfBytesAppended() const { return appended_; }
  1.4225 +    /** @return FALSE if memory allocation failed */
  1.4226 +    UBool IsOk() const { return buffer_ != NULL; }
  1.4227 +
  1.4228 +protected:
  1.4229 +    virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) = 0;
  1.4230 +    virtual UBool Resize(int32_t appendCapacity, int32_t length) = 0;
  1.4231 +
  1.4232 +    void SetNotOk() {
  1.4233 +        buffer_ = NULL;
  1.4234 +        capacity_ = 0;
  1.4235 +    }
  1.4236 +
  1.4237 +    char *buffer_;
  1.4238 +    int32_t capacity_;
  1.4239 +    int32_t appended_;
  1.4240 +
  1.4241 +private:
  1.4242 +    SortKeyByteSink(const SortKeyByteSink &); // copy constructor not implemented
  1.4243 +    SortKeyByteSink &operator=(const SortKeyByteSink &); // assignment operator not implemented
  1.4244 +};
  1.4245 +
  1.4246 +SortKeyByteSink::~SortKeyByteSink() {}
  1.4247 +
  1.4248 +void
  1.4249 +SortKeyByteSink::Append(const char *bytes, int32_t n) {
  1.4250 +    if (n <= 0 || bytes == NULL) {
  1.4251 +        return;
  1.4252 +    }
  1.4253 +    int32_t length = appended_;
  1.4254 +    appended_ += n;
  1.4255 +    if ((buffer_ + length) == bytes) {
  1.4256 +        return;  // the caller used GetAppendBuffer() and wrote the bytes already
  1.4257 +    }
  1.4258 +    int32_t available = capacity_ - length;
  1.4259 +    if (n <= available) {
  1.4260 +        uprv_memcpy(buffer_ + length, bytes, n);
  1.4261 +    } else {
  1.4262 +        AppendBeyondCapacity(bytes, n, length);
  1.4263 +    }
  1.4264 +}
  1.4265 +
  1.4266 +char *
  1.4267 +SortKeyByteSink::GetAppendBuffer(int32_t min_capacity,
  1.4268 +                                 int32_t desired_capacity_hint,
  1.4269 +                                 char *scratch,
  1.4270 +                                 int32_t scratch_capacity,
  1.4271 +                                 int32_t *result_capacity) {
  1.4272 +    if (min_capacity < 1 || scratch_capacity < min_capacity) {
  1.4273 +        *result_capacity = 0;
  1.4274 +        return NULL;
  1.4275 +    }
  1.4276 +    int32_t available = capacity_ - appended_;
  1.4277 +    if (available >= min_capacity) {
  1.4278 +        *result_capacity = available;
  1.4279 +        return buffer_ + appended_;
  1.4280 +    } else if (Resize(desired_capacity_hint, appended_)) {
  1.4281 +        *result_capacity = capacity_ - appended_;
  1.4282 +        return buffer_ + appended_;
  1.4283 +    } else {
  1.4284 +        *result_capacity = scratch_capacity;
  1.4285 +        return scratch;
  1.4286 +    }
  1.4287 +}
  1.4288 +
  1.4289 +class FixedSortKeyByteSink : public SortKeyByteSink {
  1.4290 +public:
  1.4291 +    FixedSortKeyByteSink(char *dest, int32_t destCapacity)
  1.4292 +            : SortKeyByteSink(dest, destCapacity) {}
  1.4293 +    virtual ~FixedSortKeyByteSink();
  1.4294 +
  1.4295 +private:
  1.4296 +    virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length);
  1.4297 +    virtual UBool Resize(int32_t appendCapacity, int32_t length);
  1.4298 +};
  1.4299 +
  1.4300 +FixedSortKeyByteSink::~FixedSortKeyByteSink() {}
  1.4301 +
  1.4302 +void
  1.4303 +FixedSortKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t /*n*/, int32_t length) {
  1.4304 +    // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_
  1.4305 +    // Fill the buffer completely.
  1.4306 +    int32_t available = capacity_ - length;
  1.4307 +    if (available > 0) {
  1.4308 +        uprv_memcpy(buffer_ + length, bytes, available);
  1.4309 +    }
  1.4310 +}
  1.4311 +
  1.4312 +UBool
  1.4313 +FixedSortKeyByteSink::Resize(int32_t /*appendCapacity*/, int32_t /*length*/) {
  1.4314 +    return FALSE;
  1.4315 +}
  1.4316 +
  1.4317 +class CollationKeyByteSink : public SortKeyByteSink {
  1.4318 +public:
  1.4319 +    CollationKeyByteSink(CollationKey &key)
  1.4320 +            : SortKeyByteSink(reinterpret_cast<char *>(key.getBytes()), key.getCapacity()),
  1.4321 +              key_(key) {}
  1.4322 +    virtual ~CollationKeyByteSink();
  1.4323 +
  1.4324 +private:
  1.4325 +    virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length);
  1.4326 +    virtual UBool Resize(int32_t appendCapacity, int32_t length);
  1.4327 +
  1.4328 +    CollationKey &key_;
  1.4329 +};
  1.4330 +
  1.4331 +CollationKeyByteSink::~CollationKeyByteSink() {}
  1.4332 +
  1.4333 +void
  1.4334 +CollationKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) {
  1.4335 +    // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_
  1.4336 +    if (Resize(n, length)) {
  1.4337 +        uprv_memcpy(buffer_ + length, bytes, n);
  1.4338 +    }
  1.4339 +}
  1.4340 +
  1.4341 +UBool
  1.4342 +CollationKeyByteSink::Resize(int32_t appendCapacity, int32_t length) {
  1.4343 +    if (buffer_ == NULL) {
  1.4344 +        return FALSE;  // allocation failed before already
  1.4345 +    }
  1.4346 +    int32_t newCapacity = 2 * capacity_;
  1.4347 +    int32_t altCapacity = length + 2 * appendCapacity;
  1.4348 +    if (newCapacity < altCapacity) {
  1.4349 +        newCapacity = altCapacity;
  1.4350 +    }
  1.4351 +    if (newCapacity < 200) {
  1.4352 +        newCapacity = 200;
  1.4353 +    }
  1.4354 +    uint8_t *newBuffer = key_.reallocate(newCapacity, length);
  1.4355 +    if (newBuffer == NULL) {
  1.4356 +        SetNotOk();
  1.4357 +        return FALSE;
  1.4358 +    }
  1.4359 +    buffer_ = reinterpret_cast<char *>(newBuffer);
  1.4360 +    capacity_ = newCapacity;
  1.4361 +    return TRUE;
  1.4362 +}
  1.4363 +
  1.4364 +/**
  1.4365 + * uint8_t byte buffer, similar to CharString but simpler.
  1.4366 + */
  1.4367 +class SortKeyLevel : public UMemory {
  1.4368 +public:
  1.4369 +    SortKeyLevel() : len(0), ok(TRUE) {}
  1.4370 +    ~SortKeyLevel() {}
  1.4371 +
  1.4372 +    /** @return FALSE if memory allocation failed */
  1.4373 +    UBool isOk() const { return ok; }
  1.4374 +    UBool isEmpty() const { return len == 0; }
  1.4375 +    int32_t length() const { return len; }
  1.4376 +    const uint8_t *data() const { return buffer.getAlias(); }
  1.4377 +    uint8_t operator[](int32_t index) const { return buffer[index]; }
  1.4378 +
  1.4379 +    void appendByte(uint32_t b);
  1.4380 +
  1.4381 +    void appendTo(ByteSink &sink) const {
  1.4382 +        sink.Append(reinterpret_cast<const char *>(buffer.getAlias()), len);
  1.4383 +    }
  1.4384 +
  1.4385 +    uint8_t &lastByte() {
  1.4386 +        U_ASSERT(len > 0);
  1.4387 +        return buffer[len - 1];
  1.4388 +    }
  1.4389 +
  1.4390 +    uint8_t *getLastFewBytes(int32_t n) {
  1.4391 +        if (ok && len >= n) {
  1.4392 +            return buffer.getAlias() + len - n;
  1.4393 +        } else {
  1.4394 +            return NULL;
  1.4395 +        }
  1.4396 +    }
  1.4397 +
  1.4398 +private:
  1.4399 +    MaybeStackArray<uint8_t, 40> buffer;
  1.4400 +    int32_t len;
  1.4401 +    UBool ok;
  1.4402 +
  1.4403 +    UBool ensureCapacity(int32_t appendCapacity);
  1.4404 +
  1.4405 +    SortKeyLevel(const SortKeyLevel &other); // forbid copying of this class
  1.4406 +    SortKeyLevel &operator=(const SortKeyLevel &other); // forbid copying of this class
  1.4407 +};
  1.4408 +
  1.4409 +void SortKeyLevel::appendByte(uint32_t b) {
  1.4410 +    if(len < buffer.getCapacity() || ensureCapacity(1)) {
  1.4411 +        buffer[len++] = (uint8_t)b;
  1.4412 +    }
  1.4413 +}
  1.4414 +
  1.4415 +UBool SortKeyLevel::ensureCapacity(int32_t appendCapacity) {
  1.4416 +    if(!ok) {
  1.4417 +        return FALSE;
  1.4418 +    }
  1.4419 +    int32_t newCapacity = 2 * buffer.getCapacity();
  1.4420 +    int32_t altCapacity = len + 2 * appendCapacity;
  1.4421 +    if (newCapacity < altCapacity) {
  1.4422 +        newCapacity = altCapacity;
  1.4423 +    }
  1.4424 +    if (newCapacity < 200) {
  1.4425 +        newCapacity = 200;
  1.4426 +    }
  1.4427 +    if(buffer.resize(newCapacity, len)==NULL) {
  1.4428 +        return ok = FALSE;
  1.4429 +    }
  1.4430 +    return TRUE;
  1.4431 +}
  1.4432 +
  1.4433 +U_NAMESPACE_END
  1.4434 +
  1.4435 +/* sortkey API */
  1.4436 +U_CAPI int32_t U_EXPORT2
  1.4437 +ucol_getSortKey(const    UCollator    *coll,
  1.4438 +        const    UChar        *source,
  1.4439 +        int32_t        sourceLength,
  1.4440 +        uint8_t        *result,
  1.4441 +        int32_t        resultLength)
  1.4442 +{
  1.4443 +    UTRACE_ENTRY(UTRACE_UCOL_GET_SORTKEY);
  1.4444 +    if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
  1.4445 +        UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source string = %vh ", coll, source,
  1.4446 +            ((sourceLength==-1 && source!=NULL) ? u_strlen(source) : sourceLength));
  1.4447 +    }
  1.4448 +
  1.4449 +    if(coll->delegate != NULL) {
  1.4450 +      return ((const Collator*)coll->delegate)->getSortKey(source, sourceLength, result, resultLength);
  1.4451 +    }
  1.4452 +
  1.4453 +    UErrorCode status = U_ZERO_ERROR;
  1.4454 +    int32_t keySize   = 0;
  1.4455 +
  1.4456 +    if(source != NULL) {
  1.4457 +        // source == NULL is actually an error situation, but we would need to
  1.4458 +        // have an error code to return it. Until we introduce a new
  1.4459 +        // API, it stays like this
  1.4460 +
  1.4461 +        /* this uses the function pointer that is set in updateinternalstate */
  1.4462 +        /* currently, there are two funcs: */
  1.4463 +        /*ucol_calcSortKey(...);*/
  1.4464 +        /*ucol_calcSortKeySimpleTertiary(...);*/
  1.4465 +
  1.4466 +        uint8_t noDest[1] = { 0 };
  1.4467 +        if(result == NULL) {
  1.4468 +            // Distinguish pure preflighting from an allocation error.
  1.4469 +            result = noDest;
  1.4470 +            resultLength = 0;
  1.4471 +        }
  1.4472 +        FixedSortKeyByteSink sink(reinterpret_cast<char *>(result), resultLength);
  1.4473 +        coll->sortKeyGen(coll, source, sourceLength, sink, &status);
  1.4474 +        if(U_SUCCESS(status)) {
  1.4475 +            keySize = sink.NumberOfBytesAppended();
  1.4476 +        }
  1.4477 +    }
  1.4478 +    UTRACE_DATA2(UTRACE_VERBOSE, "Sort Key = %vb", result, keySize);
  1.4479 +    UTRACE_EXIT_STATUS(status);
  1.4480 +    return keySize;
  1.4481 +}
  1.4482 +
  1.4483 +U_CFUNC int32_t
  1.4484 +ucol_getCollationKey(const UCollator *coll,
  1.4485 +                     const UChar *source, int32_t sourceLength,
  1.4486 +                     CollationKey &key,
  1.4487 +                     UErrorCode &errorCode) {
  1.4488 +    CollationKeyByteSink sink(key);
  1.4489 +    coll->sortKeyGen(coll, source, sourceLength, sink, &errorCode);
  1.4490 +    return sink.NumberOfBytesAppended();
  1.4491 +}
  1.4492 +
  1.4493 +// Is this primary weight compressible?
  1.4494 +// Returns false for multi-lead-byte scripts (digits, Latin, Han, implicit).
  1.4495 +// TODO: This should use per-lead-byte flags from FractionalUCA.txt.
  1.4496 +static inline UBool
  1.4497 +isCompressible(const UCollator * /*coll*/, uint8_t primary1) {
  1.4498 +    return UCOL_BYTE_FIRST_NON_LATIN_PRIMARY <= primary1 && primary1 <= maxRegularPrimary;
  1.4499 +}
  1.4500 +
  1.4501 +static
  1.4502 +inline void doCaseShift(SortKeyLevel &cases, uint32_t &caseShift) {
  1.4503 +    if (caseShift  == 0) {
  1.4504 +        cases.appendByte(UCOL_CASE_BYTE_START);
  1.4505 +        caseShift = UCOL_CASE_SHIFT_START;
  1.4506 +    }
  1.4507 +}
  1.4508 +
  1.4509 +// Packs the secondary buffer when processing French locale.
  1.4510 +static void
  1.4511 +packFrench(const uint8_t *secondaries, int32_t secsize, SortKeyByteSink &result) {
  1.4512 +    secondaries += secsize;  // We read the secondary-level bytes back to front.
  1.4513 +    uint8_t secondary;
  1.4514 +    int32_t count2 = 0;
  1.4515 +    int32_t i = 0;
  1.4516 +    // we use i here since the key size already accounts for terminators, so we'll discard the increment
  1.4517 +    for(i = 0; i<secsize; i++) {
  1.4518 +        secondary = *(secondaries-i-1);
  1.4519 +        /* This is compression code. */
  1.4520 +        if (secondary == UCOL_COMMON2) {
  1.4521 +            ++count2;
  1.4522 +        } else {
  1.4523 +            if (count2 > 0) {
  1.4524 +                if (secondary > UCOL_COMMON2) { // not necessary for 4th level.
  1.4525 +                    while (count2 > UCOL_TOP_COUNT2) {
  1.4526 +                        result.Append(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2);
  1.4527 +                        count2 -= (uint32_t)UCOL_TOP_COUNT2;
  1.4528 +                    }
  1.4529 +                    result.Append(UCOL_COMMON_TOP2 - (count2-1));
  1.4530 +                } else {
  1.4531 +                    while (count2 > UCOL_BOT_COUNT2) {
  1.4532 +                        result.Append(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2);
  1.4533 +                        count2 -= (uint32_t)UCOL_BOT_COUNT2;
  1.4534 +                    }
  1.4535 +                    result.Append(UCOL_COMMON_BOT2 + (count2-1));
  1.4536 +                }
  1.4537 +                count2 = 0;
  1.4538 +            }
  1.4539 +            result.Append(secondary);
  1.4540 +        }
  1.4541 +    }
  1.4542 +    if (count2 > 0) {
  1.4543 +        while (count2 > UCOL_BOT_COUNT2) {
  1.4544 +            result.Append(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2);
  1.4545 +            count2 -= (uint32_t)UCOL_BOT_COUNT2;
  1.4546 +        }
  1.4547 +        result.Append(UCOL_COMMON_BOT2 + (count2-1));
  1.4548 +    }
  1.4549 +}
  1.4550 +
  1.4551 +#define DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY 0
  1.4552 +
  1.4553 +/* This is the sortkey work horse function */
  1.4554 +U_CFUNC void U_CALLCONV
  1.4555 +ucol_calcSortKey(const    UCollator    *coll,
  1.4556 +        const    UChar        *source,
  1.4557 +        int32_t        sourceLength,
  1.4558 +        SortKeyByteSink &result,
  1.4559 +        UErrorCode *status)
  1.4560 +{
  1.4561 +    if(U_FAILURE(*status)) {
  1.4562 +        return;
  1.4563 +    }
  1.4564 +
  1.4565 +    SortKeyByteSink &primaries = result;
  1.4566 +    SortKeyLevel secondaries;
  1.4567 +    SortKeyLevel tertiaries;
  1.4568 +    SortKeyLevel cases;
  1.4569 +    SortKeyLevel quads;
  1.4570 +
  1.4571 +    UnicodeString normSource;
  1.4572 +
  1.4573 +    int32_t len = (sourceLength == -1 ? u_strlen(source) : sourceLength);
  1.4574 +
  1.4575 +    UColAttributeValue strength = coll->strength;
  1.4576 +
  1.4577 +    uint8_t compareSec   = (uint8_t)((strength >= UCOL_SECONDARY)?0:0xFF);
  1.4578 +    uint8_t compareTer   = (uint8_t)((strength >= UCOL_TERTIARY)?0:0xFF);
  1.4579 +    uint8_t compareQuad  = (uint8_t)((strength >= UCOL_QUATERNARY)?0:0xFF);
  1.4580 +    UBool  compareIdent = (strength == UCOL_IDENTICAL);
  1.4581 +    UBool  doCase = (coll->caseLevel == UCOL_ON);
  1.4582 +    UBool  isFrenchSec = (coll->frenchCollation == UCOL_ON) && (compareSec == 0);
  1.4583 +    UBool  shifted = (coll->alternateHandling == UCOL_SHIFTED);
  1.4584 +    //UBool  qShifted = shifted && (compareQuad == 0);
  1.4585 +    UBool  doHiragana = (coll->hiraganaQ == UCOL_ON) && (compareQuad == 0);
  1.4586 +
  1.4587 +    uint32_t variableTopValue = coll->variableTopValue;
  1.4588 +    // TODO: UCOL_COMMON_BOT4 should be a function of qShifted. If we have no
  1.4589 +    // qShifted, we don't need to set UCOL_COMMON_BOT4 so high.
  1.4590 +    uint8_t UCOL_COMMON_BOT4 = (uint8_t)((coll->variableTopValue>>8)+1);
  1.4591 +    uint8_t UCOL_HIRAGANA_QUAD = 0;
  1.4592 +    if(doHiragana) {
  1.4593 +        UCOL_HIRAGANA_QUAD=UCOL_COMMON_BOT4++;
  1.4594 +        /* allocate one more space for hiragana, value for hiragana */
  1.4595 +    }
  1.4596 +    uint8_t UCOL_BOT_COUNT4 = (uint8_t)(0xFF - UCOL_COMMON_BOT4);
  1.4597 +
  1.4598 +    /* support for special features like caselevel and funky secondaries */
  1.4599 +    int32_t lastSecondaryLength = 0;
  1.4600 +    uint32_t caseShift = 0;
  1.4601 +
  1.4602 +    /* If we need to normalize, we'll do it all at once at the beginning! */
  1.4603 +    const Normalizer2 *norm2;
  1.4604 +    if(compareIdent) {
  1.4605 +        norm2 = Normalizer2Factory::getNFDInstance(*status);
  1.4606 +    } else if(coll->normalizationMode != UCOL_OFF) {
  1.4607 +        norm2 = Normalizer2Factory::getFCDInstance(*status);
  1.4608 +    } else {
  1.4609 +        norm2 = NULL;
  1.4610 +    }
  1.4611 +    if(norm2 != NULL) {
  1.4612 +        normSource.setTo(FALSE, source, len);
  1.4613 +        int32_t qcYesLength = norm2->spanQuickCheckYes(normSource, *status);
  1.4614 +        if(qcYesLength != len) {
  1.4615 +            UnicodeString unnormalized = normSource.tempSubString(qcYesLength);
  1.4616 +            normSource.truncate(qcYesLength);
  1.4617 +            norm2->normalizeSecondAndAppend(normSource, unnormalized, *status);
  1.4618 +            source = normSource.getBuffer();
  1.4619 +            len = normSource.length();
  1.4620 +        }
  1.4621 +    }
  1.4622 +    collIterate s;
  1.4623 +    IInit_collIterate(coll, source, len, &s, status);
  1.4624 +    if(U_FAILURE(*status)) {
  1.4625 +        return;
  1.4626 +    }
  1.4627 +    s.flags &= ~UCOL_ITER_NORM;  // source passed the FCD test or else was normalized.
  1.4628 +
  1.4629 +    uint32_t order = 0;
  1.4630 +
  1.4631 +    uint8_t primary1 = 0;
  1.4632 +    uint8_t primary2 = 0;
  1.4633 +    uint8_t secondary = 0;
  1.4634 +    uint8_t tertiary = 0;
  1.4635 +    uint8_t caseSwitch = coll->caseSwitch;
  1.4636 +    uint8_t tertiaryMask = coll->tertiaryMask;
  1.4637 +    int8_t tertiaryAddition = coll->tertiaryAddition;
  1.4638 +    uint8_t tertiaryTop = coll->tertiaryTop;
  1.4639 +    uint8_t tertiaryBottom = coll->tertiaryBottom;
  1.4640 +    uint8_t tertiaryCommon = coll->tertiaryCommon;
  1.4641 +    uint8_t caseBits = 0;
  1.4642 +
  1.4643 +    UBool wasShifted = FALSE;
  1.4644 +    UBool notIsContinuation = FALSE;
  1.4645 +
  1.4646 +    uint32_t count2 = 0, count3 = 0, count4 = 0;
  1.4647 +    uint8_t leadPrimary = 0;
  1.4648 +
  1.4649 +    for(;;) {
  1.4650 +        order = ucol_IGetNextCE(coll, &s, status);
  1.4651 +        if(order == UCOL_NO_MORE_CES) {
  1.4652 +            break;
  1.4653 +        }
  1.4654 +
  1.4655 +        if(order == 0) {
  1.4656 +            continue;
  1.4657 +        }
  1.4658 +
  1.4659 +        notIsContinuation = !isContinuation(order);
  1.4660 +
  1.4661 +        if(notIsContinuation) {
  1.4662 +            tertiary = (uint8_t)(order & UCOL_BYTE_SIZE_MASK);
  1.4663 +        } else {
  1.4664 +            tertiary = (uint8_t)((order & UCOL_REMOVE_CONTINUATION));
  1.4665 +        }
  1.4666 +
  1.4667 +        secondary = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
  1.4668 +        primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
  1.4669 +        primary1 = (uint8_t)(order >> 8);
  1.4670 +
  1.4671 +        uint8_t originalPrimary1 = primary1;
  1.4672 +        if(notIsContinuation && coll->leadBytePermutationTable != NULL) {
  1.4673 +            primary1 = coll->leadBytePermutationTable[primary1];
  1.4674 +        }
  1.4675 +
  1.4676 +        if((shifted && ((notIsContinuation && order <= variableTopValue && primary1 > 0)
  1.4677 +                        || (!notIsContinuation && wasShifted)))
  1.4678 +            || (wasShifted && primary1 == 0)) /* amendment to the UCA says that primary ignorables */
  1.4679 +        {
  1.4680 +            /* and other ignorables should be removed if following a shifted code point */
  1.4681 +            if(primary1 == 0) { /* if we were shifted and we got an ignorable code point */
  1.4682 +                /* we should just completely ignore it */
  1.4683 +                continue;
  1.4684 +            }
  1.4685 +            if(compareQuad == 0) {
  1.4686 +                if(count4 > 0) {
  1.4687 +                    while (count4 > UCOL_BOT_COUNT4) {
  1.4688 +                        quads.appendByte(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4);
  1.4689 +                        count4 -= UCOL_BOT_COUNT4;
  1.4690 +                    }
  1.4691 +                    quads.appendByte(UCOL_COMMON_BOT4 + (count4-1));
  1.4692 +                    count4 = 0;
  1.4693 +                }
  1.4694 +                /* We are dealing with a variable and we're treating them as shifted */
  1.4695 +                /* This is a shifted ignorable */
  1.4696 +                if(primary1 != 0) { /* we need to check this since we could be in continuation */
  1.4697 +                    quads.appendByte(primary1);
  1.4698 +                }
  1.4699 +                if(primary2 != 0) {
  1.4700 +                    quads.appendByte(primary2);
  1.4701 +                }
  1.4702 +            }
  1.4703 +            wasShifted = TRUE;
  1.4704 +        } else {
  1.4705 +            wasShifted = FALSE;
  1.4706 +            /* Note: This code assumes that the table is well built i.e. not having 0 bytes where they are not supposed to be. */
  1.4707 +            /* Usually, we'll have non-zero primary1 & primary2, except in cases of a-z and friends, when primary2 will   */
  1.4708 +            /* regular and simple sortkey calc */
  1.4709 +            if(primary1 != UCOL_IGNORABLE) {
  1.4710 +                if(notIsContinuation) {
  1.4711 +                    if(leadPrimary == primary1) {
  1.4712 +                        primaries.Append(primary2);
  1.4713 +                    } else {
  1.4714 +                        if(leadPrimary != 0) {
  1.4715 +                            primaries.Append((primary1 > leadPrimary) ? UCOL_BYTE_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN);
  1.4716 +                        }
  1.4717 +                        if(primary2 == UCOL_IGNORABLE) {
  1.4718 +                            /* one byter, not compressed */
  1.4719 +                            primaries.Append(primary1);
  1.4720 +                            leadPrimary = 0;
  1.4721 +                        } else if(isCompressible(coll, originalPrimary1)) {
  1.4722 +                            /* compress */
  1.4723 +                            primaries.Append(leadPrimary = primary1, primary2);
  1.4724 +                        } else {
  1.4725 +                            leadPrimary = 0;
  1.4726 +                            primaries.Append(primary1, primary2);
  1.4727 +                        }
  1.4728 +                    }
  1.4729 +                } else { /* we are in continuation, so we're gonna add primary to the key don't care about compression */
  1.4730 +                    if(primary2 == UCOL_IGNORABLE) {
  1.4731 +                        primaries.Append(primary1);
  1.4732 +                    } else {
  1.4733 +                        primaries.Append(primary1, primary2);
  1.4734 +                    }
  1.4735 +                }
  1.4736 +            }
  1.4737 +
  1.4738 +            if(secondary > compareSec) {
  1.4739 +                if(!isFrenchSec) {
  1.4740 +                    /* This is compression code. */
  1.4741 +                    if (secondary == UCOL_COMMON2 && notIsContinuation) {
  1.4742 +                        ++count2;
  1.4743 +                    } else {
  1.4744 +                        if (count2 > 0) {
  1.4745 +                            if (secondary > UCOL_COMMON2) { // not necessary for 4th level.
  1.4746 +                                while (count2 > UCOL_TOP_COUNT2) {
  1.4747 +                                    secondaries.appendByte(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2);
  1.4748 +                                    count2 -= (uint32_t)UCOL_TOP_COUNT2;
  1.4749 +                                }
  1.4750 +                                secondaries.appendByte(UCOL_COMMON_TOP2 - (count2-1));
  1.4751 +                            } else {
  1.4752 +                                while (count2 > UCOL_BOT_COUNT2) {
  1.4753 +                                    secondaries.appendByte(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2);
  1.4754 +                                    count2 -= (uint32_t)UCOL_BOT_COUNT2;
  1.4755 +                                }
  1.4756 +                                secondaries.appendByte(UCOL_COMMON_BOT2 + (count2-1));
  1.4757 +                            }
  1.4758 +                            count2 = 0;
  1.4759 +                        }
  1.4760 +                        secondaries.appendByte(secondary);
  1.4761 +                    }
  1.4762 +                } else {
  1.4763 +                    /* Do the special handling for French secondaries */
  1.4764 +                    /* We need to get continuation elements and do intermediate restore */
  1.4765 +                    /* abc1c2c3de with french secondaries need to be edc1c2c3ba NOT edc3c2c1ba */
  1.4766 +                    if(notIsContinuation) {
  1.4767 +                        if (lastSecondaryLength > 1) {
  1.4768 +                            uint8_t *frenchStartPtr = secondaries.getLastFewBytes(lastSecondaryLength);
  1.4769 +                            if (frenchStartPtr != NULL) {
  1.4770 +                                /* reverse secondaries from frenchStartPtr up to frenchEndPtr */
  1.4771 +                                uint8_t *frenchEndPtr = frenchStartPtr + lastSecondaryLength - 1;
  1.4772 +                                uprv_ucol_reverse_buffer(uint8_t, frenchStartPtr, frenchEndPtr);
  1.4773 +                            }
  1.4774 +                        }
  1.4775 +                        lastSecondaryLength = 1;
  1.4776 +                    } else {
  1.4777 +                        ++lastSecondaryLength;
  1.4778 +                    }
  1.4779 +                    secondaries.appendByte(secondary);
  1.4780 +                }
  1.4781 +            }
  1.4782 +
  1.4783 +            if(doCase && (primary1 > 0 || strength >= UCOL_SECONDARY)) {
  1.4784 +                // do the case level if we need to do it. We don't want to calculate
  1.4785 +                // case level for primary ignorables if we have only primary strength and case level
  1.4786 +                // otherwise we would break well formedness of CEs
  1.4787 +                doCaseShift(cases, caseShift);
  1.4788 +                if(notIsContinuation) {
  1.4789 +                    caseBits = (uint8_t)(tertiary & 0xC0);
  1.4790 +
  1.4791 +                    if(tertiary != 0) {
  1.4792 +                        if(coll->caseFirst == UCOL_UPPER_FIRST) {
  1.4793 +                            if((caseBits & 0xC0) == 0) {
  1.4794 +                                cases.lastByte() |= 1 << (--caseShift);
  1.4795 +                            } else {
  1.4796 +                                cases.lastByte() |= 0 << (--caseShift);
  1.4797 +                                /* second bit */
  1.4798 +                                doCaseShift(cases, caseShift);
  1.4799 +                                cases.lastByte() |= ((caseBits>>6)&1) << (--caseShift);
  1.4800 +                            }
  1.4801 +                        } else {
  1.4802 +                            if((caseBits & 0xC0) == 0) {
  1.4803 +                                cases.lastByte() |= 0 << (--caseShift);
  1.4804 +                            } else {
  1.4805 +                                cases.lastByte() |= 1 << (--caseShift);
  1.4806 +                                /* second bit */
  1.4807 +                                doCaseShift(cases, caseShift);
  1.4808 +                                cases.lastByte() |= ((caseBits>>7)&1) << (--caseShift);
  1.4809 +                            }
  1.4810 +                        }
  1.4811 +                    }
  1.4812 +                }
  1.4813 +            } else {
  1.4814 +                if(notIsContinuation) {
  1.4815 +                    tertiary ^= caseSwitch;
  1.4816 +                }
  1.4817 +            }
  1.4818 +
  1.4819 +            tertiary &= tertiaryMask;
  1.4820 +            if(tertiary > compareTer) {
  1.4821 +                /* This is compression code. */
  1.4822 +                /* sequence size check is included in the if clause */
  1.4823 +                if (tertiary == tertiaryCommon && notIsContinuation) {
  1.4824 +                    ++count3;
  1.4825 +                } else {
  1.4826 +                    if(tertiary > tertiaryCommon && tertiaryCommon == UCOL_COMMON3_NORMAL) {
  1.4827 +                        tertiary += tertiaryAddition;
  1.4828 +                    } else if(tertiary <= tertiaryCommon && tertiaryCommon == UCOL_COMMON3_UPPERFIRST) {
  1.4829 +                        tertiary -= tertiaryAddition;
  1.4830 +                    }
  1.4831 +                    if (count3 > 0) {
  1.4832 +                        if ((tertiary > tertiaryCommon)) {
  1.4833 +                            while (count3 > coll->tertiaryTopCount) {
  1.4834 +                                tertiaries.appendByte(tertiaryTop - coll->tertiaryTopCount);
  1.4835 +                                count3 -= (uint32_t)coll->tertiaryTopCount;
  1.4836 +                            }
  1.4837 +                            tertiaries.appendByte(tertiaryTop - (count3-1));
  1.4838 +                        } else {
  1.4839 +                            while (count3 > coll->tertiaryBottomCount) {
  1.4840 +                                tertiaries.appendByte(tertiaryBottom + coll->tertiaryBottomCount);
  1.4841 +                                count3 -= (uint32_t)coll->tertiaryBottomCount;
  1.4842 +                            }
  1.4843 +                            tertiaries.appendByte(tertiaryBottom + (count3-1));
  1.4844 +                        }
  1.4845 +                        count3 = 0;
  1.4846 +                    }
  1.4847 +                    tertiaries.appendByte(tertiary);
  1.4848 +                }
  1.4849 +            }
  1.4850 +
  1.4851 +            if(/*qShifted*/(compareQuad==0)  && notIsContinuation) {
  1.4852 +                if(s.flags & UCOL_WAS_HIRAGANA) { // This was Hiragana and we need to note it
  1.4853 +                    if(count4>0) { // Close this part
  1.4854 +                        while (count4 > UCOL_BOT_COUNT4) {
  1.4855 +                            quads.appendByte(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4);
  1.4856 +                            count4 -= UCOL_BOT_COUNT4;
  1.4857 +                        }
  1.4858 +                        quads.appendByte(UCOL_COMMON_BOT4 + (count4-1));
  1.4859 +                        count4 = 0;
  1.4860 +                    }
  1.4861 +                    quads.appendByte(UCOL_HIRAGANA_QUAD); // Add the Hiragana
  1.4862 +                } else { // This wasn't Hiragana, so we can continue adding stuff
  1.4863 +                    count4++;
  1.4864 +                }
  1.4865 +            }
  1.4866 +        }
  1.4867 +    }
  1.4868 +
  1.4869 +    /* Here, we are generally done with processing */
  1.4870 +    /* bailing out would not be too productive */
  1.4871 +
  1.4872 +    UBool ok = TRUE;
  1.4873 +    if(U_SUCCESS(*status)) {
  1.4874 +        /* we have done all the CE's, now let's put them together to form a key */
  1.4875 +        if(compareSec == 0) {
  1.4876 +            if (count2 > 0) {
  1.4877 +                while (count2 > UCOL_BOT_COUNT2) {
  1.4878 +                    secondaries.appendByte(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2);
  1.4879 +                    count2 -= (uint32_t)UCOL_BOT_COUNT2;
  1.4880 +                }
  1.4881 +                secondaries.appendByte(UCOL_COMMON_BOT2 + (count2-1));
  1.4882 +            }
  1.4883 +            result.Append(UCOL_LEVELTERMINATOR);
  1.4884 +            if(!secondaries.isOk()) {
  1.4885 +                ok = FALSE;
  1.4886 +            } else if(!isFrenchSec) {
  1.4887 +                secondaries.appendTo(result);
  1.4888 +            } else {
  1.4889 +                // If there are any unresolved continuation secondaries,
  1.4890 +                // reverse them here so that we can reverse the whole secondary thing.
  1.4891 +                if (lastSecondaryLength > 1) {
  1.4892 +                    uint8_t *frenchStartPtr = secondaries.getLastFewBytes(lastSecondaryLength);
  1.4893 +                    if (frenchStartPtr != NULL) {
  1.4894 +                        /* reverse secondaries from frenchStartPtr up to frenchEndPtr */
  1.4895 +                        uint8_t *frenchEndPtr = frenchStartPtr + lastSecondaryLength - 1;
  1.4896 +                        uprv_ucol_reverse_buffer(uint8_t, frenchStartPtr, frenchEndPtr);
  1.4897 +                    }
  1.4898 +                }
  1.4899 +                packFrench(secondaries.data(), secondaries.length(), result);
  1.4900 +            }
  1.4901 +        }
  1.4902 +
  1.4903 +        if(doCase) {
  1.4904 +            ok &= cases.isOk();
  1.4905 +            result.Append(UCOL_LEVELTERMINATOR);
  1.4906 +            cases.appendTo(result);
  1.4907 +        }
  1.4908 +
  1.4909 +        if(compareTer == 0) {
  1.4910 +            if (count3 > 0) {
  1.4911 +                if (coll->tertiaryCommon != UCOL_COMMON_BOT3) {
  1.4912 +                    while (count3 >= coll->tertiaryTopCount) {
  1.4913 +                        tertiaries.appendByte(tertiaryTop - coll->tertiaryTopCount);
  1.4914 +                        count3 -= (uint32_t)coll->tertiaryTopCount;
  1.4915 +                    }
  1.4916 +                    tertiaries.appendByte(tertiaryTop - count3);
  1.4917 +                } else {
  1.4918 +                    while (count3 > coll->tertiaryBottomCount) {
  1.4919 +                        tertiaries.appendByte(tertiaryBottom + coll->tertiaryBottomCount);
  1.4920 +                        count3 -= (uint32_t)coll->tertiaryBottomCount;
  1.4921 +                    }
  1.4922 +                    tertiaries.appendByte(tertiaryBottom + (count3-1));
  1.4923 +                }
  1.4924 +            }
  1.4925 +            ok &= tertiaries.isOk();
  1.4926 +            result.Append(UCOL_LEVELTERMINATOR);
  1.4927 +            tertiaries.appendTo(result);
  1.4928 +
  1.4929 +            if(compareQuad == 0/*qShifted == TRUE*/) {
  1.4930 +                if(count4 > 0) {
  1.4931 +                    while (count4 > UCOL_BOT_COUNT4) {
  1.4932 +                        quads.appendByte(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4);
  1.4933 +                        count4 -= UCOL_BOT_COUNT4;
  1.4934 +                    }
  1.4935 +                    quads.appendByte(UCOL_COMMON_BOT4 + (count4-1));
  1.4936 +                }
  1.4937 +                ok &= quads.isOk();
  1.4938 +                result.Append(UCOL_LEVELTERMINATOR);
  1.4939 +                quads.appendTo(result);
  1.4940 +            }
  1.4941 +
  1.4942 +            if(compareIdent) {
  1.4943 +                result.Append(UCOL_LEVELTERMINATOR);
  1.4944 +                u_writeIdenticalLevelRun(s.string, len, result);
  1.4945 +            }
  1.4946 +        }
  1.4947 +        result.Append(0);
  1.4948 +    }
  1.4949 +
  1.4950 +    /* To avoid memory leak, free the offset buffer if necessary. */
  1.4951 +    ucol_freeOffsetBuffer(&s);
  1.4952 +
  1.4953 +    ok &= result.IsOk();
  1.4954 +    if(!ok && U_SUCCESS(*status)) { *status = U_MEMORY_ALLOCATION_ERROR; }
  1.4955 +}
  1.4956 +
  1.4957 +
  1.4958 +U_CFUNC void U_CALLCONV
  1.4959 +ucol_calcSortKeySimpleTertiary(const    UCollator    *coll,
  1.4960 +        const    UChar        *source,
  1.4961 +        int32_t        sourceLength,
  1.4962 +        SortKeyByteSink &result,
  1.4963 +        UErrorCode *status)
  1.4964 +{
  1.4965 +    U_ALIGN_CODE(16);
  1.4966 +
  1.4967 +    if(U_FAILURE(*status)) {
  1.4968 +        return;
  1.4969 +    }
  1.4970 +
  1.4971 +    SortKeyByteSink &primaries = result;
  1.4972 +    SortKeyLevel secondaries;
  1.4973 +    SortKeyLevel tertiaries;
  1.4974 +
  1.4975 +    UnicodeString normSource;
  1.4976 +
  1.4977 +    int32_t len =  sourceLength;
  1.4978 +
  1.4979 +    /* If we need to normalize, we'll do it all at once at the beginning! */
  1.4980 +    if(coll->normalizationMode != UCOL_OFF) {
  1.4981 +        normSource.setTo(len < 0, source, len);
  1.4982 +        const Normalizer2 *norm2 = Normalizer2Factory::getFCDInstance(*status);
  1.4983 +        int32_t qcYesLength = norm2->spanQuickCheckYes(normSource, *status);
  1.4984 +        if(qcYesLength != normSource.length()) {
  1.4985 +            UnicodeString unnormalized = normSource.tempSubString(qcYesLength);
  1.4986 +            normSource.truncate(qcYesLength);
  1.4987 +            norm2->normalizeSecondAndAppend(normSource, unnormalized, *status);
  1.4988 +            source = normSource.getBuffer();
  1.4989 +            len = normSource.length();
  1.4990 +        }
  1.4991 +    }
  1.4992 +    collIterate s;
  1.4993 +    IInit_collIterate(coll, (UChar *)source, len, &s, status);
  1.4994 +    if(U_FAILURE(*status)) {
  1.4995 +        return;
  1.4996 +    }
  1.4997 +    s.flags &= ~UCOL_ITER_NORM;  // source passed the FCD test or else was normalized.
  1.4998 +
  1.4999 +    uint32_t order = 0;
  1.5000 +
  1.5001 +    uint8_t primary1 = 0;
  1.5002 +    uint8_t primary2 = 0;
  1.5003 +    uint8_t secondary = 0;
  1.5004 +    uint8_t tertiary = 0;
  1.5005 +    uint8_t caseSwitch = coll->caseSwitch;
  1.5006 +    uint8_t tertiaryMask = coll->tertiaryMask;
  1.5007 +    int8_t tertiaryAddition = coll->tertiaryAddition;
  1.5008 +    uint8_t tertiaryTop = coll->tertiaryTop;
  1.5009 +    uint8_t tertiaryBottom = coll->tertiaryBottom;
  1.5010 +    uint8_t tertiaryCommon = coll->tertiaryCommon;
  1.5011 +
  1.5012 +    UBool notIsContinuation = FALSE;
  1.5013 +
  1.5014 +    uint32_t count2 = 0, count3 = 0;
  1.5015 +    uint8_t leadPrimary = 0;
  1.5016 +
  1.5017 +    for(;;) {
  1.5018 +        order = ucol_IGetNextCE(coll, &s, status);
  1.5019 +
  1.5020 +        if(order == 0) {
  1.5021 +            continue;
  1.5022 +        }
  1.5023 +
  1.5024 +        if(order == UCOL_NO_MORE_CES) {
  1.5025 +            break;
  1.5026 +        }
  1.5027 +
  1.5028 +        notIsContinuation = !isContinuation(order);
  1.5029 +
  1.5030 +        if(notIsContinuation) {
  1.5031 +            tertiary = (uint8_t)((order & tertiaryMask));
  1.5032 +        } else {
  1.5033 +            tertiary = (uint8_t)((order & UCOL_REMOVE_CONTINUATION));
  1.5034 +        }
  1.5035 +
  1.5036 +        secondary = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
  1.5037 +        primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);
  1.5038 +        primary1 = (uint8_t)(order >> 8);
  1.5039 +
  1.5040 +        uint8_t originalPrimary1 = primary1;
  1.5041 +        if (coll->leadBytePermutationTable != NULL && notIsContinuation) {
  1.5042 +            primary1 = coll->leadBytePermutationTable[primary1];
  1.5043 +        }
  1.5044 +
  1.5045 +        /* Note: This code assumes that the table is well built i.e. not having 0 bytes where they are not supposed to be. */
  1.5046 +        /* Usually, we'll have non-zero primary1 & primary2, except in cases of a-z and friends, when primary2 will   */
  1.5047 +        /* be zero with non zero primary1. primary3 is different than 0 only for long primaries - see above.               */
  1.5048 +        /* regular and simple sortkey calc */
  1.5049 +        if(primary1 != UCOL_IGNORABLE) {
  1.5050 +            if(notIsContinuation) {
  1.5051 +                if(leadPrimary == primary1) {
  1.5052 +                    primaries.Append(primary2);
  1.5053 +                } else {
  1.5054 +                    if(leadPrimary != 0) {
  1.5055 +                        primaries.Append((primary1 > leadPrimary) ? UCOL_BYTE_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN);
  1.5056 +                    }
  1.5057 +                    if(primary2 == UCOL_IGNORABLE) {
  1.5058 +                        /* one byter, not compressed */
  1.5059 +                        primaries.Append(primary1);
  1.5060 +                        leadPrimary = 0;
  1.5061 +                    } else if(isCompressible(coll, originalPrimary1)) {
  1.5062 +                        /* compress */
  1.5063 +                        primaries.Append(leadPrimary = primary1, primary2);
  1.5064 +                    } else {
  1.5065 +                        leadPrimary = 0;
  1.5066 +                        primaries.Append(primary1, primary2);
  1.5067 +                    }
  1.5068 +                }
  1.5069 +            } else { /* we are in continuation, so we're gonna add primary to the key don't care about compression */
  1.5070 +                if(primary2 == UCOL_IGNORABLE) {
  1.5071 +                    primaries.Append(primary1);
  1.5072 +                } else {
  1.5073 +                    primaries.Append(primary1, primary2);
  1.5074 +                }
  1.5075 +            }
  1.5076 +        }
  1.5077 +
  1.5078 +        if(secondary > 0) { /* I think that != 0 test should be != IGNORABLE */
  1.5079 +            /* This is compression code. */
  1.5080 +            if (secondary == UCOL_COMMON2 && notIsContinuation) {
  1.5081 +                ++count2;
  1.5082 +            } else {
  1.5083 +                if (count2 > 0) {
  1.5084 +                    if (secondary > UCOL_COMMON2) { // not necessary for 4th level.
  1.5085 +                        while (count2 > UCOL_TOP_COUNT2) {
  1.5086 +                            secondaries.appendByte(UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2);
  1.5087 +                            count2 -= (uint32_t)UCOL_TOP_COUNT2;
  1.5088 +                        }
  1.5089 +                        secondaries.appendByte(UCOL_COMMON_TOP2 - (count2-1));
  1.5090 +                    } else {
  1.5091 +                        while (count2 > UCOL_BOT_COUNT2) {
  1.5092 +                            secondaries.appendByte(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2);
  1.5093 +                            count2 -= (uint32_t)UCOL_BOT_COUNT2;
  1.5094 +                        }
  1.5095 +                        secondaries.appendByte(UCOL_COMMON_BOT2 + (count2-1));
  1.5096 +                    }
  1.5097 +                    count2 = 0;
  1.5098 +                }
  1.5099 +                secondaries.appendByte(secondary);
  1.5100 +            }
  1.5101 +        }
  1.5102 +
  1.5103 +        if(notIsContinuation) {
  1.5104 +            tertiary ^= caseSwitch;
  1.5105 +        }
  1.5106 +
  1.5107 +        if(tertiary > 0) {
  1.5108 +            /* This is compression code. */
  1.5109 +            /* sequence size check is included in the if clause */
  1.5110 +            if (tertiary == tertiaryCommon && notIsContinuation) {
  1.5111 +                ++count3;
  1.5112 +            } else {
  1.5113 +                if(tertiary > tertiaryCommon && tertiaryCommon == UCOL_COMMON3_NORMAL) {
  1.5114 +                    tertiary += tertiaryAddition;
  1.5115 +                } else if (tertiary <= tertiaryCommon && tertiaryCommon == UCOL_COMMON3_UPPERFIRST) {
  1.5116 +                    tertiary -= tertiaryAddition;
  1.5117 +                }
  1.5118 +                if (count3 > 0) {
  1.5119 +                    if ((tertiary > tertiaryCommon)) {
  1.5120 +                        while (count3 > coll->tertiaryTopCount) {
  1.5121 +                            tertiaries.appendByte(tertiaryTop - coll->tertiaryTopCount);
  1.5122 +                            count3 -= (uint32_t)coll->tertiaryTopCount;
  1.5123 +                        }
  1.5124 +                        tertiaries.appendByte(tertiaryTop - (count3-1));
  1.5125 +                    } else {
  1.5126 +                        while (count3 > coll->tertiaryBottomCount) {
  1.5127 +                            tertiaries.appendByte(tertiaryBottom + coll->tertiaryBottomCount);
  1.5128 +                            count3 -= (uint32_t)coll->tertiaryBottomCount;
  1.5129 +                        }
  1.5130 +                        tertiaries.appendByte(tertiaryBottom + (count3-1));
  1.5131 +                    }
  1.5132 +                    count3 = 0;
  1.5133 +                }
  1.5134 +                tertiaries.appendByte(tertiary);
  1.5135 +            }
  1.5136 +        }
  1.5137 +    }
  1.5138 +
  1.5139 +    UBool ok = TRUE;
  1.5140 +    if(U_SUCCESS(*status)) {
  1.5141 +        /* we have done all the CE's, now let's put them together to form a key */
  1.5142 +        if (count2 > 0) {
  1.5143 +            while (count2 > UCOL_BOT_COUNT2) {
  1.5144 +                secondaries.appendByte(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2);
  1.5145 +                count2 -= (uint32_t)UCOL_BOT_COUNT2;
  1.5146 +            }
  1.5147 +            secondaries.appendByte(UCOL_COMMON_BOT2 + (count2-1));
  1.5148 +        }
  1.5149 +        ok &= secondaries.isOk();
  1.5150 +        result.Append(UCOL_LEVELTERMINATOR);
  1.5151 +        secondaries.appendTo(result);
  1.5152 +
  1.5153 +        if (count3 > 0) {
  1.5154 +            if (coll->tertiaryCommon != UCOL_COMMON3_NORMAL) {
  1.5155 +                while (count3 >= coll->tertiaryTopCount) {
  1.5156 +                    tertiaries.appendByte(tertiaryTop - coll->tertiaryTopCount);
  1.5157 +                    count3 -= (uint32_t)coll->tertiaryTopCount;
  1.5158 +                }
  1.5159 +                tertiaries.appendByte(tertiaryTop - count3);
  1.5160 +            } else {
  1.5161 +                while (count3 > coll->tertiaryBottomCount) {
  1.5162 +                    tertiaries.appendByte(tertiaryBottom + coll->tertiaryBottomCount);
  1.5163 +                    count3 -= (uint32_t)coll->tertiaryBottomCount;
  1.5164 +                }
  1.5165 +                tertiaries.appendByte(tertiaryBottom + (count3-1));
  1.5166 +            }
  1.5167 +        }
  1.5168 +        ok &= tertiaries.isOk();
  1.5169 +        result.Append(UCOL_LEVELTERMINATOR);
  1.5170 +        tertiaries.appendTo(result);
  1.5171 +
  1.5172 +        result.Append(0);
  1.5173 +    }
  1.5174 +
  1.5175 +    /* To avoid memory leak, free the offset buffer if necessary. */
  1.5176 +    ucol_freeOffsetBuffer(&s);
  1.5177 +
  1.5178 +    ok &= result.IsOk();
  1.5179 +    if(!ok && U_SUCCESS(*status)) { *status = U_MEMORY_ALLOCATION_ERROR; }
  1.5180 +}
  1.5181 +
  1.5182 +static inline
  1.5183 +UBool isShiftedCE(uint32_t CE, uint32_t LVT, UBool *wasShifted) {
  1.5184 +    UBool notIsContinuation = !isContinuation(CE);
  1.5185 +    uint8_t primary1 = (uint8_t)((CE >> 24) & 0xFF);
  1.5186 +    if((LVT && ((notIsContinuation && (CE & 0xFFFF0000)<= LVT && primary1 > 0)
  1.5187 +               || (!notIsContinuation && *wasShifted)))
  1.5188 +        || (*wasShifted && primary1 == 0)) /* amendment to the UCA says that primary ignorables */
  1.5189 +    {
  1.5190 +        // The stuff below should probably be in the sortkey code... maybe not...
  1.5191 +        if(primary1 != 0) { /* if we were shifted and we got an ignorable code point */
  1.5192 +            /* we should just completely ignore it */
  1.5193 +            *wasShifted = TRUE;
  1.5194 +            //continue;
  1.5195 +        }
  1.5196 +        //*wasShifted = TRUE;
  1.5197 +        return TRUE;
  1.5198 +    } else {
  1.5199 +        *wasShifted = FALSE;
  1.5200 +        return FALSE;
  1.5201 +    }
  1.5202 +}
  1.5203 +static inline
  1.5204 +void terminatePSKLevel(int32_t level, int32_t maxLevel, int32_t &i, uint8_t *dest) {
  1.5205 +    if(level < maxLevel) {
  1.5206 +        dest[i++] = UCOL_LEVELTERMINATOR;
  1.5207 +    } else {
  1.5208 +        dest[i++] = 0;
  1.5209 +    }
  1.5210 +}
  1.5211 +
  1.5212 +/** enumeration of level identifiers for partial sort key generation */
  1.5213 +enum {
  1.5214 +  UCOL_PSK_PRIMARY = 0,
  1.5215 +    UCOL_PSK_SECONDARY = 1,
  1.5216 +    UCOL_PSK_CASE = 2,
  1.5217 +    UCOL_PSK_TERTIARY = 3,
  1.5218 +    UCOL_PSK_QUATERNARY = 4,
  1.5219 +    UCOL_PSK_QUIN = 5,      /** This is an extra level, not used - but we have three bits to blow */
  1.5220 +    UCOL_PSK_IDENTICAL = 6,
  1.5221 +    UCOL_PSK_NULL = 7,      /** level for the end of sort key. Will just produce zeros */
  1.5222 +    UCOL_PSK_LIMIT
  1.5223 +};
  1.5224 +
  1.5225 +/** collation state enum. *_SHIFT value is how much to shift right
  1.5226 + *  to get the state piece to the right. *_MASK value should be
  1.5227 + *  ANDed with the shifted state. This data is stored in state[1]
  1.5228 + *  field.
  1.5229 + */
  1.5230 +enum {
  1.5231 +    UCOL_PSK_LEVEL_SHIFT = 0,      /** level identificator. stores an enum value from above */
  1.5232 +    UCOL_PSK_LEVEL_MASK = 7,       /** three bits */
  1.5233 +    UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT = 3, /** number of bytes of primary or quaternary already written */
  1.5234 +    UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK = 1,
  1.5235 +    /** can be only 0 or 1, since we get up to two bytes from primary or quaternary
  1.5236 +     *  This field is also used to denote that the French secondary level is finished
  1.5237 +     */
  1.5238 +    UCOL_PSK_WAS_SHIFTED_SHIFT = 4,/** was the last value shifted */
  1.5239 +    UCOL_PSK_WAS_SHIFTED_MASK = 1, /** can be 0 or 1 (Boolean) */
  1.5240 +    UCOL_PSK_USED_FRENCH_SHIFT = 5,/** how many French bytes have we already written */
  1.5241 +    UCOL_PSK_USED_FRENCH_MASK = 3, /** up to 4 bytes. See comment just below */
  1.5242 +    /** When we do French we need to reverse secondary values. However, continuations
  1.5243 +     *  need to stay the same. So if you had abc1c2c3de, you need to have edc1c2c3ba
  1.5244 +     */
  1.5245 +    UCOL_PSK_BOCSU_BYTES_SHIFT = 7,
  1.5246 +    UCOL_PSK_BOCSU_BYTES_MASK = 3,
  1.5247 +    UCOL_PSK_CONSUMED_CES_SHIFT = 9,
  1.5248 +    UCOL_PSK_CONSUMED_CES_MASK = 0x7FFFF
  1.5249 +};
  1.5250 +
  1.5251 +// macro calculating the number of expansion CEs available
  1.5252 +#define uprv_numAvailableExpCEs(s) (s).CEpos - (s).toReturn
  1.5253 +
  1.5254 +
  1.5255 +/** main sortkey part procedure. On the first call,
  1.5256 + *  you should pass in a collator, an iterator, empty state
  1.5257 + *  state[0] == state[1] == 0, a buffer to hold results
  1.5258 + *  number of bytes you need and an error code pointer.
  1.5259 + *  Make sure your buffer is big enough to hold the wanted
  1.5260 + *  number of sortkey bytes. I don't check.
  1.5261 + *  The only meaningful status you can get back is
  1.5262 + *  U_BUFFER_OVERFLOW_ERROR, which basically means that you
  1.5263 + *  have been dealt a raw deal and that you probably won't
  1.5264 + *  be able to use partial sortkey generation for this
  1.5265 + *  particular combination of string and collator. This
  1.5266 + *  is highly unlikely, but you should still check the error code.
  1.5267 + *  Any other status means that you're not in a sane situation
  1.5268 + *  anymore. After the first call, preserve state values and
  1.5269 + *  use them on subsequent calls to obtain more bytes of a sortkey.
  1.5270 + *  Use until the number of bytes written is smaller than the requested
  1.5271 + *  number of bytes. Generated sortkey is not compatible with the
  1.5272 + *  one generated by ucol_getSortKey, as we don't do any compression.
  1.5273 + *  However, levels are still terminated by a 1 (one) and the sortkey
  1.5274 + *  is terminated by a 0 (zero). Identical level is the same as in the
  1.5275 + *  regular sortkey - internal bocu-1 implementation is used.
  1.5276 + *  For curious, although you cannot do much about this, here is
  1.5277 + *  the structure of state words.
  1.5278 + *  state[0] - iterator state. Depends on the iterator implementation,
  1.5279 + *             but allows the iterator to continue where it stopped in
  1.5280 + *             the last iteration.
  1.5281 + *  state[1] - collation processing state. Here is the distribution
  1.5282 + *             of the bits:
  1.5283 + *   0, 1, 2 - level of the sortkey - primary, secondary, case, tertiary
  1.5284 + *             quaternary, quin (we don't use this one), identical and
  1.5285 + *             null (producing only zeroes - first one to terminate the
  1.5286 + *             sortkey and subsequent to fill the buffer).
  1.5287 + *   3       - byte count. Number of bytes written on the primary level.
  1.5288 + *   4       - was shifted. Whether the previous iteration finished in the
  1.5289 + *             shifted state.
  1.5290 + *   5, 6    - French continuation bytes written. See the comment in the enum
  1.5291 + *   7,8     - Bocsu bytes used. Number of bytes from a bocu sequence on
  1.5292 + *             the identical level.
  1.5293 + *   9..31   - CEs consumed. Number of getCE or next32 operations performed
  1.5294 + *             since thes last successful update of the iterator state.
  1.5295 + */
  1.5296 +U_CAPI int32_t U_EXPORT2
  1.5297 +ucol_nextSortKeyPart(const UCollator *coll,
  1.5298 +                     UCharIterator *iter,
  1.5299 +                     uint32_t state[2],
  1.5300 +                     uint8_t *dest, int32_t count,
  1.5301 +                     UErrorCode *status)
  1.5302 +{
  1.5303 +    /* error checking */
  1.5304 +    if(status==NULL || U_FAILURE(*status)) {
  1.5305 +        return 0;
  1.5306 +    }
  1.5307 +    UTRACE_ENTRY(UTRACE_UCOL_NEXTSORTKEYPART);
  1.5308 +    if( coll==NULL || iter==NULL ||
  1.5309 +        state==NULL ||
  1.5310 +        count<0 || (count>0 && dest==NULL)
  1.5311 +    ) {
  1.5312 +        *status=U_ILLEGAL_ARGUMENT_ERROR;
  1.5313 +        UTRACE_EXIT_STATUS(status);
  1.5314 +        return 0;
  1.5315 +    }
  1.5316 +
  1.5317 +    UTRACE_DATA6(UTRACE_VERBOSE, "coll=%p, iter=%p, state=%d %d, dest=%p, count=%d",
  1.5318 +                  coll, iter, state[0], state[1], dest, count);
  1.5319 +
  1.5320 +    if(count==0) {
  1.5321 +        /* nothing to do */
  1.5322 +        UTRACE_EXIT_VALUE(0);
  1.5323 +        return 0;
  1.5324 +    }
  1.5325 +    /** Setting up situation according to the state we got from the previous iteration */
  1.5326 +    // The state of the iterator from the previous invocation
  1.5327 +    uint32_t iterState = state[0];
  1.5328 +    // Has the last iteration ended in the shifted state
  1.5329 +    UBool wasShifted = ((state[1] >> UCOL_PSK_WAS_SHIFTED_SHIFT) & UCOL_PSK_WAS_SHIFTED_MASK)?TRUE:FALSE;
  1.5330 +    // What is the current level of the sortkey?
  1.5331 +    int32_t level= (state[1] >> UCOL_PSK_LEVEL_SHIFT) & UCOL_PSK_LEVEL_MASK;
  1.5332 +    // Have we written only one byte from a two byte primary in the previous iteration?
  1.5333 +    // Also on secondary level - have we finished with the French secondary?
  1.5334 +    int32_t byteCountOrFrenchDone = (state[1] >> UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT) & UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK;
  1.5335 +    // number of bytes in the continuation buffer for French
  1.5336 +    int32_t usedFrench = (state[1] >> UCOL_PSK_USED_FRENCH_SHIFT) & UCOL_PSK_USED_FRENCH_MASK;
  1.5337 +    // Number of bytes already written from a bocsu sequence. Since
  1.5338 +    // the longes bocsu sequence is 4 long, this can be up to 3.
  1.5339 +    int32_t bocsuBytesUsed = (state[1] >> UCOL_PSK_BOCSU_BYTES_SHIFT) & UCOL_PSK_BOCSU_BYTES_MASK;
  1.5340 +    // Number of elements that need to be consumed in this iteration because
  1.5341 +    // the iterator returned UITER_NO_STATE at the end of the last iteration,
  1.5342 +    // so we had to save the last valid state.
  1.5343 +    int32_t cces = (state[1] >> UCOL_PSK_CONSUMED_CES_SHIFT) & UCOL_PSK_CONSUMED_CES_MASK;
  1.5344 +
  1.5345 +    /** values that depend on the collator attributes */
  1.5346 +    // strength of the collator.
  1.5347 +    int32_t strength = ucol_getAttribute(coll, UCOL_STRENGTH, status);
  1.5348 +    // maximal level of the partial sortkey. Need to take whether case level is done
  1.5349 +    int32_t maxLevel = 0;
  1.5350 +    if(strength < UCOL_TERTIARY) {
  1.5351 +        if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, status) == UCOL_ON) {
  1.5352 +            maxLevel = UCOL_PSK_CASE;
  1.5353 +        } else {
  1.5354 +            maxLevel = strength;
  1.5355 +        }
  1.5356 +    } else {
  1.5357 +        if(strength == UCOL_TERTIARY) {
  1.5358 +            maxLevel = UCOL_PSK_TERTIARY;
  1.5359 +        } else if(strength == UCOL_QUATERNARY) {
  1.5360 +            maxLevel = UCOL_PSK_QUATERNARY;
  1.5361 +        } else { // identical
  1.5362 +            maxLevel = UCOL_IDENTICAL;
  1.5363 +        }
  1.5364 +    }
  1.5365 +    // value for the quaternary level if Hiragana is encountered. Used for JIS X 4061 collation
  1.5366 +    uint8_t UCOL_HIRAGANA_QUAD =
  1.5367 +      (ucol_getAttribute(coll, UCOL_HIRAGANA_QUATERNARY_MODE, status) == UCOL_ON)?0xFE:0xFF;
  1.5368 +    // Boundary value that decides whether a CE is shifted or not
  1.5369 +    uint32_t LVT = (coll->alternateHandling == UCOL_SHIFTED)?(coll->variableTopValue<<16):0;
  1.5370 +    // Are we doing French collation?
  1.5371 +    UBool doingFrench = (ucol_getAttribute(coll, UCOL_FRENCH_COLLATION, status) == UCOL_ON);
  1.5372 +
  1.5373 +    /** initializing the collation state */
  1.5374 +    UBool notIsContinuation = FALSE;
  1.5375 +    uint32_t CE = UCOL_NO_MORE_CES;
  1.5376 +
  1.5377 +    collIterate s;
  1.5378 +    IInit_collIterate(coll, NULL, -1, &s, status);
  1.5379 +    if(U_FAILURE(*status)) {
  1.5380 +        UTRACE_EXIT_STATUS(*status);
  1.5381 +        return 0;
  1.5382 +    }
  1.5383 +    s.iterator = iter;
  1.5384 +    s.flags |= UCOL_USE_ITERATOR;
  1.5385 +    // This variable tells us whether we have produced some other levels in this iteration
  1.5386 +    // before we moved to the identical level. In that case, we need to switch the
  1.5387 +    // type of the iterator.
  1.5388 +    UBool doingIdenticalFromStart = FALSE;
  1.5389 +    // Normalizing iterator
  1.5390 +    // The division for the array length may truncate the array size to
  1.5391 +    // a little less than UNORM_ITER_SIZE, but that size is dimensioned too high
  1.5392 +    // for all platforms anyway.
  1.5393 +    UAlignedMemory stackNormIter[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];
  1.5394 +    UNormIterator *normIter = NULL;
  1.5395 +    // If the normalization is turned on for the collator and we are below identical level
  1.5396 +    // we will use a FCD normalizing iterator
  1.5397 +    if(ucol_getAttribute(coll, UCOL_NORMALIZATION_MODE, status) == UCOL_ON && level < UCOL_PSK_IDENTICAL) {
  1.5398 +        normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), status);
  1.5399 +        s.iterator = unorm_setIter(normIter, iter, UNORM_FCD, status);
  1.5400 +        s.flags &= ~UCOL_ITER_NORM;
  1.5401 +        if(U_FAILURE(*status)) {
  1.5402 +            UTRACE_EXIT_STATUS(*status);
  1.5403 +            return 0;
  1.5404 +        }
  1.5405 +    } else if(level == UCOL_PSK_IDENTICAL) {
  1.5406 +        // for identical level, we need a NFD iterator. We need to instantiate it here, since we
  1.5407 +        // will be updating the state - and this cannot be done on an ordinary iterator.
  1.5408 +        normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), status);
  1.5409 +        s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status);
  1.5410 +        s.flags &= ~UCOL_ITER_NORM;
  1.5411 +        if(U_FAILURE(*status)) {
  1.5412 +            UTRACE_EXIT_STATUS(*status);
  1.5413 +            return 0;
  1.5414 +        }
  1.5415 +        doingIdenticalFromStart = TRUE;
  1.5416 +    }
  1.5417 +
  1.5418 +    // This is the tentative new state of the iterator. The problem
  1.5419 +    // is that the iterator might return an undefined state, in
  1.5420 +    // which case we should save the last valid state and increase
  1.5421 +    // the iterator skip value.
  1.5422 +    uint32_t newState = 0;
  1.5423 +
  1.5424 +    // First, we set the iterator to the last valid position
  1.5425 +    // from the last iteration. This was saved in state[0].
  1.5426 +    if(iterState == 0) {
  1.5427 +        /* initial state */
  1.5428 +        if(level == UCOL_PSK_SECONDARY && doingFrench && !byteCountOrFrenchDone) {
  1.5429 +            s.iterator->move(s.iterator, 0, UITER_LIMIT);
  1.5430 +        } else {
  1.5431 +            s.iterator->move(s.iterator, 0, UITER_START);
  1.5432 +        }
  1.5433 +    } else {
  1.5434 +        /* reset to previous state */
  1.5435 +        s.iterator->setState(s.iterator, iterState, status);
  1.5436 +        if(U_FAILURE(*status)) {
  1.5437 +            UTRACE_EXIT_STATUS(*status);
  1.5438 +            return 0;
  1.5439 +        }
  1.5440 +    }
  1.5441 +
  1.5442 +
  1.5443 +
  1.5444 +    // This variable tells us whether we can attempt to update the state
  1.5445 +    // of iterator. Situations where we don't want to update iterator state
  1.5446 +    // are the existence of expansion CEs that are not yet processed, and
  1.5447 +    // finishing the case level without enough space in the buffer to insert
  1.5448 +    // a level terminator.
  1.5449 +    UBool canUpdateState = TRUE;
  1.5450 +
  1.5451 +    // Consume all the CEs that were consumed at the end of the previous
  1.5452 +    // iteration without updating the iterator state. On identical level,
  1.5453 +    // consume the code points.
  1.5454 +    int32_t counter = cces;
  1.5455 +    if(level < UCOL_PSK_IDENTICAL) {
  1.5456 +        while(counter-->0) {
  1.5457 +            // If we're doing French and we are on the secondary level,
  1.5458 +            // we go backwards.
  1.5459 +            if(level == UCOL_PSK_SECONDARY && doingFrench) {
  1.5460 +                CE = ucol_IGetPrevCE(coll, &s, status);
  1.5461 +            } else {
  1.5462 +                CE = ucol_IGetNextCE(coll, &s, status);
  1.5463 +            }
  1.5464 +            if(CE==UCOL_NO_MORE_CES) {
  1.5465 +                /* should not happen */
  1.5466 +                *status=U_INTERNAL_PROGRAM_ERROR;
  1.5467 +                UTRACE_EXIT_STATUS(*status);
  1.5468 +                return 0;
  1.5469 +            }
  1.5470 +            if(uprv_numAvailableExpCEs(s)) {
  1.5471 +                canUpdateState = FALSE;
  1.5472 +            }
  1.5473 +        }
  1.5474 +    } else {
  1.5475 +        while(counter-->0) {
  1.5476 +            uiter_next32(s.iterator);
  1.5477 +        }
  1.5478 +    }
  1.5479 +
  1.5480 +    // French secondary needs to know whether the iterator state of zero came from previous level OR
  1.5481 +    // from a new invocation...
  1.5482 +    UBool wasDoingPrimary = FALSE;
  1.5483 +    // destination buffer byte counter. When this guy
  1.5484 +    // gets to count, we're done with the iteration
  1.5485 +    int32_t i = 0;
  1.5486 +    // used to count the zero bytes written after we
  1.5487 +    // have finished with the sort key
  1.5488 +    int32_t j = 0;
  1.5489 +
  1.5490 +
  1.5491 +    // Hm.... I think we're ready to plunge in. Basic story is as following:
  1.5492 +    // we have a fall through case based on level. This is used for initial
  1.5493 +    // positioning on iteration start. Every level processor contains a
  1.5494 +    // for(;;) which will be broken when we exhaust all the CEs. Other
  1.5495 +    // way to exit is a goto saveState, which happens when we have filled
  1.5496 +    // out our buffer.
  1.5497 +    switch(level) {
  1.5498 +    case UCOL_PSK_PRIMARY:
  1.5499 +        wasDoingPrimary = TRUE;
  1.5500 +        for(;;) {
  1.5501 +            if(i==count) {
  1.5502 +                goto saveState;
  1.5503 +            }
  1.5504 +            // We should save the state only if we
  1.5505 +            // are sure that we are done with the
  1.5506 +            // previous iterator state
  1.5507 +            if(canUpdateState && byteCountOrFrenchDone == 0) {
  1.5508 +                newState = s.iterator->getState(s.iterator);
  1.5509 +                if(newState != UITER_NO_STATE) {
  1.5510 +                    iterState = newState;
  1.5511 +                    cces = 0;
  1.5512 +                }
  1.5513 +            }
  1.5514 +            CE = ucol_IGetNextCE(coll, &s, status);
  1.5515 +            cces++;
  1.5516 +            if(CE==UCOL_NO_MORE_CES) {
  1.5517 +                // Add the level separator
  1.5518 +                terminatePSKLevel(level, maxLevel, i, dest);
  1.5519 +                byteCountOrFrenchDone=0;
  1.5520 +                // Restart the iteration an move to the
  1.5521 +                // second level
  1.5522 +                s.iterator->move(s.iterator, 0, UITER_START);
  1.5523 +                cces = 0;
  1.5524 +                level = UCOL_PSK_SECONDARY;
  1.5525 +                break;
  1.5526 +            }
  1.5527 +            if(!isContinuation(CE)){
  1.5528 +                if(coll->leadBytePermutationTable != NULL){
  1.5529 +                    CE = (coll->leadBytePermutationTable[CE>>24] << 24) | (CE & 0x00FFFFFF);
  1.5530 +                }
  1.5531 +            }
  1.5532 +            if(!isShiftedCE(CE, LVT, &wasShifted)) {
  1.5533 +                CE >>= UCOL_PRIMARYORDERSHIFT; /* get primary */
  1.5534 +                if(CE != 0) {
  1.5535 +                    if(byteCountOrFrenchDone == 0) {
  1.5536 +                        // get the second byte of primary
  1.5537 +                        dest[i++]=(uint8_t)(CE >> 8);
  1.5538 +                    } else {
  1.5539 +                        byteCountOrFrenchDone = 0;
  1.5540 +                    }
  1.5541 +                    if((CE &=0xff)!=0) {
  1.5542 +                        if(i==count) {
  1.5543 +                            /* overflow */
  1.5544 +                            byteCountOrFrenchDone = 1;
  1.5545 +                            cces--;
  1.5546 +                            goto saveState;
  1.5547 +                        }
  1.5548 +                        dest[i++]=(uint8_t)CE;
  1.5549 +                    }
  1.5550 +                }
  1.5551 +            }
  1.5552 +            if(uprv_numAvailableExpCEs(s)) {
  1.5553 +                canUpdateState = FALSE;
  1.5554 +            } else {
  1.5555 +                canUpdateState = TRUE;
  1.5556 +            }
  1.5557 +        }
  1.5558 +        /* fall through to next level */
  1.5559 +    case UCOL_PSK_SECONDARY:
  1.5560 +        if(strength >= UCOL_SECONDARY) {
  1.5561 +            if(!doingFrench) {
  1.5562 +                for(;;) {
  1.5563 +                    if(i == count) {
  1.5564 +                        goto saveState;
  1.5565 +                    }
  1.5566 +                    // We should save the state only if we
  1.5567 +                    // are sure that we are done with the
  1.5568 +                    // previous iterator state
  1.5569 +                    if(canUpdateState) {
  1.5570 +                        newState = s.iterator->getState(s.iterator);
  1.5571 +                        if(newState != UITER_NO_STATE) {
  1.5572 +                            iterState = newState;
  1.5573 +                            cces = 0;
  1.5574 +                        }
  1.5575 +                    }
  1.5576 +                    CE = ucol_IGetNextCE(coll, &s, status);
  1.5577 +                    cces++;
  1.5578 +                    if(CE==UCOL_NO_MORE_CES) {
  1.5579 +                        // Add the level separator
  1.5580 +                        terminatePSKLevel(level, maxLevel, i, dest);
  1.5581 +                        byteCountOrFrenchDone = 0;
  1.5582 +                        // Restart the iteration an move to the
  1.5583 +                        // second level
  1.5584 +                        s.iterator->move(s.iterator, 0, UITER_START);
  1.5585 +                        cces = 0;
  1.5586 +                        level = UCOL_PSK_CASE;
  1.5587 +                        break;
  1.5588 +                    }
  1.5589 +                    if(!isShiftedCE(CE, LVT, &wasShifted)) {
  1.5590 +                        CE >>= 8; /* get secondary */
  1.5591 +                        if(CE != 0) {
  1.5592 +                            dest[i++]=(uint8_t)CE;
  1.5593 +                        }
  1.5594 +                    }
  1.5595 +                    if(uprv_numAvailableExpCEs(s)) {
  1.5596 +                        canUpdateState = FALSE;
  1.5597 +                    } else {
  1.5598 +                        canUpdateState = TRUE;
  1.5599 +                    }
  1.5600 +                }
  1.5601 +            } else { // French secondary processing
  1.5602 +                uint8_t frenchBuff[UCOL_MAX_BUFFER];
  1.5603 +                int32_t frenchIndex = 0;
  1.5604 +                // Here we are going backwards.
  1.5605 +                // If the iterator is at the beggining, it should be
  1.5606 +                // moved to end.
  1.5607 +                if(wasDoingPrimary) {
  1.5608 +                    s.iterator->move(s.iterator, 0, UITER_LIMIT);
  1.5609 +                    cces = 0;
  1.5610 +                }
  1.5611 +                for(;;) {
  1.5612 +                    if(i == count) {
  1.5613 +                        goto saveState;
  1.5614 +                    }
  1.5615 +                    if(canUpdateState) {
  1.5616 +                        newState = s.iterator->getState(s.iterator);
  1.5617 +                        if(newState != UITER_NO_STATE) {
  1.5618 +                            iterState = newState;
  1.5619 +                            cces = 0;
  1.5620 +                        }
  1.5621 +                    }
  1.5622 +                    CE = ucol_IGetPrevCE(coll, &s, status);
  1.5623 +                    cces++;
  1.5624 +                    if(CE==UCOL_NO_MORE_CES) {
  1.5625 +                        // Add the level separator
  1.5626 +                        terminatePSKLevel(level, maxLevel, i, dest);
  1.5627 +                        byteCountOrFrenchDone = 0;
  1.5628 +                        // Restart the iteration an move to the next level
  1.5629 +                        s.iterator->move(s.iterator, 0, UITER_START);
  1.5630 +                        level = UCOL_PSK_CASE;
  1.5631 +                        break;
  1.5632 +                    }
  1.5633 +                    if(isContinuation(CE)) { // if it's a continuation, we want to save it and
  1.5634 +                        // reverse when we get a first non-continuation CE.
  1.5635 +                        CE >>= 8;
  1.5636 +                        frenchBuff[frenchIndex++] = (uint8_t)CE;
  1.5637 +                    } else if(!isShiftedCE(CE, LVT, &wasShifted)) {
  1.5638 +                        CE >>= 8; /* get secondary */
  1.5639 +                        if(!frenchIndex) {
  1.5640 +                            if(CE != 0) {
  1.5641 +                                dest[i++]=(uint8_t)CE;
  1.5642 +                            }
  1.5643 +                        } else {
  1.5644 +                            frenchBuff[frenchIndex++] = (uint8_t)CE;
  1.5645 +                            frenchIndex -= usedFrench;
  1.5646 +                            usedFrench = 0;
  1.5647 +                            while(i < count && frenchIndex) {
  1.5648 +                                dest[i++] = frenchBuff[--frenchIndex];
  1.5649 +                                usedFrench++;
  1.5650 +                            }
  1.5651 +                        }
  1.5652 +                    }
  1.5653 +                    if(uprv_numAvailableExpCEs(s)) {
  1.5654 +                        canUpdateState = FALSE;
  1.5655 +                    } else {
  1.5656 +                        canUpdateState = TRUE;
  1.5657 +                    }
  1.5658 +                }
  1.5659 +            }
  1.5660 +        } else {
  1.5661 +            level = UCOL_PSK_CASE;
  1.5662 +        }
  1.5663 +        /* fall through to next level */
  1.5664 +    case UCOL_PSK_CASE:
  1.5665 +        if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, status) == UCOL_ON) {
  1.5666 +            uint32_t caseShift = UCOL_CASE_SHIFT_START;
  1.5667 +            uint8_t caseByte = UCOL_CASE_BYTE_START;
  1.5668 +            uint8_t caseBits = 0;
  1.5669 +
  1.5670 +            for(;;) {
  1.5671 +                U_ASSERT(caseShift <= UCOL_CASE_SHIFT_START);
  1.5672 +                if(i == count) {
  1.5673 +                    goto saveState;
  1.5674 +                }
  1.5675 +                // We should save the state only if we
  1.5676 +                // are sure that we are done with the
  1.5677 +                // previous iterator state
  1.5678 +                if(canUpdateState) {
  1.5679 +                    newState = s.iterator->getState(s.iterator);
  1.5680 +                    if(newState != UITER_NO_STATE) {
  1.5681 +                        iterState = newState;
  1.5682 +                        cces = 0;
  1.5683 +                    }
  1.5684 +                }
  1.5685 +                CE = ucol_IGetNextCE(coll, &s, status);
  1.5686 +                cces++;
  1.5687 +                if(CE==UCOL_NO_MORE_CES) {
  1.5688 +                    // On the case level we might have an unfinished
  1.5689 +                    // case byte. Add one if it's started.
  1.5690 +                    if(caseShift != UCOL_CASE_SHIFT_START) {
  1.5691 +                        dest[i++] = caseByte;
  1.5692 +                    }
  1.5693 +                    cces = 0;
  1.5694 +                    // We have finished processing CEs on this level.
  1.5695 +                    // However, we don't know if we have enough space
  1.5696 +                    // to add a case level terminator.
  1.5697 +                    if(i < count) {
  1.5698 +                        // Add the level separator
  1.5699 +                        terminatePSKLevel(level, maxLevel, i, dest);
  1.5700 +                        // Restart the iteration and move to the
  1.5701 +                        // next level
  1.5702 +                        s.iterator->move(s.iterator, 0, UITER_START);
  1.5703 +                        level = UCOL_PSK_TERTIARY;
  1.5704 +                    } else {
  1.5705 +                        canUpdateState = FALSE;
  1.5706 +                    }
  1.5707 +                    break;
  1.5708 +                }
  1.5709 +
  1.5710 +                if(!isShiftedCE(CE, LVT, &wasShifted)) {
  1.5711 +                    if(!isContinuation(CE) && ((CE & UCOL_PRIMARYMASK) != 0 || strength > UCOL_PRIMARY)) {
  1.5712 +                        // do the case level if we need to do it. We don't want to calculate
  1.5713 +                        // case level for primary ignorables if we have only primary strength and case level
  1.5714 +                        // otherwise we would break well formedness of CEs
  1.5715 +                        CE = (uint8_t)(CE & UCOL_BYTE_SIZE_MASK);
  1.5716 +                        caseBits = (uint8_t)(CE & 0xC0);
  1.5717 +                        // this copies the case level logic from the
  1.5718 +                        // sort key generation code
  1.5719 +                        if(CE != 0) {
  1.5720 +                            if (caseShift == 0) {
  1.5721 +                                dest[i++] = caseByte;
  1.5722 +                                caseShift = UCOL_CASE_SHIFT_START;
  1.5723 +                                caseByte = UCOL_CASE_BYTE_START;
  1.5724 +                            }
  1.5725 +                            if(coll->caseFirst == UCOL_UPPER_FIRST) {
  1.5726 +                                if((caseBits & 0xC0) == 0) {
  1.5727 +                                    caseByte |= 1 << (--caseShift);
  1.5728 +                                } else {
  1.5729 +                                    caseByte |= 0 << (--caseShift);
  1.5730 +                                    /* second bit */
  1.5731 +                                    if(caseShift == 0) {
  1.5732 +                                        dest[i++] = caseByte;
  1.5733 +                                        caseShift = UCOL_CASE_SHIFT_START;
  1.5734 +                                        caseByte = UCOL_CASE_BYTE_START;
  1.5735 +                                    }
  1.5736 +                                    caseByte |= ((caseBits>>6)&1) << (--caseShift);
  1.5737 +                                }
  1.5738 +                            } else {
  1.5739 +                                if((caseBits & 0xC0) == 0) {
  1.5740 +                                    caseByte |= 0 << (--caseShift);
  1.5741 +                                } else {
  1.5742 +                                    caseByte |= 1 << (--caseShift);
  1.5743 +                                    /* second bit */
  1.5744 +                                    if(caseShift == 0) {
  1.5745 +                                        dest[i++] = caseByte;
  1.5746 +                                        caseShift = UCOL_CASE_SHIFT_START;
  1.5747 +                                        caseByte = UCOL_CASE_BYTE_START;
  1.5748 +                                    }
  1.5749 +                                    caseByte |= ((caseBits>>7)&1) << (--caseShift);
  1.5750 +                                }
  1.5751 +                            }
  1.5752 +                        }
  1.5753 +
  1.5754 +                    }
  1.5755 +                }
  1.5756 +                // Not sure this is correct for the case level - revisit
  1.5757 +                if(uprv_numAvailableExpCEs(s)) {
  1.5758 +                    canUpdateState = FALSE;
  1.5759 +                } else {
  1.5760 +                    canUpdateState = TRUE;
  1.5761 +                }
  1.5762 +            }
  1.5763 +        } else {
  1.5764 +            level = UCOL_PSK_TERTIARY;
  1.5765 +        }
  1.5766 +        /* fall through to next level */
  1.5767 +    case UCOL_PSK_TERTIARY:
  1.5768 +        if(strength >= UCOL_TERTIARY) {
  1.5769 +            for(;;) {
  1.5770 +                if(i == count) {
  1.5771 +                    goto saveState;
  1.5772 +                }
  1.5773 +                // We should save the state only if we
  1.5774 +                // are sure that we are done with the
  1.5775 +                // previous iterator state
  1.5776 +                if(canUpdateState) {
  1.5777 +                    newState = s.iterator->getState(s.iterator);
  1.5778 +                    if(newState != UITER_NO_STATE) {
  1.5779 +                        iterState = newState;
  1.5780 +                        cces = 0;
  1.5781 +                    }
  1.5782 +                }
  1.5783 +                CE = ucol_IGetNextCE(coll, &s, status);
  1.5784 +                cces++;
  1.5785 +                if(CE==UCOL_NO_MORE_CES) {
  1.5786 +                    // Add the level separator
  1.5787 +                    terminatePSKLevel(level, maxLevel, i, dest);
  1.5788 +                    byteCountOrFrenchDone = 0;
  1.5789 +                    // Restart the iteration an move to the
  1.5790 +                    // second level
  1.5791 +                    s.iterator->move(s.iterator, 0, UITER_START);
  1.5792 +                    cces = 0;
  1.5793 +                    level = UCOL_PSK_QUATERNARY;
  1.5794 +                    break;
  1.5795 +                }
  1.5796 +                if(!isShiftedCE(CE, LVT, &wasShifted)) {
  1.5797 +                    notIsContinuation = !isContinuation(CE);
  1.5798 +
  1.5799 +                    if(notIsContinuation) {
  1.5800 +                        CE = (uint8_t)(CE & UCOL_BYTE_SIZE_MASK);
  1.5801 +                        CE ^= coll->caseSwitch;
  1.5802 +                        CE &= coll->tertiaryMask;
  1.5803 +                    } else {
  1.5804 +                        CE = (uint8_t)((CE & UCOL_REMOVE_CONTINUATION));
  1.5805 +                    }
  1.5806 +
  1.5807 +                    if(CE != 0) {
  1.5808 +                        dest[i++]=(uint8_t)CE;
  1.5809 +                    }
  1.5810 +                }
  1.5811 +                if(uprv_numAvailableExpCEs(s)) {
  1.5812 +                    canUpdateState = FALSE;
  1.5813 +                } else {
  1.5814 +                    canUpdateState = TRUE;
  1.5815 +                }
  1.5816 +            }
  1.5817 +        } else {
  1.5818 +            // if we're not doing tertiary
  1.5819 +            // skip to the end
  1.5820 +            level = UCOL_PSK_NULL;
  1.5821 +        }
  1.5822 +        /* fall through to next level */
  1.5823 +    case UCOL_PSK_QUATERNARY:
  1.5824 +        if(strength >= UCOL_QUATERNARY) {
  1.5825 +            for(;;) {
  1.5826 +                if(i == count) {
  1.5827 +                    goto saveState;
  1.5828 +                }
  1.5829 +                // We should save the state only if we
  1.5830 +                // are sure that we are done with the
  1.5831 +                // previous iterator state
  1.5832 +                if(canUpdateState) {
  1.5833 +                    newState = s.iterator->getState(s.iterator);
  1.5834 +                    if(newState != UITER_NO_STATE) {
  1.5835 +                        iterState = newState;
  1.5836 +                        cces = 0;
  1.5837 +                    }
  1.5838 +                }
  1.5839 +                CE = ucol_IGetNextCE(coll, &s, status);
  1.5840 +                cces++;
  1.5841 +                if(CE==UCOL_NO_MORE_CES) {
  1.5842 +                    // Add the level separator
  1.5843 +                    terminatePSKLevel(level, maxLevel, i, dest);
  1.5844 +                    //dest[i++] = UCOL_LEVELTERMINATOR;
  1.5845 +                    byteCountOrFrenchDone = 0;
  1.5846 +                    // Restart the iteration an move to the
  1.5847 +                    // second level
  1.5848 +                    s.iterator->move(s.iterator, 0, UITER_START);
  1.5849 +                    cces = 0;
  1.5850 +                    level = UCOL_PSK_QUIN;
  1.5851 +                    break;
  1.5852 +                }
  1.5853 +                if(CE==0)
  1.5854 +                    continue;
  1.5855 +                if(isShiftedCE(CE, LVT, &wasShifted)) {
  1.5856 +                    CE >>= 16; /* get primary */
  1.5857 +                    if(CE != 0) {
  1.5858 +                        if(byteCountOrFrenchDone == 0) {
  1.5859 +                            dest[i++]=(uint8_t)(CE >> 8);
  1.5860 +                        } else {
  1.5861 +                            byteCountOrFrenchDone = 0;
  1.5862 +                        }
  1.5863 +                        if((CE &=0xff)!=0) {
  1.5864 +                            if(i==count) {
  1.5865 +                                /* overflow */
  1.5866 +                                byteCountOrFrenchDone = 1;
  1.5867 +                                goto saveState;
  1.5868 +                            }
  1.5869 +                            dest[i++]=(uint8_t)CE;
  1.5870 +                        }
  1.5871 +                    }
  1.5872 +                } else {
  1.5873 +                    notIsContinuation = !isContinuation(CE);
  1.5874 +                    if(notIsContinuation) {
  1.5875 +                        if(s.flags & UCOL_WAS_HIRAGANA) { // This was Hiragana and we need to note it
  1.5876 +                            dest[i++] = UCOL_HIRAGANA_QUAD;
  1.5877 +                        } else {
  1.5878 +                            dest[i++] = 0xFF;
  1.5879 +                        }
  1.5880 +                    }
  1.5881 +                }
  1.5882 +                if(uprv_numAvailableExpCEs(s)) {
  1.5883 +                    canUpdateState = FALSE;
  1.5884 +                } else {
  1.5885 +                    canUpdateState = TRUE;
  1.5886 +                }
  1.5887 +            }
  1.5888 +        } else {
  1.5889 +            // if we're not doing quaternary
  1.5890 +            // skip to the end
  1.5891 +            level = UCOL_PSK_NULL;
  1.5892 +        }
  1.5893 +        /* fall through to next level */
  1.5894 +    case UCOL_PSK_QUIN:
  1.5895 +        level = UCOL_PSK_IDENTICAL;
  1.5896 +        /* fall through to next level */
  1.5897 +    case UCOL_PSK_IDENTICAL:
  1.5898 +        if(strength >= UCOL_IDENTICAL) {
  1.5899 +            UChar32 first, second;
  1.5900 +            int32_t bocsuBytesWritten = 0;
  1.5901 +            // We always need to do identical on
  1.5902 +            // the NFD form of the string.
  1.5903 +            if(normIter == NULL) {
  1.5904 +                // we arrived from the level below and
  1.5905 +                // normalization was not turned on.
  1.5906 +                // therefore, we need to make a fresh NFD iterator
  1.5907 +                normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), status);
  1.5908 +                s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status);
  1.5909 +            } else if(!doingIdenticalFromStart) {
  1.5910 +                // there is an iterator, but we did some other levels.
  1.5911 +                // therefore, we have a FCD iterator - need to make
  1.5912 +                // a NFD one.
  1.5913 +                // normIter being at the beginning does not guarantee
  1.5914 +                // that the underlying iterator is at the beginning
  1.5915 +                iter->move(iter, 0, UITER_START);
  1.5916 +                s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status);
  1.5917 +            }
  1.5918 +            // At this point we have a NFD iterator that is positioned
  1.5919 +            // in the right place
  1.5920 +            if(U_FAILURE(*status)) {
  1.5921 +                UTRACE_EXIT_STATUS(*status);
  1.5922 +                return 0;
  1.5923 +            }
  1.5924 +            first = uiter_previous32(s.iterator);
  1.5925 +            // maybe we're at the start of the string
  1.5926 +            if(first == U_SENTINEL) {
  1.5927 +                first = 0;
  1.5928 +            } else {
  1.5929 +                uiter_next32(s.iterator);
  1.5930 +            }
  1.5931 +
  1.5932 +            j = 0;
  1.5933 +            for(;;) {
  1.5934 +                if(i == count) {
  1.5935 +                    if(j+1 < bocsuBytesWritten) {
  1.5936 +                        bocsuBytesUsed = j+1;
  1.5937 +                    }
  1.5938 +                    goto saveState;
  1.5939 +                }
  1.5940 +
  1.5941 +                // On identical level, we will always save
  1.5942 +                // the state if we reach this point, since
  1.5943 +                // we don't depend on getNextCE for content
  1.5944 +                // all the content is in our buffer and we
  1.5945 +                // already either stored the full buffer OR
  1.5946 +                // otherwise we won't arrive here.
  1.5947 +                newState = s.iterator->getState(s.iterator);
  1.5948 +                if(newState != UITER_NO_STATE) {
  1.5949 +                    iterState = newState;
  1.5950 +                    cces = 0;
  1.5951 +                }
  1.5952 +
  1.5953 +                uint8_t buff[4];
  1.5954 +                second = uiter_next32(s.iterator);
  1.5955 +                cces++;
  1.5956 +
  1.5957 +                // end condition for identical level
  1.5958 +                if(second == U_SENTINEL) {
  1.5959 +                    terminatePSKLevel(level, maxLevel, i, dest);
  1.5960 +                    level = UCOL_PSK_NULL;
  1.5961 +                    break;
  1.5962 +                }
  1.5963 +                bocsuBytesWritten = u_writeIdenticalLevelRunTwoChars(first, second, buff);
  1.5964 +                first = second;
  1.5965 +
  1.5966 +                j = 0;
  1.5967 +                if(bocsuBytesUsed != 0) {
  1.5968 +                    while(bocsuBytesUsed-->0) {
  1.5969 +                        j++;
  1.5970 +                    }
  1.5971 +                }
  1.5972 +
  1.5973 +                while(i < count && j < bocsuBytesWritten) {
  1.5974 +                    dest[i++] = buff[j++];
  1.5975 +                }
  1.5976 +            }
  1.5977 +
  1.5978 +        } else {
  1.5979 +            level = UCOL_PSK_NULL;
  1.5980 +        }
  1.5981 +        /* fall through to next level */
  1.5982 +    case UCOL_PSK_NULL:
  1.5983 +        j = i;
  1.5984 +        while(j<count) {
  1.5985 +            dest[j++]=0;
  1.5986 +        }
  1.5987 +        break;
  1.5988 +    default:
  1.5989 +        *status = U_INTERNAL_PROGRAM_ERROR;
  1.5990 +        UTRACE_EXIT_STATUS(*status);
  1.5991 +        return 0;
  1.5992 +    }
  1.5993 +
  1.5994 +saveState:
  1.5995 +    // Now we need to return stuff. First we want to see whether we have
  1.5996 +    // done everything for the current state of iterator.
  1.5997 +    if(byteCountOrFrenchDone
  1.5998 +        || canUpdateState == FALSE
  1.5999 +        || (newState = s.iterator->getState(s.iterator)) == UITER_NO_STATE)
  1.6000 +    {
  1.6001 +        // Any of above mean that the previous transaction
  1.6002 +        // wasn't finished and that we should store the
  1.6003 +        // previous iterator state.
  1.6004 +        state[0] = iterState;
  1.6005 +    } else {
  1.6006 +        // The transaction is complete. We will continue in the next iteration.
  1.6007 +        state[0] = s.iterator->getState(s.iterator);
  1.6008 +        cces = 0;
  1.6009 +    }
  1.6010 +    // Store the number of bocsu bytes written.
  1.6011 +    if((bocsuBytesUsed & UCOL_PSK_BOCSU_BYTES_MASK) != bocsuBytesUsed) {
  1.6012 +        *status = U_INDEX_OUTOFBOUNDS_ERROR;
  1.6013 +    }
  1.6014 +    state[1] = (bocsuBytesUsed & UCOL_PSK_BOCSU_BYTES_MASK) << UCOL_PSK_BOCSU_BYTES_SHIFT;
  1.6015 +
  1.6016 +    // Next we put in the level of comparison
  1.6017 +    state[1] |= ((level & UCOL_PSK_LEVEL_MASK) << UCOL_PSK_LEVEL_SHIFT);
  1.6018 +
  1.6019 +    // If we are doing French, we need to store whether we have just finished the French level
  1.6020 +    if(level == UCOL_PSK_SECONDARY && doingFrench) {
  1.6021 +        state[1] |= (((int32_t)(state[0] == 0) & UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK) << UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT);
  1.6022 +    } else {
  1.6023 +        state[1] |= ((byteCountOrFrenchDone & UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK) << UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT);
  1.6024 +    }
  1.6025 +
  1.6026 +    // Was the latest CE shifted
  1.6027 +    if(wasShifted) {
  1.6028 +        state[1] |= 1 << UCOL_PSK_WAS_SHIFTED_SHIFT;
  1.6029 +    }
  1.6030 +    // Check for cces overflow
  1.6031 +    if((cces & UCOL_PSK_CONSUMED_CES_MASK) != cces) {
  1.6032 +        *status = U_INDEX_OUTOFBOUNDS_ERROR;
  1.6033 +    }
  1.6034 +    // Store cces
  1.6035 +    state[1] |= ((cces & UCOL_PSK_CONSUMED_CES_MASK) << UCOL_PSK_CONSUMED_CES_SHIFT);
  1.6036 +
  1.6037 +    // Check for French overflow
  1.6038 +    if((usedFrench & UCOL_PSK_USED_FRENCH_MASK) != usedFrench) {
  1.6039 +        *status = U_INDEX_OUTOFBOUNDS_ERROR;
  1.6040 +    }
  1.6041 +    // Store number of bytes written in the French secondary continuation sequence
  1.6042 +    state[1] |= ((usedFrench & UCOL_PSK_USED_FRENCH_MASK) << UCOL_PSK_USED_FRENCH_SHIFT);
  1.6043 +
  1.6044 +
  1.6045 +    // If we have used normalizing iterator, get rid of it
  1.6046 +    if(normIter != NULL) {
  1.6047 +        unorm_closeIter(normIter);
  1.6048 +    }
  1.6049 +
  1.6050 +    /* To avoid memory leak, free the offset buffer if necessary. */
  1.6051 +    ucol_freeOffsetBuffer(&s);
  1.6052 +    
  1.6053 +    // Return number of meaningful sortkey bytes.
  1.6054 +    UTRACE_DATA4(UTRACE_VERBOSE, "dest = %vb, state=%d %d",
  1.6055 +                  dest,i, state[0], state[1]);
  1.6056 +    UTRACE_EXIT_VALUE(i);
  1.6057 +    return i;
  1.6058 +}
  1.6059 +
  1.6060 +/**
  1.6061 + * Produce a bound for a given sortkey and a number of levels.
  1.6062 + */
  1.6063 +U_CAPI int32_t U_EXPORT2
  1.6064 +ucol_getBound(const uint8_t       *source,
  1.6065 +        int32_t             sourceLength,
  1.6066 +        UColBoundMode       boundType,
  1.6067 +        uint32_t            noOfLevels,
  1.6068 +        uint8_t             *result,
  1.6069 +        int32_t             resultLength,
  1.6070 +        UErrorCode          *status)
  1.6071 +{
  1.6072 +    // consistency checks
  1.6073 +    if(status == NULL || U_FAILURE(*status)) {
  1.6074 +        return 0;
  1.6075 +    }
  1.6076 +    if(source == NULL) {
  1.6077 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
  1.6078 +        return 0;
  1.6079 +    }
  1.6080 +
  1.6081 +    int32_t sourceIndex = 0;
  1.6082 +    // Scan the string until we skip enough of the key OR reach the end of the key
  1.6083 +    do {
  1.6084 +        sourceIndex++;
  1.6085 +        if(source[sourceIndex] == UCOL_LEVELTERMINATOR) {
  1.6086 +            noOfLevels--;
  1.6087 +        }
  1.6088 +    } while (noOfLevels > 0
  1.6089 +        && (source[sourceIndex] != 0 || sourceIndex < sourceLength));
  1.6090 +
  1.6091 +    if((source[sourceIndex] == 0 || sourceIndex == sourceLength)
  1.6092 +        && noOfLevels > 0) {
  1.6093 +            *status = U_SORT_KEY_TOO_SHORT_WARNING;
  1.6094 +    }
  1.6095 +
  1.6096 +
  1.6097 +    // READ ME: this code assumes that the values for boundType
  1.6098 +    // enum will not changes. They are set so that the enum value
  1.6099 +    // corresponds to the number of extra bytes each bound type
  1.6100 +    // needs.
  1.6101 +    if(result != NULL && resultLength >= sourceIndex+boundType) {
  1.6102 +        uprv_memcpy(result, source, sourceIndex);
  1.6103 +        switch(boundType) {
  1.6104 +            // Lower bound just gets terminated. No extra bytes
  1.6105 +        case UCOL_BOUND_LOWER: // = 0
  1.6106 +            break;
  1.6107 +            // Upper bound needs one extra byte
  1.6108 +        case UCOL_BOUND_UPPER: // = 1
  1.6109 +            result[sourceIndex++] = 2;
  1.6110 +            break;
  1.6111 +            // Upper long bound needs two extra bytes
  1.6112 +        case UCOL_BOUND_UPPER_LONG: // = 2
  1.6113 +            result[sourceIndex++] = 0xFF;
  1.6114 +            result[sourceIndex++] = 0xFF;
  1.6115 +            break;
  1.6116 +        default:
  1.6117 +            *status = U_ILLEGAL_ARGUMENT_ERROR;
  1.6118 +            return 0;
  1.6119 +        }
  1.6120 +        result[sourceIndex++] = 0;
  1.6121 +
  1.6122 +        return sourceIndex;
  1.6123 +    } else {
  1.6124 +        return sourceIndex+boundType+1;
  1.6125 +    }
  1.6126 +}
  1.6127 +
  1.6128 +/****************************************************************************/
  1.6129 +/* Following are the functions that deal with the properties of a collator  */
  1.6130 +/* there are new APIs and some compatibility APIs                           */
  1.6131 +/****************************************************************************/
  1.6132 +
  1.6133 +static inline void
  1.6134 +ucol_addLatinOneEntry(UCollator *coll, UChar ch, uint32_t CE,
  1.6135 +                    int32_t *primShift, int32_t *secShift, int32_t *terShift)
  1.6136 +{
  1.6137 +    uint8_t primary1 = 0, primary2 = 0, secondary = 0, tertiary = 0;
  1.6138 +    UBool reverseSecondary = FALSE;
  1.6139 +    UBool continuation = isContinuation(CE);
  1.6140 +    if(!continuation) {
  1.6141 +        tertiary = (uint8_t)((CE & coll->tertiaryMask));
  1.6142 +        tertiary ^= coll->caseSwitch;
  1.6143 +        reverseSecondary = TRUE;
  1.6144 +    } else {
  1.6145 +        tertiary = (uint8_t)((CE & UCOL_REMOVE_CONTINUATION));
  1.6146 +        tertiary &= UCOL_REMOVE_CASE;
  1.6147 +        reverseSecondary = FALSE;
  1.6148 +    }
  1.6149 +
  1.6150 +    secondary = (uint8_t)((CE >>= 8) & UCOL_BYTE_SIZE_MASK);
  1.6151 +    primary2 = (uint8_t)((CE >>= 8) & UCOL_BYTE_SIZE_MASK);
  1.6152 +    primary1 = (uint8_t)(CE >> 8);
  1.6153 +
  1.6154 +    if(primary1 != 0) {
  1.6155 +        if (coll->leadBytePermutationTable != NULL && !continuation) {
  1.6156 +            primary1 = coll->leadBytePermutationTable[primary1];
  1.6157 +        }
  1.6158 +
  1.6159 +        coll->latinOneCEs[ch] |= (primary1 << *primShift);
  1.6160 +        *primShift -= 8;
  1.6161 +    }
  1.6162 +    if(primary2 != 0) {
  1.6163 +        if(*primShift < 0) {
  1.6164 +            coll->latinOneCEs[ch] = UCOL_BAIL_OUT_CE;
  1.6165 +            coll->latinOneCEs[coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE;
  1.6166 +            coll->latinOneCEs[2*coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE;
  1.6167 +            return;
  1.6168 +        }
  1.6169 +        coll->latinOneCEs[ch] |= (primary2 << *primShift);
  1.6170 +        *primShift -= 8;
  1.6171 +    }
  1.6172 +    if(secondary != 0) {
  1.6173 +        if(reverseSecondary && coll->frenchCollation == UCOL_ON) { // reverse secondary
  1.6174 +            coll->latinOneCEs[coll->latinOneTableLen+ch] >>= 8; // make space for secondary
  1.6175 +            coll->latinOneCEs[coll->latinOneTableLen+ch] |= (secondary << 24);
  1.6176 +        } else { // normal case
  1.6177 +            coll->latinOneCEs[coll->latinOneTableLen+ch] |= (secondary << *secShift);
  1.6178 +        }
  1.6179 +        *secShift -= 8;
  1.6180 +    }
  1.6181 +    if(tertiary != 0) {
  1.6182 +        coll->latinOneCEs[2*coll->latinOneTableLen+ch] |= (tertiary << *terShift);
  1.6183 +        *terShift -= 8;
  1.6184 +    }
  1.6185 +}
  1.6186 +
  1.6187 +static inline UBool
  1.6188 +ucol_resizeLatinOneTable(UCollator *coll, int32_t size, UErrorCode *status) {
  1.6189 +    uint32_t *newTable = (uint32_t *)uprv_malloc(size*sizeof(uint32_t)*3);
  1.6190 +    if(newTable == NULL) {
  1.6191 +      *status = U_MEMORY_ALLOCATION_ERROR;
  1.6192 +      coll->latinOneFailed = TRUE;
  1.6193 +      return FALSE;
  1.6194 +    }
  1.6195 +    int32_t sizeToCopy = ((size<coll->latinOneTableLen)?size:coll->latinOneTableLen)*sizeof(uint32_t);
  1.6196 +    uprv_memset(newTable, 0, size*sizeof(uint32_t)*3);
  1.6197 +    uprv_memcpy(newTable, coll->latinOneCEs, sizeToCopy);
  1.6198 +    uprv_memcpy(newTable+size, coll->latinOneCEs+coll->latinOneTableLen, sizeToCopy);
  1.6199 +    uprv_memcpy(newTable+2*size, coll->latinOneCEs+2*coll->latinOneTableLen, sizeToCopy);
  1.6200 +    coll->latinOneTableLen = size;
  1.6201 +    uprv_free(coll->latinOneCEs);
  1.6202 +    coll->latinOneCEs = newTable;
  1.6203 +    return TRUE;
  1.6204 +}
  1.6205 +
  1.6206 +static UBool
  1.6207 +ucol_setUpLatinOne(UCollator *coll, UErrorCode *status) {
  1.6208 +    UBool result = TRUE;
  1.6209 +    if(coll->latinOneCEs == NULL) {
  1.6210 +        coll->latinOneCEs = (uint32_t *)uprv_malloc(sizeof(uint32_t)*UCOL_LATINONETABLELEN*3);
  1.6211 +        if(coll->latinOneCEs == NULL) {
  1.6212 +            *status = U_MEMORY_ALLOCATION_ERROR;
  1.6213 +            return FALSE;
  1.6214 +        }
  1.6215 +        coll->latinOneTableLen = UCOL_LATINONETABLELEN;
  1.6216 +    }
  1.6217 +    UChar ch = 0;
  1.6218 +    UCollationElements *it = ucol_openElements(coll, &ch, 1, status);
  1.6219 +    // Check for null pointer 
  1.6220 +    if (U_FAILURE(*status)) {
  1.6221 +        ucol_closeElements(it);
  1.6222 +        return FALSE;
  1.6223 +    }
  1.6224 +    uprv_memset(coll->latinOneCEs, 0, sizeof(uint32_t)*coll->latinOneTableLen*3);
  1.6225 +
  1.6226 +    int32_t primShift = 24, secShift = 24, terShift = 24;
  1.6227 +    uint32_t CE = 0;
  1.6228 +    int32_t contractionOffset = UCOL_ENDOFLATINONERANGE+1;
  1.6229 +
  1.6230 +    // TODO: make safe if you get more than you wanted...
  1.6231 +    for(ch = 0; ch <= UCOL_ENDOFLATINONERANGE; ch++) {
  1.6232 +        primShift = 24; secShift = 24; terShift = 24;
  1.6233 +        if(ch < 0x100) {
  1.6234 +            CE = coll->latinOneMapping[ch];
  1.6235 +        } else {
  1.6236 +            CE = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);
  1.6237 +            if(CE == UCOL_NOT_FOUND && coll->UCA) {
  1.6238 +                CE = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);
  1.6239 +            }
  1.6240 +        }
  1.6241 +        if(CE < UCOL_NOT_FOUND) {
  1.6242 +            ucol_addLatinOneEntry(coll, ch, CE, &primShift, &secShift, &terShift);
  1.6243 +        } else {
  1.6244 +            switch (getCETag(CE)) {
  1.6245 +            case EXPANSION_TAG:
  1.6246 +            case DIGIT_TAG:
  1.6247 +                ucol_setText(it, &ch, 1, status);
  1.6248 +                while((int32_t)(CE = ucol_next(it, status)) != UCOL_NULLORDER) {
  1.6249 +                    if(primShift < 0 || secShift < 0 || terShift < 0) {
  1.6250 +                        coll->latinOneCEs[ch] = UCOL_BAIL_OUT_CE;
  1.6251 +                        coll->latinOneCEs[coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE;
  1.6252 +                        coll->latinOneCEs[2*coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE;
  1.6253 +                        break;
  1.6254 +                    }
  1.6255 +                    ucol_addLatinOneEntry(coll, ch, CE, &primShift, &secShift, &terShift);
  1.6256 +                }
  1.6257 +                break;
  1.6258 +            case CONTRACTION_TAG:
  1.6259 +                // here is the trick
  1.6260 +                // F2 is contraction. We do something very similar to contractions
  1.6261 +                // but have two indices, one in the real contraction table and the
  1.6262 +                // other to where we stuffed things. This hopes that we don't have
  1.6263 +                // many contractions (this should work for latin-1 tables).
  1.6264 +                {
  1.6265 +                    if((CE & 0x00FFF000) != 0) {
  1.6266 +                        *status = U_UNSUPPORTED_ERROR;
  1.6267 +                        goto cleanup_after_failure;
  1.6268 +                    }
  1.6269 +
  1.6270 +                    const UChar *UCharOffset = (UChar *)coll->image+getContractOffset(CE);
  1.6271 +
  1.6272 +                    CE |= (contractionOffset & 0xFFF) << 12; // insert the offset in latin-1 table
  1.6273 +
  1.6274 +                    coll->latinOneCEs[ch] = CE;
  1.6275 +                    coll->latinOneCEs[coll->latinOneTableLen+ch] = CE;
  1.6276 +                    coll->latinOneCEs[2*coll->latinOneTableLen+ch] = CE;
  1.6277 +
  1.6278 +                    // We're going to jump into contraction table, pick the elements
  1.6279 +                    // and use them
  1.6280 +                    do {
  1.6281 +                        CE = *(coll->contractionCEs +
  1.6282 +                            (UCharOffset - coll->contractionIndex));
  1.6283 +                        if(CE > UCOL_NOT_FOUND && getCETag(CE) == EXPANSION_TAG) {
  1.6284 +                            uint32_t size;
  1.6285 +                            uint32_t i;    /* general counter */
  1.6286 +                            uint32_t *CEOffset = (uint32_t *)coll->image+getExpansionOffset(CE); /* find the offset to expansion table */
  1.6287 +                            size = getExpansionCount(CE);
  1.6288 +                            //CE = *CEOffset++;
  1.6289 +                            if(size != 0) { /* if there are less than 16 elements in expansion, we don't terminate */
  1.6290 +                                for(i = 0; i<size; i++) {
  1.6291 +                                    if(primShift < 0 || secShift < 0 || terShift < 0) {
  1.6292 +                                        coll->latinOneCEs[(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
  1.6293 +                                        coll->latinOneCEs[coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
  1.6294 +                                        coll->latinOneCEs[2*coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
  1.6295 +                                        break;
  1.6296 +                                    }
  1.6297 +                                    ucol_addLatinOneEntry(coll, (UChar)contractionOffset, *CEOffset++, &primShift, &secShift, &terShift);
  1.6298 +                                }
  1.6299 +                            } else { /* else, we do */
  1.6300 +                                while(*CEOffset != 0) {
  1.6301 +                                    if(primShift < 0 || secShift < 0 || terShift < 0) {
  1.6302 +                                        coll->latinOneCEs[(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
  1.6303 +                                        coll->latinOneCEs[coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
  1.6304 +                                        coll->latinOneCEs[2*coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
  1.6305 +                                        break;
  1.6306 +                                    }
  1.6307 +                                    ucol_addLatinOneEntry(coll, (UChar)contractionOffset, *CEOffset++, &primShift, &secShift, &terShift);
  1.6308 +                                }
  1.6309 +                            }
  1.6310 +                            contractionOffset++;
  1.6311 +                        } else if(CE < UCOL_NOT_FOUND) {
  1.6312 +                            ucol_addLatinOneEntry(coll, (UChar)contractionOffset++, CE, &primShift, &secShift, &terShift);
  1.6313 +                        } else {
  1.6314 +                            coll->latinOneCEs[(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
  1.6315 +                            coll->latinOneCEs[coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
  1.6316 +                            coll->latinOneCEs[2*coll->latinOneTableLen+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;
  1.6317 +                            contractionOffset++;
  1.6318 +                        }
  1.6319 +                        UCharOffset++;
  1.6320 +                        primShift = 24; secShift = 24; terShift = 24;
  1.6321 +                        if(contractionOffset == coll->latinOneTableLen) { // we need to reallocate
  1.6322 +                            if(!ucol_resizeLatinOneTable(coll, 2*coll->latinOneTableLen, status)) {
  1.6323 +                                goto cleanup_after_failure;
  1.6324 +                            }
  1.6325 +                        }
  1.6326 +                    } while(*UCharOffset != 0xFFFF);
  1.6327 +                }
  1.6328 +                break;;
  1.6329 +            case SPEC_PROC_TAG:
  1.6330 +                {
  1.6331 +                    // 0xB7 is a precontext character defined in UCA5.1, a special
  1.6332 +                    // handle is implemeted in order to save LatinOne table for
  1.6333 +                    // most locales.
  1.6334 +                    if (ch==0xb7) {
  1.6335 +                        ucol_addLatinOneEntry(coll, ch, CE, &primShift, &secShift, &terShift);
  1.6336 +                    }
  1.6337 +                    else {
  1.6338 +                        goto cleanup_after_failure;
  1.6339 +                    }
  1.6340 +                }
  1.6341 +                break;
  1.6342 +            default:
  1.6343 +                goto cleanup_after_failure;
  1.6344 +            }
  1.6345 +        }
  1.6346 +    }
  1.6347 +    // compact table
  1.6348 +    if(contractionOffset < coll->latinOneTableLen) {
  1.6349 +        if(!ucol_resizeLatinOneTable(coll, contractionOffset, status)) {
  1.6350 +            goto cleanup_after_failure;
  1.6351 +        }
  1.6352 +    }
  1.6353 +    ucol_closeElements(it);
  1.6354 +    return result;
  1.6355 +
  1.6356 +cleanup_after_failure:
  1.6357 +    // status should already be set before arriving here.
  1.6358 +    coll->latinOneFailed = TRUE;
  1.6359 +    ucol_closeElements(it);
  1.6360 +    return FALSE;
  1.6361 +}
  1.6362 +
  1.6363 +void ucol_updateInternalState(UCollator *coll, UErrorCode *status) {
  1.6364 +    if(U_SUCCESS(*status)) {
  1.6365 +        if(coll->caseFirst == UCOL_UPPER_FIRST) {
  1.6366 +            coll->caseSwitch = UCOL_CASE_SWITCH;
  1.6367 +        } else {
  1.6368 +            coll->caseSwitch = UCOL_NO_CASE_SWITCH;
  1.6369 +        }
  1.6370 +
  1.6371 +        if(coll->caseLevel == UCOL_ON || coll->caseFirst == UCOL_OFF) {
  1.6372 +            coll->tertiaryMask = UCOL_REMOVE_CASE;
  1.6373 +            coll->tertiaryCommon = UCOL_COMMON3_NORMAL;
  1.6374 +            coll->tertiaryAddition = (int8_t)UCOL_FLAG_BIT_MASK_CASE_SW_OFF; /* Should be 0x80 */
  1.6375 +            coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_OFF;
  1.6376 +            coll->tertiaryBottom = UCOL_COMMON_BOT3;
  1.6377 +        } else {
  1.6378 +            coll->tertiaryMask = UCOL_KEEP_CASE;
  1.6379 +            coll->tertiaryAddition = UCOL_FLAG_BIT_MASK_CASE_SW_ON;
  1.6380 +            if(coll->caseFirst == UCOL_UPPER_FIRST) {
  1.6381 +                coll->tertiaryCommon = UCOL_COMMON3_UPPERFIRST;
  1.6382 +                coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_UPPER;
  1.6383 +                coll->tertiaryBottom = UCOL_COMMON_BOTTOM3_CASE_SW_UPPER;
  1.6384 +            } else {
  1.6385 +                coll->tertiaryCommon = UCOL_COMMON3_NORMAL;
  1.6386 +                coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_LOWER;
  1.6387 +                coll->tertiaryBottom = UCOL_COMMON_BOTTOM3_CASE_SW_LOWER;
  1.6388 +            }
  1.6389 +        }
  1.6390 +
  1.6391 +        /* Set the compression values */
  1.6392 +        uint8_t tertiaryTotal = (uint8_t)(coll->tertiaryTop - coll->tertiaryBottom - 1);
  1.6393 +        coll->tertiaryTopCount = (uint8_t)(UCOL_PROPORTION3*tertiaryTotal); /* we multilply double with int, but need only int */
  1.6394 +        coll->tertiaryBottomCount = (uint8_t)(tertiaryTotal - coll->tertiaryTopCount);
  1.6395 +
  1.6396 +        if(coll->caseLevel == UCOL_OFF && coll->strength == UCOL_TERTIARY
  1.6397 +            && coll->frenchCollation == UCOL_OFF && coll->alternateHandling == UCOL_NON_IGNORABLE)
  1.6398 +        {
  1.6399 +            coll->sortKeyGen = ucol_calcSortKeySimpleTertiary;
  1.6400 +        } else {
  1.6401 +            coll->sortKeyGen = ucol_calcSortKey;
  1.6402 +        }
  1.6403 +        if(coll->caseLevel == UCOL_OFF && coll->strength <= UCOL_TERTIARY && coll->numericCollation == UCOL_OFF
  1.6404 +            && coll->alternateHandling == UCOL_NON_IGNORABLE && !coll->latinOneFailed)
  1.6405 +        {
  1.6406 +            if(coll->latinOneCEs == NULL || coll->latinOneRegenTable) {
  1.6407 +                if(ucol_setUpLatinOne(coll, status)) { // if we succeed in building latin1 table, we'll use it
  1.6408 +                    //fprintf(stderr, "F");
  1.6409 +                    coll->latinOneUse = TRUE;
  1.6410 +                } else {
  1.6411 +                    coll->latinOneUse = FALSE;
  1.6412 +                }
  1.6413 +                if(*status == U_UNSUPPORTED_ERROR) {
  1.6414 +                    *status = U_ZERO_ERROR;
  1.6415 +                }
  1.6416 +            } else { // latin1Table exists and it doesn't need to be regenerated, just use it
  1.6417 +                coll->latinOneUse = TRUE;
  1.6418 +            }
  1.6419 +        } else {
  1.6420 +            coll->latinOneUse = FALSE;
  1.6421 +        }
  1.6422 +    }
  1.6423 +}
  1.6424 +
  1.6425 +U_CAPI uint32_t  U_EXPORT2
  1.6426 +ucol_setVariableTop(UCollator *coll, const UChar *varTop, int32_t len, UErrorCode *status) {
  1.6427 +    if(U_FAILURE(*status) || coll == NULL) {
  1.6428 +        return 0;
  1.6429 +    }
  1.6430 +    if(len == -1) {
  1.6431 +        len = u_strlen(varTop);
  1.6432 +    }
  1.6433 +    if(len == 0) {
  1.6434 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
  1.6435 +        return 0;
  1.6436 +    }
  1.6437 +
  1.6438 +    if(coll->delegate!=NULL) {
  1.6439 +      return ((Collator*)coll->delegate)->setVariableTop(varTop, len, *status);
  1.6440 +    }
  1.6441 +
  1.6442 +
  1.6443 +    collIterate s;
  1.6444 +    IInit_collIterate(coll, varTop, len, &s, status);
  1.6445 +    if(U_FAILURE(*status)) {
  1.6446 +        return 0;
  1.6447 +    }
  1.6448 +
  1.6449 +    uint32_t CE = ucol_IGetNextCE(coll, &s, status);
  1.6450 +
  1.6451 +    /* here we check if we have consumed all characters */
  1.6452 +    /* you can put in either one character or a contraction */
  1.6453 +    /* you shouldn't put more... */
  1.6454 +    if(s.pos != s.endp || CE == UCOL_NO_MORE_CES) {
  1.6455 +        *status = U_CE_NOT_FOUND_ERROR;
  1.6456 +        return 0;
  1.6457 +    }
  1.6458 +
  1.6459 +    uint32_t nextCE = ucol_IGetNextCE(coll, &s, status);
  1.6460 +
  1.6461 +    if(isContinuation(nextCE) && (nextCE & UCOL_PRIMARYMASK) != 0) {
  1.6462 +        *status = U_PRIMARY_TOO_LONG_ERROR;
  1.6463 +        return 0;
  1.6464 +    }
  1.6465 +    if(coll->variableTopValue != (CE & UCOL_PRIMARYMASK)>>16) {
  1.6466 +        coll->variableTopValueisDefault = FALSE;
  1.6467 +        coll->variableTopValue = (CE & UCOL_PRIMARYMASK)>>16;
  1.6468 +    }
  1.6469 +    
  1.6470 +    /* To avoid memory leak, free the offset buffer if necessary. */
  1.6471 +    ucol_freeOffsetBuffer(&s);
  1.6472 +
  1.6473 +    return CE & UCOL_PRIMARYMASK;
  1.6474 +}
  1.6475 +
  1.6476 +U_CAPI uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status) {
  1.6477 +    if(U_FAILURE(*status) || coll == NULL) {
  1.6478 +        return 0;
  1.6479 +    }
  1.6480 +    if(coll->delegate!=NULL) {
  1.6481 +      return ((const Collator*)coll->delegate)->getVariableTop(*status);
  1.6482 +    }
  1.6483 +    return coll->variableTopValue<<16;
  1.6484 +}
  1.6485 +
  1.6486 +U_CAPI void  U_EXPORT2
  1.6487 +ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status) {
  1.6488 +    if(U_FAILURE(*status) || coll == NULL) {
  1.6489 +        return;
  1.6490 +    }
  1.6491 +
  1.6492 +    if(coll->variableTopValue != (varTop & UCOL_PRIMARYMASK)>>16) {
  1.6493 +        coll->variableTopValueisDefault = FALSE;
  1.6494 +        coll->variableTopValue = (varTop & UCOL_PRIMARYMASK)>>16;
  1.6495 +    }
  1.6496 +}
  1.6497 +/* Attribute setter API */
  1.6498 +U_CAPI void  U_EXPORT2
  1.6499 +ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status) {
  1.6500 +    if(U_FAILURE(*status) || coll == NULL) {
  1.6501 +      return;
  1.6502 +    }
  1.6503 +
  1.6504 +    if(coll->delegate != NULL) {
  1.6505 +      ((Collator*)coll->delegate)->setAttribute(attr,value,*status);
  1.6506 +      return;
  1.6507 +    }
  1.6508 +
  1.6509 +    UColAttributeValue oldFrench = coll->frenchCollation;
  1.6510 +    UColAttributeValue oldCaseFirst = coll->caseFirst;
  1.6511 +    switch(attr) {
  1.6512 +    case UCOL_NUMERIC_COLLATION: /* sort substrings of digits as numbers */
  1.6513 +        if(value == UCOL_ON) {
  1.6514 +            coll->numericCollation = UCOL_ON;
  1.6515 +            coll->numericCollationisDefault = FALSE;
  1.6516 +        } else if (value == UCOL_OFF) {
  1.6517 +            coll->numericCollation = UCOL_OFF;
  1.6518 +            coll->numericCollationisDefault = FALSE;
  1.6519 +        } else if (value == UCOL_DEFAULT) {
  1.6520 +            coll->numericCollationisDefault = TRUE;
  1.6521 +            coll->numericCollation = (UColAttributeValue)coll->options->numericCollation;
  1.6522 +        } else {
  1.6523 +            *status = U_ILLEGAL_ARGUMENT_ERROR;
  1.6524 +        }
  1.6525 +        break;
  1.6526 +    case UCOL_HIRAGANA_QUATERNARY_MODE: /* special quaternary values for Hiragana */
  1.6527 +        if(value == UCOL_ON || value == UCOL_OFF || value == UCOL_DEFAULT) {
  1.6528 +            // This attribute is an implementation detail of the CLDR Japanese tailoring.
  1.6529 +            // The implementation might change to use a different mechanism
  1.6530 +            // to achieve the same Japanese sort order.
  1.6531 +            // Since ICU 50, this attribute is not settable any more via API functions.
  1.6532 +        } else {
  1.6533 +            *status = U_ILLEGAL_ARGUMENT_ERROR;
  1.6534 +        }
  1.6535 +        break;
  1.6536 +    case UCOL_FRENCH_COLLATION: /* attribute for direction of secondary weights*/
  1.6537 +        if(value == UCOL_ON) {
  1.6538 +            coll->frenchCollation = UCOL_ON;
  1.6539 +            coll->frenchCollationisDefault = FALSE;
  1.6540 +        } else if (value == UCOL_OFF) {
  1.6541 +            coll->frenchCollation = UCOL_OFF;
  1.6542 +            coll->frenchCollationisDefault = FALSE;
  1.6543 +        } else if (value == UCOL_DEFAULT) {
  1.6544 +            coll->frenchCollationisDefault = TRUE;
  1.6545 +            coll->frenchCollation = (UColAttributeValue)coll->options->frenchCollation;
  1.6546 +        } else {
  1.6547 +            *status = U_ILLEGAL_ARGUMENT_ERROR  ;
  1.6548 +        }
  1.6549 +        break;
  1.6550 +    case UCOL_ALTERNATE_HANDLING: /* attribute for handling variable elements*/
  1.6551 +        if(value == UCOL_SHIFTED) {
  1.6552 +            coll->alternateHandling = UCOL_SHIFTED;
  1.6553 +            coll->alternateHandlingisDefault = FALSE;
  1.6554 +        } else if (value == UCOL_NON_IGNORABLE) {
  1.6555 +            coll->alternateHandling = UCOL_NON_IGNORABLE;
  1.6556 +            coll->alternateHandlingisDefault = FALSE;
  1.6557 +        } else if (value == UCOL_DEFAULT) {
  1.6558 +            coll->alternateHandlingisDefault = TRUE;
  1.6559 +            coll->alternateHandling = (UColAttributeValue)coll->options->alternateHandling ;
  1.6560 +        } else {
  1.6561 +            *status = U_ILLEGAL_ARGUMENT_ERROR  ;
  1.6562 +        }
  1.6563 +        break;
  1.6564 +    case UCOL_CASE_FIRST: /* who goes first, lower case or uppercase */
  1.6565 +        if(value == UCOL_LOWER_FIRST) {
  1.6566 +            coll->caseFirst = UCOL_LOWER_FIRST;
  1.6567 +            coll->caseFirstisDefault = FALSE;
  1.6568 +        } else if (value == UCOL_UPPER_FIRST) {
  1.6569 +            coll->caseFirst = UCOL_UPPER_FIRST;
  1.6570 +            coll->caseFirstisDefault = FALSE;
  1.6571 +        } else if (value == UCOL_OFF) {
  1.6572 +            coll->caseFirst = UCOL_OFF;
  1.6573 +            coll->caseFirstisDefault = FALSE;
  1.6574 +        } else if (value == UCOL_DEFAULT) {
  1.6575 +            coll->caseFirst = (UColAttributeValue)coll->options->caseFirst;
  1.6576 +            coll->caseFirstisDefault = TRUE;
  1.6577 +        } else {
  1.6578 +            *status = U_ILLEGAL_ARGUMENT_ERROR  ;
  1.6579 +        }
  1.6580 +        break;
  1.6581 +    case UCOL_CASE_LEVEL: /* do we have an extra case level */
  1.6582 +        if(value == UCOL_ON) {
  1.6583 +            coll->caseLevel = UCOL_ON;
  1.6584 +            coll->caseLevelisDefault = FALSE;
  1.6585 +        } else if (value == UCOL_OFF) {
  1.6586 +            coll->caseLevel = UCOL_OFF;
  1.6587 +            coll->caseLevelisDefault = FALSE;
  1.6588 +        } else if (value == UCOL_DEFAULT) {
  1.6589 +            coll->caseLevel = (UColAttributeValue)coll->options->caseLevel;
  1.6590 +            coll->caseLevelisDefault = TRUE;
  1.6591 +        } else {
  1.6592 +            *status = U_ILLEGAL_ARGUMENT_ERROR  ;
  1.6593 +        }
  1.6594 +        break;
  1.6595 +    case UCOL_NORMALIZATION_MODE: /* attribute for normalization */
  1.6596 +        if(value == UCOL_ON) {
  1.6597 +            coll->normalizationMode = UCOL_ON;
  1.6598 +            coll->normalizationModeisDefault = FALSE;
  1.6599 +            initializeFCD(status);
  1.6600 +        } else if (value == UCOL_OFF) {
  1.6601 +            coll->normalizationMode = UCOL_OFF;
  1.6602 +            coll->normalizationModeisDefault = FALSE;
  1.6603 +        } else if (value == UCOL_DEFAULT) {
  1.6604 +            coll->normalizationModeisDefault = TRUE;
  1.6605 +            coll->normalizationMode = (UColAttributeValue)coll->options->normalizationMode;
  1.6606 +            if(coll->normalizationMode == UCOL_ON) {
  1.6607 +                initializeFCD(status);
  1.6608 +            }
  1.6609 +        } else {
  1.6610 +            *status = U_ILLEGAL_ARGUMENT_ERROR  ;
  1.6611 +        }
  1.6612 +        break;
  1.6613 +    case UCOL_STRENGTH:         /* attribute for strength */
  1.6614 +        if (value == UCOL_DEFAULT) {
  1.6615 +            coll->strengthisDefault = TRUE;
  1.6616 +            coll->strength = (UColAttributeValue)coll->options->strength;
  1.6617 +        } else if (value <= UCOL_IDENTICAL) {
  1.6618 +            coll->strengthisDefault = FALSE;
  1.6619 +            coll->strength = value;
  1.6620 +        } else {
  1.6621 +            *status = U_ILLEGAL_ARGUMENT_ERROR  ;
  1.6622 +        }
  1.6623 +        break;
  1.6624 +    case UCOL_ATTRIBUTE_COUNT:
  1.6625 +    default:
  1.6626 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
  1.6627 +        break;
  1.6628 +    }
  1.6629 +    if(oldFrench != coll->frenchCollation || oldCaseFirst != coll->caseFirst) {
  1.6630 +        coll->latinOneRegenTable = TRUE;
  1.6631 +    } else {
  1.6632 +        coll->latinOneRegenTable = FALSE;
  1.6633 +    }
  1.6634 +    ucol_updateInternalState(coll, status);
  1.6635 +}
  1.6636 +
  1.6637 +U_CAPI UColAttributeValue  U_EXPORT2
  1.6638 +ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status) {
  1.6639 +    if(U_FAILURE(*status) || coll == NULL) {
  1.6640 +      return UCOL_DEFAULT;
  1.6641 +    }
  1.6642 +
  1.6643 +    if(coll->delegate != NULL) {
  1.6644 +      return ((Collator*)coll->delegate)->getAttribute(attr,*status);
  1.6645 +    }
  1.6646 +
  1.6647 +    switch(attr) {
  1.6648 +    case UCOL_NUMERIC_COLLATION:
  1.6649 +      return coll->numericCollation;
  1.6650 +    case UCOL_HIRAGANA_QUATERNARY_MODE:
  1.6651 +      return coll->hiraganaQ;
  1.6652 +    case UCOL_FRENCH_COLLATION: /* attribute for direction of secondary weights*/
  1.6653 +        return coll->frenchCollation;
  1.6654 +    case UCOL_ALTERNATE_HANDLING: /* attribute for handling variable elements*/
  1.6655 +        return coll->alternateHandling;
  1.6656 +    case UCOL_CASE_FIRST: /* who goes first, lower case or uppercase */
  1.6657 +        return coll->caseFirst;
  1.6658 +    case UCOL_CASE_LEVEL: /* do we have an extra case level */
  1.6659 +        return coll->caseLevel;
  1.6660 +    case UCOL_NORMALIZATION_MODE: /* attribute for normalization */
  1.6661 +        return coll->normalizationMode;
  1.6662 +    case UCOL_STRENGTH:         /* attribute for strength */
  1.6663 +        return coll->strength;
  1.6664 +    case UCOL_ATTRIBUTE_COUNT:
  1.6665 +    default:
  1.6666 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
  1.6667 +        break;
  1.6668 +    }
  1.6669 +    return UCOL_DEFAULT;
  1.6670 +}
  1.6671 +
  1.6672 +U_CAPI void U_EXPORT2
  1.6673 +ucol_setStrength(    UCollator                *coll,
  1.6674 +            UCollationStrength        strength)
  1.6675 +{
  1.6676 +    UErrorCode status = U_ZERO_ERROR;
  1.6677 +    ucol_setAttribute(coll, UCOL_STRENGTH, strength, &status);
  1.6678 +}
  1.6679 +
  1.6680 +U_CAPI UCollationStrength U_EXPORT2
  1.6681 +ucol_getStrength(const UCollator *coll)
  1.6682 +{
  1.6683 +    UErrorCode status = U_ZERO_ERROR;
  1.6684 +    return ucol_getAttribute(coll, UCOL_STRENGTH, &status);
  1.6685 +}
  1.6686 +
  1.6687 +U_CAPI int32_t U_EXPORT2 
  1.6688 +ucol_getReorderCodes(const UCollator *coll,
  1.6689 +                    int32_t *dest,
  1.6690 +                    int32_t destCapacity,
  1.6691 +                    UErrorCode *status) {
  1.6692 +    if (U_FAILURE(*status)) {
  1.6693 +        return 0;
  1.6694 +    }
  1.6695 +
  1.6696 +    if(coll->delegate!=NULL) {
  1.6697 +      return ((const Collator*)coll->delegate)->getReorderCodes(dest, destCapacity, *status);
  1.6698 +    }
  1.6699 +
  1.6700 +    if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) {
  1.6701 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
  1.6702 +        return 0;
  1.6703 +    }
  1.6704 +
  1.6705 +#ifdef UCOL_DEBUG
  1.6706 +    printf("coll->reorderCodesLength = %d\n", coll->reorderCodesLength);
  1.6707 +    printf("coll->defaultReorderCodesLength = %d\n", coll->defaultReorderCodesLength);
  1.6708 +#endif
  1.6709 +
  1.6710 +    if (coll->reorderCodesLength > destCapacity) {
  1.6711 +        *status = U_BUFFER_OVERFLOW_ERROR;
  1.6712 +        return coll->reorderCodesLength;
  1.6713 +    }
  1.6714 +    for (int32_t i = 0; i < coll->reorderCodesLength; i++) {
  1.6715 +        dest[i] = coll->reorderCodes[i];
  1.6716 +    }
  1.6717 +    return coll->reorderCodesLength;
  1.6718 +}
  1.6719 +
  1.6720 +U_CAPI void U_EXPORT2 
  1.6721 +ucol_setReorderCodes(UCollator* coll,
  1.6722 +                    const int32_t* reorderCodes,
  1.6723 +                    int32_t reorderCodesLength,
  1.6724 +                    UErrorCode *status) {
  1.6725 +    if (U_FAILURE(*status)) {
  1.6726 +        return;
  1.6727 +    }
  1.6728 +
  1.6729 +    if (reorderCodesLength < 0 || (reorderCodesLength > 0 && reorderCodes == NULL)) {
  1.6730 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
  1.6731 +        return;
  1.6732 +    }
  1.6733 +
  1.6734 +    if(coll->delegate!=NULL) {
  1.6735 +      ((Collator*)coll->delegate)->setReorderCodes(reorderCodes, reorderCodesLength, *status);
  1.6736 +      return;
  1.6737 +    }
  1.6738 +    
  1.6739 +    if (coll->reorderCodes != NULL && coll->freeReorderCodesOnClose == TRUE) {
  1.6740 +        uprv_free(coll->reorderCodes);
  1.6741 +    }
  1.6742 +    coll->reorderCodes = NULL;
  1.6743 +    coll->freeReorderCodesOnClose = FALSE;
  1.6744 +    coll->reorderCodesLength = 0;
  1.6745 +    if (reorderCodesLength == 0) {
  1.6746 +        if (coll->leadBytePermutationTable != NULL && coll->freeLeadBytePermutationTableOnClose == TRUE) {
  1.6747 +            uprv_free(coll->leadBytePermutationTable);
  1.6748 +        }
  1.6749 +        coll->leadBytePermutationTable = NULL;
  1.6750 +        coll->freeLeadBytePermutationTableOnClose = FALSE;
  1.6751 +        return;
  1.6752 +    }
  1.6753 +    coll->reorderCodes = (int32_t*) uprv_malloc(reorderCodesLength * sizeof(int32_t));
  1.6754 +    if (coll->reorderCodes == NULL) {
  1.6755 +        *status = U_MEMORY_ALLOCATION_ERROR;
  1.6756 +        return;
  1.6757 +    }
  1.6758 +    coll->freeReorderCodesOnClose = TRUE;
  1.6759 +    for (int32_t i = 0; i < reorderCodesLength; i++) {
  1.6760 +        coll->reorderCodes[i] = reorderCodes[i];
  1.6761 +    }
  1.6762 +    coll->reorderCodesLength = reorderCodesLength;
  1.6763 +    ucol_buildPermutationTable(coll, status);
  1.6764 +}
  1.6765 +
  1.6766 +U_CAPI int32_t U_EXPORT2 
  1.6767 +ucol_getEquivalentReorderCodes(int32_t reorderCode,
  1.6768 +                    int32_t* dest,
  1.6769 +                    int32_t destCapacity,
  1.6770 +                    UErrorCode *pErrorCode) {
  1.6771 +    bool equivalentCodesSet[USCRIPT_CODE_LIMIT];
  1.6772 +    uint16_t leadBytes[256];
  1.6773 +    int leadBytesCount;
  1.6774 +    int leadByteIndex;
  1.6775 +    int16_t reorderCodesForLeadByte[USCRIPT_CODE_LIMIT];
  1.6776 +    int reorderCodesForLeadByteCount;
  1.6777 +    int reorderCodeIndex;
  1.6778 +    
  1.6779 +    int32_t equivalentCodesCount = 0;
  1.6780 +    int setIndex;
  1.6781 +    
  1.6782 +    if (U_FAILURE(*pErrorCode)) {
  1.6783 +        return 0;
  1.6784 +    }
  1.6785 +
  1.6786 +    if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) {
  1.6787 +        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
  1.6788 +        return 0;
  1.6789 +    }
  1.6790 +
  1.6791 +    uprv_memset(equivalentCodesSet, 0, USCRIPT_CODE_LIMIT * sizeof(bool));
  1.6792 +
  1.6793 +    const UCollator* uca = ucol_initUCA(pErrorCode);
  1.6794 +    if (U_FAILURE(*pErrorCode)) {
  1.6795 +	return 0;
  1.6796 +    }
  1.6797 +    leadBytesCount = ucol_getLeadBytesForReorderCode(uca, reorderCode, leadBytes, 256);
  1.6798 +    for (leadByteIndex = 0; leadByteIndex < leadBytesCount; leadByteIndex++) {
  1.6799 +        reorderCodesForLeadByteCount = ucol_getReorderCodesForLeadByte(
  1.6800 +            uca, leadBytes[leadByteIndex], reorderCodesForLeadByte, USCRIPT_CODE_LIMIT);
  1.6801 +        for (reorderCodeIndex = 0; reorderCodeIndex < reorderCodesForLeadByteCount; reorderCodeIndex++) {
  1.6802 +            equivalentCodesSet[reorderCodesForLeadByte[reorderCodeIndex]] = true;
  1.6803 +        }
  1.6804 +    }
  1.6805 +    
  1.6806 +    for (setIndex = 0; setIndex < USCRIPT_CODE_LIMIT; setIndex++) {
  1.6807 +        if (equivalentCodesSet[setIndex] == true) {
  1.6808 +            equivalentCodesCount++;
  1.6809 +        }
  1.6810 +    }
  1.6811 +    
  1.6812 +    if (destCapacity == 0) {
  1.6813 +        return equivalentCodesCount;
  1.6814 +    }
  1.6815 +    
  1.6816 +    equivalentCodesCount = 0;
  1.6817 +    for (setIndex = 0; setIndex < USCRIPT_CODE_LIMIT; setIndex++) {
  1.6818 +        if (equivalentCodesSet[setIndex] == true) {
  1.6819 +            dest[equivalentCodesCount++] = setIndex;
  1.6820 +            if (equivalentCodesCount >= destCapacity) {
  1.6821 +                break;
  1.6822 +            }
  1.6823 +        }        
  1.6824 +    }
  1.6825 +    return equivalentCodesCount;
  1.6826 +}
  1.6827 +
  1.6828 +
  1.6829 +/****************************************************************************/
  1.6830 +/* Following are misc functions                                             */
  1.6831 +/* there are new APIs and some compatibility APIs                           */
  1.6832 +/****************************************************************************/
  1.6833 +
  1.6834 +U_CAPI void U_EXPORT2
  1.6835 +ucol_getVersion(const UCollator* coll,
  1.6836 +                UVersionInfo versionInfo)
  1.6837 +{
  1.6838 +    if(coll->delegate!=NULL) {
  1.6839 +      ((const Collator*)coll->delegate)->getVersion(versionInfo);
  1.6840 +      return;
  1.6841 +    }
  1.6842 +    /* RunTime version  */
  1.6843 +    uint8_t rtVersion = UCOL_RUNTIME_VERSION;
  1.6844 +    /* Builder version*/
  1.6845 +    uint8_t bdVersion = coll->image->version[0];
  1.6846 +
  1.6847 +    /* Charset Version. Need to get the version from cnv files
  1.6848 +     * makeconv should populate cnv files with version and
  1.6849 +     * an api has to be provided in ucnv.h to obtain this version
  1.6850 +     */
  1.6851 +    uint8_t csVersion = 0;
  1.6852 +
  1.6853 +    /* combine the version info */
  1.6854 +    uint16_t cmbVersion = (uint16_t)((rtVersion<<11) | (bdVersion<<6) | (csVersion));
  1.6855 +
  1.6856 +    /* Tailoring rules */
  1.6857 +    versionInfo[0] = (uint8_t)(cmbVersion>>8);
  1.6858 +    versionInfo[1] = (uint8_t)cmbVersion;
  1.6859 +    versionInfo[2] = coll->image->version[1];
  1.6860 +    if(coll->UCA) {
  1.6861 +        /* Include the minor number when getting the UCA version. (major & 1f) << 3 | (minor & 7) */
  1.6862 +        versionInfo[3] = (coll->UCA->image->UCAVersion[0] & 0x1f) << 3 | (coll->UCA->image->UCAVersion[1] & 0x07);
  1.6863 +    } else {
  1.6864 +        versionInfo[3] = 0;
  1.6865 +    }
  1.6866 +}
  1.6867 +
  1.6868 +
  1.6869 +/* This internal API checks whether a character is tailored or not */
  1.6870 +U_CAPI UBool  U_EXPORT2
  1.6871 +ucol_isTailored(const UCollator *coll, const UChar u, UErrorCode *status) {
  1.6872 +    if(U_FAILURE(*status) || coll == NULL || coll == coll->UCA) {
  1.6873 +        return FALSE;
  1.6874 +    }
  1.6875 +
  1.6876 +    uint32_t CE = UCOL_NOT_FOUND;
  1.6877 +    const UChar *ContractionStart = NULL;
  1.6878 +    if(u < 0x100) { /* latin-1 */
  1.6879 +        CE = coll->latinOneMapping[u];
  1.6880 +        if(coll->UCA && CE == coll->UCA->latinOneMapping[u]) {
  1.6881 +            return FALSE;
  1.6882 +        }
  1.6883 +    } else { /* regular */
  1.6884 +        CE = UTRIE_GET32_FROM_LEAD(&coll->mapping, u);
  1.6885 +    }
  1.6886 +
  1.6887 +    if(isContraction(CE)) {
  1.6888 +        ContractionStart = (UChar *)coll->image+getContractOffset(CE);
  1.6889 +        CE = *(coll->contractionCEs + (ContractionStart- coll->contractionIndex));
  1.6890 +    }
  1.6891 +
  1.6892 +    return (UBool)(CE != UCOL_NOT_FOUND);
  1.6893 +}
  1.6894 +
  1.6895 +
  1.6896 +/****************************************************************************/
  1.6897 +/* Following are the string compare functions                               */
  1.6898 +/*                                                                          */
  1.6899 +/****************************************************************************/
  1.6900 +
  1.6901 +
  1.6902 +/*  ucol_checkIdent    internal function.  Does byte level string compare.   */
  1.6903 +/*                     Used by strcoll if strength == identical and strings  */
  1.6904 +/*                     are otherwise equal.                                  */
  1.6905 +/*                                                                           */
  1.6906 +/*                     Comparison must be done on NFD normalized strings.    */
  1.6907 +/*                     FCD is not good enough.                               */
  1.6908 +
  1.6909 +static
  1.6910 +UCollationResult    ucol_checkIdent(collIterate *sColl, collIterate *tColl, UBool normalize, UErrorCode *status)
  1.6911 +{
  1.6912 +    // When we arrive here, we can have normal strings or UCharIterators. Currently they are both
  1.6913 +    // of same type, but that doesn't really mean that it will stay that way.
  1.6914 +    int32_t            comparison;
  1.6915 +
  1.6916 +    if (sColl->flags & UCOL_USE_ITERATOR) {
  1.6917 +        // The division for the array length may truncate the array size to
  1.6918 +        // a little less than UNORM_ITER_SIZE, but that size is dimensioned too high
  1.6919 +        // for all platforms anyway.
  1.6920 +        UAlignedMemory stackNormIter1[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];
  1.6921 +        UAlignedMemory stackNormIter2[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];
  1.6922 +        UNormIterator *sNIt = NULL, *tNIt = NULL;
  1.6923 +        sNIt = unorm_openIter(stackNormIter1, sizeof(stackNormIter1), status);
  1.6924 +        tNIt = unorm_openIter(stackNormIter2, sizeof(stackNormIter2), status);
  1.6925 +        sColl->iterator->move(sColl->iterator, 0, UITER_START);
  1.6926 +        tColl->iterator->move(tColl->iterator, 0, UITER_START);
  1.6927 +        UCharIterator *sIt = unorm_setIter(sNIt, sColl->iterator, UNORM_NFD, status);
  1.6928 +        UCharIterator *tIt = unorm_setIter(tNIt, tColl->iterator, UNORM_NFD, status);
  1.6929 +        comparison = u_strCompareIter(sIt, tIt, TRUE);
  1.6930 +        unorm_closeIter(sNIt);
  1.6931 +        unorm_closeIter(tNIt);
  1.6932 +    } else {
  1.6933 +        int32_t sLen      = (sColl->flags & UCOL_ITER_HASLEN) ? (int32_t)(sColl->endp - sColl->string) : -1;
  1.6934 +        const UChar *sBuf = sColl->string;
  1.6935 +        int32_t tLen      = (tColl->flags & UCOL_ITER_HASLEN) ? (int32_t)(tColl->endp - tColl->string) : -1;
  1.6936 +        const UChar *tBuf = tColl->string;
  1.6937 +
  1.6938 +        if (normalize) {
  1.6939 +            *status = U_ZERO_ERROR;
  1.6940 +            // Note: We could use Normalizer::compare() or similar, but for short strings
  1.6941 +            // which may not be in FCD it might be faster to just NFD them.
  1.6942 +            // Note: spanQuickCheckYes() + normalizeSecondAndAppend() rather than
  1.6943 +            // NFD'ing immediately might be faster for long strings,
  1.6944 +            // but string comparison is usually done on relatively short strings.
  1.6945 +            sColl->nfd->normalize(UnicodeString((sColl->flags & UCOL_ITER_HASLEN) == 0, sBuf, sLen),
  1.6946 +                                  sColl->writableBuffer,
  1.6947 +                                  *status);
  1.6948 +            tColl->nfd->normalize(UnicodeString((tColl->flags & UCOL_ITER_HASLEN) == 0, tBuf, tLen),
  1.6949 +                                  tColl->writableBuffer,
  1.6950 +                                  *status);
  1.6951 +            if(U_FAILURE(*status)) {
  1.6952 +                return UCOL_LESS;
  1.6953 +            }
  1.6954 +            comparison = sColl->writableBuffer.compareCodePointOrder(tColl->writableBuffer);
  1.6955 +        } else {
  1.6956 +            comparison = u_strCompare(sBuf, sLen, tBuf, tLen, TRUE);
  1.6957 +        }
  1.6958 +    }
  1.6959 +
  1.6960 +    if (comparison < 0) {
  1.6961 +        return UCOL_LESS;
  1.6962 +    } else if (comparison == 0) {
  1.6963 +        return UCOL_EQUAL;
  1.6964 +    } else /* comparison > 0 */ {
  1.6965 +        return UCOL_GREATER;
  1.6966 +    }
  1.6967 +}
  1.6968 +
  1.6969 +/*  CEBuf - A struct and some inline functions to handle the saving    */
  1.6970 +/*          of CEs in a buffer within ucol_strcoll                     */
  1.6971 +
  1.6972 +#define UCOL_CEBUF_SIZE 512
  1.6973 +typedef struct ucol_CEBuf {
  1.6974 +    uint32_t    *buf;
  1.6975 +    uint32_t    *endp;
  1.6976 +    uint32_t    *pos;
  1.6977 +    uint32_t     localArray[UCOL_CEBUF_SIZE];
  1.6978 +} ucol_CEBuf;
  1.6979 +
  1.6980 +
  1.6981 +static
  1.6982 +inline void UCOL_INIT_CEBUF(ucol_CEBuf *b) {
  1.6983 +    (b)->buf = (b)->pos = (b)->localArray;
  1.6984 +    (b)->endp = (b)->buf + UCOL_CEBUF_SIZE;
  1.6985 +}
  1.6986 +
  1.6987 +static
  1.6988 +void ucol_CEBuf_Expand(ucol_CEBuf *b, collIterate *ci, UErrorCode *status) {
  1.6989 +    uint32_t  oldSize;
  1.6990 +    uint32_t  newSize;
  1.6991 +    uint32_t  *newBuf;
  1.6992 +
  1.6993 +    ci->flags |= UCOL_ITER_ALLOCATED;
  1.6994 +    oldSize = (uint32_t)(b->pos - b->buf);
  1.6995 +    newSize = oldSize * 2;
  1.6996 +    newBuf = (uint32_t *)uprv_malloc(newSize * sizeof(uint32_t));
  1.6997 +    if(newBuf == NULL) {
  1.6998 +        *status = U_MEMORY_ALLOCATION_ERROR;
  1.6999 +    }
  1.7000 +    else {
  1.7001 +        uprv_memcpy(newBuf, b->buf, oldSize * sizeof(uint32_t));
  1.7002 +        if (b->buf != b->localArray) {
  1.7003 +            uprv_free(b->buf);
  1.7004 +        }
  1.7005 +        b->buf = newBuf;
  1.7006 +        b->endp = b->buf + newSize;
  1.7007 +        b->pos  = b->buf + oldSize;
  1.7008 +    }
  1.7009 +}
  1.7010 +
  1.7011 +static
  1.7012 +inline void UCOL_CEBUF_PUT(ucol_CEBuf *b, uint32_t ce, collIterate *ci, UErrorCode *status) {
  1.7013 +    if (b->pos == b->endp) {
  1.7014 +        ucol_CEBuf_Expand(b, ci, status);
  1.7015 +    }
  1.7016 +    if (U_SUCCESS(*status)) {
  1.7017 +        *(b)->pos++ = ce;
  1.7018 +    }
  1.7019 +}
  1.7020 +
  1.7021 +/* This is a trick string compare function that goes in and uses sortkeys to compare */
  1.7022 +/* It is used when compare gets in trouble and needs to bail out                     */
  1.7023 +static UCollationResult ucol_compareUsingSortKeys(collIterate *sColl,
  1.7024 +                                                  collIterate *tColl,
  1.7025 +                                                  UErrorCode *status)
  1.7026 +{
  1.7027 +    uint8_t sourceKey[UCOL_MAX_BUFFER], targetKey[UCOL_MAX_BUFFER];
  1.7028 +    uint8_t *sourceKeyP = sourceKey;
  1.7029 +    uint8_t *targetKeyP = targetKey;
  1.7030 +    int32_t sourceKeyLen = UCOL_MAX_BUFFER, targetKeyLen = UCOL_MAX_BUFFER;
  1.7031 +    const UCollator *coll = sColl->coll;
  1.7032 +    const UChar *source = NULL;
  1.7033 +    const UChar *target = NULL;
  1.7034 +    int32_t result = UCOL_EQUAL;
  1.7035 +    UnicodeString sourceString, targetString;
  1.7036 +    int32_t sourceLength;
  1.7037 +    int32_t targetLength;
  1.7038 +
  1.7039 +    if(sColl->flags & UCOL_USE_ITERATOR) {
  1.7040 +        sColl->iterator->move(sColl->iterator, 0, UITER_START);
  1.7041 +        tColl->iterator->move(tColl->iterator, 0, UITER_START);
  1.7042 +        UChar32 c;
  1.7043 +        while((c=sColl->iterator->next(sColl->iterator))>=0) {
  1.7044 +            sourceString.append((UChar)c);
  1.7045 +        }
  1.7046 +        while((c=tColl->iterator->next(tColl->iterator))>=0) {
  1.7047 +            targetString.append((UChar)c);
  1.7048 +        }
  1.7049 +        source = sourceString.getBuffer();
  1.7050 +        sourceLength = sourceString.length();
  1.7051 +        target = targetString.getBuffer();
  1.7052 +        targetLength = targetString.length();
  1.7053 +    } else { // no iterators
  1.7054 +        sourceLength = (sColl->flags&UCOL_ITER_HASLEN)?(int32_t)(sColl->endp-sColl->string):-1;
  1.7055 +        targetLength = (tColl->flags&UCOL_ITER_HASLEN)?(int32_t)(tColl->endp-tColl->string):-1;
  1.7056 +        source = sColl->string;
  1.7057 +        target = tColl->string;
  1.7058 +    }
  1.7059 +
  1.7060 +
  1.7061 +
  1.7062 +    sourceKeyLen = ucol_getSortKey(coll, source, sourceLength, sourceKeyP, sourceKeyLen);
  1.7063 +    if(sourceKeyLen > UCOL_MAX_BUFFER) {
  1.7064 +        sourceKeyP = (uint8_t*)uprv_malloc(sourceKeyLen*sizeof(uint8_t));
  1.7065 +        if(sourceKeyP == NULL) {
  1.7066 +            *status = U_MEMORY_ALLOCATION_ERROR;
  1.7067 +            goto cleanup_and_do_compare;
  1.7068 +        }
  1.7069 +        sourceKeyLen = ucol_getSortKey(coll, source, sourceLength, sourceKeyP, sourceKeyLen);
  1.7070 +    }
  1.7071 +
  1.7072 +    targetKeyLen = ucol_getSortKey(coll, target, targetLength, targetKeyP, targetKeyLen);
  1.7073 +    if(targetKeyLen > UCOL_MAX_BUFFER) {
  1.7074 +        targetKeyP = (uint8_t*)uprv_malloc(targetKeyLen*sizeof(uint8_t));
  1.7075 +        if(targetKeyP == NULL) {
  1.7076 +            *status = U_MEMORY_ALLOCATION_ERROR;
  1.7077 +            goto cleanup_and_do_compare;
  1.7078 +        }
  1.7079 +        targetKeyLen = ucol_getSortKey(coll, target, targetLength, targetKeyP, targetKeyLen);
  1.7080 +    }
  1.7081 +
  1.7082 +    result = uprv_strcmp((const char*)sourceKeyP, (const char*)targetKeyP);
  1.7083 +
  1.7084 +cleanup_and_do_compare:
  1.7085 +    if(sourceKeyP != NULL && sourceKeyP != sourceKey) {
  1.7086 +        uprv_free(sourceKeyP);
  1.7087 +    }
  1.7088 +
  1.7089 +    if(targetKeyP != NULL && targetKeyP != targetKey) {
  1.7090 +        uprv_free(targetKeyP);
  1.7091 +    }
  1.7092 +
  1.7093 +    if(result<0) {
  1.7094 +        return UCOL_LESS;
  1.7095 +    } else if(result>0) {
  1.7096 +        return UCOL_GREATER;
  1.7097 +    } else {
  1.7098 +        return UCOL_EQUAL;
  1.7099 +    }
  1.7100 +}
  1.7101 +
  1.7102 +
  1.7103 +static UCollationResult
  1.7104 +ucol_strcollRegular(collIterate *sColl, collIterate *tColl, UErrorCode *status)
  1.7105 +{
  1.7106 +    U_ALIGN_CODE(16);
  1.7107 +
  1.7108 +    const UCollator *coll = sColl->coll;
  1.7109 +
  1.7110 +
  1.7111 +    // setting up the collator parameters
  1.7112 +    UColAttributeValue strength = coll->strength;
  1.7113 +    UBool initialCheckSecTer = (strength  >= UCOL_SECONDARY);
  1.7114 +
  1.7115 +    UBool checkSecTer = initialCheckSecTer;
  1.7116 +    UBool checkTertiary = (strength  >= UCOL_TERTIARY);
  1.7117 +    UBool checkQuad = (strength  >= UCOL_QUATERNARY);
  1.7118 +    UBool checkIdent = (strength == UCOL_IDENTICAL);
  1.7119 +    UBool checkCase = (coll->caseLevel == UCOL_ON);
  1.7120 +    UBool isFrenchSec = (coll->frenchCollation == UCOL_ON) && checkSecTer;
  1.7121 +    UBool shifted = (coll->alternateHandling == UCOL_SHIFTED);
  1.7122 +    UBool qShifted = shifted && checkQuad;
  1.7123 +    UBool doHiragana = (coll->hiraganaQ == UCOL_ON) && checkQuad;
  1.7124 +
  1.7125 +    if(doHiragana && shifted) {
  1.7126 +        return (ucol_compareUsingSortKeys(sColl, tColl, status));
  1.7127 +    }
  1.7128 +    uint8_t caseSwitch = coll->caseSwitch;
  1.7129 +    uint8_t tertiaryMask = coll->tertiaryMask;
  1.7130 +
  1.7131 +    // This is the lowest primary value that will not be ignored if shifted
  1.7132 +    uint32_t LVT = (shifted)?(coll->variableTopValue<<16):0;
  1.7133 +
  1.7134 +    UCollationResult result = UCOL_EQUAL;
  1.7135 +    UCollationResult hirResult = UCOL_EQUAL;
  1.7136 +
  1.7137 +    // Preparing the CE buffers. They will be filled during the primary phase
  1.7138 +    ucol_CEBuf   sCEs;
  1.7139 +    ucol_CEBuf   tCEs;
  1.7140 +    UCOL_INIT_CEBUF(&sCEs);
  1.7141 +    UCOL_INIT_CEBUF(&tCEs);
  1.7142 +
  1.7143 +    uint32_t secS = 0, secT = 0;
  1.7144 +    uint32_t sOrder=0, tOrder=0;
  1.7145 +
  1.7146 +    // Non shifted primary processing is quite simple
  1.7147 +    if(!shifted) {
  1.7148 +        for(;;) {
  1.7149 +            // We fetch CEs until we hit a non ignorable primary or end.
  1.7150 +            uint32_t sPrimary;
  1.7151 +            do {
  1.7152 +                // We get the next CE
  1.7153 +                sOrder = ucol_IGetNextCE(coll, sColl, status);
  1.7154 +                // Stuff it in the buffer
  1.7155 +                UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
  1.7156 +                // And keep just the primary part.
  1.7157 +                sPrimary = sOrder & UCOL_PRIMARYMASK;
  1.7158 +            } while(sPrimary == 0);
  1.7159 +
  1.7160 +            // see the comments on the above block
  1.7161 +            uint32_t tPrimary;
  1.7162 +            do {
  1.7163 +                tOrder = ucol_IGetNextCE(coll, tColl, status);
  1.7164 +                UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
  1.7165 +                tPrimary = tOrder & UCOL_PRIMARYMASK;
  1.7166 +            } while(tPrimary == 0);
  1.7167 +
  1.7168 +            // if both primaries are the same
  1.7169 +            if(sPrimary == tPrimary) {
  1.7170 +                // and there are no more CEs, we advance to the next level
  1.7171 +                if(sPrimary == UCOL_NO_MORE_CES_PRIMARY) {
  1.7172 +                    break;
  1.7173 +                }
  1.7174 +                if(doHiragana && hirResult == UCOL_EQUAL) {
  1.7175 +                    if((sColl->flags & UCOL_WAS_HIRAGANA) != (tColl->flags & UCOL_WAS_HIRAGANA)) {
  1.7176 +                        hirResult = ((sColl->flags & UCOL_WAS_HIRAGANA) > (tColl->flags & UCOL_WAS_HIRAGANA))
  1.7177 +                            ? UCOL_LESS:UCOL_GREATER;
  1.7178 +                    }
  1.7179 +                }
  1.7180 +            } else {
  1.7181 +                // only need to check one for continuation
  1.7182 +                // if one is then the other must be or the preceding CE would be a prefix of the other
  1.7183 +                if (coll->leadBytePermutationTable != NULL && !isContinuation(sOrder)) {
  1.7184 +                    sPrimary = (coll->leadBytePermutationTable[sPrimary>>24] << 24) | (sPrimary & 0x00FFFFFF);
  1.7185 +                    tPrimary = (coll->leadBytePermutationTable[tPrimary>>24] << 24) | (tPrimary & 0x00FFFFFF);
  1.7186 +                }
  1.7187 +                // if two primaries are different, we are done
  1.7188 +                result = (sPrimary < tPrimary) ?  UCOL_LESS: UCOL_GREATER;
  1.7189 +                goto commonReturn;
  1.7190 +            }
  1.7191 +        } // no primary difference... do the rest from the buffers
  1.7192 +    } else { // shifted - do a slightly more complicated processing :)
  1.7193 +        for(;;) {
  1.7194 +            UBool sInShifted = FALSE;
  1.7195 +            UBool tInShifted = FALSE;
  1.7196 +            // This version of code can be refactored. However, it seems easier to understand this way.
  1.7197 +            // Source loop. Same as the target loop.
  1.7198 +            for(;;) {
  1.7199 +                sOrder = ucol_IGetNextCE(coll, sColl, status);
  1.7200 +                if(sOrder == UCOL_NO_MORE_CES) {
  1.7201 +                    UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
  1.7202 +                    break;
  1.7203 +                } else if(sOrder == 0 || (sInShifted && (sOrder & UCOL_PRIMARYMASK) == 0)) {
  1.7204 +                    /* UCA amendment - ignore ignorables that follow shifted code points */
  1.7205 +                    continue;
  1.7206 +                } else if(isContinuation(sOrder)) {
  1.7207 +                    if((sOrder & UCOL_PRIMARYMASK) > 0) { /* There is primary value */
  1.7208 +                        if(sInShifted) {
  1.7209 +                            sOrder = (sOrder & UCOL_PRIMARYMASK) | 0xC0; /* preserve interesting continuation */
  1.7210 +                            UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
  1.7211 +                            continue;
  1.7212 +                        } else {
  1.7213 +                            UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
  1.7214 +                            break;
  1.7215 +                        }
  1.7216 +                    } else { /* Just lower level values */
  1.7217 +                        if(sInShifted) {
  1.7218 +                            continue;
  1.7219 +                        } else {
  1.7220 +                            UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
  1.7221 +                            continue;
  1.7222 +                        }
  1.7223 +                    }
  1.7224 +                } else { /* regular */
  1.7225 +                    if(coll->leadBytePermutationTable != NULL){
  1.7226 +                        sOrder = (coll->leadBytePermutationTable[sOrder>>24] << 24) | (sOrder & 0x00FFFFFF);
  1.7227 +                    }
  1.7228 +                    if((sOrder & UCOL_PRIMARYMASK) > LVT) {
  1.7229 +                        UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
  1.7230 +                        break;
  1.7231 +                    } else {
  1.7232 +                        if((sOrder & UCOL_PRIMARYMASK) > 0) {
  1.7233 +                            sInShifted = TRUE;
  1.7234 +                            sOrder &= UCOL_PRIMARYMASK;
  1.7235 +                            UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
  1.7236 +                            continue;
  1.7237 +                        } else {
  1.7238 +                            UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
  1.7239 +                            sInShifted = FALSE;
  1.7240 +                            continue;
  1.7241 +                        }
  1.7242 +                    }
  1.7243 +                }
  1.7244 +            }
  1.7245 +            sOrder &= UCOL_PRIMARYMASK;
  1.7246 +            sInShifted = FALSE;
  1.7247 +
  1.7248 +            for(;;) {
  1.7249 +                tOrder = ucol_IGetNextCE(coll, tColl, status);
  1.7250 +                if(tOrder == UCOL_NO_MORE_CES) {
  1.7251 +                    UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
  1.7252 +                    break;
  1.7253 +                } else if(tOrder == 0 || (tInShifted && (tOrder & UCOL_PRIMARYMASK) == 0)) {
  1.7254 +                    /* UCA amendment - ignore ignorables that follow shifted code points */
  1.7255 +                    continue;
  1.7256 +                } else if(isContinuation(tOrder)) {
  1.7257 +                    if((tOrder & UCOL_PRIMARYMASK) > 0) { /* There is primary value */
  1.7258 +                        if(tInShifted) {
  1.7259 +                            tOrder = (tOrder & UCOL_PRIMARYMASK) | 0xC0; /* preserve interesting continuation */
  1.7260 +                            UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
  1.7261 +                            continue;
  1.7262 +                        } else {
  1.7263 +                            UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
  1.7264 +                            break;
  1.7265 +                        }
  1.7266 +                    } else { /* Just lower level values */
  1.7267 +                        if(tInShifted) {
  1.7268 +                            continue;
  1.7269 +                        } else {
  1.7270 +                            UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
  1.7271 +                            continue;
  1.7272 +                        }
  1.7273 +                    }
  1.7274 +                } else { /* regular */
  1.7275 +                    if(coll->leadBytePermutationTable != NULL){
  1.7276 +                        tOrder = (coll->leadBytePermutationTable[tOrder>>24] << 24) | (tOrder & 0x00FFFFFF);
  1.7277 +                    }
  1.7278 +                    if((tOrder & UCOL_PRIMARYMASK) > LVT) {
  1.7279 +                        UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
  1.7280 +                        break;
  1.7281 +                    } else {
  1.7282 +                        if((tOrder & UCOL_PRIMARYMASK) > 0) {
  1.7283 +                            tInShifted = TRUE;
  1.7284 +                            tOrder &= UCOL_PRIMARYMASK;
  1.7285 +                            UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
  1.7286 +                            continue;
  1.7287 +                        } else {
  1.7288 +                            UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
  1.7289 +                            tInShifted = FALSE;
  1.7290 +                            continue;
  1.7291 +                        }
  1.7292 +                    }
  1.7293 +                }
  1.7294 +            }
  1.7295 +            tOrder &= UCOL_PRIMARYMASK;
  1.7296 +            tInShifted = FALSE;
  1.7297 +
  1.7298 +            if(sOrder == tOrder) {
  1.7299 +                /*
  1.7300 +                if(doHiragana && hirResult == UCOL_EQUAL) {
  1.7301 +                if((sColl.flags & UCOL_WAS_HIRAGANA) != (tColl.flags & UCOL_WAS_HIRAGANA)) {
  1.7302 +                hirResult = ((sColl.flags & UCOL_WAS_HIRAGANA) > (tColl.flags & UCOL_WAS_HIRAGANA))
  1.7303 +                ? UCOL_LESS:UCOL_GREATER;
  1.7304 +                }
  1.7305 +                }
  1.7306 +                */
  1.7307 +                if(sOrder == UCOL_NO_MORE_CES_PRIMARY) {
  1.7308 +                    break;
  1.7309 +                } else {
  1.7310 +                    sOrder = 0;
  1.7311 +                    tOrder = 0;
  1.7312 +                    continue;
  1.7313 +                }
  1.7314 +            } else {
  1.7315 +                result = (sOrder < tOrder) ? UCOL_LESS : UCOL_GREATER;
  1.7316 +                goto commonReturn;
  1.7317 +            }
  1.7318 +        } /* no primary difference... do the rest from the buffers */
  1.7319 +    }
  1.7320 +
  1.7321 +    /* now, we're gonna reexamine collected CEs */
  1.7322 +    uint32_t    *sCE;
  1.7323 +    uint32_t    *tCE;
  1.7324 +
  1.7325 +    /* This is the secondary level of comparison */
  1.7326 +    if(checkSecTer) {
  1.7327 +        if(!isFrenchSec) { /* normal */
  1.7328 +            sCE = sCEs.buf;
  1.7329 +            tCE = tCEs.buf;
  1.7330 +            for(;;) {
  1.7331 +                while (secS == 0) {
  1.7332 +                    secS = *(sCE++) & UCOL_SECONDARYMASK;
  1.7333 +                }
  1.7334 +
  1.7335 +                while(secT == 0) {
  1.7336 +                    secT = *(tCE++) & UCOL_SECONDARYMASK;
  1.7337 +                }
  1.7338 +
  1.7339 +                if(secS == secT) {
  1.7340 +                    if(secS == UCOL_NO_MORE_CES_SECONDARY) {
  1.7341 +                        break;
  1.7342 +                    } else {
  1.7343 +                        secS = 0; secT = 0;
  1.7344 +                        continue;
  1.7345 +                    }
  1.7346 +                } else {
  1.7347 +                    result = (secS < secT) ? UCOL_LESS : UCOL_GREATER;
  1.7348 +                    goto commonReturn;
  1.7349 +                }
  1.7350 +            }
  1.7351 +        } else { /* do the French */
  1.7352 +            uint32_t *sCESave = NULL;
  1.7353 +            uint32_t *tCESave = NULL;
  1.7354 +            sCE = sCEs.pos-2; /* this could also be sCEs-- if needs to be optimized */
  1.7355 +            tCE = tCEs.pos-2;
  1.7356 +            for(;;) {
  1.7357 +                while (secS == 0 && sCE >= sCEs.buf) {
  1.7358 +                    if(sCESave == NULL) {
  1.7359 +                        secS = *(sCE--);
  1.7360 +                        if(isContinuation(secS)) {
  1.7361 +                            while(isContinuation(secS = *(sCE--)))
  1.7362 +                                ;
  1.7363 +                            /* after this, secS has the start of continuation, and sCEs points before that */
  1.7364 +                            sCESave = sCE; /* we save it, so that we know where to come back AND that we need to go forward */
  1.7365 +                            sCE+=2;  /* need to point to the first continuation CP */
  1.7366 +                            /* However, now you can just continue doing stuff */
  1.7367 +                        }
  1.7368 +                    } else {
  1.7369 +                        secS = *(sCE++);
  1.7370 +                        if(!isContinuation(secS)) { /* This means we have finished with this cont */
  1.7371 +                            sCE = sCESave;            /* reset the pointer to before continuation */
  1.7372 +                            sCESave = NULL;
  1.7373 +                            secS = 0;  /* Fetch a fresh CE before the continuation sequence. */
  1.7374 +                            continue;
  1.7375 +                        }
  1.7376 +                    }
  1.7377 +                    secS &= UCOL_SECONDARYMASK; /* remove the continuation bit */
  1.7378 +                }
  1.7379 +
  1.7380 +                while(secT == 0 && tCE >= tCEs.buf) {
  1.7381 +                    if(tCESave == NULL) {
  1.7382 +                        secT = *(tCE--);
  1.7383 +                        if(isContinuation(secT)) {
  1.7384 +                            while(isContinuation(secT = *(tCE--)))
  1.7385 +                                ;
  1.7386 +                            /* after this, secS has the start of continuation, and sCEs points before that */
  1.7387 +                            tCESave = tCE; /* we save it, so that we know where to come back AND that we need to go forward */
  1.7388 +                            tCE+=2;  /* need to point to the first continuation CP */
  1.7389 +                            /* However, now you can just continue doing stuff */
  1.7390 +                        }
  1.7391 +                    } else {
  1.7392 +                        secT = *(tCE++);
  1.7393 +                        if(!isContinuation(secT)) { /* This means we have finished with this cont */
  1.7394 +                            tCE = tCESave;          /* reset the pointer to before continuation */
  1.7395 +                            tCESave = NULL;
  1.7396 +                            secT = 0;  /* Fetch a fresh CE before the continuation sequence. */
  1.7397 +                            continue;
  1.7398 +                        }
  1.7399 +                    }
  1.7400 +                    secT &= UCOL_SECONDARYMASK; /* remove the continuation bit */
  1.7401 +                }
  1.7402 +
  1.7403 +                if(secS == secT) {
  1.7404 +                    if(secS == UCOL_NO_MORE_CES_SECONDARY || (sCE < sCEs.buf && tCE < tCEs.buf)) {
  1.7405 +                        break;
  1.7406 +                    } else {
  1.7407 +                        secS = 0; secT = 0;
  1.7408 +                        continue;
  1.7409 +                    }
  1.7410 +                } else {
  1.7411 +                    result = (secS < secT) ? UCOL_LESS : UCOL_GREATER;
  1.7412 +                    goto commonReturn;
  1.7413 +                }
  1.7414 +            }
  1.7415 +        }
  1.7416 +    }
  1.7417 +
  1.7418 +    /* doing the case bit */
  1.7419 +    if(checkCase) {
  1.7420 +        sCE = sCEs.buf;
  1.7421 +        tCE = tCEs.buf;
  1.7422 +        for(;;) {
  1.7423 +            while((secS & UCOL_REMOVE_CASE) == 0) {
  1.7424 +                if(!isContinuation(*sCE++)) {
  1.7425 +                    secS =*(sCE-1);
  1.7426 +                    if(((secS & UCOL_PRIMARYMASK) != 0) || strength > UCOL_PRIMARY) {
  1.7427 +                        // primary ignorables should not be considered on the case level when the strength is primary
  1.7428 +                        // otherwise, the CEs stop being well-formed
  1.7429 +                        secS &= UCOL_TERT_CASE_MASK;
  1.7430 +                        secS ^= caseSwitch;
  1.7431 +                    } else {
  1.7432 +                        secS = 0;
  1.7433 +                    }
  1.7434 +                } else {
  1.7435 +                    secS = 0;
  1.7436 +                }
  1.7437 +            }
  1.7438 +
  1.7439 +            while((secT & UCOL_REMOVE_CASE) == 0) {
  1.7440 +                if(!isContinuation(*tCE++)) {
  1.7441 +                    secT = *(tCE-1);
  1.7442 +                    if(((secT & UCOL_PRIMARYMASK) != 0) || strength > UCOL_PRIMARY) {
  1.7443 +                        // primary ignorables should not be considered on the case level when the strength is primary
  1.7444 +                        // otherwise, the CEs stop being well-formed
  1.7445 +                        secT &= UCOL_TERT_CASE_MASK;
  1.7446 +                        secT ^= caseSwitch;
  1.7447 +                    } else {
  1.7448 +                        secT = 0;
  1.7449 +                    }
  1.7450 +                } else {
  1.7451 +                    secT = 0;
  1.7452 +                }
  1.7453 +            }
  1.7454 +
  1.7455 +            if((secS & UCOL_CASE_BIT_MASK) < (secT & UCOL_CASE_BIT_MASK)) {
  1.7456 +                result = UCOL_LESS;
  1.7457 +                goto commonReturn;
  1.7458 +            } else if((secS & UCOL_CASE_BIT_MASK) > (secT & UCOL_CASE_BIT_MASK)) {
  1.7459 +                result = UCOL_GREATER;
  1.7460 +                goto commonReturn;
  1.7461 +            }
  1.7462 +
  1.7463 +            if((secS & UCOL_REMOVE_CASE) == UCOL_NO_MORE_CES_TERTIARY || (secT & UCOL_REMOVE_CASE) == UCOL_NO_MORE_CES_TERTIARY ) {
  1.7464 +                break;
  1.7465 +            } else {
  1.7466 +                secS = 0;
  1.7467 +                secT = 0;
  1.7468 +            }
  1.7469 +        }
  1.7470 +    }
  1.7471 +
  1.7472 +    /* Tertiary level */
  1.7473 +    if(checkTertiary) {
  1.7474 +        secS = 0;
  1.7475 +        secT = 0;
  1.7476 +        sCE = sCEs.buf;
  1.7477 +        tCE = tCEs.buf;
  1.7478 +        for(;;) {
  1.7479 +            while((secS & UCOL_REMOVE_CASE) == 0) {
  1.7480 +                sOrder = *sCE++;
  1.7481 +                secS = sOrder & tertiaryMask;
  1.7482 +                if(!isContinuation(sOrder)) {
  1.7483 +                    secS ^= caseSwitch;
  1.7484 +                } else {
  1.7485 +                    secS &= UCOL_REMOVE_CASE;
  1.7486 +                }
  1.7487 +            }
  1.7488 +
  1.7489 +            while((secT & UCOL_REMOVE_CASE)  == 0) {
  1.7490 +                tOrder = *tCE++;
  1.7491 +                secT = tOrder & tertiaryMask;
  1.7492 +                if(!isContinuation(tOrder)) {
  1.7493 +                    secT ^= caseSwitch;
  1.7494 +                } else {
  1.7495 +                    secT &= UCOL_REMOVE_CASE;
  1.7496 +                }
  1.7497 +            }
  1.7498 +
  1.7499 +            if(secS == secT) {
  1.7500 +                if((secS & UCOL_REMOVE_CASE) == 1) {
  1.7501 +                    break;
  1.7502 +                } else {
  1.7503 +                    secS = 0; secT = 0;
  1.7504 +                    continue;
  1.7505 +                }
  1.7506 +            } else {
  1.7507 +                result = (secS < secT) ? UCOL_LESS : UCOL_GREATER;
  1.7508 +                goto commonReturn;
  1.7509 +            }
  1.7510 +        }
  1.7511 +    }
  1.7512 +
  1.7513 +
  1.7514 +    if(qShifted /*checkQuad*/) {
  1.7515 +        UBool sInShifted = TRUE;
  1.7516 +        UBool tInShifted = TRUE;
  1.7517 +        secS = 0;
  1.7518 +        secT = 0;
  1.7519 +        sCE = sCEs.buf;
  1.7520 +        tCE = tCEs.buf;
  1.7521 +        for(;;) {
  1.7522 +            while((secS == 0 && secS != UCOL_NO_MORE_CES) || (isContinuation(secS) && !sInShifted)) {
  1.7523 +                secS = *(sCE++);
  1.7524 +                if(isContinuation(secS)) {
  1.7525 +                    if(!sInShifted) {
  1.7526 +                        continue;
  1.7527 +                    }
  1.7528 +                } else if(secS > LVT || (secS & UCOL_PRIMARYMASK) == 0) { /* non continuation */
  1.7529 +                    secS = UCOL_PRIMARYMASK;
  1.7530 +                    sInShifted = FALSE;
  1.7531 +                } else {
  1.7532 +                    sInShifted = TRUE;
  1.7533 +                }
  1.7534 +            }
  1.7535 +            secS &= UCOL_PRIMARYMASK;
  1.7536 +
  1.7537 +
  1.7538 +            while((secT == 0 && secT != UCOL_NO_MORE_CES) || (isContinuation(secT) && !tInShifted)) {
  1.7539 +                secT = *(tCE++);
  1.7540 +                if(isContinuation(secT)) {
  1.7541 +                    if(!tInShifted) {
  1.7542 +                        continue;
  1.7543 +                    }
  1.7544 +                } else if(secT > LVT || (secT & UCOL_PRIMARYMASK) == 0) {
  1.7545 +                    secT = UCOL_PRIMARYMASK;
  1.7546 +                    tInShifted = FALSE;
  1.7547 +                } else {
  1.7548 +                    tInShifted = TRUE;
  1.7549 +                }
  1.7550 +            }
  1.7551 +            secT &= UCOL_PRIMARYMASK;
  1.7552 +
  1.7553 +            if(secS == secT) {
  1.7554 +                if(secS == UCOL_NO_MORE_CES_PRIMARY) {
  1.7555 +                    break;
  1.7556 +                } else {
  1.7557 +                    secS = 0; secT = 0;
  1.7558 +                    continue;
  1.7559 +                }
  1.7560 +            } else {
  1.7561 +                result = (secS < secT) ? UCOL_LESS : UCOL_GREATER;
  1.7562 +                goto commonReturn;
  1.7563 +            }
  1.7564 +        }
  1.7565 +    } else if(doHiragana && hirResult != UCOL_EQUAL) {
  1.7566 +        // If we're fine on quaternaries, we might be different
  1.7567 +        // on Hiragana. This, however, might fail us in shifted.
  1.7568 +        result = hirResult;
  1.7569 +        goto commonReturn;
  1.7570 +    }
  1.7571 +
  1.7572 +    /*  For IDENTICAL comparisons, we use a bitwise character comparison */
  1.7573 +    /*  as a tiebreaker if all else is equal.                                */
  1.7574 +    /*  Getting here  should be quite rare - strings are not identical -     */
  1.7575 +    /*     that is checked first, but compared == through all other checks.  */
  1.7576 +    if(checkIdent)
  1.7577 +    {
  1.7578 +        //result = ucol_checkIdent(&sColl, &tColl, coll->normalizationMode == UCOL_ON);
  1.7579 +        result = ucol_checkIdent(sColl, tColl, TRUE, status);
  1.7580 +    }
  1.7581 +
  1.7582 +commonReturn:
  1.7583 +    if ((sColl->flags | tColl->flags) & UCOL_ITER_ALLOCATED) {
  1.7584 +        if (sCEs.buf != sCEs.localArray ) {
  1.7585 +            uprv_free(sCEs.buf);
  1.7586 +        }
  1.7587 +        if (tCEs.buf != tCEs.localArray ) {
  1.7588 +            uprv_free(tCEs.buf);
  1.7589 +        }
  1.7590 +    }
  1.7591 +
  1.7592 +    return result;
  1.7593 +}
  1.7594 +
  1.7595 +static UCollationResult
  1.7596 +ucol_strcollRegular(const UCollator *coll,
  1.7597 +                    const UChar *source, int32_t sourceLength,
  1.7598 +                    const UChar *target, int32_t targetLength,
  1.7599 +                    UErrorCode *status) {
  1.7600 +    collIterate sColl, tColl;
  1.7601 +    // Preparing the context objects for iterating over strings
  1.7602 +    IInit_collIterate(coll, source, sourceLength, &sColl, status);
  1.7603 +    IInit_collIterate(coll, target, targetLength, &tColl, status);
  1.7604 +    if(U_FAILURE(*status)) {
  1.7605 +        return UCOL_LESS;
  1.7606 +    }
  1.7607 +    return ucol_strcollRegular(&sColl, &tColl, status);
  1.7608 +}
  1.7609 +
  1.7610 +static inline uint32_t
  1.7611 +ucol_getLatinOneContraction(const UCollator *coll, int32_t strength,
  1.7612 +                          uint32_t CE, const UChar *s, int32_t *index, int32_t len)
  1.7613 +{
  1.7614 +    const UChar *UCharOffset = (UChar *)coll->image+getContractOffset(CE&0xFFF);
  1.7615 +    int32_t latinOneOffset = (CE & 0x00FFF000) >> 12;
  1.7616 +    int32_t offset = 1;
  1.7617 +    UChar schar = 0, tchar = 0;
  1.7618 +
  1.7619 +    for(;;) {
  1.7620 +        if(len == -1) {
  1.7621 +            if(s[*index] == 0) { // end of string
  1.7622 +                return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset]);
  1.7623 +            } else {
  1.7624 +                schar = s[*index];
  1.7625 +            }
  1.7626 +        } else {
  1.7627 +            if(*index == len) {
  1.7628 +                return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset]);
  1.7629 +            } else {
  1.7630 +                schar = s[*index];
  1.7631 +            }
  1.7632 +        }
  1.7633 +
  1.7634 +        while(schar > (tchar = *(UCharOffset+offset))) { /* since the contraction codepoints should be ordered, we skip all that are smaller */
  1.7635 +            offset++;
  1.7636 +        }
  1.7637 +
  1.7638 +        if (schar == tchar) {
  1.7639 +            (*index)++;
  1.7640 +            return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset+offset]);
  1.7641 +        }
  1.7642 +        else
  1.7643 +        {
  1.7644 +            if(schar & 0xFF00 /*> UCOL_ENDOFLATIN1RANGE*/) {
  1.7645 +                return UCOL_BAIL_OUT_CE;
  1.7646 +            }
  1.7647 +            // skip completely ignorables
  1.7648 +            uint32_t isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapping, schar);
  1.7649 +            if(isZeroCE == 0) { // we have to ignore completely ignorables
  1.7650 +                (*index)++;
  1.7651 +                continue;
  1.7652 +            }
  1.7653 +
  1.7654 +            return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset]);
  1.7655 +        }
  1.7656 +    }
  1.7657 +}
  1.7658 +
  1.7659 +
  1.7660 +/**
  1.7661 + * This is a fast strcoll, geared towards text in Latin-1.
  1.7662 + * It supports contractions of size two, French secondaries
  1.7663 + * and case switching. You can use it with strengths primary
  1.7664 + * to tertiary. It does not support shifted and case level.
  1.7665 + * It relies on the table build by setupLatin1Table. If it
  1.7666 + * doesn't understand something, it will go to the regular
  1.7667 + * strcoll.
  1.7668 + */
  1.7669 +static UCollationResult
  1.7670 +ucol_strcollUseLatin1( const UCollator    *coll,
  1.7671 +              const UChar        *source,
  1.7672 +              int32_t            sLen,
  1.7673 +              const UChar        *target,
  1.7674 +              int32_t            tLen,
  1.7675 +              UErrorCode *status)
  1.7676 +{
  1.7677 +    U_ALIGN_CODE(16);
  1.7678 +    int32_t strength = coll->strength;
  1.7679 +
  1.7680 +    int32_t sIndex = 0, tIndex = 0;
  1.7681 +    UChar sChar = 0, tChar = 0;
  1.7682 +    uint32_t sOrder=0, tOrder=0;
  1.7683 +
  1.7684 +    UBool endOfSource = FALSE;
  1.7685 +
  1.7686 +    uint32_t *elements = coll->latinOneCEs;
  1.7687 +
  1.7688 +    UBool haveContractions = FALSE; // if we have contractions in our string
  1.7689 +                                    // we cannot do French secondary
  1.7690 +
  1.7691 +    // Do the primary level
  1.7692 +    for(;;) {
  1.7693 +        while(sOrder==0) { // this loop skips primary ignorables
  1.7694 +            // sOrder=getNextlatinOneCE(source);
  1.7695 +            if(sLen==-1) {   // handling zero terminated strings
  1.7696 +                sChar=source[sIndex++];
  1.7697 +                if(sChar==0) {
  1.7698 +                    endOfSource = TRUE;
  1.7699 +                    break;
  1.7700 +                }
  1.7701 +            } else {        // handling strings with known length
  1.7702 +                if(sIndex==sLen) {
  1.7703 +                    endOfSource = TRUE;
  1.7704 +                    break;
  1.7705 +                }
  1.7706 +                sChar=source[sIndex++];
  1.7707 +            }
  1.7708 +            if(sChar&0xFF00) { // if we encounter non-latin-1, we bail out (sChar > 0xFF, but this is faster on win32)
  1.7709 +                //fprintf(stderr, "R");
  1.7710 +                return ucol_strcollRegular(coll, source, sLen, target, tLen, status);
  1.7711 +            }
  1.7712 +            sOrder = elements[sChar];
  1.7713 +            if(sOrder >= UCOL_NOT_FOUND) { // if we got a special
  1.7714 +                // specials can basically be either contractions or bail-out signs. If we get anything
  1.7715 +                // else, we'll bail out anywasy
  1.7716 +                if(getCETag(sOrder) == CONTRACTION_TAG) {
  1.7717 +                    sOrder = ucol_getLatinOneContraction(coll, UCOL_PRIMARY, sOrder, source, &sIndex, sLen);
  1.7718 +                    haveContractions = TRUE; // if there are contractions, we cannot do French secondary
  1.7719 +                    // However, if there are contractions in the table, but we always use just one char,
  1.7720 +                    // we might be able to do French. This should be checked out.
  1.7721 +                }
  1.7722 +                if(sOrder >= UCOL_NOT_FOUND /*== UCOL_BAIL_OUT_CE*/) {
  1.7723 +                    //fprintf(stderr, "S");
  1.7724 +                    return ucol_strcollRegular(coll, source, sLen, target, tLen, status);
  1.7725 +                }
  1.7726 +            }
  1.7727 +        }
  1.7728 +
  1.7729 +        while(tOrder==0) {  // this loop skips primary ignorables
  1.7730 +            // tOrder=getNextlatinOneCE(target);
  1.7731 +            if(tLen==-1) {    // handling zero terminated strings
  1.7732 +                tChar=target[tIndex++];
  1.7733 +                if(tChar==0) {
  1.7734 +                    if(endOfSource) { // this is different than source loop,
  1.7735 +                        // as we already know that source loop is done here,
  1.7736 +                        // so we can either finish the primary loop if both
  1.7737 +                        // strings are done or anounce the result if only
  1.7738 +                        // target is done. Same below.
  1.7739 +                        goto endOfPrimLoop;
  1.7740 +                    } else {
  1.7741 +                        return UCOL_GREATER;
  1.7742 +                    }
  1.7743 +                }
  1.7744 +            } else {          // handling strings with known length
  1.7745 +                if(tIndex==tLen) {
  1.7746 +                    if(endOfSource) {
  1.7747 +                        goto endOfPrimLoop;
  1.7748 +                    } else {
  1.7749 +                        return UCOL_GREATER;
  1.7750 +                    }
  1.7751 +                }
  1.7752 +                tChar=target[tIndex++];
  1.7753 +            }
  1.7754 +            if(tChar&0xFF00) { // if we encounter non-latin-1, we bail out (sChar > 0xFF, but this is faster on win32)
  1.7755 +                //fprintf(stderr, "R");
  1.7756 +                return ucol_strcollRegular(coll, source, sLen, target, tLen, status);
  1.7757 +            }
  1.7758 +            tOrder = elements[tChar];
  1.7759 +            if(tOrder >= UCOL_NOT_FOUND) {
  1.7760 +                // Handling specials, see the comments for source
  1.7761 +                if(getCETag(tOrder) == CONTRACTION_TAG) {
  1.7762 +                    tOrder = ucol_getLatinOneContraction(coll, UCOL_PRIMARY, tOrder, target, &tIndex, tLen);
  1.7763 +                    haveContractions = TRUE;
  1.7764 +                }
  1.7765 +                if(tOrder >= UCOL_NOT_FOUND /*== UCOL_BAIL_OUT_CE*/) {
  1.7766 +                    //fprintf(stderr, "S");
  1.7767 +                    return ucol_strcollRegular(coll, source, sLen, target, tLen, status);
  1.7768 +                }
  1.7769 +            }
  1.7770 +        }
  1.7771 +        if(endOfSource) { // source is finished, but target is not, say the result.
  1.7772 +            return UCOL_LESS;
  1.7773 +        }
  1.7774 +
  1.7775 +        if(sOrder == tOrder) { // if we have same CEs, we continue the loop
  1.7776 +            sOrder = 0; tOrder = 0;
  1.7777 +            continue;
  1.7778 +        } else {
  1.7779 +            // compare current top bytes
  1.7780 +            if(((sOrder^tOrder)&0xFF000000)!=0) {
  1.7781 +                // top bytes differ, return difference
  1.7782 +                if(sOrder < tOrder) {
  1.7783 +                    return UCOL_LESS;
  1.7784 +                } else if(sOrder > tOrder) {
  1.7785 +                    return UCOL_GREATER;
  1.7786 +                }
  1.7787 +                // instead of return (int32_t)(sOrder>>24)-(int32_t)(tOrder>>24);
  1.7788 +                // since we must return enum value
  1.7789 +            }
  1.7790 +
  1.7791 +            // top bytes match, continue with following bytes
  1.7792 +            sOrder<<=8;
  1.7793 +            tOrder<<=8;
  1.7794 +        }
  1.7795 +    }
  1.7796 +
  1.7797 +endOfPrimLoop:
  1.7798 +    // after primary loop, we definitely know the sizes of strings,
  1.7799 +    // so we set it and use simpler loop for secondaries and tertiaries
  1.7800 +    sLen = sIndex; tLen = tIndex;
  1.7801 +    if(strength >= UCOL_SECONDARY) {
  1.7802 +        // adjust the table beggining
  1.7803 +        elements += coll->latinOneTableLen;
  1.7804 +        endOfSource = FALSE;
  1.7805 +
  1.7806 +        if(coll->frenchCollation == UCOL_OFF) { // non French
  1.7807 +            // This loop is a simplified copy of primary loop
  1.7808 +            // at this point we know that whole strings are latin-1, so we don't
  1.7809 +            // check for that. We also know that we only have contractions as
  1.7810 +            // specials.
  1.7811 +            sIndex = 0; tIndex = 0;
  1.7812 +            for(;;) {
  1.7813 +                while(sOrder==0) {
  1.7814 +                    if(sIndex==sLen) {
  1.7815 +                        endOfSource = TRUE;
  1.7816 +                        break;
  1.7817 +                    }
  1.7818 +                    sChar=source[sIndex++];
  1.7819 +                    sOrder = elements[sChar];
  1.7820 +                    if(sOrder > UCOL_NOT_FOUND) {
  1.7821 +                        sOrder = ucol_getLatinOneContraction(coll, UCOL_SECONDARY, sOrder, source, &sIndex, sLen);
  1.7822 +                    }
  1.7823 +                }
  1.7824 +
  1.7825 +                while(tOrder==0) {
  1.7826 +                    if(tIndex==tLen) {
  1.7827 +                        if(endOfSource) {
  1.7828 +                            goto endOfSecLoop;
  1.7829 +                        } else {
  1.7830 +                            return UCOL_GREATER;
  1.7831 +                        }
  1.7832 +                    }
  1.7833 +                    tChar=target[tIndex++];
  1.7834 +                    tOrder = elements[tChar];
  1.7835 +                    if(tOrder > UCOL_NOT_FOUND) {
  1.7836 +                        tOrder = ucol_getLatinOneContraction(coll, UCOL_SECONDARY, tOrder, target, &tIndex, tLen);
  1.7837 +                    }
  1.7838 +                }
  1.7839 +                if(endOfSource) {
  1.7840 +                    return UCOL_LESS;
  1.7841 +                }
  1.7842 +
  1.7843 +                if(sOrder == tOrder) {
  1.7844 +                    sOrder = 0; tOrder = 0;
  1.7845 +                    continue;
  1.7846 +                } else {
  1.7847 +                    // see primary loop for comments on this
  1.7848 +                    if(((sOrder^tOrder)&0xFF000000)!=0) {
  1.7849 +                        if(sOrder < tOrder) {
  1.7850 +                            return UCOL_LESS;
  1.7851 +                        } else if(sOrder > tOrder) {
  1.7852 +                            return UCOL_GREATER;
  1.7853 +                        }
  1.7854 +                    }
  1.7855 +                    sOrder<<=8;
  1.7856 +                    tOrder<<=8;
  1.7857 +                }
  1.7858 +            }
  1.7859 +        } else { // French
  1.7860 +            if(haveContractions) { // if we have contractions, we have to bail out
  1.7861 +                // since we don't really know how to handle them here
  1.7862 +                return ucol_strcollRegular(coll, source, sLen, target, tLen, status);
  1.7863 +            }
  1.7864 +            // For French, we go backwards
  1.7865 +            sIndex = sLen; tIndex = tLen;
  1.7866 +            for(;;) {
  1.7867 +                while(sOrder==0) {
  1.7868 +                    if(sIndex==0) {
  1.7869 +                        endOfSource = TRUE;
  1.7870 +                        break;
  1.7871 +                    }
  1.7872 +                    sChar=source[--sIndex];
  1.7873 +                    sOrder = elements[sChar];
  1.7874 +                    // don't even look for contractions
  1.7875 +                }
  1.7876 +
  1.7877 +                while(tOrder==0) {
  1.7878 +                    if(tIndex==0) {
  1.7879 +                        if(endOfSource) {
  1.7880 +                            goto endOfSecLoop;
  1.7881 +                        } else {
  1.7882 +                            return UCOL_GREATER;
  1.7883 +                        }
  1.7884 +                    }
  1.7885 +                    tChar=target[--tIndex];
  1.7886 +                    tOrder = elements[tChar];
  1.7887 +                    // don't even look for contractions
  1.7888 +                }
  1.7889 +                if(endOfSource) {
  1.7890 +                    return UCOL_LESS;
  1.7891 +                }
  1.7892 +
  1.7893 +                if(sOrder == tOrder) {
  1.7894 +                    sOrder = 0; tOrder = 0;
  1.7895 +                    continue;
  1.7896 +                } else {
  1.7897 +                    // see the primary loop for comments
  1.7898 +                    if(((sOrder^tOrder)&0xFF000000)!=0) {
  1.7899 +                        if(sOrder < tOrder) {
  1.7900 +                            return UCOL_LESS;
  1.7901 +                        } else if(sOrder > tOrder) {
  1.7902 +                            return UCOL_GREATER;
  1.7903 +                        }
  1.7904 +                    }
  1.7905 +                    sOrder<<=8;
  1.7906 +                    tOrder<<=8;
  1.7907 +                }
  1.7908 +            }
  1.7909 +        }
  1.7910 +    }
  1.7911 +
  1.7912 +endOfSecLoop:
  1.7913 +    if(strength >= UCOL_TERTIARY) {
  1.7914 +        // tertiary loop is the same as secondary (except no French)
  1.7915 +        elements += coll->latinOneTableLen;
  1.7916 +        sIndex = 0; tIndex = 0;
  1.7917 +        endOfSource = FALSE;
  1.7918 +        for(;;) {
  1.7919 +            while(sOrder==0) {
  1.7920 +                if(sIndex==sLen) {
  1.7921 +                    endOfSource = TRUE;
  1.7922 +                    break;
  1.7923 +                }
  1.7924 +                sChar=source[sIndex++];
  1.7925 +                sOrder = elements[sChar];
  1.7926 +                if(sOrder > UCOL_NOT_FOUND) {
  1.7927 +                    sOrder = ucol_getLatinOneContraction(coll, UCOL_TERTIARY, sOrder, source, &sIndex, sLen);
  1.7928 +                }
  1.7929 +            }
  1.7930 +            while(tOrder==0) {
  1.7931 +                if(tIndex==tLen) {
  1.7932 +                    if(endOfSource) {
  1.7933 +                        return UCOL_EQUAL; // if both strings are at the end, they are equal
  1.7934 +                    } else {
  1.7935 +                        return UCOL_GREATER;
  1.7936 +                    }
  1.7937 +                }
  1.7938 +                tChar=target[tIndex++];
  1.7939 +                tOrder = elements[tChar];
  1.7940 +                if(tOrder > UCOL_NOT_FOUND) {
  1.7941 +                    tOrder = ucol_getLatinOneContraction(coll, UCOL_TERTIARY, tOrder, target, &tIndex, tLen);
  1.7942 +                }
  1.7943 +            }
  1.7944 +            if(endOfSource) {
  1.7945 +                return UCOL_LESS;
  1.7946 +            }
  1.7947 +            if(sOrder == tOrder) {
  1.7948 +                sOrder = 0; tOrder = 0;
  1.7949 +                continue;
  1.7950 +            } else {
  1.7951 +                if(((sOrder^tOrder)&0xff000000)!=0) {
  1.7952 +                    if(sOrder < tOrder) {
  1.7953 +                        return UCOL_LESS;
  1.7954 +                    } else if(sOrder > tOrder) {
  1.7955 +                        return UCOL_GREATER;
  1.7956 +                    }
  1.7957 +                }
  1.7958 +                sOrder<<=8;
  1.7959 +                tOrder<<=8;
  1.7960 +            }
  1.7961 +        }
  1.7962 +    }
  1.7963 +    return UCOL_EQUAL;
  1.7964 +}
  1.7965 +
  1.7966 +/*
  1.7967 +  Note: ucol_strcollUTF8 supports null terminated input. Calculating length of
  1.7968 +  null terminated input string takes extra amount of CPU cycles.
  1.7969 +*/
  1.7970 +static UCollationResult
  1.7971 +ucol_strcollRegularUTF8(
  1.7972 +                    const UCollator *coll,
  1.7973 +                    const char      *source,
  1.7974 +                    int32_t         sourceLength,
  1.7975 +                    const char      *target,
  1.7976 +                    int32_t         targetLength,
  1.7977 +                    UErrorCode      *status)
  1.7978 +{
  1.7979 +    UCharIterator src;
  1.7980 +    UCharIterator tgt;
  1.7981 +
  1.7982 +    uiter_setUTF8(&src, source, sourceLength);
  1.7983 +    uiter_setUTF8(&tgt, target, targetLength);
  1.7984 +
  1.7985 +    // Preparing the context objects for iterating over strings
  1.7986 +    collIterate sColl, tColl;
  1.7987 +    IInit_collIterate(coll, NULL, -1, &sColl, status);
  1.7988 +    IInit_collIterate(coll, NULL, -1, &tColl, status);
  1.7989 +    if(U_FAILURE(*status)) {
  1.7990 +        UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status)
  1.7991 +        return UCOL_EQUAL;
  1.7992 +    }
  1.7993 +    // The division for the array length may truncate the array size to
  1.7994 +    // a little less than UNORM_ITER_SIZE, but that size is dimensioned too high
  1.7995 +    // for all platforms anyway.
  1.7996 +    UAlignedMemory stackNormIter1[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];
  1.7997 +    UAlignedMemory stackNormIter2[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];
  1.7998 +    UNormIterator *sNormIter = NULL, *tNormIter = NULL;
  1.7999 +
  1.8000 +    sColl.iterator = &src;
  1.8001 +    sColl.flags |= UCOL_USE_ITERATOR;
  1.8002 +    tColl.flags |= UCOL_USE_ITERATOR;
  1.8003 +    tColl.iterator = &tgt;
  1.8004 +
  1.8005 +    if(ucol_getAttribute(coll, UCOL_NORMALIZATION_MODE, status) == UCOL_ON) {
  1.8006 +        sNormIter = unorm_openIter(stackNormIter1, sizeof(stackNormIter1), status);
  1.8007 +        sColl.iterator = unorm_setIter(sNormIter, &src, UNORM_FCD, status);
  1.8008 +        sColl.flags &= ~UCOL_ITER_NORM;
  1.8009 +
  1.8010 +        tNormIter = unorm_openIter(stackNormIter2, sizeof(stackNormIter2), status);
  1.8011 +        tColl.iterator = unorm_setIter(tNormIter, &tgt, UNORM_FCD, status);
  1.8012 +        tColl.flags &= ~UCOL_ITER_NORM;
  1.8013 +    }
  1.8014 +
  1.8015 +    return ucol_strcollRegular(&sColl, &tColl, status);
  1.8016 +}
  1.8017 +
  1.8018 +static inline uint32_t
  1.8019 +ucol_getLatinOneContractionUTF8(const UCollator *coll, int32_t strength,
  1.8020 +                          uint32_t CE, const char *s, int32_t *index, int32_t len)
  1.8021 +{
  1.8022 +    const UChar *UCharOffset = (UChar *)coll->image+getContractOffset(CE&0xFFF);
  1.8023 +    int32_t latinOneOffset = (CE & 0x00FFF000) >> 12;
  1.8024 +    int32_t offset = 1;
  1.8025 +    UChar32 schar = 0, tchar = 0;
  1.8026 +
  1.8027 +    for(;;) {
  1.8028 +        if (*index == len) {
  1.8029 +            return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset]);
  1.8030 +        }
  1.8031 +        U8_GET_OR_FFFD((const uint8_t*)s, 0, *index, len, schar);
  1.8032 +        if (len < 0 && schar == 0) {
  1.8033 +            return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset]);
  1.8034 +        }
  1.8035 +
  1.8036 +        while(schar > (tchar = *(UCharOffset+offset))) { /* since the contraction codepoints should be ordered, we skip all that are smaller */
  1.8037 +            offset++;
  1.8038 +        }
  1.8039 +
  1.8040 +        if (schar == tchar) {
  1.8041 +            U8_FWD_1(s, *index, len);
  1.8042 +            return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset+offset]);
  1.8043 +        }
  1.8044 +        else
  1.8045 +        {
  1.8046 +            if(schar & 0xFF00 /*> UCOL_ENDOFLATIN1RANGE*/) {
  1.8047 +                return UCOL_BAIL_OUT_CE;
  1.8048 +            }
  1.8049 +            // skip completely ignorables
  1.8050 +            uint32_t isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapping, schar);
  1.8051 +            if(isZeroCE == 0) { // we have to ignore completely ignorables
  1.8052 +                U8_FWD_1(s, *index, len);
  1.8053 +                continue;
  1.8054 +            }
  1.8055 +
  1.8056 +            return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOffset]);
  1.8057 +        }
  1.8058 +    }
  1.8059 +}
  1.8060 +
  1.8061 +static inline UCollationResult
  1.8062 +ucol_strcollUseLatin1UTF8(
  1.8063 +                const UCollator *coll,
  1.8064 +                const char      *source,
  1.8065 +                int32_t         sLen,
  1.8066 +                const char      *target,
  1.8067 +                int32_t         tLen,
  1.8068 +                UErrorCode      *status)
  1.8069 +{
  1.8070 +    U_ALIGN_CODE(16);
  1.8071 +    int32_t strength = coll->strength;
  1.8072 +
  1.8073 +    int32_t sIndex = 0, tIndex = 0;
  1.8074 +    UChar32 sChar = 0, tChar = 0;
  1.8075 +    uint32_t sOrder=0, tOrder=0;
  1.8076 +
  1.8077 +    UBool endOfSource = FALSE;
  1.8078 +
  1.8079 +    uint32_t *elements = coll->latinOneCEs;
  1.8080 +
  1.8081 +    UBool haveContractions = FALSE; // if we have contractions in our string
  1.8082 +                                    // we cannot do French secondary
  1.8083 +
  1.8084 +    // Do the primary level
  1.8085 +    for(;;) {
  1.8086 +        while(sOrder==0) { // this loop skips primary ignorables
  1.8087 +            // sOrder=getNextlatinOneCE(source);
  1.8088 +            if (sIndex == sLen) {
  1.8089 +                endOfSource = TRUE;
  1.8090 +                break;
  1.8091 +            }
  1.8092 +            U8_NEXT_OR_FFFD(source, sIndex, sLen ,sChar);
  1.8093 +            if (sLen < 0 && sChar == 0) {
  1.8094 +                endOfSource = TRUE;
  1.8095 +                sLen = sIndex;
  1.8096 +                break;
  1.8097 +            }
  1.8098 +            if(sChar&0xFFFFFF00) { // if we encounter non-latin-1, we bail out (sChar > 0xFF, but this is faster on win32)
  1.8099 +                //fprintf(stderr, "R");
  1.8100 +                return ucol_strcollRegularUTF8(coll, source, sLen, target, tLen, status);
  1.8101 +            }
  1.8102 +            sOrder = elements[sChar];
  1.8103 +            if(sOrder >= UCOL_NOT_FOUND) { // if we got a special
  1.8104 +                // specials can basically be either contractions or bail-out signs. If we get anything
  1.8105 +                // else, we'll bail out anywasy
  1.8106 +                if(getCETag(sOrder) == CONTRACTION_TAG) {
  1.8107 +                    sOrder = ucol_getLatinOneContractionUTF8(coll, UCOL_PRIMARY, sOrder, source, &sIndex, sLen);
  1.8108 +                    haveContractions = TRUE; // if there are contractions, we cannot do French secondary
  1.8109 +                    // However, if there are contractions in the table, but we always use just one char,
  1.8110 +                    // we might be able to do French. This should be checked out.
  1.8111 +                }
  1.8112 +                if(sOrder >= UCOL_NOT_FOUND /*== UCOL_BAIL_OUT_CE*/) {
  1.8113 +                    //fprintf(stderr, "S");
  1.8114 +                    return ucol_strcollRegularUTF8(coll, source, sLen, target, tLen, status);
  1.8115 +                }
  1.8116 +            }
  1.8117 +        }
  1.8118 +
  1.8119 +        while(tOrder==0) {  // this loop skips primary ignorables
  1.8120 +            // tOrder=getNextlatinOneCE(target);
  1.8121 +            if (tIndex == tLen) {
  1.8122 +                if(endOfSource) {
  1.8123 +                    goto endOfPrimLoopU8;
  1.8124 +                } else {
  1.8125 +                    return UCOL_GREATER;
  1.8126 +                }
  1.8127 +            }
  1.8128 +            U8_NEXT_OR_FFFD(target, tIndex, tLen, tChar);
  1.8129 +            if (tLen < 0 && tChar == 0) {
  1.8130 +                if(endOfSource) {
  1.8131 +                    tLen = tIndex;
  1.8132 +                    goto endOfPrimLoopU8;
  1.8133 +                } else {
  1.8134 +                    return UCOL_GREATER;
  1.8135 +                }
  1.8136 +            }
  1.8137 +            if(tChar&0xFFFFFF00) { // if we encounter non-latin-1, we bail out (sChar > 0xFF, but this is faster on win32)
  1.8138 +                //fprintf(stderr, "R");
  1.8139 +                return ucol_strcollRegularUTF8(coll, source, sLen, target, tLen, status);
  1.8140 +            }
  1.8141 +            tOrder = elements[tChar];
  1.8142 +            if(tOrder >= UCOL_NOT_FOUND) {
  1.8143 +                // Handling specials, see the comments for source
  1.8144 +                if(getCETag(tOrder) == CONTRACTION_TAG) {
  1.8145 +                    tOrder = ucol_getLatinOneContractionUTF8(coll, UCOL_PRIMARY, tOrder, target, &tIndex, tLen);
  1.8146 +                    haveContractions = TRUE;
  1.8147 +                }
  1.8148 +                if(tOrder >= UCOL_NOT_FOUND /*== UCOL_BAIL_OUT_CE*/) {
  1.8149 +                    //fprintf(stderr, "S");
  1.8150 +                    return ucol_strcollRegularUTF8(coll, source, sLen, target, tLen, status);
  1.8151 +                }
  1.8152 +            }
  1.8153 +        }
  1.8154 +        if(endOfSource) { // source is finished, but target is not, say the result.
  1.8155 +            return UCOL_LESS;
  1.8156 +        }
  1.8157 +
  1.8158 +        if(sOrder == tOrder) { // if we have same CEs, we continue the loop
  1.8159 +            sOrder = 0; tOrder = 0;
  1.8160 +            continue;
  1.8161 +        } else {
  1.8162 +            // compare current top bytes
  1.8163 +            if(((sOrder^tOrder)&0xFF000000)!=0) {
  1.8164 +                // top bytes differ, return difference
  1.8165 +                if(sOrder < tOrder) {
  1.8166 +                    return UCOL_LESS;
  1.8167 +                } else if(sOrder > tOrder) {
  1.8168 +                    return UCOL_GREATER;
  1.8169 +                }
  1.8170 +                // instead of return (int32_t)(sOrder>>24)-(int32_t)(tOrder>>24);
  1.8171 +                // since we must return enum value
  1.8172 +            }
  1.8173 +
  1.8174 +            // top bytes match, continue with following bytes
  1.8175 +            sOrder<<=8;
  1.8176 +            tOrder<<=8;
  1.8177 +        }
  1.8178 +    }
  1.8179 +
  1.8180 +endOfPrimLoopU8:
  1.8181 +    // after primary loop, we definitely know the sizes of strings,
  1.8182 +    // so we set it and use simpler loop for secondaries and tertiaries
  1.8183 +    sLen = sIndex; tLen = tIndex;
  1.8184 +    if(strength >= UCOL_SECONDARY) {
  1.8185 +        // adjust the table beggining
  1.8186 +        elements += coll->latinOneTableLen;
  1.8187 +        endOfSource = FALSE;
  1.8188 +
  1.8189 +        if(coll->frenchCollation == UCOL_OFF) { // non French
  1.8190 +            // This loop is a simplified copy of primary loop
  1.8191 +            // at this point we know that whole strings are latin-1, so we don't
  1.8192 +            // check for that. We also know that we only have contractions as
  1.8193 +            // specials.
  1.8194 +            sIndex = 0; tIndex = 0;
  1.8195 +            for(;;) {
  1.8196 +                while(sOrder==0) {
  1.8197 +                    if(sIndex==sLen) {
  1.8198 +                        endOfSource = TRUE;
  1.8199 +                        break;
  1.8200 +                    }
  1.8201 +                    U_ASSERT(sLen >= 0);
  1.8202 +                    U8_NEXT_OR_FFFD(source, sIndex, sLen, sChar);
  1.8203 +                    U_ASSERT(sChar >= 0 && sChar <= 0xFF);
  1.8204 +                    sOrder = elements[sChar];
  1.8205 +                    if(sOrder > UCOL_NOT_FOUND) {
  1.8206 +                        sOrder = ucol_getLatinOneContractionUTF8(coll, UCOL_SECONDARY, sOrder, source, &sIndex, sLen);
  1.8207 +                    }
  1.8208 +                }
  1.8209 +
  1.8210 +                while(tOrder==0) {
  1.8211 +                    if(tIndex==tLen) {
  1.8212 +                        if(endOfSource) {
  1.8213 +                            goto endOfSecLoopU8;
  1.8214 +                        } else {
  1.8215 +                            return UCOL_GREATER;
  1.8216 +                        }
  1.8217 +                    }
  1.8218 +                    U_ASSERT(tLen >= 0);
  1.8219 +                    U8_NEXT_OR_FFFD(target, tIndex, tLen, tChar);
  1.8220 +                    U_ASSERT(tChar >= 0 && tChar <= 0xFF);
  1.8221 +                    tOrder = elements[tChar];
  1.8222 +                    if(tOrder > UCOL_NOT_FOUND) {
  1.8223 +                        tOrder = ucol_getLatinOneContractionUTF8(coll, UCOL_SECONDARY, tOrder, target, &tIndex, tLen);
  1.8224 +                    }
  1.8225 +                }
  1.8226 +                if(endOfSource) {
  1.8227 +                    return UCOL_LESS;
  1.8228 +                }
  1.8229 +
  1.8230 +                if(sOrder == tOrder) {
  1.8231 +                    sOrder = 0; tOrder = 0;
  1.8232 +                    continue;
  1.8233 +                } else {
  1.8234 +                    // see primary loop for comments on this
  1.8235 +                    if(((sOrder^tOrder)&0xFF000000)!=0) {
  1.8236 +                        if(sOrder < tOrder) {
  1.8237 +                            return UCOL_LESS;
  1.8238 +                        } else if(sOrder > tOrder) {
  1.8239 +                            return UCOL_GREATER;
  1.8240 +                        }
  1.8241 +                    }
  1.8242 +                    sOrder<<=8;
  1.8243 +                    tOrder<<=8;
  1.8244 +                }
  1.8245 +            }
  1.8246 +        } else { // French
  1.8247 +            if(haveContractions) { // if we have contractions, we have to bail out
  1.8248 +                // since we don't really know how to handle them here
  1.8249 +                return ucol_strcollRegularUTF8(coll, source, sLen, target, tLen, status);
  1.8250 +            }
  1.8251 +            // For French, we go backwards
  1.8252 +            sIndex = sLen; tIndex = tLen;
  1.8253 +            for(;;) {
  1.8254 +                while(sOrder==0) {
  1.8255 +                    if(sIndex==0) {
  1.8256 +                        endOfSource = TRUE;
  1.8257 +                        break;
  1.8258 +                    }
  1.8259 +                    U8_PREV_OR_FFFD(source, 0, sIndex, sChar);
  1.8260 +                    U_ASSERT(sChar >= 0 && sChar <= 0xFF);
  1.8261 +                    sOrder = elements[sChar];
  1.8262 +                    // don't even look for contractions
  1.8263 +                }
  1.8264 +
  1.8265 +                while(tOrder==0) {
  1.8266 +                    if(tIndex==0) {
  1.8267 +                        if(endOfSource) {
  1.8268 +                            goto endOfSecLoopU8;
  1.8269 +                        } else {
  1.8270 +                            return UCOL_GREATER;
  1.8271 +                        }
  1.8272 +                    }
  1.8273 +                    U8_PREV_OR_FFFD(target, 0, tIndex, tChar);
  1.8274 +                    U_ASSERT(tChar >= 0 && tChar <= 0xFF);
  1.8275 +                    tOrder = elements[tChar];
  1.8276 +                    // don't even look for contractions
  1.8277 +                }
  1.8278 +                if(endOfSource) {
  1.8279 +                    return UCOL_LESS;
  1.8280 +                }
  1.8281 +
  1.8282 +                if(sOrder == tOrder) {
  1.8283 +                    sOrder = 0; tOrder = 0;
  1.8284 +                    continue;
  1.8285 +                } else {
  1.8286 +                    // see the primary loop for comments
  1.8287 +                    if(((sOrder^tOrder)&0xFF000000)!=0) {
  1.8288 +                        if(sOrder < tOrder) {
  1.8289 +                            return UCOL_LESS;
  1.8290 +                        } else if(sOrder > tOrder) {
  1.8291 +                            return UCOL_GREATER;
  1.8292 +                        }
  1.8293 +                    }
  1.8294 +                    sOrder<<=8;
  1.8295 +                    tOrder<<=8;
  1.8296 +                }
  1.8297 +            }
  1.8298 +        }
  1.8299 +    }
  1.8300 +
  1.8301 +endOfSecLoopU8:
  1.8302 +    if(strength >= UCOL_TERTIARY) {
  1.8303 +        // tertiary loop is the same as secondary (except no French)
  1.8304 +        elements += coll->latinOneTableLen;
  1.8305 +        sIndex = 0; tIndex = 0;
  1.8306 +        endOfSource = FALSE;
  1.8307 +        for(;;) {
  1.8308 +            while(sOrder==0) {
  1.8309 +                if(sIndex==sLen) {
  1.8310 +                    endOfSource = TRUE;
  1.8311 +                    break;
  1.8312 +                }
  1.8313 +                U_ASSERT(sLen >= 0);
  1.8314 +                U8_NEXT_OR_FFFD(source, sIndex, sLen, sChar);
  1.8315 +                U_ASSERT(sChar >= 0 && sChar <= 0xFF);
  1.8316 +                sOrder = elements[sChar];
  1.8317 +                if(sOrder > UCOL_NOT_FOUND) {
  1.8318 +                    sOrder = ucol_getLatinOneContractionUTF8(coll, UCOL_TERTIARY, sOrder, source, &sIndex, sLen);
  1.8319 +                }
  1.8320 +            }
  1.8321 +            while(tOrder==0) {
  1.8322 +                if(tIndex==tLen) {
  1.8323 +                    if(endOfSource) {
  1.8324 +                        return UCOL_EQUAL; // if both strings are at the end, they are equal
  1.8325 +                    } else {
  1.8326 +                        return UCOL_GREATER;
  1.8327 +                    }
  1.8328 +                }
  1.8329 +                U_ASSERT(tLen >= 0);
  1.8330 +                U8_NEXT_OR_FFFD(target, tIndex, tLen, tChar);
  1.8331 +                U_ASSERT(tChar >= 0 && tChar <= 0xFF);
  1.8332 +                tOrder = elements[tChar];
  1.8333 +                if(tOrder > UCOL_NOT_FOUND) {
  1.8334 +                    tOrder = ucol_getLatinOneContractionUTF8(coll, UCOL_TERTIARY, tOrder, target, &tIndex, tLen);
  1.8335 +                }
  1.8336 +            }
  1.8337 +            if(endOfSource) {
  1.8338 +                return UCOL_LESS;
  1.8339 +            }
  1.8340 +            if(sOrder == tOrder) {
  1.8341 +                sOrder = 0; tOrder = 0;
  1.8342 +                continue;
  1.8343 +            } else {
  1.8344 +                if(((sOrder^tOrder)&0xff000000)!=0) {
  1.8345 +                    if(sOrder < tOrder) {
  1.8346 +                        return UCOL_LESS;
  1.8347 +                    } else if(sOrder > tOrder) {
  1.8348 +                        return UCOL_GREATER;
  1.8349 +                    }
  1.8350 +                }
  1.8351 +                sOrder<<=8;
  1.8352 +                tOrder<<=8;
  1.8353 +            }
  1.8354 +        }
  1.8355 +    }
  1.8356 +    return UCOL_EQUAL;
  1.8357 +}
  1.8358 +
  1.8359 +U_CAPI UCollationResult U_EXPORT2
  1.8360 +ucol_strcollIter( const UCollator    *coll,
  1.8361 +                 UCharIterator *sIter,
  1.8362 +                 UCharIterator *tIter,
  1.8363 +                 UErrorCode         *status)
  1.8364 +{
  1.8365 +    if(!status || U_FAILURE(*status)) {
  1.8366 +        return UCOL_EQUAL;
  1.8367 +    }
  1.8368 +
  1.8369 +    UTRACE_ENTRY(UTRACE_UCOL_STRCOLLITER);
  1.8370 +    UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, sIter=%p, tIter=%p", coll, sIter, tIter);
  1.8371 +
  1.8372 +    if (sIter == tIter) {
  1.8373 +        UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status)
  1.8374 +        return UCOL_EQUAL;
  1.8375 +    }
  1.8376 +    if(sIter == NULL || tIter == NULL || coll == NULL) {
  1.8377 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
  1.8378 +        UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status)
  1.8379 +        return UCOL_EQUAL;
  1.8380 +    }
  1.8381 +
  1.8382 +    UCollationResult result = UCOL_EQUAL;
  1.8383 +
  1.8384 +    // Preparing the context objects for iterating over strings
  1.8385 +    collIterate sColl, tColl;
  1.8386 +    IInit_collIterate(coll, NULL, -1, &sColl, status);
  1.8387 +    IInit_collIterate(coll, NULL, -1, &tColl, status);
  1.8388 +    if(U_FAILURE(*status)) {
  1.8389 +        UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status)
  1.8390 +        return UCOL_EQUAL;
  1.8391 +    }
  1.8392 +    // The division for the array length may truncate the array size to
  1.8393 +    // a little less than UNORM_ITER_SIZE, but that size is dimensioned too high
  1.8394 +    // for all platforms anyway.
  1.8395 +    UAlignedMemory stackNormIter1[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];
  1.8396 +    UAlignedMemory stackNormIter2[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];
  1.8397 +    UNormIterator *sNormIter = NULL, *tNormIter = NULL;
  1.8398 +
  1.8399 +    sColl.iterator = sIter;
  1.8400 +    sColl.flags |= UCOL_USE_ITERATOR;
  1.8401 +    tColl.flags |= UCOL_USE_ITERATOR;
  1.8402 +    tColl.iterator = tIter;
  1.8403 +
  1.8404 +    if(ucol_getAttribute(coll, UCOL_NORMALIZATION_MODE, status) == UCOL_ON) {
  1.8405 +        sNormIter = unorm_openIter(stackNormIter1, sizeof(stackNormIter1), status);
  1.8406 +        sColl.iterator = unorm_setIter(sNormIter, sIter, UNORM_FCD, status);
  1.8407 +        sColl.flags &= ~UCOL_ITER_NORM;
  1.8408 +
  1.8409 +        tNormIter = unorm_openIter(stackNormIter2, sizeof(stackNormIter2), status);
  1.8410 +        tColl.iterator = unorm_setIter(tNormIter, tIter, UNORM_FCD, status);
  1.8411 +        tColl.flags &= ~UCOL_ITER_NORM;
  1.8412 +    }
  1.8413 +
  1.8414 +    UChar32 sChar = U_SENTINEL, tChar = U_SENTINEL;
  1.8415 +
  1.8416 +    while((sChar = sColl.iterator->next(sColl.iterator)) ==
  1.8417 +        (tChar = tColl.iterator->next(tColl.iterator))) {
  1.8418 +            if(sChar == U_SENTINEL) {
  1.8419 +                result = UCOL_EQUAL;
  1.8420 +                goto end_compare;
  1.8421 +            }
  1.8422 +    }
  1.8423 +
  1.8424 +    if(sChar == U_SENTINEL) {
  1.8425 +        tChar = tColl.iterator->previous(tColl.iterator);
  1.8426 +    }
  1.8427 +
  1.8428 +    if(tChar == U_SENTINEL) {
  1.8429 +        sChar = sColl.iterator->previous(sColl.iterator);
  1.8430 +    }
  1.8431 +
  1.8432 +    sChar = sColl.iterator->previous(sColl.iterator);
  1.8433 +    tChar = tColl.iterator->previous(tColl.iterator);
  1.8434 +
  1.8435 +    if (ucol_unsafeCP((UChar)sChar, coll) || ucol_unsafeCP((UChar)tChar, coll))
  1.8436 +    {
  1.8437 +        // We are stopped in the middle of a contraction.
  1.8438 +        // Scan backwards through the == part of the string looking for the start of the contraction.
  1.8439 +        //   It doesn't matter which string we scan, since they are the same in this region.
  1.8440 +        do
  1.8441 +        {
  1.8442 +            sChar = sColl.iterator->previous(sColl.iterator);
  1.8443 +            tChar = tColl.iterator->previous(tColl.iterator);
  1.8444 +        }
  1.8445 +        while (sChar != U_SENTINEL && ucol_unsafeCP((UChar)sChar, coll));
  1.8446 +    }
  1.8447 +
  1.8448 +
  1.8449 +    if(U_SUCCESS(*status)) {
  1.8450 +        result = ucol_strcollRegular(&sColl, &tColl, status);
  1.8451 +    }
  1.8452 +
  1.8453 +end_compare:
  1.8454 +    if(sNormIter || tNormIter) {
  1.8455 +        unorm_closeIter(sNormIter);
  1.8456 +        unorm_closeIter(tNormIter);
  1.8457 +    }
  1.8458 +
  1.8459 +    UTRACE_EXIT_VALUE_STATUS(result, *status)
  1.8460 +    return result;
  1.8461 +}
  1.8462 +
  1.8463 +
  1.8464 +/*                                                                      */
  1.8465 +/* ucol_strcoll     Main public API string comparison function          */
  1.8466 +/*                                                                      */
  1.8467 +U_CAPI UCollationResult U_EXPORT2
  1.8468 +ucol_strcoll( const UCollator    *coll,
  1.8469 +              const UChar        *source,
  1.8470 +              int32_t            sourceLength,
  1.8471 +              const UChar        *target,
  1.8472 +              int32_t            targetLength)
  1.8473 +{
  1.8474 +    U_ALIGN_CODE(16);
  1.8475 +
  1.8476 +    UTRACE_ENTRY(UTRACE_UCOL_STRCOLL);
  1.8477 +    if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
  1.8478 +        UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target);
  1.8479 +        UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vh ", source, sourceLength);
  1.8480 +        UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vh ", target, targetLength);
  1.8481 +    }
  1.8482 +
  1.8483 +    if((source == NULL && sourceLength != 0) || (target == NULL && targetLength != 0)) {
  1.8484 +        // do not crash, but return. Should have
  1.8485 +        // status argument to return error.
  1.8486 +        UTRACE_EXIT_VALUE(UCOL_EQUAL);
  1.8487 +        return UCOL_EQUAL;
  1.8488 +    }
  1.8489 +
  1.8490 +    /* Quick check if source and target are same strings. */
  1.8491 +    /* They should either both be NULL terminated or the explicit length should be set on both. */
  1.8492 +    if (source==target && sourceLength==targetLength) {
  1.8493 +        UTRACE_EXIT_VALUE(UCOL_EQUAL);
  1.8494 +        return UCOL_EQUAL;
  1.8495 +    }
  1.8496 +
  1.8497 +    if(coll->delegate != NULL) {
  1.8498 +      UErrorCode status = U_ZERO_ERROR;
  1.8499 +      return ((const Collator*)coll->delegate)->compare(source,sourceLength,target,targetLength, status);
  1.8500 +    }
  1.8501 +
  1.8502 +    /* Scan the strings.  Find:                                                             */
  1.8503 +    /*    The length of any leading portion that is equal                                   */
  1.8504 +    /*    Whether they are exactly equal.  (in which case we just return)                   */
  1.8505 +    const UChar    *pSrc    = source;
  1.8506 +    const UChar    *pTarg   = target;
  1.8507 +    int32_t        equalLength;
  1.8508 +
  1.8509 +    if (sourceLength == -1 && targetLength == -1) {
  1.8510 +        // Both strings are null terminated.
  1.8511 +        //    Scan through any leading equal portion.
  1.8512 +        while (*pSrc == *pTarg && *pSrc != 0) {
  1.8513 +            pSrc++;
  1.8514 +            pTarg++;
  1.8515 +        }
  1.8516 +        if (*pSrc == 0 && *pTarg == 0) {
  1.8517 +            UTRACE_EXIT_VALUE(UCOL_EQUAL);
  1.8518 +            return UCOL_EQUAL;
  1.8519 +        }
  1.8520 +        equalLength = (int32_t)(pSrc - source);
  1.8521 +    }
  1.8522 +    else
  1.8523 +    {
  1.8524 +        // One or both strings has an explicit length.
  1.8525 +        const UChar    *pSrcEnd = source + sourceLength;
  1.8526 +        const UChar    *pTargEnd = target + targetLength;
  1.8527 +
  1.8528 +        // Scan while the strings are bitwise ==, or until one is exhausted.
  1.8529 +        for (;;) {
  1.8530 +            if (pSrc == pSrcEnd || pTarg == pTargEnd) {
  1.8531 +                break;
  1.8532 +            }
  1.8533 +            if ((*pSrc == 0 && sourceLength == -1) || (*pTarg == 0 && targetLength == -1)) {
  1.8534 +                break;
  1.8535 +            }
  1.8536 +            if (*pSrc != *pTarg) {
  1.8537 +                break;
  1.8538 +            }
  1.8539 +            pSrc++;
  1.8540 +            pTarg++;
  1.8541 +        }
  1.8542 +        equalLength = (int32_t)(pSrc - source);
  1.8543 +
  1.8544 +        // If we made it all the way through both strings, we are done.  They are ==
  1.8545 +        if ((pSrc ==pSrcEnd  || (pSrcEnd <pSrc  && *pSrc==0))  &&   /* At end of src string, however it was specified. */
  1.8546 +            (pTarg==pTargEnd || (pTargEnd<pTarg && *pTarg==0)))     /* and also at end of dest string                  */
  1.8547 +        {
  1.8548 +            UTRACE_EXIT_VALUE(UCOL_EQUAL);
  1.8549 +            return UCOL_EQUAL;
  1.8550 +        }
  1.8551 +    }
  1.8552 +    if (equalLength > 0) {
  1.8553 +        /* There is an identical portion at the beginning of the two strings.        */
  1.8554 +        /*   If the identical portion ends within a contraction or a comibining      */
  1.8555 +        /*   character sequence, back up to the start of that sequence.              */
  1.8556 +        
  1.8557 +        // These values should already be set by the code above.
  1.8558 +        //pSrc  = source + equalLength;        /* point to the first differing chars   */
  1.8559 +        //pTarg = target + equalLength;
  1.8560 +        if ((pSrc  != source+sourceLength && ucol_unsafeCP(*pSrc, coll)) ||
  1.8561 +            (pTarg != target+targetLength && ucol_unsafeCP(*pTarg, coll)))
  1.8562 +        {
  1.8563 +            // We are stopped in the middle of a contraction.
  1.8564 +            // Scan backwards through the == part of the string looking for the start of the contraction.
  1.8565 +            //   It doesn't matter which string we scan, since they are the same in this region.
  1.8566 +            do
  1.8567 +            {
  1.8568 +                equalLength--;
  1.8569 +                pSrc--;
  1.8570 +            }
  1.8571 +            while (equalLength>0 && ucol_unsafeCP(*pSrc, coll));
  1.8572 +        }
  1.8573 +
  1.8574 +        source += equalLength;
  1.8575 +        target += equalLength;
  1.8576 +        if (sourceLength > 0) {
  1.8577 +            sourceLength -= equalLength;
  1.8578 +        }
  1.8579 +        if (targetLength > 0) {
  1.8580 +            targetLength -= equalLength;
  1.8581 +        }
  1.8582 +    }
  1.8583 +
  1.8584 +    UErrorCode status = U_ZERO_ERROR;
  1.8585 +    UCollationResult returnVal;
  1.8586 +    if(!coll->latinOneUse || (sourceLength > 0 && *source&0xff00) || (targetLength > 0 && *target&0xff00)) {
  1.8587 +        returnVal = ucol_strcollRegular(coll, source, sourceLength, target, targetLength, &status);
  1.8588 +    } else {
  1.8589 +        returnVal = ucol_strcollUseLatin1(coll, source, sourceLength, target, targetLength, &status);
  1.8590 +    }
  1.8591 +    UTRACE_EXIT_VALUE(returnVal);
  1.8592 +    return returnVal;
  1.8593 +}
  1.8594 +
  1.8595 +U_CAPI UCollationResult U_EXPORT2
  1.8596 +ucol_strcollUTF8(
  1.8597 +        const UCollator *coll,
  1.8598 +        const char      *source,
  1.8599 +        int32_t         sourceLength,
  1.8600 +        const char      *target,
  1.8601 +        int32_t         targetLength,
  1.8602 +        UErrorCode      *status)
  1.8603 +{
  1.8604 +    U_ALIGN_CODE(16);
  1.8605 +
  1.8606 +    UTRACE_ENTRY(UTRACE_UCOL_STRCOLLUTF8);
  1.8607 +    if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
  1.8608 +        UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target);
  1.8609 +        UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vb ", source, sourceLength);
  1.8610 +        UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vb ", target, targetLength);
  1.8611 +    }
  1.8612 +
  1.8613 +    if (U_FAILURE(*status)) {
  1.8614 +        /* do nothing */
  1.8615 +        UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status);
  1.8616 +        return UCOL_EQUAL;
  1.8617 +    }
  1.8618 +
  1.8619 +    if((source == NULL && sourceLength != 0) || (target == NULL && targetLength != 0)) {
  1.8620 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
  1.8621 +        UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status);
  1.8622 +        return UCOL_EQUAL;
  1.8623 +    }
  1.8624 +
  1.8625 +    /* Quick check if source and target are same strings. */
  1.8626 +    /* They should either both be NULL terminated or the explicit length should be set on both. */
  1.8627 +    if (source==target && sourceLength==targetLength) {
  1.8628 +        UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status);
  1.8629 +        return UCOL_EQUAL;
  1.8630 +    }
  1.8631 +
  1.8632 +    if(coll->delegate != NULL) {
  1.8633 +        return ((const Collator*)coll->delegate)->compareUTF8(
  1.8634 +            StringPiece(source, (sourceLength < 0) ? uprv_strlen(source) : sourceLength),
  1.8635 +            StringPiece(target, (targetLength < 0) ? uprv_strlen(target) : targetLength),
  1.8636 +            *status);
  1.8637 +    }
  1.8638 +
  1.8639 +    /* Scan the strings.  Find:                                                             */
  1.8640 +    /*    The length of any leading portion that is equal                                   */
  1.8641 +    /*    Whether they are exactly equal.  (in which case we just return)                   */
  1.8642 +    const char  *pSrc = source;
  1.8643 +    const char  *pTarg = target;
  1.8644 +    UBool       bSrcLimit = FALSE;
  1.8645 +    UBool       bTargLimit = FALSE;
  1.8646 +
  1.8647 +    if (sourceLength == -1 && targetLength == -1) {
  1.8648 +        // Both strings are null terminated.
  1.8649 +        //    Scan through any leading equal portion.
  1.8650 +        while (*pSrc == *pTarg && *pSrc != 0) {
  1.8651 +            pSrc++;
  1.8652 +            pTarg++;
  1.8653 +        }
  1.8654 +        if (*pSrc == 0 && *pTarg == 0) {
  1.8655 +            UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status);
  1.8656 +            return UCOL_EQUAL;
  1.8657 +        }
  1.8658 +        bSrcLimit = (*pSrc == 0);
  1.8659 +        bTargLimit = (*pTarg == 0);
  1.8660 +    }
  1.8661 +    else
  1.8662 +    {
  1.8663 +        // One or both strings has an explicit length.
  1.8664 +        const char *pSrcEnd = source + sourceLength;
  1.8665 +        const char *pTargEnd = target + targetLength;
  1.8666 +
  1.8667 +        // Scan while the strings are bitwise ==, or until one is exhausted.
  1.8668 +        for (;;) {
  1.8669 +            if (pSrc == pSrcEnd || pTarg == pTargEnd) {
  1.8670 +                break;
  1.8671 +            }
  1.8672 +            if ((*pSrc == 0 && sourceLength == -1) || (*pTarg == 0 && targetLength == -1)) {
  1.8673 +                break;
  1.8674 +            }
  1.8675 +            if (*pSrc != *pTarg) {
  1.8676 +                break;
  1.8677 +            }
  1.8678 +            pSrc++;
  1.8679 +            pTarg++;
  1.8680 +        }
  1.8681 +        bSrcLimit = (pSrc ==pSrcEnd  || (pSrcEnd <pSrc  && *pSrc==0));
  1.8682 +        bTargLimit = (pTarg==pTargEnd || (pTargEnd<pTarg && *pTarg==0));
  1.8683 +
  1.8684 +        // If we made it all the way through both strings, we are done.  They are ==
  1.8685 +        if (bSrcLimit &&    /* At end of src string, however it was specified. */
  1.8686 +            bTargLimit)     /* and also at end of dest string                  */
  1.8687 +        {
  1.8688 +            UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status);
  1.8689 +            return UCOL_EQUAL;
  1.8690 +        }
  1.8691 +    }
  1.8692 +
  1.8693 +    U_ASSERT(!(bSrcLimit && bTargLimit));
  1.8694 +
  1.8695 +    int32_t    equalLength = pSrc - source;
  1.8696 +    UBool       bSawNonLatin1 = FALSE;
  1.8697 +
  1.8698 +    if (equalLength > 0) {
  1.8699 +        // Align position to the start of UTF-8 code point.
  1.8700 +        if (bTargLimit) {
  1.8701 +            U8_SET_CP_START((const uint8_t*)source, 0, equalLength);
  1.8702 +        } else {
  1.8703 +            U8_SET_CP_START((const uint8_t*)target, 0, equalLength);
  1.8704 +        }
  1.8705 +        pSrc = source + equalLength;
  1.8706 +        pTarg = target + equalLength;
  1.8707 +    }
  1.8708 +
  1.8709 +    if (equalLength > 0) {
  1.8710 +        /* There is an identical portion at the beginning of the two strings.        */
  1.8711 +        /*   If the identical portion ends within a contraction or a comibining      */
  1.8712 +        /*   character sequence, back up to the start of that sequence.              */
  1.8713 +        UBool bUnsafeCP = FALSE;
  1.8714 +        UChar32 uc32 = -1;
  1.8715 +
  1.8716 +        if (!bSrcLimit) {
  1.8717 +            U8_GET_OR_FFFD((const uint8_t*)source, 0, equalLength, sourceLength, uc32);
  1.8718 +            if (uc32 >= 0x10000 || ucol_unsafeCP((UChar)uc32, coll)) {
  1.8719 +                bUnsafeCP = TRUE;
  1.8720 +            }
  1.8721 +            bSawNonLatin1 |= (uc32 > 0xff);
  1.8722 +        }
  1.8723 +        if (!bTargLimit) {
  1.8724 +            U8_GET_OR_FFFD((const uint8_t*)target, 0, equalLength, targetLength, uc32);
  1.8725 +            if (uc32 >= 0x10000 || ucol_unsafeCP((UChar)uc32, coll)) {
  1.8726 +                bUnsafeCP = TRUE;
  1.8727 +            }
  1.8728 +            bSawNonLatin1 |= (uc32 > 0xff);
  1.8729 +        }
  1.8730 +
  1.8731 +        if (bUnsafeCP) {
  1.8732 +            while (equalLength > 0) {
  1.8733 +                // We are stopped in the middle of a contraction.
  1.8734 +                // Scan backwards through the == part of the string looking for the start of the contraction.
  1.8735 +                //   It doesn't matter which string we scan, since they are the same in this region.
  1.8736 +                U8_PREV_OR_FFFD((uint8_t*)source, 0, equalLength, uc32);
  1.8737 +                bSawNonLatin1 |= (uc32 > 0xff);
  1.8738 +                if (uc32 < 0x10000 && !ucol_unsafeCP((UChar)uc32, coll)) {
  1.8739 +                    break;
  1.8740 +                }
  1.8741 +            }
  1.8742 +        }
  1.8743 +        source += equalLength;
  1.8744 +        target += equalLength;
  1.8745 +        if (sourceLength > 0) {
  1.8746 +            sourceLength -= equalLength;
  1.8747 +        }
  1.8748 +        if (targetLength > 0) {
  1.8749 +            targetLength -= equalLength;
  1.8750 +        }
  1.8751 +    } else {
  1.8752 +        // Lead byte of Latin 1 character is 0x00 - 0xC3
  1.8753 +        bSawNonLatin1 = (source && (sourceLength != 0) && (uint8_t)*source > 0xc3);
  1.8754 +        bSawNonLatin1 |= (target && (targetLength != 0) && (uint8_t)*target > 0xc3);
  1.8755 +    }
  1.8756 +
  1.8757 +    UCollationResult returnVal;
  1.8758 +
  1.8759 +    if(!coll->latinOneUse || bSawNonLatin1) {
  1.8760 +        returnVal = ucol_strcollRegularUTF8(coll, source, sourceLength, target, targetLength, status);
  1.8761 +    } else {
  1.8762 +        returnVal = ucol_strcollUseLatin1UTF8(coll, source, sourceLength, target, targetLength, status);
  1.8763 +    }
  1.8764 +    UTRACE_EXIT_VALUE_STATUS(returnVal, *status);
  1.8765 +    return returnVal;
  1.8766 +}
  1.8767 +
  1.8768 +
  1.8769 +/* convenience function for comparing strings */
  1.8770 +U_CAPI UBool U_EXPORT2
  1.8771 +ucol_greater(    const    UCollator        *coll,
  1.8772 +        const    UChar            *source,
  1.8773 +        int32_t            sourceLength,
  1.8774 +        const    UChar            *target,
  1.8775 +        int32_t            targetLength)
  1.8776 +{
  1.8777 +    return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
  1.8778 +        == UCOL_GREATER);
  1.8779 +}
  1.8780 +
  1.8781 +/* convenience function for comparing strings */
  1.8782 +U_CAPI UBool U_EXPORT2
  1.8783 +ucol_greaterOrEqual(    const    UCollator    *coll,
  1.8784 +            const    UChar        *source,
  1.8785 +            int32_t        sourceLength,
  1.8786 +            const    UChar        *target,
  1.8787 +            int32_t        targetLength)
  1.8788 +{
  1.8789 +    return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
  1.8790 +        != UCOL_LESS);
  1.8791 +}
  1.8792 +
  1.8793 +/* convenience function for comparing strings */
  1.8794 +U_CAPI UBool U_EXPORT2
  1.8795 +ucol_equal(        const    UCollator        *coll,
  1.8796 +            const    UChar            *source,
  1.8797 +            int32_t            sourceLength,
  1.8798 +            const    UChar            *target,
  1.8799 +            int32_t            targetLength)
  1.8800 +{
  1.8801 +    return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
  1.8802 +        == UCOL_EQUAL);
  1.8803 +}
  1.8804 +
  1.8805 +U_CAPI void U_EXPORT2
  1.8806 +ucol_getUCAVersion(const UCollator* coll, UVersionInfo info) {
  1.8807 +    if(coll && coll->UCA) {
  1.8808 +        uprv_memcpy(info, coll->UCA->image->UCAVersion, sizeof(UVersionInfo));
  1.8809 +    }
  1.8810 +}
  1.8811 +
  1.8812 +#endif /* #if !UCONFIG_NO_COLLATION */
The Tor Browser / file diff

diff: intl/icu/source/i18n/ucol.cpp

intl/icu/source/i18n/ucol.cpp