intl/icu/source/i18n/coleitr.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 * Copyright (C) 1996-2011, International Business Machines Corporation and *
michael@0 4 * others. All Rights Reserved. *
michael@0 5 *******************************************************************************
michael@0 6 */
michael@0 7
michael@0 8 /*
michael@0 9 * File coleitr.cpp
michael@0 10 *
michael@0 11 *
michael@0 12 *
michael@0 13 * Created by: Helena Shih
michael@0 14 *
michael@0 15 * Modification History:
michael@0 16 *
michael@0 17 * Date Name Description
michael@0 18 *
michael@0 19 * 6/23/97 helena Adding comments to make code more readable.
michael@0 20 * 08/03/98 erm Synched with 1.2 version of CollationElementIterator.java
michael@0 21 * 12/10/99 aliu Ported Thai collation support from Java.
michael@0 22 * 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h)
michael@0 23 * 02/19/01 swquek Removed CollationElementsIterator() since it is
michael@0 24 * private constructor and no calls are made to it
michael@0 25 */
michael@0 26
michael@0 27 #include "unicode/utypes.h"
michael@0 28
michael@0 29 #if !UCONFIG_NO_COLLATION
michael@0 30
michael@0 31 #include "unicode/coleitr.h"
michael@0 32 #include "unicode/ustring.h"
michael@0 33 #include "ucol_imp.h"
michael@0 34 #include "uassert.h"
michael@0 35 #include "cmemory.h"
michael@0 36
michael@0 37
michael@0 38 /* Constants --------------------------------------------------------------- */
michael@0 39
michael@0 40 U_NAMESPACE_BEGIN
michael@0 41
michael@0 42 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)
michael@0 43
michael@0 44 /* CollationElementIterator public constructor/destructor ------------------ */
michael@0 45
michael@0 46 CollationElementIterator::CollationElementIterator(
michael@0 47 const CollationElementIterator& other)
michael@0 48 : UObject(other), isDataOwned_(TRUE)
michael@0 49 {
michael@0 50 UErrorCode status = U_ZERO_ERROR;
michael@0 51 m_data_ = ucol_openElements(other.m_data_->iteratordata_.coll, NULL, 0,
michael@0 52 &status);
michael@0 53
michael@0 54 *this = other;
michael@0 55 }
michael@0 56
michael@0 57 CollationElementIterator::~CollationElementIterator()
michael@0 58 {
michael@0 59 if (isDataOwned_) {
michael@0 60 ucol_closeElements(m_data_);
michael@0 61 }
michael@0 62 }
michael@0 63
michael@0 64 /* CollationElementIterator public methods --------------------------------- */
michael@0 65
michael@0 66 int32_t CollationElementIterator::getOffset() const
michael@0 67 {
michael@0 68 return ucol_getOffset(m_data_);
michael@0 69 }
michael@0 70
michael@0 71 /**
michael@0 72 * Get the ordering priority of the next character in the string.
michael@0 73 * @return the next character's ordering. Returns NULLORDER if an error has
michael@0 74 * occured or if the end of string has been reached
michael@0 75 */
michael@0 76 int32_t CollationElementIterator::next(UErrorCode& status)
michael@0 77 {
michael@0 78 return ucol_next(m_data_, &status);
michael@0 79 }
michael@0 80
michael@0 81 UBool CollationElementIterator::operator!=(
michael@0 82 const CollationElementIterator& other) const
michael@0 83 {
michael@0 84 return !(*this == other);
michael@0 85 }
michael@0 86
michael@0 87 UBool CollationElementIterator::operator==(
michael@0 88 const CollationElementIterator& that) const
michael@0 89 {
michael@0 90 if (this == &that || m_data_ == that.m_data_) {
michael@0 91 return TRUE;
michael@0 92 }
michael@0 93
michael@0 94 // option comparison
michael@0 95 if (m_data_->iteratordata_.coll != that.m_data_->iteratordata_.coll)
michael@0 96 {
michael@0 97 return FALSE;
michael@0 98 }
michael@0 99
michael@0 100 // the constructor and setText always sets a length
michael@0 101 // and we only compare the string not the contents of the normalization
michael@0 102 // buffer
michael@0 103 int thislength = (int)(m_data_->iteratordata_.endp - m_data_->iteratordata_.string);
michael@0 104 int thatlength = (int)(that.m_data_->iteratordata_.endp - that.m_data_->iteratordata_.string);
michael@0 105
michael@0 106 if (thislength != thatlength) {
michael@0 107 return FALSE;
michael@0 108 }
michael@0 109
michael@0 110 if (uprv_memcmp(m_data_->iteratordata_.string,
michael@0 111 that.m_data_->iteratordata_.string,
michael@0 112 thislength * U_SIZEOF_UCHAR) != 0) {
michael@0 113 return FALSE;
michael@0 114 }
michael@0 115 if (getOffset() != that.getOffset()) {
michael@0 116 return FALSE;
michael@0 117 }
michael@0 118
michael@0 119 // checking normalization buffer
michael@0 120 if ((m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
michael@0 121 if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) != 0) {
michael@0 122 return FALSE;
michael@0 123 }
michael@0 124 // both are in the normalization buffer
michael@0 125 if (m_data_->iteratordata_.pos
michael@0 126 - m_data_->iteratordata_.writableBuffer.getBuffer()
michael@0 127 != that.m_data_->iteratordata_.pos
michael@0 128 - that.m_data_->iteratordata_.writableBuffer.getBuffer()) {
michael@0 129 // not in the same position in the normalization buffer
michael@0 130 return FALSE;
michael@0 131 }
michael@0 132 }
michael@0 133 else if ((that.m_data_->iteratordata_.flags & UCOL_ITER_HASLEN) == 0) {
michael@0 134 return FALSE;
michael@0 135 }
michael@0 136 // checking ce position
michael@0 137 return (m_data_->iteratordata_.CEpos - m_data_->iteratordata_.CEs)
michael@0 138 == (that.m_data_->iteratordata_.CEpos
michael@0 139 - that.m_data_->iteratordata_.CEs);
michael@0 140 }
michael@0 141
michael@0 142 /**
michael@0 143 * Get the ordering priority of the previous collation element in the string.
michael@0 144 * @param status the error code status.
michael@0 145 * @return the previous element's ordering. Returns NULLORDER if an error has
michael@0 146 * occured or if the start of string has been reached.
michael@0 147 */
michael@0 148 int32_t CollationElementIterator::previous(UErrorCode& status)
michael@0 149 {
michael@0 150 return ucol_previous(m_data_, &status);
michael@0 151 }
michael@0 152
michael@0 153 /**
michael@0 154 * Resets the cursor to the beginning of the string.
michael@0 155 */
michael@0 156 void CollationElementIterator::reset()
michael@0 157 {
michael@0 158 ucol_reset(m_data_);
michael@0 159 }
michael@0 160
michael@0 161 void CollationElementIterator::setOffset(int32_t newOffset,
michael@0 162 UErrorCode& status)
michael@0 163 {
michael@0 164 ucol_setOffset(m_data_, newOffset, &status);
michael@0 165 }
michael@0 166
michael@0 167 /**
michael@0 168 * Sets the source to the new source string.
michael@0 169 */
michael@0 170 void CollationElementIterator::setText(const UnicodeString& source,
michael@0 171 UErrorCode& status)
michael@0 172 {
michael@0 173 if (U_FAILURE(status)) {
michael@0 174 return;
michael@0 175 }
michael@0 176
michael@0 177 int32_t length = source.length();
michael@0 178 UChar *string = NULL;
michael@0 179 if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
michael@0 180 uprv_free((UChar *)m_data_->iteratordata_.string);
michael@0 181 }
michael@0 182 m_data_->isWritable = TRUE;
michael@0 183 if (length > 0) {
michael@0 184 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
michael@0 185 /* test for NULL */
michael@0 186 if (string == NULL) {
michael@0 187 status = U_MEMORY_ALLOCATION_ERROR;
michael@0 188 return;
michael@0 189 }
michael@0 190 u_memcpy(string, source.getBuffer(), length);
michael@0 191 }
michael@0 192 else {
michael@0 193 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
michael@0 194 /* test for NULL */
michael@0 195 if (string == NULL) {
michael@0 196 status = U_MEMORY_ALLOCATION_ERROR;
michael@0 197 return;
michael@0 198 }
michael@0 199 *string = 0;
michael@0 200 }
michael@0 201 /* Free offsetBuffer before initializing it. */
michael@0 202 ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
michael@0 203 uprv_init_collIterate(m_data_->iteratordata_.coll, string, length,
michael@0 204 &m_data_->iteratordata_, &status);
michael@0 205
michael@0 206 m_data_->reset_ = TRUE;
michael@0 207 }
michael@0 208
michael@0 209 // Sets the source to the new character iterator.
michael@0 210 void CollationElementIterator::setText(CharacterIterator& source,
michael@0 211 UErrorCode& status)
michael@0 212 {
michael@0 213 if (U_FAILURE(status))
michael@0 214 return;
michael@0 215
michael@0 216 int32_t length = source.getLength();
michael@0 217 UChar *buffer = NULL;
michael@0 218
michael@0 219 if (length == 0) {
michael@0 220 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
michael@0 221 /* test for NULL */
michael@0 222 if (buffer == NULL) {
michael@0 223 status = U_MEMORY_ALLOCATION_ERROR;
michael@0 224 return;
michael@0 225 }
michael@0 226 *buffer = 0;
michael@0 227 }
michael@0 228 else {
michael@0 229 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
michael@0 230 /* test for NULL */
michael@0 231 if (buffer == NULL) {
michael@0 232 status = U_MEMORY_ALLOCATION_ERROR;
michael@0 233 return;
michael@0 234 }
michael@0 235 /*
michael@0 236 Using this constructor will prevent buffer from being removed when
michael@0 237 string gets removed
michael@0 238 */
michael@0 239 UnicodeString string;
michael@0 240 source.getText(string);
michael@0 241 u_memcpy(buffer, string.getBuffer(), length);
michael@0 242 }
michael@0 243
michael@0 244 if (m_data_->isWritable && m_data_->iteratordata_.string != NULL) {
michael@0 245 uprv_free((UChar *)m_data_->iteratordata_.string);
michael@0 246 }
michael@0 247 m_data_->isWritable = TRUE;
michael@0 248 /* Free offsetBuffer before initializing it. */
michael@0 249 ucol_freeOffsetBuffer(&(m_data_->iteratordata_));
michael@0 250 uprv_init_collIterate(m_data_->iteratordata_.coll, buffer, length,
michael@0 251 &m_data_->iteratordata_, &status);
michael@0 252 m_data_->reset_ = TRUE;
michael@0 253 }
michael@0 254
michael@0 255 int32_t CollationElementIterator::strengthOrder(int32_t order) const
michael@0 256 {
michael@0 257 UCollationStrength s = ucol_getStrength(m_data_->iteratordata_.coll);
michael@0 258 // Mask off the unwanted differences.
michael@0 259 if (s == UCOL_PRIMARY) {
michael@0 260 order &= RuleBasedCollator::PRIMARYDIFFERENCEONLY;
michael@0 261 }
michael@0 262 else if (s == UCOL_SECONDARY) {
michael@0 263 order &= RuleBasedCollator::SECONDARYDIFFERENCEONLY;
michael@0 264 }
michael@0 265
michael@0 266 return order;
michael@0 267 }
michael@0 268
michael@0 269 /* CollationElementIterator private constructors/destructors --------------- */
michael@0 270
michael@0 271 /**
michael@0 272 * This is the "real" constructor for this class; it constructs an iterator
michael@0 273 * over the source text using the specified collator
michael@0 274 */
michael@0 275 CollationElementIterator::CollationElementIterator(
michael@0 276 const UnicodeString& sourceText,
michael@0 277 const RuleBasedCollator* order,
michael@0 278 UErrorCode& status)
michael@0 279 : isDataOwned_(TRUE)
michael@0 280 {
michael@0 281 if (U_FAILURE(status)) {
michael@0 282 return;
michael@0 283 }
michael@0 284
michael@0 285 int32_t length = sourceText.length();
michael@0 286 UChar *string = NULL;
michael@0 287
michael@0 288 if (length > 0) {
michael@0 289 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
michael@0 290 /* test for NULL */
michael@0 291 if (string == NULL) {
michael@0 292 status = U_MEMORY_ALLOCATION_ERROR;
michael@0 293 return;
michael@0 294 }
michael@0 295 /*
michael@0 296 Using this constructor will prevent buffer from being removed when
michael@0 297 string gets removed
michael@0 298 */
michael@0 299 u_memcpy(string, sourceText.getBuffer(), length);
michael@0 300 }
michael@0 301 else {
michael@0 302 string = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
michael@0 303 /* test for NULL */
michael@0 304 if (string == NULL) {
michael@0 305 status = U_MEMORY_ALLOCATION_ERROR;
michael@0 306 return;
michael@0 307 }
michael@0 308 *string = 0;
michael@0 309 }
michael@0 310 m_data_ = ucol_openElements(order->ucollator, string, length, &status);
michael@0 311
michael@0 312 /* Test for buffer overflows */
michael@0 313 if (U_FAILURE(status)) {
michael@0 314 return;
michael@0 315 }
michael@0 316 m_data_->isWritable = TRUE;
michael@0 317 }
michael@0 318
michael@0 319 /**
michael@0 320 * This is the "real" constructor for this class; it constructs an iterator over
michael@0 321 * the source text using the specified collator
michael@0 322 */
michael@0 323 CollationElementIterator::CollationElementIterator(
michael@0 324 const CharacterIterator& sourceText,
michael@0 325 const RuleBasedCollator* order,
michael@0 326 UErrorCode& status)
michael@0 327 : isDataOwned_(TRUE)
michael@0 328 {
michael@0 329 if (U_FAILURE(status))
michael@0 330 return;
michael@0 331
michael@0 332 // **** should I just drop this test? ****
michael@0 333 /*
michael@0 334 if ( sourceText.endIndex() != 0 )
michael@0 335 {
michael@0 336 // A CollationElementIterator is really a two-layered beast.
michael@0 337 // Internally it uses a Normalizer to munge the source text into a form
michael@0 338 // where all "composed" Unicode characters (such as \u00FC) are split into a
michael@0 339 // normal character and a combining accent character.
michael@0 340 // Afterward, CollationElementIterator does its own processing to handle
michael@0 341 // expanding and contracting collation sequences, ignorables, and so on.
michael@0 342
michael@0 343 Normalizer::EMode decomp = order->getStrength() == Collator::IDENTICAL
michael@0 344 ? Normalizer::NO_OP : order->getDecomposition();
michael@0 345
michael@0 346 text = new Normalizer(sourceText, decomp);
michael@0 347 if (text == NULL)
michael@0 348 status = U_MEMORY_ALLOCATION_ERROR;
michael@0 349 }
michael@0 350 */
michael@0 351 int32_t length = sourceText.getLength();
michael@0 352 UChar *buffer;
michael@0 353 if (length > 0) {
michael@0 354 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR * length);
michael@0 355 /* test for NULL */
michael@0 356 if (buffer == NULL) {
michael@0 357 status = U_MEMORY_ALLOCATION_ERROR;
michael@0 358 return;
michael@0 359 }
michael@0 360 /*
michael@0 361 Using this constructor will prevent buffer from being removed when
michael@0 362 string gets removed
michael@0 363 */
michael@0 364 UnicodeString string(buffer, length, length);
michael@0 365 ((CharacterIterator &)sourceText).getText(string);
michael@0 366 const UChar *temp = string.getBuffer();
michael@0 367 u_memcpy(buffer, temp, length);
michael@0 368 }
michael@0 369 else {
michael@0 370 buffer = (UChar *)uprv_malloc(U_SIZEOF_UCHAR);
michael@0 371 /* test for NULL */
michael@0 372 if (buffer == NULL) {
michael@0 373 status = U_MEMORY_ALLOCATION_ERROR;
michael@0 374 return;
michael@0 375 }
michael@0 376 *buffer = 0;
michael@0 377 }
michael@0 378 m_data_ = ucol_openElements(order->ucollator, buffer, length, &status);
michael@0 379
michael@0 380 /* Test for buffer overflows */
michael@0 381 if (U_FAILURE(status)) {
michael@0 382 return;
michael@0 383 }
michael@0 384 m_data_->isWritable = TRUE;
michael@0 385 }
michael@0 386
michael@0 387 /* CollationElementIterator protected methods ----------------------------- */
michael@0 388
michael@0 389 const CollationElementIterator& CollationElementIterator::operator=(
michael@0 390 const CollationElementIterator& other)
michael@0 391 {
michael@0 392 if (this != &other)
michael@0 393 {
michael@0 394 UCollationElements *ucolelem = this->m_data_;
michael@0 395 UCollationElements *otherucolelem = other.m_data_;
michael@0 396 collIterate *coliter = &(ucolelem->iteratordata_);
michael@0 397 collIterate *othercoliter = &(otherucolelem->iteratordata_);
michael@0 398 int length = 0;
michael@0 399
michael@0 400 // checking only UCOL_ITER_HASLEN is not enough here as we may be in
michael@0 401 // the normalization buffer
michael@0 402 length = (int)(othercoliter->endp - othercoliter->string);
michael@0 403
michael@0 404 ucolelem->reset_ = otherucolelem->reset_;
michael@0 405 ucolelem->isWritable = TRUE;
michael@0 406
michael@0 407 /* create a duplicate of string */
michael@0 408 if (length > 0) {
michael@0 409 coliter->string = (UChar *)uprv_malloc(length * U_SIZEOF_UCHAR);
michael@0 410 if(coliter->string != NULL) {
michael@0 411 uprv_memcpy((UChar *)coliter->string, othercoliter->string,
michael@0 412 length * U_SIZEOF_UCHAR);
michael@0 413 } else { // Error: couldn't allocate memory. No copying should be done
michael@0 414 length = 0;
michael@0 415 }
michael@0 416 }
michael@0 417 else {
michael@0 418 coliter->string = NULL;
michael@0 419 }
michael@0 420
michael@0 421 /* start and end of string */
michael@0 422 coliter->endp = coliter->string == NULL ? NULL : coliter->string + length;
michael@0 423
michael@0 424 /* handle writable buffer here */
michael@0 425
michael@0 426 if (othercoliter->flags & UCOL_ITER_INNORMBUF) {
michael@0 427 coliter->writableBuffer = othercoliter->writableBuffer;
michael@0 428 coliter->writableBuffer.getTerminatedBuffer();
michael@0 429 }
michael@0 430
michael@0 431 /* current position */
michael@0 432 if (othercoliter->pos >= othercoliter->string &&
michael@0 433 othercoliter->pos <= othercoliter->endp)
michael@0 434 {
michael@0 435 U_ASSERT(coliter->string != NULL);
michael@0 436 coliter->pos = coliter->string +
michael@0 437 (othercoliter->pos - othercoliter->string);
michael@0 438 }
michael@0 439 else {
michael@0 440 coliter->pos = coliter->writableBuffer.getTerminatedBuffer() +
michael@0 441 (othercoliter->pos - othercoliter->writableBuffer.getBuffer());
michael@0 442 }
michael@0 443
michael@0 444 /* CE buffer */
michael@0 445 int32_t CEsize;
michael@0 446 if (coliter->extendCEs) {
michael@0 447 uprv_memcpy(coliter->CEs, othercoliter->CEs, sizeof(uint32_t) * UCOL_EXPAND_CE_BUFFER_SIZE);
michael@0 448 CEsize = sizeof(othercoliter->extendCEs);
michael@0 449 if (CEsize > 0) {
michael@0 450 othercoliter->extendCEs = (uint32_t *)uprv_malloc(CEsize);
michael@0 451 uprv_memcpy(coliter->extendCEs, othercoliter->extendCEs, CEsize);
michael@0 452 }
michael@0 453 coliter->toReturn = coliter->extendCEs +
michael@0 454 (othercoliter->toReturn - othercoliter->extendCEs);
michael@0 455 coliter->CEpos = coliter->extendCEs + CEsize;
michael@0 456 } else {
michael@0 457 CEsize = (int32_t)(othercoliter->CEpos - othercoliter->CEs);
michael@0 458 if (CEsize > 0) {
michael@0 459 uprv_memcpy(coliter->CEs, othercoliter->CEs, CEsize);
michael@0 460 }
michael@0 461 coliter->toReturn = coliter->CEs +
michael@0 462 (othercoliter->toReturn - othercoliter->CEs);
michael@0 463 coliter->CEpos = coliter->CEs + CEsize;
michael@0 464 }
michael@0 465
michael@0 466 if (othercoliter->fcdPosition != NULL) {
michael@0 467 U_ASSERT(coliter->string != NULL);
michael@0 468 coliter->fcdPosition = coliter->string +
michael@0 469 (othercoliter->fcdPosition
michael@0 470 - othercoliter->string);
michael@0 471 }
michael@0 472 else {
michael@0 473 coliter->fcdPosition = NULL;
michael@0 474 }
michael@0 475 coliter->flags = othercoliter->flags/*| UCOL_ITER_HASLEN*/;
michael@0 476 coliter->origFlags = othercoliter->origFlags;
michael@0 477 coliter->coll = othercoliter->coll;
michael@0 478 this->isDataOwned_ = TRUE;
michael@0 479 }
michael@0 480
michael@0 481 return *this;
michael@0 482 }
michael@0 483
michael@0 484 U_NAMESPACE_END
michael@0 485
michael@0 486 #endif /* #if !UCONFIG_NO_COLLATION */
michael@0 487
michael@0 488 /* eof */

mercurial