intl/icu/source/common/normalizer2.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 2009-2013, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 *******************************************************************************
michael@0 8 * file name: normalizer2.cpp
michael@0 9 * encoding: US-ASCII
michael@0 10 * tab size: 8 (not used)
michael@0 11 * indentation:4
michael@0 12 *
michael@0 13 * created on: 2009nov22
michael@0 14 * created by: Markus W. Scherer
michael@0 15 */
michael@0 16
michael@0 17 #include "unicode/utypes.h"
michael@0 18
michael@0 19 #if !UCONFIG_NO_NORMALIZATION
michael@0 20
michael@0 21 #include "unicode/localpointer.h"
michael@0 22 #include "unicode/normalizer2.h"
michael@0 23 #include "unicode/unistr.h"
michael@0 24 #include "unicode/unorm.h"
michael@0 25 #include "cpputils.h"
michael@0 26 #include "cstring.h"
michael@0 27 #include "mutex.h"
michael@0 28 #include "normalizer2impl.h"
michael@0 29 #include "uassert.h"
michael@0 30 #include "ucln_cmn.h"
michael@0 31 #include "uhash.h"
michael@0 32
michael@0 33 U_NAMESPACE_BEGIN
michael@0 34
michael@0 35 // Public API dispatch via Normalizer2 subclasses -------------------------- ***
michael@0 36
michael@0 37 Normalizer2::~Normalizer2() {}
michael@0 38
michael@0 39 UBool
michael@0 40 Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const {
michael@0 41 return FALSE;
michael@0 42 }
michael@0 43
michael@0 44 UChar32
michael@0 45 Normalizer2::composePair(UChar32, UChar32) const {
michael@0 46 return U_SENTINEL;
michael@0 47 }
michael@0 48
michael@0 49 uint8_t
michael@0 50 Normalizer2::getCombiningClass(UChar32 /*c*/) const {
michael@0 51 return 0;
michael@0 52 }
michael@0 53
michael@0 54 // Normalizer2 implementation for the old UNORM_NONE.
michael@0 55 class NoopNormalizer2 : public Normalizer2 {
michael@0 56 virtual ~NoopNormalizer2();
michael@0 57
michael@0 58 virtual UnicodeString &
michael@0 59 normalize(const UnicodeString &src,
michael@0 60 UnicodeString &dest,
michael@0 61 UErrorCode &errorCode) const {
michael@0 62 if(U_SUCCESS(errorCode)) {
michael@0 63 if(&dest!=&src) {
michael@0 64 dest=src;
michael@0 65 } else {
michael@0 66 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 67 }
michael@0 68 }
michael@0 69 return dest;
michael@0 70 }
michael@0 71 virtual UnicodeString &
michael@0 72 normalizeSecondAndAppend(UnicodeString &first,
michael@0 73 const UnicodeString &second,
michael@0 74 UErrorCode &errorCode) const {
michael@0 75 if(U_SUCCESS(errorCode)) {
michael@0 76 if(&first!=&second) {
michael@0 77 first.append(second);
michael@0 78 } else {
michael@0 79 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 80 }
michael@0 81 }
michael@0 82 return first;
michael@0 83 }
michael@0 84 virtual UnicodeString &
michael@0 85 append(UnicodeString &first,
michael@0 86 const UnicodeString &second,
michael@0 87 UErrorCode &errorCode) const {
michael@0 88 if(U_SUCCESS(errorCode)) {
michael@0 89 if(&first!=&second) {
michael@0 90 first.append(second);
michael@0 91 } else {
michael@0 92 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 93 }
michael@0 94 }
michael@0 95 return first;
michael@0 96 }
michael@0 97 virtual UBool
michael@0 98 getDecomposition(UChar32, UnicodeString &) const {
michael@0 99 return FALSE;
michael@0 100 }
michael@0 101 // No need to override the default getRawDecomposition().
michael@0 102 virtual UBool
michael@0 103 isNormalized(const UnicodeString &, UErrorCode &) const {
michael@0 104 return TRUE;
michael@0 105 }
michael@0 106 virtual UNormalizationCheckResult
michael@0 107 quickCheck(const UnicodeString &, UErrorCode &) const {
michael@0 108 return UNORM_YES;
michael@0 109 }
michael@0 110 virtual int32_t
michael@0 111 spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const {
michael@0 112 return s.length();
michael@0 113 }
michael@0 114 virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; }
michael@0 115 virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; }
michael@0 116 virtual UBool isInert(UChar32) const { return TRUE; }
michael@0 117 };
michael@0 118
michael@0 119 NoopNormalizer2::~NoopNormalizer2() {}
michael@0 120
michael@0 121 // Intermediate class:
michael@0 122 // Has Normalizer2Impl and does boilerplate argument checking and setup.
michael@0 123 class Normalizer2WithImpl : public Normalizer2 {
michael@0 124 public:
michael@0 125 Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
michael@0 126 virtual ~Normalizer2WithImpl();
michael@0 127
michael@0 128 // normalize
michael@0 129 virtual UnicodeString &
michael@0 130 normalize(const UnicodeString &src,
michael@0 131 UnicodeString &dest,
michael@0 132 UErrorCode &errorCode) const {
michael@0 133 if(U_FAILURE(errorCode)) {
michael@0 134 dest.setToBogus();
michael@0 135 return dest;
michael@0 136 }
michael@0 137 const UChar *sArray=src.getBuffer();
michael@0 138 if(&dest==&src || sArray==NULL) {
michael@0 139 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 140 dest.setToBogus();
michael@0 141 return dest;
michael@0 142 }
michael@0 143 dest.remove();
michael@0 144 ReorderingBuffer buffer(impl, dest);
michael@0 145 if(buffer.init(src.length(), errorCode)) {
michael@0 146 normalize(sArray, sArray+src.length(), buffer, errorCode);
michael@0 147 }
michael@0 148 return dest;
michael@0 149 }
michael@0 150 virtual void
michael@0 151 normalize(const UChar *src, const UChar *limit,
michael@0 152 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
michael@0 153
michael@0 154 // normalize and append
michael@0 155 virtual UnicodeString &
michael@0 156 normalizeSecondAndAppend(UnicodeString &first,
michael@0 157 const UnicodeString &second,
michael@0 158 UErrorCode &errorCode) const {
michael@0 159 return normalizeSecondAndAppend(first, second, TRUE, errorCode);
michael@0 160 }
michael@0 161 virtual UnicodeString &
michael@0 162 append(UnicodeString &first,
michael@0 163 const UnicodeString &second,
michael@0 164 UErrorCode &errorCode) const {
michael@0 165 return normalizeSecondAndAppend(first, second, FALSE, errorCode);
michael@0 166 }
michael@0 167 UnicodeString &
michael@0 168 normalizeSecondAndAppend(UnicodeString &first,
michael@0 169 const UnicodeString &second,
michael@0 170 UBool doNormalize,
michael@0 171 UErrorCode &errorCode) const {
michael@0 172 uprv_checkCanGetBuffer(first, errorCode);
michael@0 173 if(U_FAILURE(errorCode)) {
michael@0 174 return first;
michael@0 175 }
michael@0 176 const UChar *secondArray=second.getBuffer();
michael@0 177 if(&first==&second || secondArray==NULL) {
michael@0 178 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 179 return first;
michael@0 180 }
michael@0 181 int32_t firstLength=first.length();
michael@0 182 UnicodeString safeMiddle;
michael@0 183 {
michael@0 184 ReorderingBuffer buffer(impl, first);
michael@0 185 if(buffer.init(firstLength+second.length(), errorCode)) {
michael@0 186 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
michael@0 187 safeMiddle, buffer, errorCode);
michael@0 188 }
michael@0 189 } // The ReorderingBuffer destructor finalizes the first string.
michael@0 190 if(U_FAILURE(errorCode)) {
michael@0 191 // Restore the modified suffix of the first string.
michael@0 192 first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
michael@0 193 }
michael@0 194 return first;
michael@0 195 }
michael@0 196 virtual void
michael@0 197 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
michael@0 198 UnicodeString &safeMiddle,
michael@0 199 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
michael@0 200 virtual UBool
michael@0 201 getDecomposition(UChar32 c, UnicodeString &decomposition) const {
michael@0 202 UChar buffer[4];
michael@0 203 int32_t length;
michael@0 204 const UChar *d=impl.getDecomposition(c, buffer, length);
michael@0 205 if(d==NULL) {
michael@0 206 return FALSE;
michael@0 207 }
michael@0 208 if(d==buffer) {
michael@0 209 decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c)
michael@0 210 } else {
michael@0 211 decomposition.setTo(FALSE, d, length); // read-only alias
michael@0 212 }
michael@0 213 return TRUE;
michael@0 214 }
michael@0 215 virtual UBool
michael@0 216 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
michael@0 217 UChar buffer[30];
michael@0 218 int32_t length;
michael@0 219 const UChar *d=impl.getRawDecomposition(c, buffer, length);
michael@0 220 if(d==NULL) {
michael@0 221 return FALSE;
michael@0 222 }
michael@0 223 if(d==buffer) {
michael@0 224 decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition)
michael@0 225 } else {
michael@0 226 decomposition.setTo(FALSE, d, length); // read-only alias
michael@0 227 }
michael@0 228 return TRUE;
michael@0 229 }
michael@0 230 virtual UChar32
michael@0 231 composePair(UChar32 a, UChar32 b) const {
michael@0 232 return impl.composePair(a, b);
michael@0 233 }
michael@0 234
michael@0 235 virtual uint8_t
michael@0 236 getCombiningClass(UChar32 c) const {
michael@0 237 return impl.getCC(impl.getNorm16(c));
michael@0 238 }
michael@0 239
michael@0 240 // quick checks
michael@0 241 virtual UBool
michael@0 242 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
michael@0 243 if(U_FAILURE(errorCode)) {
michael@0 244 return FALSE;
michael@0 245 }
michael@0 246 const UChar *sArray=s.getBuffer();
michael@0 247 if(sArray==NULL) {
michael@0 248 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 249 return FALSE;
michael@0 250 }
michael@0 251 const UChar *sLimit=sArray+s.length();
michael@0 252 return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
michael@0 253 }
michael@0 254 virtual UNormalizationCheckResult
michael@0 255 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
michael@0 256 return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
michael@0 257 }
michael@0 258 virtual int32_t
michael@0 259 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
michael@0 260 if(U_FAILURE(errorCode)) {
michael@0 261 return 0;
michael@0 262 }
michael@0 263 const UChar *sArray=s.getBuffer();
michael@0 264 if(sArray==NULL) {
michael@0 265 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 266 return 0;
michael@0 267 }
michael@0 268 return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
michael@0 269 }
michael@0 270 virtual const UChar *
michael@0 271 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
michael@0 272
michael@0 273 virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
michael@0 274 return UNORM_YES;
michael@0 275 }
michael@0 276
michael@0 277 const Normalizer2Impl &impl;
michael@0 278 };
michael@0 279
michael@0 280 Normalizer2WithImpl::~Normalizer2WithImpl() {}
michael@0 281
michael@0 282 class DecomposeNormalizer2 : public Normalizer2WithImpl {
michael@0 283 public:
michael@0 284 DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
michael@0 285 virtual ~DecomposeNormalizer2();
michael@0 286
michael@0 287 private:
michael@0 288 virtual void
michael@0 289 normalize(const UChar *src, const UChar *limit,
michael@0 290 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
michael@0 291 impl.decompose(src, limit, &buffer, errorCode);
michael@0 292 }
michael@0 293 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
michael@0 294 virtual void
michael@0 295 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
michael@0 296 UnicodeString &safeMiddle,
michael@0 297 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
michael@0 298 impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
michael@0 299 }
michael@0 300 virtual const UChar *
michael@0 301 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
michael@0 302 return impl.decompose(src, limit, NULL, errorCode);
michael@0 303 }
michael@0 304 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
michael@0 305 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
michael@0 306 return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
michael@0 307 }
michael@0 308 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
michael@0 309 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
michael@0 310 virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
michael@0 311 };
michael@0 312
michael@0 313 DecomposeNormalizer2::~DecomposeNormalizer2() {}
michael@0 314
michael@0 315 class ComposeNormalizer2 : public Normalizer2WithImpl {
michael@0 316 public:
michael@0 317 ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
michael@0 318 Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
michael@0 319 virtual ~ComposeNormalizer2();
michael@0 320
michael@0 321 private:
michael@0 322 virtual void
michael@0 323 normalize(const UChar *src, const UChar *limit,
michael@0 324 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
michael@0 325 impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
michael@0 326 }
michael@0 327 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
michael@0 328 virtual void
michael@0 329 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
michael@0 330 UnicodeString &safeMiddle,
michael@0 331 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
michael@0 332 impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
michael@0 333 }
michael@0 334
michael@0 335 virtual UBool
michael@0 336 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
michael@0 337 if(U_FAILURE(errorCode)) {
michael@0 338 return FALSE;
michael@0 339 }
michael@0 340 const UChar *sArray=s.getBuffer();
michael@0 341 if(sArray==NULL) {
michael@0 342 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 343 return FALSE;
michael@0 344 }
michael@0 345 UnicodeString temp;
michael@0 346 ReorderingBuffer buffer(impl, temp);
michael@0 347 if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization
michael@0 348 return FALSE;
michael@0 349 }
michael@0 350 return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
michael@0 351 }
michael@0 352 virtual UNormalizationCheckResult
michael@0 353 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
michael@0 354 if(U_FAILURE(errorCode)) {
michael@0 355 return UNORM_MAYBE;
michael@0 356 }
michael@0 357 const UChar *sArray=s.getBuffer();
michael@0 358 if(sArray==NULL) {
michael@0 359 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 360 return UNORM_MAYBE;
michael@0 361 }
michael@0 362 UNormalizationCheckResult qcResult=UNORM_YES;
michael@0 363 impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
michael@0 364 return qcResult;
michael@0 365 }
michael@0 366 virtual const UChar *
michael@0 367 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
michael@0 368 return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
michael@0 369 }
michael@0 370 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
michael@0 371 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
michael@0 372 return impl.getCompQuickCheck(impl.getNorm16(c));
michael@0 373 }
michael@0 374 virtual UBool hasBoundaryBefore(UChar32 c) const {
michael@0 375 return impl.hasCompBoundaryBefore(c);
michael@0 376 }
michael@0 377 virtual UBool hasBoundaryAfter(UChar32 c) const {
michael@0 378 return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
michael@0 379 }
michael@0 380 virtual UBool isInert(UChar32 c) const {
michael@0 381 return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
michael@0 382 }
michael@0 383
michael@0 384 const UBool onlyContiguous;
michael@0 385 };
michael@0 386
michael@0 387 ComposeNormalizer2::~ComposeNormalizer2() {}
michael@0 388
michael@0 389 class FCDNormalizer2 : public Normalizer2WithImpl {
michael@0 390 public:
michael@0 391 FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
michael@0 392 virtual ~FCDNormalizer2();
michael@0 393
michael@0 394 private:
michael@0 395 virtual void
michael@0 396 normalize(const UChar *src, const UChar *limit,
michael@0 397 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
michael@0 398 impl.makeFCD(src, limit, &buffer, errorCode);
michael@0 399 }
michael@0 400 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
michael@0 401 virtual void
michael@0 402 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
michael@0 403 UnicodeString &safeMiddle,
michael@0 404 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
michael@0 405 impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
michael@0 406 }
michael@0 407 virtual const UChar *
michael@0 408 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
michael@0 409 return impl.makeFCD(src, limit, NULL, errorCode);
michael@0 410 }
michael@0 411 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
michael@0 412 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
michael@0 413 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
michael@0 414 virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
michael@0 415 };
michael@0 416
michael@0 417 FCDNormalizer2::~FCDNormalizer2() {}
michael@0 418
michael@0 419 // instance cache ---------------------------------------------------------- ***
michael@0 420
michael@0 421 struct Norm2AllModes : public UMemory {
michael@0 422 static Norm2AllModes *createInstance(const char *packageName,
michael@0 423 const char *name,
michael@0 424 UErrorCode &errorCode);
michael@0 425 Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {}
michael@0 426
michael@0 427 Normalizer2Impl impl;
michael@0 428 ComposeNormalizer2 comp;
michael@0 429 DecomposeNormalizer2 decomp;
michael@0 430 FCDNormalizer2 fcd;
michael@0 431 ComposeNormalizer2 fcc;
michael@0 432 };
michael@0 433
michael@0 434 Norm2AllModes *
michael@0 435 Norm2AllModes::createInstance(const char *packageName,
michael@0 436 const char *name,
michael@0 437 UErrorCode &errorCode) {
michael@0 438 if(U_FAILURE(errorCode)) {
michael@0 439 return NULL;
michael@0 440 }
michael@0 441 LocalPointer<Norm2AllModes> allModes(new Norm2AllModes);
michael@0 442 if(allModes.isNull()) {
michael@0 443 errorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 444 return NULL;
michael@0 445 }
michael@0 446 allModes->impl.load(packageName, name, errorCode);
michael@0 447 return U_SUCCESS(errorCode) ? allModes.orphan() : NULL;
michael@0 448 }
michael@0 449
michael@0 450 U_CDECL_BEGIN
michael@0 451 static UBool U_CALLCONV uprv_normalizer2_cleanup();
michael@0 452 U_CDECL_END
michael@0 453
michael@0 454
michael@0 455 static Norm2AllModes *nfcSingleton;
michael@0 456 static Norm2AllModes *nfkcSingleton;
michael@0 457 static Norm2AllModes *nfkc_cfSingleton;
michael@0 458 static Normalizer2 *noopSingleton;
michael@0 459 static UHashtable *cache=NULL;
michael@0 460
michael@0 461 static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER;
michael@0 462 static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER;
michael@0 463 static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER;
michael@0 464 static icu::UInitOnce noopInitOnce = U_INITONCE_INITIALIZER;
michael@0 465
michael@0 466 // UInitOnce singleton initialization function
michael@0 467 static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
michael@0 468 if (uprv_strcmp(what, "nfc") == 0) {
michael@0 469 nfcSingleton = Norm2AllModes::createInstance(NULL, "nfc", errorCode);
michael@0 470 } else if (uprv_strcmp(what, "nfkc") == 0) {
michael@0 471 nfkcSingleton = Norm2AllModes::createInstance(NULL, "nfkc", errorCode);
michael@0 472 } else if (uprv_strcmp(what, "nfkc_cf") == 0) {
michael@0 473 nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode);
michael@0 474 } else if (uprv_strcmp(what, "noop") == 0) {
michael@0 475 noopSingleton = new NoopNormalizer2;
michael@0 476 } else {
michael@0 477 U_ASSERT(FALSE); // Unknown singleton
michael@0 478 }
michael@0 479 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
michael@0 480 }
michael@0 481
michael@0 482 U_CDECL_BEGIN
michael@0 483
michael@0 484 static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
michael@0 485 delete (Norm2AllModes *)allModes;
michael@0 486 }
michael@0 487
michael@0 488 static UBool U_CALLCONV uprv_normalizer2_cleanup() {
michael@0 489 delete nfcSingleton;
michael@0 490 nfcSingleton = NULL;
michael@0 491 delete nfkcSingleton;
michael@0 492 nfkcSingleton = NULL;
michael@0 493 delete nfkc_cfSingleton;
michael@0 494 nfkc_cfSingleton = NULL;
michael@0 495 delete noopSingleton;
michael@0 496 noopSingleton = NULL;
michael@0 497 uhash_close(cache);
michael@0 498 cache=NULL;
michael@0 499 nfcInitOnce.reset();
michael@0 500 nfkcInitOnce.reset();
michael@0 501 nfkc_cfInitOnce.reset();
michael@0 502 noopInitOnce.reset();
michael@0 503 return TRUE;
michael@0 504 }
michael@0 505
michael@0 506 U_CDECL_END
michael@0 507
michael@0 508 const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) {
michael@0 509 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
michael@0 510 return nfcSingleton!=NULL ? &nfcSingleton->comp : NULL;
michael@0 511 }
michael@0 512
michael@0 513 const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) {
michael@0 514 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
michael@0 515 return nfcSingleton!=NULL ? &nfcSingleton->decomp : NULL;
michael@0 516 }
michael@0 517
michael@0 518 const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
michael@0 519 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
michael@0 520 return nfcSingleton!=NULL ? &nfcSingleton->fcd : NULL;
michael@0 521 }
michael@0 522
michael@0 523 const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
michael@0 524 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
michael@0 525 return nfcSingleton!=NULL ? &nfcSingleton->fcc : NULL;
michael@0 526 }
michael@0 527
michael@0 528 const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) {
michael@0 529 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
michael@0 530 return nfkcSingleton!=NULL ? &nfkcSingleton->comp : NULL;
michael@0 531 }
michael@0 532
michael@0 533 const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) {
michael@0 534 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
michael@0 535 return nfkcSingleton!=NULL ? &nfkcSingleton->decomp : NULL;
michael@0 536 }
michael@0 537
michael@0 538 const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) {
michael@0 539 umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
michael@0 540 return nfkc_cfSingleton!=NULL ? &nfkc_cfSingleton->comp : NULL;
michael@0 541 }
michael@0 542
michael@0 543 const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
michael@0 544 umtx_initOnce(noopInitOnce, &initSingletons, "noop", errorCode);
michael@0 545 return noopSingleton;
michael@0 546 }
michael@0 547
michael@0 548 const Normalizer2 *
michael@0 549 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
michael@0 550 if(U_FAILURE(errorCode)) {
michael@0 551 return NULL;
michael@0 552 }
michael@0 553 switch(mode) {
michael@0 554 case UNORM_NFD:
michael@0 555 return getNFDInstance(errorCode);
michael@0 556 case UNORM_NFKD:
michael@0 557 return getNFKDInstance(errorCode);
michael@0 558 case UNORM_NFC:
michael@0 559 return getNFCInstance(errorCode);
michael@0 560 case UNORM_NFKC:
michael@0 561 return getNFKCInstance(errorCode);
michael@0 562 case UNORM_FCD:
michael@0 563 return getFCDInstance(errorCode);
michael@0 564 default: // UNORM_NONE
michael@0 565 return getNoopInstance(errorCode);
michael@0 566 }
michael@0 567 }
michael@0 568
michael@0 569 const Normalizer2Impl *
michael@0 570 Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
michael@0 571 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
michael@0 572 return nfcSingleton!=NULL ? &nfcSingleton->impl : NULL;
michael@0 573 }
michael@0 574
michael@0 575 const Normalizer2Impl *
michael@0 576 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
michael@0 577 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
michael@0 578 return nfkcSingleton!=NULL ? &nfkcSingleton->impl : NULL;
michael@0 579 }
michael@0 580
michael@0 581 const Normalizer2Impl *
michael@0 582 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
michael@0 583 umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
michael@0 584 return nfkc_cfSingleton!=NULL ? &nfkc_cfSingleton->impl : NULL;
michael@0 585 }
michael@0 586
michael@0 587 const Normalizer2Impl *
michael@0 588 Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
michael@0 589 return &((Normalizer2WithImpl *)norm2)->impl;
michael@0 590 }
michael@0 591
michael@0 592 const Normalizer2 *
michael@0 593 Normalizer2::getNFCInstance(UErrorCode &errorCode) {
michael@0 594 return Normalizer2Factory::getNFCInstance(errorCode);
michael@0 595 }
michael@0 596
michael@0 597 const Normalizer2 *
michael@0 598 Normalizer2::getNFDInstance(UErrorCode &errorCode) {
michael@0 599 return Normalizer2Factory::getNFDInstance(errorCode);
michael@0 600 }
michael@0 601
michael@0 602 const Normalizer2 *
michael@0 603 Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
michael@0 604 return Normalizer2Factory::getNFKCInstance(errorCode);
michael@0 605 }
michael@0 606
michael@0 607 const Normalizer2 *
michael@0 608 Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
michael@0 609 return Normalizer2Factory::getNFKDInstance(errorCode);
michael@0 610 }
michael@0 611
michael@0 612 const Normalizer2 *
michael@0 613 Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
michael@0 614 return Normalizer2Factory::getNFKC_CFInstance(errorCode);
michael@0 615 }
michael@0 616
michael@0 617 const Normalizer2 *
michael@0 618 Normalizer2::getInstance(const char *packageName,
michael@0 619 const char *name,
michael@0 620 UNormalization2Mode mode,
michael@0 621 UErrorCode &errorCode) {
michael@0 622 if(U_FAILURE(errorCode)) {
michael@0 623 return NULL;
michael@0 624 }
michael@0 625 if(name==NULL || *name==0) {
michael@0 626 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 627 return NULL;
michael@0 628 }
michael@0 629 Norm2AllModes *allModes=NULL;
michael@0 630 if(packageName==NULL) {
michael@0 631 if(0==uprv_strcmp(name, "nfc")) {
michael@0 632 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
michael@0 633 allModes=nfcSingleton;
michael@0 634 } else if(0==uprv_strcmp(name, "nfkc")) {
michael@0 635 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
michael@0 636 allModes=nfkcSingleton;
michael@0 637 } else if(0==uprv_strcmp(name, "nfkc_cf")) {
michael@0 638 umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
michael@0 639 allModes=nfkc_cfSingleton;
michael@0 640 }
michael@0 641 }
michael@0 642 if(allModes==NULL && U_SUCCESS(errorCode)) {
michael@0 643 {
michael@0 644 Mutex lock;
michael@0 645 if(cache!=NULL) {
michael@0 646 allModes=(Norm2AllModes *)uhash_get(cache, name);
michael@0 647 }
michael@0 648 }
michael@0 649 if(allModes==NULL) {
michael@0 650 LocalPointer<Norm2AllModes> localAllModes(
michael@0 651 Norm2AllModes::createInstance(packageName, name, errorCode));
michael@0 652 if(U_SUCCESS(errorCode)) {
michael@0 653 Mutex lock;
michael@0 654 if(cache==NULL) {
michael@0 655 cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
michael@0 656 if(U_FAILURE(errorCode)) {
michael@0 657 return NULL;
michael@0 658 }
michael@0 659 uhash_setKeyDeleter(cache, uprv_free);
michael@0 660 uhash_setValueDeleter(cache, deleteNorm2AllModes);
michael@0 661 }
michael@0 662 void *temp=uhash_get(cache, name);
michael@0 663 if(temp==NULL) {
michael@0 664 int32_t keyLength=uprv_strlen(name)+1;
michael@0 665 char *nameCopy=(char *)uprv_malloc(keyLength);
michael@0 666 if(nameCopy==NULL) {
michael@0 667 errorCode=U_MEMORY_ALLOCATION_ERROR;
michael@0 668 return NULL;
michael@0 669 }
michael@0 670 uprv_memcpy(nameCopy, name, keyLength);
michael@0 671 uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode);
michael@0 672 } else {
michael@0 673 // race condition
michael@0 674 allModes=(Norm2AllModes *)temp;
michael@0 675 }
michael@0 676 }
michael@0 677 }
michael@0 678 }
michael@0 679 if(allModes!=NULL && U_SUCCESS(errorCode)) {
michael@0 680 switch(mode) {
michael@0 681 case UNORM2_COMPOSE:
michael@0 682 return &allModes->comp;
michael@0 683 case UNORM2_DECOMPOSE:
michael@0 684 return &allModes->decomp;
michael@0 685 case UNORM2_FCD:
michael@0 686 return &allModes->fcd;
michael@0 687 case UNORM2_COMPOSE_CONTIGUOUS:
michael@0 688 return &allModes->fcc;
michael@0 689 default:
michael@0 690 break; // do nothing
michael@0 691 }
michael@0 692 }
michael@0 693 return NULL;
michael@0 694 }
michael@0 695
michael@0 696 U_NAMESPACE_END
michael@0 697
michael@0 698 // C API ------------------------------------------------------------------- ***
michael@0 699
michael@0 700 U_NAMESPACE_USE
michael@0 701
michael@0 702 U_CAPI const UNormalizer2 * U_EXPORT2
michael@0 703 unorm2_getNFCInstance(UErrorCode *pErrorCode) {
michael@0 704 return (const UNormalizer2 *)Normalizer2::getNFCInstance(*pErrorCode);
michael@0 705 }
michael@0 706
michael@0 707 U_CAPI const UNormalizer2 * U_EXPORT2
michael@0 708 unorm2_getNFDInstance(UErrorCode *pErrorCode) {
michael@0 709 return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode);
michael@0 710 }
michael@0 711
michael@0 712 U_CAPI const UNormalizer2 * U_EXPORT2
michael@0 713 unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
michael@0 714 return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
michael@0 715 }
michael@0 716
michael@0 717 U_CAPI const UNormalizer2 * U_EXPORT2
michael@0 718 unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
michael@0 719 return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
michael@0 720 }
michael@0 721
michael@0 722 U_CAPI const UNormalizer2 * U_EXPORT2
michael@0 723 unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
michael@0 724 return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
michael@0 725 }
michael@0 726
michael@0 727 U_CAPI const UNormalizer2 * U_EXPORT2
michael@0 728 unorm2_getInstance(const char *packageName,
michael@0 729 const char *name,
michael@0 730 UNormalization2Mode mode,
michael@0 731 UErrorCode *pErrorCode) {
michael@0 732 return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
michael@0 733 }
michael@0 734
michael@0 735 U_CAPI void U_EXPORT2
michael@0 736 unorm2_close(UNormalizer2 *norm2) {
michael@0 737 delete (Normalizer2 *)norm2;
michael@0 738 }
michael@0 739
michael@0 740 U_CAPI int32_t U_EXPORT2
michael@0 741 unorm2_normalize(const UNormalizer2 *norm2,
michael@0 742 const UChar *src, int32_t length,
michael@0 743 UChar *dest, int32_t capacity,
michael@0 744 UErrorCode *pErrorCode) {
michael@0 745 if(U_FAILURE(*pErrorCode)) {
michael@0 746 return 0;
michael@0 747 }
michael@0 748 if( (src==NULL ? length!=0 : length<-1) ||
michael@0 749 (dest==NULL ? capacity!=0 : capacity<0) ||
michael@0 750 (src==dest && src!=NULL)
michael@0 751 ) {
michael@0 752 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 753 return 0;
michael@0 754 }
michael@0 755 UnicodeString destString(dest, 0, capacity);
michael@0 756 // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash.
michael@0 757 if(length!=0) {
michael@0 758 const Normalizer2 *n2=(const Normalizer2 *)norm2;
michael@0 759 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
michael@0 760 if(n2wi!=NULL) {
michael@0 761 // Avoid duplicate argument checking and support NUL-terminated src.
michael@0 762 ReorderingBuffer buffer(n2wi->impl, destString);
michael@0 763 if(buffer.init(length, *pErrorCode)) {
michael@0 764 n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode);
michael@0 765 }
michael@0 766 } else {
michael@0 767 UnicodeString srcString(length<0, src, length);
michael@0 768 n2->normalize(srcString, destString, *pErrorCode);
michael@0 769 }
michael@0 770 }
michael@0 771 return destString.extract(dest, capacity, *pErrorCode);
michael@0 772 }
michael@0 773
michael@0 774 static int32_t
michael@0 775 normalizeSecondAndAppend(const UNormalizer2 *norm2,
michael@0 776 UChar *first, int32_t firstLength, int32_t firstCapacity,
michael@0 777 const UChar *second, int32_t secondLength,
michael@0 778 UBool doNormalize,
michael@0 779 UErrorCode *pErrorCode) {
michael@0 780 if(U_FAILURE(*pErrorCode)) {
michael@0 781 return 0;
michael@0 782 }
michael@0 783 if( (second==NULL ? secondLength!=0 : secondLength<-1) ||
michael@0 784 (first==NULL ? (firstCapacity!=0 || firstLength!=0) :
michael@0 785 (firstCapacity<0 || firstLength<-1)) ||
michael@0 786 (first==second && first!=NULL)
michael@0 787 ) {
michael@0 788 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 789 return 0;
michael@0 790 }
michael@0 791 UnicodeString firstString(first, firstLength, firstCapacity);
michael@0 792 firstLength=firstString.length(); // In case it was -1.
michael@0 793 // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash.
michael@0 794 if(secondLength!=0) {
michael@0 795 const Normalizer2 *n2=(const Normalizer2 *)norm2;
michael@0 796 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
michael@0 797 if(n2wi!=NULL) {
michael@0 798 // Avoid duplicate argument checking and support NUL-terminated src.
michael@0 799 UnicodeString safeMiddle;
michael@0 800 {
michael@0 801 ReorderingBuffer buffer(n2wi->impl, firstString);
michael@0 802 if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destCapacity>=-1
michael@0 803 n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL,
michael@0 804 doNormalize, safeMiddle, buffer, *pErrorCode);
michael@0 805 }
michael@0 806 } // The ReorderingBuffer destructor finalizes firstString.
michael@0 807 if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) {
michael@0 808 // Restore the modified suffix of the first string.
michael@0 809 // This does not restore first[] array contents between firstLength and firstCapacity.
michael@0 810 // (That might be uninitialized memory, as far as we know.)
michael@0 811 if(first!=NULL) { /* don't dereference NULL */
michael@0 812 safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length());
michael@0 813 if(firstLength<firstCapacity) {
michael@0 814 first[firstLength]=0; // NUL-terminate in case it was originally.
michael@0 815 }
michael@0 816 }
michael@0 817 }
michael@0 818 } else {
michael@0 819 UnicodeString secondString(secondLength<0, second, secondLength);
michael@0 820 if(doNormalize) {
michael@0 821 n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode);
michael@0 822 } else {
michael@0 823 n2->append(firstString, secondString, *pErrorCode);
michael@0 824 }
michael@0 825 }
michael@0 826 }
michael@0 827 return firstString.extract(first, firstCapacity, *pErrorCode);
michael@0 828 }
michael@0 829
michael@0 830 U_CAPI int32_t U_EXPORT2
michael@0 831 unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
michael@0 832 UChar *first, int32_t firstLength, int32_t firstCapacity,
michael@0 833 const UChar *second, int32_t secondLength,
michael@0 834 UErrorCode *pErrorCode) {
michael@0 835 return normalizeSecondAndAppend(norm2,
michael@0 836 first, firstLength, firstCapacity,
michael@0 837 second, secondLength,
michael@0 838 TRUE, pErrorCode);
michael@0 839 }
michael@0 840
michael@0 841 U_CAPI int32_t U_EXPORT2
michael@0 842 unorm2_append(const UNormalizer2 *norm2,
michael@0 843 UChar *first, int32_t firstLength, int32_t firstCapacity,
michael@0 844 const UChar *second, int32_t secondLength,
michael@0 845 UErrorCode *pErrorCode) {
michael@0 846 return normalizeSecondAndAppend(norm2,
michael@0 847 first, firstLength, firstCapacity,
michael@0 848 second, secondLength,
michael@0 849 FALSE, pErrorCode);
michael@0 850 }
michael@0 851
michael@0 852 U_CAPI int32_t U_EXPORT2
michael@0 853 unorm2_getDecomposition(const UNormalizer2 *norm2,
michael@0 854 UChar32 c, UChar *decomposition, int32_t capacity,
michael@0 855 UErrorCode *pErrorCode) {
michael@0 856 if(U_FAILURE(*pErrorCode)) {
michael@0 857 return 0;
michael@0 858 }
michael@0 859 if(decomposition==NULL ? capacity!=0 : capacity<0) {
michael@0 860 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 861 return 0;
michael@0 862 }
michael@0 863 UnicodeString destString(decomposition, 0, capacity);
michael@0 864 if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) {
michael@0 865 return destString.extract(decomposition, capacity, *pErrorCode);
michael@0 866 } else {
michael@0 867 return -1;
michael@0 868 }
michael@0 869 }
michael@0 870
michael@0 871 U_CAPI int32_t U_EXPORT2
michael@0 872 unorm2_getRawDecomposition(const UNormalizer2 *norm2,
michael@0 873 UChar32 c, UChar *decomposition, int32_t capacity,
michael@0 874 UErrorCode *pErrorCode) {
michael@0 875 if(U_FAILURE(*pErrorCode)) {
michael@0 876 return 0;
michael@0 877 }
michael@0 878 if(decomposition==NULL ? capacity!=0 : capacity<0) {
michael@0 879 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 880 return 0;
michael@0 881 }
michael@0 882 UnicodeString destString(decomposition, 0, capacity);
michael@0 883 if(reinterpret_cast<const Normalizer2 *>(norm2)->getRawDecomposition(c, destString)) {
michael@0 884 return destString.extract(decomposition, capacity, *pErrorCode);
michael@0 885 } else {
michael@0 886 return -1;
michael@0 887 }
michael@0 888 }
michael@0 889
michael@0 890 U_CAPI UChar32 U_EXPORT2
michael@0 891 unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b) {
michael@0 892 return reinterpret_cast<const Normalizer2 *>(norm2)->composePair(a, b);
michael@0 893 }
michael@0 894
michael@0 895 U_CAPI uint8_t U_EXPORT2
michael@0 896 unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c) {
michael@0 897 return reinterpret_cast<const Normalizer2 *>(norm2)->getCombiningClass(c);
michael@0 898 }
michael@0 899
michael@0 900 U_CAPI UBool U_EXPORT2
michael@0 901 unorm2_isNormalized(const UNormalizer2 *norm2,
michael@0 902 const UChar *s, int32_t length,
michael@0 903 UErrorCode *pErrorCode) {
michael@0 904 if(U_FAILURE(*pErrorCode)) {
michael@0 905 return 0;
michael@0 906 }
michael@0 907 if((s==NULL && length!=0) || length<-1) {
michael@0 908 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 909 return 0;
michael@0 910 }
michael@0 911 UnicodeString sString(length<0, s, length);
michael@0 912 return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode);
michael@0 913 }
michael@0 914
michael@0 915 U_CAPI UNormalizationCheckResult U_EXPORT2
michael@0 916 unorm2_quickCheck(const UNormalizer2 *norm2,
michael@0 917 const UChar *s, int32_t length,
michael@0 918 UErrorCode *pErrorCode) {
michael@0 919 if(U_FAILURE(*pErrorCode)) {
michael@0 920 return UNORM_NO;
michael@0 921 }
michael@0 922 if((s==NULL && length!=0) || length<-1) {
michael@0 923 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 924 return UNORM_NO;
michael@0 925 }
michael@0 926 UnicodeString sString(length<0, s, length);
michael@0 927 return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode);
michael@0 928 }
michael@0 929
michael@0 930 U_CAPI int32_t U_EXPORT2
michael@0 931 unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
michael@0 932 const UChar *s, int32_t length,
michael@0 933 UErrorCode *pErrorCode) {
michael@0 934 if(U_FAILURE(*pErrorCode)) {
michael@0 935 return 0;
michael@0 936 }
michael@0 937 if((s==NULL && length!=0) || length<-1) {
michael@0 938 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
michael@0 939 return 0;
michael@0 940 }
michael@0 941 UnicodeString sString(length<0, s, length);
michael@0 942 return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode);
michael@0 943 }
michael@0 944
michael@0 945 U_CAPI UBool U_EXPORT2
michael@0 946 unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) {
michael@0 947 return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c);
michael@0 948 }
michael@0 949
michael@0 950 U_CAPI UBool U_EXPORT2
michael@0 951 unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) {
michael@0 952 return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c);
michael@0 953 }
michael@0 954
michael@0 955 U_CAPI UBool U_EXPORT2
michael@0 956 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
michael@0 957 return ((const Normalizer2 *)norm2)->isInert(c);
michael@0 958 }
michael@0 959
michael@0 960 // Some properties APIs ---------------------------------------------------- ***
michael@0 961
michael@0 962 U_CAPI uint8_t U_EXPORT2
michael@0 963 u_getCombiningClass(UChar32 c) {
michael@0 964 UErrorCode errorCode=U_ZERO_ERROR;
michael@0 965 const Normalizer2 *nfd=Normalizer2Factory::getNFDInstance(errorCode);
michael@0 966 if(U_SUCCESS(errorCode)) {
michael@0 967 return nfd->getCombiningClass(c);
michael@0 968 } else {
michael@0 969 return 0;
michael@0 970 }
michael@0 971 }
michael@0 972
michael@0 973 U_CFUNC UNormalizationCheckResult
michael@0 974 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
michael@0 975 if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
michael@0 976 return UNORM_YES;
michael@0 977 }
michael@0 978 UErrorCode errorCode=U_ZERO_ERROR;
michael@0 979 const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
michael@0 980 if(U_SUCCESS(errorCode)) {
michael@0 981 return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
michael@0 982 } else {
michael@0 983 return UNORM_MAYBE;
michael@0 984 }
michael@0 985 }
michael@0 986
michael@0 987 U_CFUNC uint16_t
michael@0 988 unorm_getFCD16(UChar32 c) {
michael@0 989 UErrorCode errorCode=U_ZERO_ERROR;
michael@0 990 const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
michael@0 991 if(U_SUCCESS(errorCode)) {
michael@0 992 return impl->getFCD16(c);
michael@0 993 } else {
michael@0 994 return 0;
michael@0 995 }
michael@0 996 }
michael@0 997
michael@0 998 #endif // !UCONFIG_NO_NORMALIZATION

mercurial