intl/icu/source/common/normalizer2.cpp

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

     1 /*
     2 *******************************************************************************
     3 *
     4 *   Copyright (C) 2009-2013, International Business Machines
     5 *   Corporation and others.  All Rights Reserved.
     6 *
     7 *******************************************************************************
     8 *   file name:  normalizer2.cpp
     9 *   encoding:   US-ASCII
    10 *   tab size:   8 (not used)
    11 *   indentation:4
    12 *
    13 *   created on: 2009nov22
    14 *   created by: Markus W. Scherer
    15 */
    17 #include "unicode/utypes.h"
    19 #if !UCONFIG_NO_NORMALIZATION
    21 #include "unicode/localpointer.h"
    22 #include "unicode/normalizer2.h"
    23 #include "unicode/unistr.h"
    24 #include "unicode/unorm.h"
    25 #include "cpputils.h"
    26 #include "cstring.h"
    27 #include "mutex.h"
    28 #include "normalizer2impl.h"
    29 #include "uassert.h"
    30 #include "ucln_cmn.h"
    31 #include "uhash.h"
    33 U_NAMESPACE_BEGIN
    35 // Public API dispatch via Normalizer2 subclasses -------------------------- ***
    37 Normalizer2::~Normalizer2() {}
    39 UBool
    40 Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const {
    41     return FALSE;
    42 }
    44 UChar32
    45 Normalizer2::composePair(UChar32, UChar32) const {
    46     return U_SENTINEL;
    47 }
    49 uint8_t
    50 Normalizer2::getCombiningClass(UChar32 /*c*/) const {
    51     return 0;
    52 }
    54 // Normalizer2 implementation for the old UNORM_NONE.
    55 class NoopNormalizer2 : public Normalizer2 {
    56     virtual ~NoopNormalizer2();
    58     virtual UnicodeString &
    59     normalize(const UnicodeString &src,
    60               UnicodeString &dest,
    61               UErrorCode &errorCode) const {
    62         if(U_SUCCESS(errorCode)) {
    63             if(&dest!=&src) {
    64                 dest=src;
    65             } else {
    66                 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    67             }
    68         }
    69         return dest;
    70     }
    71     virtual UnicodeString &
    72     normalizeSecondAndAppend(UnicodeString &first,
    73                              const UnicodeString &second,
    74                              UErrorCode &errorCode) const {
    75         if(U_SUCCESS(errorCode)) {
    76             if(&first!=&second) {
    77                 first.append(second);
    78             } else {
    79                 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    80             }
    81         }
    82         return first;
    83     }
    84     virtual UnicodeString &
    85     append(UnicodeString &first,
    86            const UnicodeString &second,
    87            UErrorCode &errorCode) const {
    88         if(U_SUCCESS(errorCode)) {
    89             if(&first!=&second) {
    90                 first.append(second);
    91             } else {
    92                 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    93             }
    94         }
    95         return first;
    96     }
    97     virtual UBool
    98     getDecomposition(UChar32, UnicodeString &) const {
    99         return FALSE;
   100     }
   101     // No need to override the default getRawDecomposition().
   102     virtual UBool
   103     isNormalized(const UnicodeString &, UErrorCode &) const {
   104         return TRUE;
   105     }
   106     virtual UNormalizationCheckResult
   107     quickCheck(const UnicodeString &, UErrorCode &) const {
   108         return UNORM_YES;
   109     }
   110     virtual int32_t
   111     spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const {
   112         return s.length();
   113     }
   114     virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; }
   115     virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; }
   116     virtual UBool isInert(UChar32) const { return TRUE; }
   117 };
   119 NoopNormalizer2::~NoopNormalizer2() {}
   121 // Intermediate class:
   122 // Has Normalizer2Impl and does boilerplate argument checking and setup.
   123 class Normalizer2WithImpl : public Normalizer2 {
   124 public:
   125     Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
   126     virtual ~Normalizer2WithImpl();
   128     // normalize
   129     virtual UnicodeString &
   130     normalize(const UnicodeString &src,
   131               UnicodeString &dest,
   132               UErrorCode &errorCode) const {
   133         if(U_FAILURE(errorCode)) {
   134             dest.setToBogus();
   135             return dest;
   136         }
   137         const UChar *sArray=src.getBuffer();
   138         if(&dest==&src || sArray==NULL) {
   139             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
   140             dest.setToBogus();
   141             return dest;
   142         }
   143         dest.remove();
   144         ReorderingBuffer buffer(impl, dest);
   145         if(buffer.init(src.length(), errorCode)) {
   146             normalize(sArray, sArray+src.length(), buffer, errorCode);
   147         }
   148         return dest;
   149     }
   150     virtual void
   151     normalize(const UChar *src, const UChar *limit,
   152               ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
   154     // normalize and append
   155     virtual UnicodeString &
   156     normalizeSecondAndAppend(UnicodeString &first,
   157                              const UnicodeString &second,
   158                              UErrorCode &errorCode) const {
   159         return normalizeSecondAndAppend(first, second, TRUE, errorCode);
   160     }
   161     virtual UnicodeString &
   162     append(UnicodeString &first,
   163            const UnicodeString &second,
   164            UErrorCode &errorCode) const {
   165         return normalizeSecondAndAppend(first, second, FALSE, errorCode);
   166     }
   167     UnicodeString &
   168     normalizeSecondAndAppend(UnicodeString &first,
   169                              const UnicodeString &second,
   170                              UBool doNormalize,
   171                              UErrorCode &errorCode) const {
   172         uprv_checkCanGetBuffer(first, errorCode);
   173         if(U_FAILURE(errorCode)) {
   174             return first;
   175         }
   176         const UChar *secondArray=second.getBuffer();
   177         if(&first==&second || secondArray==NULL) {
   178             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
   179             return first;
   180         }
   181         int32_t firstLength=first.length();
   182         UnicodeString safeMiddle;
   183         {
   184             ReorderingBuffer buffer(impl, first);
   185             if(buffer.init(firstLength+second.length(), errorCode)) {
   186                 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
   187                                    safeMiddle, buffer, errorCode);
   188             }
   189         }  // The ReorderingBuffer destructor finalizes the first string.
   190         if(U_FAILURE(errorCode)) {
   191             // Restore the modified suffix of the first string.
   192             first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
   193         }
   194         return first;
   195     }
   196     virtual void
   197     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
   198                        UnicodeString &safeMiddle,
   199                        ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
   200     virtual UBool
   201     getDecomposition(UChar32 c, UnicodeString &decomposition) const {
   202         UChar buffer[4];
   203         int32_t length;
   204         const UChar *d=impl.getDecomposition(c, buffer, length);
   205         if(d==NULL) {
   206             return FALSE;
   207         }
   208         if(d==buffer) {
   209             decomposition.setTo(buffer, length);  // copy the string (Jamos from Hangul syllable c)
   210         } else {
   211             decomposition.setTo(FALSE, d, length);  // read-only alias
   212         }
   213         return TRUE;
   214     }
   215     virtual UBool
   216     getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
   217         UChar buffer[30];
   218         int32_t length;
   219         const UChar *d=impl.getRawDecomposition(c, buffer, length);
   220         if(d==NULL) {
   221             return FALSE;
   222         }
   223         if(d==buffer) {
   224             decomposition.setTo(buffer, length);  // copy the string (algorithmic decomposition)
   225         } else {
   226             decomposition.setTo(FALSE, d, length);  // read-only alias
   227         }
   228         return TRUE;
   229     }
   230     virtual UChar32
   231     composePair(UChar32 a, UChar32 b) const {
   232         return impl.composePair(a, b);
   233     }
   235     virtual uint8_t
   236     getCombiningClass(UChar32 c) const {
   237         return impl.getCC(impl.getNorm16(c));
   238     }
   240     // quick checks
   241     virtual UBool
   242     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
   243         if(U_FAILURE(errorCode)) {
   244             return FALSE;
   245         }
   246         const UChar *sArray=s.getBuffer();
   247         if(sArray==NULL) {
   248             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
   249             return FALSE;
   250         }
   251         const UChar *sLimit=sArray+s.length();
   252         return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
   253     }
   254     virtual UNormalizationCheckResult
   255     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
   256         return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
   257     }
   258     virtual int32_t
   259     spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
   260         if(U_FAILURE(errorCode)) {
   261             return 0;
   262         }
   263         const UChar *sArray=s.getBuffer();
   264         if(sArray==NULL) {
   265             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
   266             return 0;
   267         }
   268         return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
   269     }
   270     virtual const UChar *
   271     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
   273     virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
   274         return UNORM_YES;
   275     }
   277     const Normalizer2Impl &impl;
   278 };
   280 Normalizer2WithImpl::~Normalizer2WithImpl() {}
   282 class DecomposeNormalizer2 : public Normalizer2WithImpl {
   283 public:
   284     DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
   285     virtual ~DecomposeNormalizer2();
   287 private:
   288     virtual void
   289     normalize(const UChar *src, const UChar *limit,
   290               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
   291         impl.decompose(src, limit, &buffer, errorCode);
   292     }
   293     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
   294     virtual void
   295     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
   296                        UnicodeString &safeMiddle,
   297                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
   298         impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
   299     }
   300     virtual const UChar *
   301     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
   302         return impl.decompose(src, limit, NULL, errorCode);
   303     }
   304     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
   305     virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
   306         return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
   307     }
   308     virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
   309     virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
   310     virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
   311 };
   313 DecomposeNormalizer2::~DecomposeNormalizer2() {}
   315 class ComposeNormalizer2 : public Normalizer2WithImpl {
   316 public:
   317     ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
   318         Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
   319     virtual ~ComposeNormalizer2();
   321 private:
   322     virtual void
   323     normalize(const UChar *src, const UChar *limit,
   324               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
   325         impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
   326     }
   327     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
   328     virtual void
   329     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
   330                        UnicodeString &safeMiddle,
   331                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
   332         impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
   333     }
   335     virtual UBool
   336     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
   337         if(U_FAILURE(errorCode)) {
   338             return FALSE;
   339         }
   340         const UChar *sArray=s.getBuffer();
   341         if(sArray==NULL) {
   342             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
   343             return FALSE;
   344         }
   345         UnicodeString temp;
   346         ReorderingBuffer buffer(impl, temp);
   347         if(!buffer.init(5, errorCode)) {  // small destCapacity for substring normalization
   348             return FALSE;
   349         }
   350         return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
   351     }
   352     virtual UNormalizationCheckResult
   353     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
   354         if(U_FAILURE(errorCode)) {
   355             return UNORM_MAYBE;
   356         }
   357         const UChar *sArray=s.getBuffer();
   358         if(sArray==NULL) {
   359             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
   360             return UNORM_MAYBE;
   361         }
   362         UNormalizationCheckResult qcResult=UNORM_YES;
   363         impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
   364         return qcResult;
   365     }
   366     virtual const UChar *
   367     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
   368         return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
   369     }
   370     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
   371     virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
   372         return impl.getCompQuickCheck(impl.getNorm16(c));
   373     }
   374     virtual UBool hasBoundaryBefore(UChar32 c) const {
   375         return impl.hasCompBoundaryBefore(c);
   376     }
   377     virtual UBool hasBoundaryAfter(UChar32 c) const {
   378         return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
   379     }
   380     virtual UBool isInert(UChar32 c) const {
   381         return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
   382     }
   384     const UBool onlyContiguous;
   385 };
   387 ComposeNormalizer2::~ComposeNormalizer2() {}
   389 class FCDNormalizer2 : public Normalizer2WithImpl {
   390 public:
   391     FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
   392     virtual ~FCDNormalizer2();
   394 private:
   395     virtual void
   396     normalize(const UChar *src, const UChar *limit,
   397               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
   398         impl.makeFCD(src, limit, &buffer, errorCode);
   399     }
   400     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
   401     virtual void
   402     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
   403                        UnicodeString &safeMiddle,
   404                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
   405         impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
   406     }
   407     virtual const UChar *
   408     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
   409         return impl.makeFCD(src, limit, NULL, errorCode);
   410     }
   411     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
   412     virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
   413     virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
   414     virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
   415 };
   417 FCDNormalizer2::~FCDNormalizer2() {}
   419 // instance cache ---------------------------------------------------------- ***
   421 struct Norm2AllModes : public UMemory {
   422     static Norm2AllModes *createInstance(const char *packageName,
   423                                          const char *name,
   424                                          UErrorCode &errorCode);
   425     Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {}
   427     Normalizer2Impl impl;
   428     ComposeNormalizer2 comp;
   429     DecomposeNormalizer2 decomp;
   430     FCDNormalizer2 fcd;
   431     ComposeNormalizer2 fcc;
   432 };
   434 Norm2AllModes *
   435 Norm2AllModes::createInstance(const char *packageName,
   436                               const char *name,
   437                               UErrorCode &errorCode) {
   438     if(U_FAILURE(errorCode)) {
   439         return NULL;
   440     }
   441     LocalPointer<Norm2AllModes> allModes(new Norm2AllModes);
   442     if(allModes.isNull()) {
   443         errorCode=U_MEMORY_ALLOCATION_ERROR;
   444         return NULL;
   445     }
   446     allModes->impl.load(packageName, name, errorCode);
   447     return U_SUCCESS(errorCode) ? allModes.orphan() : NULL;
   448 }
   450 U_CDECL_BEGIN
   451 static UBool U_CALLCONV uprv_normalizer2_cleanup();
   452 U_CDECL_END
   455 static Norm2AllModes *nfcSingleton;
   456 static Norm2AllModes *nfkcSingleton;
   457 static Norm2AllModes *nfkc_cfSingleton;
   458 static Normalizer2   *noopSingleton;
   459 static UHashtable    *cache=NULL;
   461 static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER;
   462 static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER;
   463 static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER;
   464 static icu::UInitOnce noopInitOnce = U_INITONCE_INITIALIZER;
   466 // UInitOnce singleton initialization function
   467 static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
   468     if (uprv_strcmp(what, "nfc") == 0) {
   469         nfcSingleton     = Norm2AllModes::createInstance(NULL, "nfc", errorCode);
   470     } else if (uprv_strcmp(what, "nfkc") == 0) {
   471         nfkcSingleton    = Norm2AllModes::createInstance(NULL, "nfkc", errorCode);
   472     } else if (uprv_strcmp(what, "nfkc_cf") == 0) {
   473         nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode);
   474     } else if (uprv_strcmp(what, "noop") == 0) {
   475         noopSingleton    = new NoopNormalizer2;
   476     } else {
   477         U_ASSERT(FALSE);   // Unknown singleton
   478     }
   479     ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
   480 }
   482 U_CDECL_BEGIN
   484 static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
   485     delete (Norm2AllModes *)allModes;
   486 }
   488 static UBool U_CALLCONV uprv_normalizer2_cleanup() {
   489     delete nfcSingleton;
   490     nfcSingleton = NULL;
   491     delete nfkcSingleton;
   492     nfkcSingleton = NULL;
   493     delete nfkc_cfSingleton;
   494     nfkc_cfSingleton = NULL;
   495     delete noopSingleton;
   496     noopSingleton = NULL;
   497     uhash_close(cache);
   498     cache=NULL;
   499     nfcInitOnce.reset(); 
   500     nfkcInitOnce.reset(); 
   501     nfkc_cfInitOnce.reset(); 
   502     noopInitOnce.reset(); 
   503     return TRUE;
   504 }
   506 U_CDECL_END
   508 const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) {
   509     umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
   510     return nfcSingleton!=NULL ? &nfcSingleton->comp : NULL;
   511 }
   513 const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) {
   514     umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
   515     return nfcSingleton!=NULL ? &nfcSingleton->decomp : NULL;
   516 }
   518 const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
   519     umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
   520     return nfcSingleton!=NULL ? &nfcSingleton->fcd : NULL;
   521 }
   523 const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
   524     umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
   525     return nfcSingleton!=NULL ? &nfcSingleton->fcc : NULL;
   526 }
   528 const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) {
   529     umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
   530     return nfkcSingleton!=NULL ? &nfkcSingleton->comp : NULL;
   531 }
   533 const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) {
   534     umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
   535     return nfkcSingleton!=NULL ? &nfkcSingleton->decomp : NULL;
   536 }
   538 const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) {
   539     umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
   540     return nfkc_cfSingleton!=NULL ? &nfkc_cfSingleton->comp : NULL;
   541 }
   543 const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
   544     umtx_initOnce(noopInitOnce, &initSingletons, "noop", errorCode);
   545     return noopSingleton;
   546 }
   548 const Normalizer2 *
   549 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
   550     if(U_FAILURE(errorCode)) {
   551         return NULL;
   552     }
   553     switch(mode) {
   554     case UNORM_NFD:
   555         return getNFDInstance(errorCode);
   556     case UNORM_NFKD:
   557         return getNFKDInstance(errorCode);
   558     case UNORM_NFC:
   559         return getNFCInstance(errorCode);
   560     case UNORM_NFKC:
   561         return getNFKCInstance(errorCode);
   562     case UNORM_FCD:
   563         return getFCDInstance(errorCode);
   564     default:  // UNORM_NONE
   565         return getNoopInstance(errorCode);
   566     }
   567 }
   569 const Normalizer2Impl *
   570 Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
   571     umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
   572     return nfcSingleton!=NULL ? &nfcSingleton->impl : NULL;
   573 }
   575 const Normalizer2Impl *
   576 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
   577     umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
   578     return nfkcSingleton!=NULL ? &nfkcSingleton->impl : NULL;
   579 }
   581 const Normalizer2Impl *
   582 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
   583     umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
   584     return nfkc_cfSingleton!=NULL ? &nfkc_cfSingleton->impl : NULL;
   585 }
   587 const Normalizer2Impl *
   588 Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
   589     return &((Normalizer2WithImpl *)norm2)->impl;
   590 }
   592 const Normalizer2 *
   593 Normalizer2::getNFCInstance(UErrorCode &errorCode) {
   594     return Normalizer2Factory::getNFCInstance(errorCode);
   595 }
   597 const Normalizer2 *
   598 Normalizer2::getNFDInstance(UErrorCode &errorCode) {
   599     return Normalizer2Factory::getNFDInstance(errorCode);
   600 }
   602 const Normalizer2 *
   603 Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
   604     return Normalizer2Factory::getNFKCInstance(errorCode);
   605 }
   607 const Normalizer2 *
   608 Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
   609     return Normalizer2Factory::getNFKDInstance(errorCode);
   610 }
   612 const Normalizer2 *
   613 Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
   614     return Normalizer2Factory::getNFKC_CFInstance(errorCode);
   615 }
   617 const Normalizer2 *
   618 Normalizer2::getInstance(const char *packageName,
   619                          const char *name,
   620                          UNormalization2Mode mode,
   621                          UErrorCode &errorCode) {
   622     if(U_FAILURE(errorCode)) {
   623         return NULL;
   624     }
   625     if(name==NULL || *name==0) {
   626         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
   627         return NULL;
   628     }
   629     Norm2AllModes *allModes=NULL;
   630     if(packageName==NULL) {
   631         if(0==uprv_strcmp(name, "nfc")) {
   632             umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
   633             allModes=nfcSingleton;
   634         } else if(0==uprv_strcmp(name, "nfkc")) {
   635             umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
   636             allModes=nfkcSingleton;
   637         } else if(0==uprv_strcmp(name, "nfkc_cf")) {
   638             umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
   639             allModes=nfkc_cfSingleton;
   640         }
   641     }
   642     if(allModes==NULL && U_SUCCESS(errorCode)) {
   643         {
   644             Mutex lock;
   645             if(cache!=NULL) {
   646                 allModes=(Norm2AllModes *)uhash_get(cache, name);
   647             }
   648         }
   649         if(allModes==NULL) {
   650             LocalPointer<Norm2AllModes> localAllModes(
   651                 Norm2AllModes::createInstance(packageName, name, errorCode));
   652             if(U_SUCCESS(errorCode)) {
   653                 Mutex lock;
   654                 if(cache==NULL) {
   655                     cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
   656                     if(U_FAILURE(errorCode)) {
   657                         return NULL;
   658                     }
   659                     uhash_setKeyDeleter(cache, uprv_free);
   660                     uhash_setValueDeleter(cache, deleteNorm2AllModes);
   661                 }
   662                 void *temp=uhash_get(cache, name);
   663                 if(temp==NULL) {
   664                     int32_t keyLength=uprv_strlen(name)+1;
   665                     char *nameCopy=(char *)uprv_malloc(keyLength);
   666                     if(nameCopy==NULL) {
   667                         errorCode=U_MEMORY_ALLOCATION_ERROR;
   668                         return NULL;
   669                     }
   670                     uprv_memcpy(nameCopy, name, keyLength);
   671                     uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode);
   672                 } else {
   673                     // race condition
   674                     allModes=(Norm2AllModes *)temp;
   675                 }
   676             }
   677         }
   678     }
   679     if(allModes!=NULL && U_SUCCESS(errorCode)) {
   680         switch(mode) {
   681         case UNORM2_COMPOSE:
   682             return &allModes->comp;
   683         case UNORM2_DECOMPOSE:
   684             return &allModes->decomp;
   685         case UNORM2_FCD:
   686             return &allModes->fcd;
   687         case UNORM2_COMPOSE_CONTIGUOUS:
   688             return &allModes->fcc;
   689         default:
   690             break;  // do nothing
   691         }
   692     }
   693     return NULL;
   694 }
   696 U_NAMESPACE_END
   698 // C API ------------------------------------------------------------------- ***
   700 U_NAMESPACE_USE
   702 U_CAPI const UNormalizer2 * U_EXPORT2
   703 unorm2_getNFCInstance(UErrorCode *pErrorCode) {
   704     return (const UNormalizer2 *)Normalizer2::getNFCInstance(*pErrorCode);
   705 }
   707 U_CAPI const UNormalizer2 * U_EXPORT2
   708 unorm2_getNFDInstance(UErrorCode *pErrorCode) {
   709     return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode);
   710 }
   712 U_CAPI const UNormalizer2 * U_EXPORT2
   713 unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
   714     return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
   715 }
   717 U_CAPI const UNormalizer2 * U_EXPORT2
   718 unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
   719     return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
   720 }
   722 U_CAPI const UNormalizer2 * U_EXPORT2
   723 unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
   724     return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
   725 }
   727 U_CAPI const UNormalizer2 * U_EXPORT2
   728 unorm2_getInstance(const char *packageName,
   729                    const char *name,
   730                    UNormalization2Mode mode,
   731                    UErrorCode *pErrorCode) {
   732     return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
   733 }
   735 U_CAPI void U_EXPORT2
   736 unorm2_close(UNormalizer2 *norm2) {
   737     delete (Normalizer2 *)norm2;
   738 }
   740 U_CAPI int32_t U_EXPORT2
   741 unorm2_normalize(const UNormalizer2 *norm2,
   742                  const UChar *src, int32_t length,
   743                  UChar *dest, int32_t capacity,
   744                  UErrorCode *pErrorCode) {
   745     if(U_FAILURE(*pErrorCode)) {
   746         return 0;
   747     }
   748     if( (src==NULL ? length!=0 : length<-1) ||
   749         (dest==NULL ? capacity!=0 : capacity<0) ||
   750         (src==dest && src!=NULL)
   751     ) {
   752         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   753         return 0;
   754     }
   755     UnicodeString destString(dest, 0, capacity);
   756     // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash.
   757     if(length!=0) {
   758         const Normalizer2 *n2=(const Normalizer2 *)norm2;
   759         const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
   760         if(n2wi!=NULL) {
   761             // Avoid duplicate argument checking and support NUL-terminated src.
   762             ReorderingBuffer buffer(n2wi->impl, destString);
   763             if(buffer.init(length, *pErrorCode)) {
   764                 n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode);
   765             }
   766         } else {
   767             UnicodeString srcString(length<0, src, length);
   768             n2->normalize(srcString, destString, *pErrorCode);
   769         }
   770     }
   771     return destString.extract(dest, capacity, *pErrorCode);
   772 }
   774 static int32_t
   775 normalizeSecondAndAppend(const UNormalizer2 *norm2,
   776                          UChar *first, int32_t firstLength, int32_t firstCapacity,
   777                          const UChar *second, int32_t secondLength,
   778                          UBool doNormalize,
   779                          UErrorCode *pErrorCode) {
   780     if(U_FAILURE(*pErrorCode)) {
   781         return 0;
   782     }
   783     if( (second==NULL ? secondLength!=0 : secondLength<-1) ||
   784         (first==NULL ? (firstCapacity!=0 || firstLength!=0) :
   785                        (firstCapacity<0 || firstLength<-1)) ||
   786         (first==second && first!=NULL)
   787     ) {
   788         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   789         return 0;
   790     }
   791     UnicodeString firstString(first, firstLength, firstCapacity);
   792     firstLength=firstString.length();  // In case it was -1.
   793     // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash.
   794     if(secondLength!=0) {
   795         const Normalizer2 *n2=(const Normalizer2 *)norm2;
   796         const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
   797         if(n2wi!=NULL) {
   798             // Avoid duplicate argument checking and support NUL-terminated src.
   799             UnicodeString safeMiddle;
   800             {
   801                 ReorderingBuffer buffer(n2wi->impl, firstString);
   802                 if(buffer.init(firstLength+secondLength+1, *pErrorCode)) {  // destCapacity>=-1
   803                     n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL,
   804                                              doNormalize, safeMiddle, buffer, *pErrorCode);
   805                 }
   806             }  // The ReorderingBuffer destructor finalizes firstString.
   807             if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) {
   808                 // Restore the modified suffix of the first string.
   809                 // This does not restore first[] array contents between firstLength and firstCapacity.
   810                 // (That might be uninitialized memory, as far as we know.)
   811                 if(first!=NULL) { /* don't dereference NULL */
   812                   safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length());
   813                   if(firstLength<firstCapacity) {
   814                     first[firstLength]=0;  // NUL-terminate in case it was originally.
   815                   }
   816                 }
   817             }
   818         } else {
   819             UnicodeString secondString(secondLength<0, second, secondLength);
   820             if(doNormalize) {
   821                 n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode);
   822             } else {
   823                 n2->append(firstString, secondString, *pErrorCode);
   824             }
   825         }
   826     }
   827     return firstString.extract(first, firstCapacity, *pErrorCode);
   828 }
   830 U_CAPI int32_t U_EXPORT2
   831 unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
   832                                 UChar *first, int32_t firstLength, int32_t firstCapacity,
   833                                 const UChar *second, int32_t secondLength,
   834                                 UErrorCode *pErrorCode) {
   835     return normalizeSecondAndAppend(norm2,
   836                                     first, firstLength, firstCapacity,
   837                                     second, secondLength,
   838                                     TRUE, pErrorCode);
   839 }
   841 U_CAPI int32_t U_EXPORT2
   842 unorm2_append(const UNormalizer2 *norm2,
   843               UChar *first, int32_t firstLength, int32_t firstCapacity,
   844               const UChar *second, int32_t secondLength,
   845               UErrorCode *pErrorCode) {
   846     return normalizeSecondAndAppend(norm2,
   847                                     first, firstLength, firstCapacity,
   848                                     second, secondLength,
   849                                     FALSE, pErrorCode);
   850 }
   852 U_CAPI int32_t U_EXPORT2
   853 unorm2_getDecomposition(const UNormalizer2 *norm2,
   854                         UChar32 c, UChar *decomposition, int32_t capacity,
   855                         UErrorCode *pErrorCode) {
   856     if(U_FAILURE(*pErrorCode)) {
   857         return 0;
   858     }
   859     if(decomposition==NULL ? capacity!=0 : capacity<0) {
   860         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   861         return 0;
   862     }
   863     UnicodeString destString(decomposition, 0, capacity);
   864     if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) {
   865         return destString.extract(decomposition, capacity, *pErrorCode);
   866     } else {
   867         return -1;
   868     }
   869 }
   871 U_CAPI int32_t U_EXPORT2
   872 unorm2_getRawDecomposition(const UNormalizer2 *norm2,
   873                            UChar32 c, UChar *decomposition, int32_t capacity,
   874                            UErrorCode *pErrorCode) {
   875     if(U_FAILURE(*pErrorCode)) {
   876         return 0;
   877     }
   878     if(decomposition==NULL ? capacity!=0 : capacity<0) {
   879         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   880         return 0;
   881     }
   882     UnicodeString destString(decomposition, 0, capacity);
   883     if(reinterpret_cast<const Normalizer2 *>(norm2)->getRawDecomposition(c, destString)) {
   884         return destString.extract(decomposition, capacity, *pErrorCode);
   885     } else {
   886         return -1;
   887     }
   888 }
   890 U_CAPI UChar32 U_EXPORT2
   891 unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b) {
   892     return reinterpret_cast<const Normalizer2 *>(norm2)->composePair(a, b);
   893 }
   895 U_CAPI uint8_t U_EXPORT2
   896 unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c) {
   897     return reinterpret_cast<const Normalizer2 *>(norm2)->getCombiningClass(c);
   898 }
   900 U_CAPI UBool U_EXPORT2
   901 unorm2_isNormalized(const UNormalizer2 *norm2,
   902                     const UChar *s, int32_t length,
   903                     UErrorCode *pErrorCode) {
   904     if(U_FAILURE(*pErrorCode)) {
   905         return 0;
   906     }
   907     if((s==NULL && length!=0) || length<-1) {
   908         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   909         return 0;
   910     }
   911     UnicodeString sString(length<0, s, length);
   912     return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode);
   913 }
   915 U_CAPI UNormalizationCheckResult U_EXPORT2
   916 unorm2_quickCheck(const UNormalizer2 *norm2,
   917                   const UChar *s, int32_t length,
   918                   UErrorCode *pErrorCode) {
   919     if(U_FAILURE(*pErrorCode)) {
   920         return UNORM_NO;
   921     }
   922     if((s==NULL && length!=0) || length<-1) {
   923         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   924         return UNORM_NO;
   925     }
   926     UnicodeString sString(length<0, s, length);
   927     return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode);
   928 }
   930 U_CAPI int32_t U_EXPORT2
   931 unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
   932                          const UChar *s, int32_t length,
   933                          UErrorCode *pErrorCode) {
   934     if(U_FAILURE(*pErrorCode)) {
   935         return 0;
   936     }
   937     if((s==NULL && length!=0) || length<-1) {
   938         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   939         return 0;
   940     }
   941     UnicodeString sString(length<0, s, length);
   942     return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode);
   943 }
   945 U_CAPI UBool U_EXPORT2
   946 unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) {
   947     return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c);
   948 }
   950 U_CAPI UBool U_EXPORT2
   951 unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) {
   952     return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c);
   953 }
   955 U_CAPI UBool U_EXPORT2
   956 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
   957     return ((const Normalizer2 *)norm2)->isInert(c);
   958 }
   960 // Some properties APIs ---------------------------------------------------- ***
   962 U_CAPI uint8_t U_EXPORT2
   963 u_getCombiningClass(UChar32 c) {
   964     UErrorCode errorCode=U_ZERO_ERROR;
   965     const Normalizer2 *nfd=Normalizer2Factory::getNFDInstance(errorCode);
   966     if(U_SUCCESS(errorCode)) {
   967         return nfd->getCombiningClass(c);
   968     } else {
   969         return 0;
   970     }
   971 }
   973 U_CFUNC UNormalizationCheckResult
   974 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
   975     if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
   976         return UNORM_YES;
   977     }
   978     UErrorCode errorCode=U_ZERO_ERROR;
   979     const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
   980     if(U_SUCCESS(errorCode)) {
   981         return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
   982     } else {
   983         return UNORM_MAYBE;
   984     }
   985 }
   987 U_CFUNC uint16_t
   988 unorm_getFCD16(UChar32 c) {
   989     UErrorCode errorCode=U_ZERO_ERROR;
   990     const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
   991     if(U_SUCCESS(errorCode)) {
   992         return impl->getFCD16(c);
   993     } else {
   994         return 0;
   995     }
   996 }
   998 #endif  // !UCONFIG_NO_NORMALIZATION

mercurial