michael@0: /* michael@0: ******************************************************************************* michael@0: * michael@0: * Copyright (C) 2009-2013, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: * michael@0: ******************************************************************************* michael@0: * file name: normalizer2.cpp michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created on: 2009nov22 michael@0: * created by: Markus W. Scherer michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: #if !UCONFIG_NO_NORMALIZATION michael@0: michael@0: #include "unicode/localpointer.h" michael@0: #include "unicode/normalizer2.h" michael@0: #include "unicode/unistr.h" michael@0: #include "unicode/unorm.h" michael@0: #include "cpputils.h" michael@0: #include "cstring.h" michael@0: #include "mutex.h" michael@0: #include "normalizer2impl.h" michael@0: #include "uassert.h" michael@0: #include "ucln_cmn.h" michael@0: #include "uhash.h" michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: // Public API dispatch via Normalizer2 subclasses -------------------------- *** michael@0: michael@0: Normalizer2::~Normalizer2() {} michael@0: michael@0: UBool michael@0: Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const { michael@0: return FALSE; michael@0: } michael@0: michael@0: UChar32 michael@0: Normalizer2::composePair(UChar32, UChar32) const { michael@0: return U_SENTINEL; michael@0: } michael@0: michael@0: uint8_t michael@0: Normalizer2::getCombiningClass(UChar32 /*c*/) const { michael@0: return 0; michael@0: } michael@0: michael@0: // Normalizer2 implementation for the old UNORM_NONE. michael@0: class NoopNormalizer2 : public Normalizer2 { michael@0: virtual ~NoopNormalizer2(); michael@0: michael@0: virtual UnicodeString & michael@0: normalize(const UnicodeString &src, michael@0: UnicodeString &dest, michael@0: UErrorCode &errorCode) const { michael@0: if(U_SUCCESS(errorCode)) { michael@0: if(&dest!=&src) { michael@0: dest=src; michael@0: } else { michael@0: errorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: } michael@0: } michael@0: return dest; michael@0: } michael@0: virtual UnicodeString & michael@0: normalizeSecondAndAppend(UnicodeString &first, michael@0: const UnicodeString &second, michael@0: UErrorCode &errorCode) const { michael@0: if(U_SUCCESS(errorCode)) { michael@0: if(&first!=&second) { michael@0: first.append(second); michael@0: } else { michael@0: errorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: } michael@0: } michael@0: return first; michael@0: } michael@0: virtual UnicodeString & michael@0: append(UnicodeString &first, michael@0: const UnicodeString &second, michael@0: UErrorCode &errorCode) const { michael@0: if(U_SUCCESS(errorCode)) { michael@0: if(&first!=&second) { michael@0: first.append(second); michael@0: } else { michael@0: errorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: } michael@0: } michael@0: return first; michael@0: } michael@0: virtual UBool michael@0: getDecomposition(UChar32, UnicodeString &) const { michael@0: return FALSE; michael@0: } michael@0: // No need to override the default getRawDecomposition(). michael@0: virtual UBool michael@0: isNormalized(const UnicodeString &, UErrorCode &) const { michael@0: return TRUE; michael@0: } michael@0: virtual UNormalizationCheckResult michael@0: quickCheck(const UnicodeString &, UErrorCode &) const { michael@0: return UNORM_YES; michael@0: } michael@0: virtual int32_t michael@0: spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const { michael@0: return s.length(); michael@0: } michael@0: virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; } michael@0: virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; } michael@0: virtual UBool isInert(UChar32) const { return TRUE; } michael@0: }; michael@0: michael@0: NoopNormalizer2::~NoopNormalizer2() {} michael@0: michael@0: // Intermediate class: michael@0: // Has Normalizer2Impl and does boilerplate argument checking and setup. michael@0: class Normalizer2WithImpl : public Normalizer2 { michael@0: public: michael@0: Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {} michael@0: virtual ~Normalizer2WithImpl(); michael@0: michael@0: // normalize michael@0: virtual UnicodeString & michael@0: normalize(const UnicodeString &src, michael@0: UnicodeString &dest, michael@0: UErrorCode &errorCode) const { michael@0: if(U_FAILURE(errorCode)) { michael@0: dest.setToBogus(); michael@0: return dest; michael@0: } michael@0: const UChar *sArray=src.getBuffer(); michael@0: if(&dest==&src || sArray==NULL) { michael@0: errorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: dest.setToBogus(); michael@0: return dest; michael@0: } michael@0: dest.remove(); michael@0: ReorderingBuffer buffer(impl, dest); michael@0: if(buffer.init(src.length(), errorCode)) { michael@0: normalize(sArray, sArray+src.length(), buffer, errorCode); michael@0: } michael@0: return dest; michael@0: } michael@0: virtual void michael@0: normalize(const UChar *src, const UChar *limit, michael@0: ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; michael@0: michael@0: // normalize and append michael@0: virtual UnicodeString & michael@0: normalizeSecondAndAppend(UnicodeString &first, michael@0: const UnicodeString &second, michael@0: UErrorCode &errorCode) const { michael@0: return normalizeSecondAndAppend(first, second, TRUE, errorCode); michael@0: } michael@0: virtual UnicodeString & michael@0: append(UnicodeString &first, michael@0: const UnicodeString &second, michael@0: UErrorCode &errorCode) const { michael@0: return normalizeSecondAndAppend(first, second, FALSE, errorCode); michael@0: } michael@0: UnicodeString & michael@0: normalizeSecondAndAppend(UnicodeString &first, michael@0: const UnicodeString &second, michael@0: UBool doNormalize, michael@0: UErrorCode &errorCode) const { michael@0: uprv_checkCanGetBuffer(first, errorCode); michael@0: if(U_FAILURE(errorCode)) { michael@0: return first; michael@0: } michael@0: const UChar *secondArray=second.getBuffer(); michael@0: if(&first==&second || secondArray==NULL) { michael@0: errorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return first; michael@0: } michael@0: int32_t firstLength=first.length(); michael@0: UnicodeString safeMiddle; michael@0: { michael@0: ReorderingBuffer buffer(impl, first); michael@0: if(buffer.init(firstLength+second.length(), errorCode)) { michael@0: normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize, michael@0: safeMiddle, buffer, errorCode); michael@0: } michael@0: } // The ReorderingBuffer destructor finalizes the first string. michael@0: if(U_FAILURE(errorCode)) { michael@0: // Restore the modified suffix of the first string. michael@0: first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle); michael@0: } michael@0: return first; michael@0: } michael@0: virtual void michael@0: normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, michael@0: UnicodeString &safeMiddle, michael@0: ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; michael@0: virtual UBool michael@0: getDecomposition(UChar32 c, UnicodeString &decomposition) const { michael@0: UChar buffer[4]; michael@0: int32_t length; michael@0: const UChar *d=impl.getDecomposition(c, buffer, length); michael@0: if(d==NULL) { michael@0: return FALSE; michael@0: } michael@0: if(d==buffer) { michael@0: decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c) michael@0: } else { michael@0: decomposition.setTo(FALSE, d, length); // read-only alias michael@0: } michael@0: return TRUE; michael@0: } michael@0: virtual UBool michael@0: getRawDecomposition(UChar32 c, UnicodeString &decomposition) const { michael@0: UChar buffer[30]; michael@0: int32_t length; michael@0: const UChar *d=impl.getRawDecomposition(c, buffer, length); michael@0: if(d==NULL) { michael@0: return FALSE; michael@0: } michael@0: if(d==buffer) { michael@0: decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition) michael@0: } else { michael@0: decomposition.setTo(FALSE, d, length); // read-only alias michael@0: } michael@0: return TRUE; michael@0: } michael@0: virtual UChar32 michael@0: composePair(UChar32 a, UChar32 b) const { michael@0: return impl.composePair(a, b); michael@0: } michael@0: michael@0: virtual uint8_t michael@0: getCombiningClass(UChar32 c) const { michael@0: return impl.getCC(impl.getNorm16(c)); michael@0: } michael@0: michael@0: // quick checks michael@0: virtual UBool michael@0: isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { michael@0: if(U_FAILURE(errorCode)) { michael@0: return FALSE; michael@0: } michael@0: const UChar *sArray=s.getBuffer(); michael@0: if(sArray==NULL) { michael@0: errorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return FALSE; michael@0: } michael@0: const UChar *sLimit=sArray+s.length(); michael@0: return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode); michael@0: } michael@0: virtual UNormalizationCheckResult michael@0: quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { michael@0: return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO; michael@0: } michael@0: virtual int32_t michael@0: spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const { michael@0: if(U_FAILURE(errorCode)) { michael@0: return 0; michael@0: } michael@0: const UChar *sArray=s.getBuffer(); michael@0: if(sArray==NULL) { michael@0: errorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return 0; michael@0: } michael@0: return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray); michael@0: } michael@0: virtual const UChar * michael@0: spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0; michael@0: michael@0: virtual UNormalizationCheckResult getQuickCheck(UChar32) const { michael@0: return UNORM_YES; michael@0: } michael@0: michael@0: const Normalizer2Impl &impl; michael@0: }; michael@0: michael@0: Normalizer2WithImpl::~Normalizer2WithImpl() {} michael@0: michael@0: class DecomposeNormalizer2 : public Normalizer2WithImpl { michael@0: public: michael@0: DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} michael@0: virtual ~DecomposeNormalizer2(); michael@0: michael@0: private: michael@0: virtual void michael@0: normalize(const UChar *src, const UChar *limit, michael@0: ReorderingBuffer &buffer, UErrorCode &errorCode) const { michael@0: impl.decompose(src, limit, &buffer, errorCode); michael@0: } michael@0: using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. michael@0: virtual void michael@0: normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, michael@0: UnicodeString &safeMiddle, michael@0: ReorderingBuffer &buffer, UErrorCode &errorCode) const { michael@0: impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); michael@0: } michael@0: virtual const UChar * michael@0: spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { michael@0: return impl.decompose(src, limit, NULL, errorCode); michael@0: } michael@0: using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. michael@0: virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { michael@0: return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO; michael@0: } michael@0: virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); } michael@0: virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); } michael@0: virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); } michael@0: }; michael@0: michael@0: DecomposeNormalizer2::~DecomposeNormalizer2() {} michael@0: michael@0: class ComposeNormalizer2 : public Normalizer2WithImpl { michael@0: public: michael@0: ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) : michael@0: Normalizer2WithImpl(ni), onlyContiguous(fcc) {} michael@0: virtual ~ComposeNormalizer2(); michael@0: michael@0: private: michael@0: virtual void michael@0: normalize(const UChar *src, const UChar *limit, michael@0: ReorderingBuffer &buffer, UErrorCode &errorCode) const { michael@0: impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode); michael@0: } michael@0: using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. michael@0: virtual void michael@0: normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, michael@0: UnicodeString &safeMiddle, michael@0: ReorderingBuffer &buffer, UErrorCode &errorCode) const { michael@0: impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode); michael@0: } michael@0: michael@0: virtual UBool michael@0: isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { michael@0: if(U_FAILURE(errorCode)) { michael@0: return FALSE; michael@0: } michael@0: const UChar *sArray=s.getBuffer(); michael@0: if(sArray==NULL) { michael@0: errorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return FALSE; michael@0: } michael@0: UnicodeString temp; michael@0: ReorderingBuffer buffer(impl, temp); michael@0: if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization michael@0: return FALSE; michael@0: } michael@0: return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode); michael@0: } michael@0: virtual UNormalizationCheckResult michael@0: quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { michael@0: if(U_FAILURE(errorCode)) { michael@0: return UNORM_MAYBE; michael@0: } michael@0: const UChar *sArray=s.getBuffer(); michael@0: if(sArray==NULL) { michael@0: errorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return UNORM_MAYBE; michael@0: } michael@0: UNormalizationCheckResult qcResult=UNORM_YES; michael@0: impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult); michael@0: return qcResult; michael@0: } michael@0: virtual const UChar * michael@0: spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const { michael@0: return impl.composeQuickCheck(src, limit, onlyContiguous, NULL); michael@0: } michael@0: using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. michael@0: virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { michael@0: return impl.getCompQuickCheck(impl.getNorm16(c)); michael@0: } michael@0: virtual UBool hasBoundaryBefore(UChar32 c) const { michael@0: return impl.hasCompBoundaryBefore(c); michael@0: } michael@0: virtual UBool hasBoundaryAfter(UChar32 c) const { michael@0: return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE); michael@0: } michael@0: virtual UBool isInert(UChar32 c) const { michael@0: return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE); michael@0: } michael@0: michael@0: const UBool onlyContiguous; michael@0: }; michael@0: michael@0: ComposeNormalizer2::~ComposeNormalizer2() {} michael@0: michael@0: class FCDNormalizer2 : public Normalizer2WithImpl { michael@0: public: michael@0: FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} michael@0: virtual ~FCDNormalizer2(); michael@0: michael@0: private: michael@0: virtual void michael@0: normalize(const UChar *src, const UChar *limit, michael@0: ReorderingBuffer &buffer, UErrorCode &errorCode) const { michael@0: impl.makeFCD(src, limit, &buffer, errorCode); michael@0: } michael@0: using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. michael@0: virtual void michael@0: normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, michael@0: UnicodeString &safeMiddle, michael@0: ReorderingBuffer &buffer, UErrorCode &errorCode) const { michael@0: impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); michael@0: } michael@0: virtual const UChar * michael@0: spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { michael@0: return impl.makeFCD(src, limit, NULL, errorCode); michael@0: } michael@0: using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. michael@0: virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); } michael@0: virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); } michael@0: virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); } michael@0: }; michael@0: michael@0: FCDNormalizer2::~FCDNormalizer2() {} michael@0: michael@0: // instance cache ---------------------------------------------------------- *** michael@0: michael@0: struct Norm2AllModes : public UMemory { michael@0: static Norm2AllModes *createInstance(const char *packageName, michael@0: const char *name, michael@0: UErrorCode &errorCode); michael@0: Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {} michael@0: michael@0: Normalizer2Impl impl; michael@0: ComposeNormalizer2 comp; michael@0: DecomposeNormalizer2 decomp; michael@0: FCDNormalizer2 fcd; michael@0: ComposeNormalizer2 fcc; michael@0: }; michael@0: michael@0: Norm2AllModes * michael@0: Norm2AllModes::createInstance(const char *packageName, michael@0: const char *name, michael@0: UErrorCode &errorCode) { michael@0: if(U_FAILURE(errorCode)) { michael@0: return NULL; michael@0: } michael@0: LocalPointer allModes(new Norm2AllModes); michael@0: if(allModes.isNull()) { michael@0: errorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: return NULL; michael@0: } michael@0: allModes->impl.load(packageName, name, errorCode); michael@0: return U_SUCCESS(errorCode) ? allModes.orphan() : NULL; michael@0: } michael@0: michael@0: U_CDECL_BEGIN michael@0: static UBool U_CALLCONV uprv_normalizer2_cleanup(); michael@0: U_CDECL_END michael@0: michael@0: michael@0: static Norm2AllModes *nfcSingleton; michael@0: static Norm2AllModes *nfkcSingleton; michael@0: static Norm2AllModes *nfkc_cfSingleton; michael@0: static Normalizer2 *noopSingleton; michael@0: static UHashtable *cache=NULL; michael@0: michael@0: static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER; michael@0: static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER; michael@0: static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER; michael@0: static icu::UInitOnce noopInitOnce = U_INITONCE_INITIALIZER; michael@0: michael@0: // UInitOnce singleton initialization function michael@0: static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) { michael@0: if (uprv_strcmp(what, "nfc") == 0) { michael@0: nfcSingleton = Norm2AllModes::createInstance(NULL, "nfc", errorCode); michael@0: } else if (uprv_strcmp(what, "nfkc") == 0) { michael@0: nfkcSingleton = Norm2AllModes::createInstance(NULL, "nfkc", errorCode); michael@0: } else if (uprv_strcmp(what, "nfkc_cf") == 0) { michael@0: nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode); michael@0: } else if (uprv_strcmp(what, "noop") == 0) { michael@0: noopSingleton = new NoopNormalizer2; michael@0: } else { michael@0: U_ASSERT(FALSE); // Unknown singleton michael@0: } michael@0: ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); michael@0: } michael@0: michael@0: U_CDECL_BEGIN michael@0: michael@0: static void U_CALLCONV deleteNorm2AllModes(void *allModes) { michael@0: delete (Norm2AllModes *)allModes; michael@0: } michael@0: michael@0: static UBool U_CALLCONV uprv_normalizer2_cleanup() { michael@0: delete nfcSingleton; michael@0: nfcSingleton = NULL; michael@0: delete nfkcSingleton; michael@0: nfkcSingleton = NULL; michael@0: delete nfkc_cfSingleton; michael@0: nfkc_cfSingleton = NULL; michael@0: delete noopSingleton; michael@0: noopSingleton = NULL; michael@0: uhash_close(cache); michael@0: cache=NULL; michael@0: nfcInitOnce.reset(); michael@0: nfkcInitOnce.reset(); michael@0: nfkc_cfInitOnce.reset(); michael@0: noopInitOnce.reset(); michael@0: return TRUE; michael@0: } michael@0: michael@0: U_CDECL_END michael@0: michael@0: const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) { michael@0: umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); michael@0: return nfcSingleton!=NULL ? &nfcSingleton->comp : NULL; michael@0: } michael@0: michael@0: const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) { michael@0: umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); michael@0: return nfcSingleton!=NULL ? &nfcSingleton->decomp : NULL; michael@0: } michael@0: michael@0: const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) { michael@0: umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); michael@0: return nfcSingleton!=NULL ? &nfcSingleton->fcd : NULL; michael@0: } michael@0: michael@0: const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) { michael@0: umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); michael@0: return nfcSingleton!=NULL ? &nfcSingleton->fcc : NULL; michael@0: } michael@0: michael@0: const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) { michael@0: umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode); michael@0: return nfkcSingleton!=NULL ? &nfkcSingleton->comp : NULL; michael@0: } michael@0: michael@0: const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) { michael@0: umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode); michael@0: return nfkcSingleton!=NULL ? &nfkcSingleton->decomp : NULL; michael@0: } michael@0: michael@0: const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) { michael@0: umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode); michael@0: return nfkc_cfSingleton!=NULL ? &nfkc_cfSingleton->comp : NULL; michael@0: } michael@0: michael@0: const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) { michael@0: umtx_initOnce(noopInitOnce, &initSingletons, "noop", errorCode); michael@0: return noopSingleton; michael@0: } michael@0: michael@0: const Normalizer2 * michael@0: Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) { michael@0: if(U_FAILURE(errorCode)) { michael@0: return NULL; michael@0: } michael@0: switch(mode) { michael@0: case UNORM_NFD: michael@0: return getNFDInstance(errorCode); michael@0: case UNORM_NFKD: michael@0: return getNFKDInstance(errorCode); michael@0: case UNORM_NFC: michael@0: return getNFCInstance(errorCode); michael@0: case UNORM_NFKC: michael@0: return getNFKCInstance(errorCode); michael@0: case UNORM_FCD: michael@0: return getFCDInstance(errorCode); michael@0: default: // UNORM_NONE michael@0: return getNoopInstance(errorCode); michael@0: } michael@0: } michael@0: michael@0: const Normalizer2Impl * michael@0: Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) { michael@0: umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); michael@0: return nfcSingleton!=NULL ? &nfcSingleton->impl : NULL; michael@0: } michael@0: michael@0: const Normalizer2Impl * michael@0: Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) { michael@0: umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode); michael@0: return nfkcSingleton!=NULL ? &nfkcSingleton->impl : NULL; michael@0: } michael@0: michael@0: const Normalizer2Impl * michael@0: Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) { michael@0: umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode); michael@0: return nfkc_cfSingleton!=NULL ? &nfkc_cfSingleton->impl : NULL; michael@0: } michael@0: michael@0: const Normalizer2Impl * michael@0: Normalizer2Factory::getImpl(const Normalizer2 *norm2) { michael@0: return &((Normalizer2WithImpl *)norm2)->impl; michael@0: } michael@0: michael@0: const Normalizer2 * michael@0: Normalizer2::getNFCInstance(UErrorCode &errorCode) { michael@0: return Normalizer2Factory::getNFCInstance(errorCode); michael@0: } michael@0: michael@0: const Normalizer2 * michael@0: Normalizer2::getNFDInstance(UErrorCode &errorCode) { michael@0: return Normalizer2Factory::getNFDInstance(errorCode); michael@0: } michael@0: michael@0: const Normalizer2 * michael@0: Normalizer2::getNFKCInstance(UErrorCode &errorCode) { michael@0: return Normalizer2Factory::getNFKCInstance(errorCode); michael@0: } michael@0: michael@0: const Normalizer2 * michael@0: Normalizer2::getNFKDInstance(UErrorCode &errorCode) { michael@0: return Normalizer2Factory::getNFKDInstance(errorCode); michael@0: } michael@0: michael@0: const Normalizer2 * michael@0: Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) { michael@0: return Normalizer2Factory::getNFKC_CFInstance(errorCode); michael@0: } michael@0: michael@0: const Normalizer2 * michael@0: Normalizer2::getInstance(const char *packageName, michael@0: const char *name, michael@0: UNormalization2Mode mode, michael@0: UErrorCode &errorCode) { michael@0: if(U_FAILURE(errorCode)) { michael@0: return NULL; michael@0: } michael@0: if(name==NULL || *name==0) { michael@0: errorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return NULL; michael@0: } michael@0: Norm2AllModes *allModes=NULL; michael@0: if(packageName==NULL) { michael@0: if(0==uprv_strcmp(name, "nfc")) { michael@0: umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); michael@0: allModes=nfcSingleton; michael@0: } else if(0==uprv_strcmp(name, "nfkc")) { michael@0: umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode); michael@0: allModes=nfkcSingleton; michael@0: } else if(0==uprv_strcmp(name, "nfkc_cf")) { michael@0: umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode); michael@0: allModes=nfkc_cfSingleton; michael@0: } michael@0: } michael@0: if(allModes==NULL && U_SUCCESS(errorCode)) { michael@0: { michael@0: Mutex lock; michael@0: if(cache!=NULL) { michael@0: allModes=(Norm2AllModes *)uhash_get(cache, name); michael@0: } michael@0: } michael@0: if(allModes==NULL) { michael@0: LocalPointer localAllModes( michael@0: Norm2AllModes::createInstance(packageName, name, errorCode)); michael@0: if(U_SUCCESS(errorCode)) { michael@0: Mutex lock; michael@0: if(cache==NULL) { michael@0: cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode); michael@0: if(U_FAILURE(errorCode)) { michael@0: return NULL; michael@0: } michael@0: uhash_setKeyDeleter(cache, uprv_free); michael@0: uhash_setValueDeleter(cache, deleteNorm2AllModes); michael@0: } michael@0: void *temp=uhash_get(cache, name); michael@0: if(temp==NULL) { michael@0: int32_t keyLength=uprv_strlen(name)+1; michael@0: char *nameCopy=(char *)uprv_malloc(keyLength); michael@0: if(nameCopy==NULL) { michael@0: errorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: return NULL; michael@0: } michael@0: uprv_memcpy(nameCopy, name, keyLength); michael@0: uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode); michael@0: } else { michael@0: // race condition michael@0: allModes=(Norm2AllModes *)temp; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: if(allModes!=NULL && U_SUCCESS(errorCode)) { michael@0: switch(mode) { michael@0: case UNORM2_COMPOSE: michael@0: return &allModes->comp; michael@0: case UNORM2_DECOMPOSE: michael@0: return &allModes->decomp; michael@0: case UNORM2_FCD: michael@0: return &allModes->fcd; michael@0: case UNORM2_COMPOSE_CONTIGUOUS: michael@0: return &allModes->fcc; michael@0: default: michael@0: break; // do nothing michael@0: } michael@0: } michael@0: return NULL; michael@0: } michael@0: michael@0: U_NAMESPACE_END michael@0: michael@0: // C API ------------------------------------------------------------------- *** michael@0: michael@0: U_NAMESPACE_USE michael@0: michael@0: U_CAPI const UNormalizer2 * U_EXPORT2 michael@0: unorm2_getNFCInstance(UErrorCode *pErrorCode) { michael@0: return (const UNormalizer2 *)Normalizer2::getNFCInstance(*pErrorCode); michael@0: } michael@0: michael@0: U_CAPI const UNormalizer2 * U_EXPORT2 michael@0: unorm2_getNFDInstance(UErrorCode *pErrorCode) { michael@0: return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode); michael@0: } michael@0: michael@0: U_CAPI const UNormalizer2 * U_EXPORT2 michael@0: unorm2_getNFKCInstance(UErrorCode *pErrorCode) { michael@0: return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode); michael@0: } michael@0: michael@0: U_CAPI const UNormalizer2 * U_EXPORT2 michael@0: unorm2_getNFKDInstance(UErrorCode *pErrorCode) { michael@0: return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode); michael@0: } michael@0: michael@0: U_CAPI const UNormalizer2 * U_EXPORT2 michael@0: unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) { michael@0: return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode); michael@0: } michael@0: michael@0: U_CAPI const UNormalizer2 * U_EXPORT2 michael@0: unorm2_getInstance(const char *packageName, michael@0: const char *name, michael@0: UNormalization2Mode mode, michael@0: UErrorCode *pErrorCode) { michael@0: return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode); michael@0: } michael@0: michael@0: U_CAPI void U_EXPORT2 michael@0: unorm2_close(UNormalizer2 *norm2) { michael@0: delete (Normalizer2 *)norm2; michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: unorm2_normalize(const UNormalizer2 *norm2, michael@0: const UChar *src, int32_t length, michael@0: UChar *dest, int32_t capacity, michael@0: UErrorCode *pErrorCode) { michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: return 0; michael@0: } michael@0: if( (src==NULL ? length!=0 : length<-1) || michael@0: (dest==NULL ? capacity!=0 : capacity<0) || michael@0: (src==dest && src!=NULL) michael@0: ) { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return 0; michael@0: } michael@0: UnicodeString destString(dest, 0, capacity); michael@0: // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash. michael@0: if(length!=0) { michael@0: const Normalizer2 *n2=(const Normalizer2 *)norm2; michael@0: const Normalizer2WithImpl *n2wi=dynamic_cast(n2); michael@0: if(n2wi!=NULL) { michael@0: // Avoid duplicate argument checking and support NUL-terminated src. michael@0: ReorderingBuffer buffer(n2wi->impl, destString); michael@0: if(buffer.init(length, *pErrorCode)) { michael@0: n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode); michael@0: } michael@0: } else { michael@0: UnicodeString srcString(length<0, src, length); michael@0: n2->normalize(srcString, destString, *pErrorCode); michael@0: } michael@0: } michael@0: return destString.extract(dest, capacity, *pErrorCode); michael@0: } michael@0: michael@0: static int32_t michael@0: normalizeSecondAndAppend(const UNormalizer2 *norm2, michael@0: UChar *first, int32_t firstLength, int32_t firstCapacity, michael@0: const UChar *second, int32_t secondLength, michael@0: UBool doNormalize, michael@0: UErrorCode *pErrorCode) { michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: return 0; michael@0: } michael@0: if( (second==NULL ? secondLength!=0 : secondLength<-1) || michael@0: (first==NULL ? (firstCapacity!=0 || firstLength!=0) : michael@0: (firstCapacity<0 || firstLength<-1)) || michael@0: (first==second && first!=NULL) michael@0: ) { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return 0; michael@0: } michael@0: UnicodeString firstString(first, firstLength, firstCapacity); michael@0: firstLength=firstString.length(); // In case it was -1. michael@0: // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash. michael@0: if(secondLength!=0) { michael@0: const Normalizer2 *n2=(const Normalizer2 *)norm2; michael@0: const Normalizer2WithImpl *n2wi=dynamic_cast(n2); michael@0: if(n2wi!=NULL) { michael@0: // Avoid duplicate argument checking and support NUL-terminated src. michael@0: UnicodeString safeMiddle; michael@0: { michael@0: ReorderingBuffer buffer(n2wi->impl, firstString); michael@0: if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destCapacity>=-1 michael@0: n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL, michael@0: doNormalize, safeMiddle, buffer, *pErrorCode); michael@0: } michael@0: } // The ReorderingBuffer destructor finalizes firstString. michael@0: if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) { michael@0: // Restore the modified suffix of the first string. michael@0: // This does not restore first[] array contents between firstLength and firstCapacity. michael@0: // (That might be uninitialized memory, as far as we know.) michael@0: if(first!=NULL) { /* don't dereference NULL */ michael@0: safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length()); michael@0: if(firstLengthnormalizeSecondAndAppend(firstString, secondString, *pErrorCode); michael@0: } else { michael@0: n2->append(firstString, secondString, *pErrorCode); michael@0: } michael@0: } michael@0: } michael@0: return firstString.extract(first, firstCapacity, *pErrorCode); michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, michael@0: UChar *first, int32_t firstLength, int32_t firstCapacity, michael@0: const UChar *second, int32_t secondLength, michael@0: UErrorCode *pErrorCode) { michael@0: return normalizeSecondAndAppend(norm2, michael@0: first, firstLength, firstCapacity, michael@0: second, secondLength, michael@0: TRUE, pErrorCode); michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: unorm2_append(const UNormalizer2 *norm2, michael@0: UChar *first, int32_t firstLength, int32_t firstCapacity, michael@0: const UChar *second, int32_t secondLength, michael@0: UErrorCode *pErrorCode) { michael@0: return normalizeSecondAndAppend(norm2, michael@0: first, firstLength, firstCapacity, michael@0: second, secondLength, michael@0: FALSE, pErrorCode); michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: unorm2_getDecomposition(const UNormalizer2 *norm2, michael@0: UChar32 c, UChar *decomposition, int32_t capacity, michael@0: UErrorCode *pErrorCode) { michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: return 0; michael@0: } michael@0: if(decomposition==NULL ? capacity!=0 : capacity<0) { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return 0; michael@0: } michael@0: UnicodeString destString(decomposition, 0, capacity); michael@0: if(reinterpret_cast(norm2)->getDecomposition(c, destString)) { michael@0: return destString.extract(decomposition, capacity, *pErrorCode); michael@0: } else { michael@0: return -1; michael@0: } michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: unorm2_getRawDecomposition(const UNormalizer2 *norm2, michael@0: UChar32 c, UChar *decomposition, int32_t capacity, michael@0: UErrorCode *pErrorCode) { michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: return 0; michael@0: } michael@0: if(decomposition==NULL ? capacity!=0 : capacity<0) { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return 0; michael@0: } michael@0: UnicodeString destString(decomposition, 0, capacity); michael@0: if(reinterpret_cast(norm2)->getRawDecomposition(c, destString)) { michael@0: return destString.extract(decomposition, capacity, *pErrorCode); michael@0: } else { michael@0: return -1; michael@0: } michael@0: } michael@0: michael@0: U_CAPI UChar32 U_EXPORT2 michael@0: unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b) { michael@0: return reinterpret_cast(norm2)->composePair(a, b); michael@0: } michael@0: michael@0: U_CAPI uint8_t U_EXPORT2 michael@0: unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c) { michael@0: return reinterpret_cast(norm2)->getCombiningClass(c); michael@0: } michael@0: michael@0: U_CAPI UBool U_EXPORT2 michael@0: unorm2_isNormalized(const UNormalizer2 *norm2, michael@0: const UChar *s, int32_t length, michael@0: UErrorCode *pErrorCode) { michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: return 0; michael@0: } michael@0: if((s==NULL && length!=0) || length<-1) { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return 0; michael@0: } michael@0: UnicodeString sString(length<0, s, length); michael@0: return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode); michael@0: } michael@0: michael@0: U_CAPI UNormalizationCheckResult U_EXPORT2 michael@0: unorm2_quickCheck(const UNormalizer2 *norm2, michael@0: const UChar *s, int32_t length, michael@0: UErrorCode *pErrorCode) { michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: return UNORM_NO; michael@0: } michael@0: if((s==NULL && length!=0) || length<-1) { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return UNORM_NO; michael@0: } michael@0: UnicodeString sString(length<0, s, length); michael@0: return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode); michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, michael@0: const UChar *s, int32_t length, michael@0: UErrorCode *pErrorCode) { michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: return 0; michael@0: } michael@0: if((s==NULL && length!=0) || length<-1) { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return 0; michael@0: } michael@0: UnicodeString sString(length<0, s, length); michael@0: return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode); michael@0: } michael@0: michael@0: U_CAPI UBool U_EXPORT2 michael@0: unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) { michael@0: return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c); michael@0: } michael@0: michael@0: U_CAPI UBool U_EXPORT2 michael@0: unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) { michael@0: return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c); michael@0: } michael@0: michael@0: U_CAPI UBool U_EXPORT2 michael@0: unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) { michael@0: return ((const Normalizer2 *)norm2)->isInert(c); michael@0: } michael@0: michael@0: // Some properties APIs ---------------------------------------------------- *** michael@0: michael@0: U_CAPI uint8_t U_EXPORT2 michael@0: u_getCombiningClass(UChar32 c) { michael@0: UErrorCode errorCode=U_ZERO_ERROR; michael@0: const Normalizer2 *nfd=Normalizer2Factory::getNFDInstance(errorCode); michael@0: if(U_SUCCESS(errorCode)) { michael@0: return nfd->getCombiningClass(c); michael@0: } else { michael@0: return 0; michael@0: } michael@0: } michael@0: michael@0: U_CFUNC UNormalizationCheckResult michael@0: unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) { michael@0: if(mode<=UNORM_NONE || UNORM_FCD<=mode) { michael@0: return UNORM_YES; michael@0: } michael@0: UErrorCode errorCode=U_ZERO_ERROR; michael@0: const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode); michael@0: if(U_SUCCESS(errorCode)) { michael@0: return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c); michael@0: } else { michael@0: return UNORM_MAYBE; michael@0: } michael@0: } michael@0: michael@0: U_CFUNC uint16_t michael@0: unorm_getFCD16(UChar32 c) { michael@0: UErrorCode errorCode=U_ZERO_ERROR; michael@0: const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); michael@0: if(U_SUCCESS(errorCode)) { michael@0: return impl->getFCD16(c); michael@0: } else { michael@0: return 0; michael@0: } michael@0: } michael@0: michael@0: #endif // !UCONFIG_NO_NORMALIZATION