michael@0: /* michael@0: ******************************************************************************* michael@0: * Copyright (C) 1997-2013, International Business Machines Corporation and michael@0: * others. All Rights Reserved. michael@0: ******************************************************************************* michael@0: * michael@0: * File TXTBDRY.CPP michael@0: * michael@0: * Modification History: michael@0: * michael@0: * Date Name Description michael@0: * 02/18/97 aliu Converted from OpenClass. Added DONE. michael@0: * 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods. michael@0: ***************************************************************************************** michael@0: */ michael@0: michael@0: // ***************************************************************************** michael@0: // This file was generated from the java source file BreakIterator.java michael@0: // ***************************************************************************** michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: #if !UCONFIG_NO_BREAK_ITERATION michael@0: michael@0: #include "unicode/rbbi.h" michael@0: #include "unicode/brkiter.h" michael@0: #include "unicode/udata.h" michael@0: #include "unicode/ures.h" michael@0: #include "unicode/ustring.h" michael@0: #include "ucln_cmn.h" michael@0: #include "cstring.h" michael@0: #include "umutex.h" michael@0: #include "servloc.h" michael@0: #include "locbased.h" michael@0: #include "uresimp.h" michael@0: #include "uassert.h" michael@0: #include "ubrkimpl.h" michael@0: michael@0: // ***************************************************************************** michael@0: // class BreakIterator michael@0: // This class implements methods for finding the location of boundaries in text. michael@0: // Instances of BreakIterator maintain a current position and scan over text michael@0: // returning the index of characters where boundaries occur. michael@0: // ***************************************************************************** michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: // ------------------------------------- michael@0: michael@0: BreakIterator* michael@0: BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode &status) michael@0: { michael@0: char fnbuff[256]; michael@0: char ext[4]={'\0'}; michael@0: char actualLocale[ULOC_FULLNAME_CAPACITY]; michael@0: int32_t size; michael@0: const UChar* brkfname = NULL; michael@0: UResourceBundle brkRulesStack; michael@0: UResourceBundle brkNameStack; michael@0: UResourceBundle *brkRules = &brkRulesStack; michael@0: UResourceBundle *brkName = &brkNameStack; michael@0: RuleBasedBreakIterator *result = NULL; michael@0: michael@0: if (U_FAILURE(status)) michael@0: return NULL; michael@0: michael@0: ures_initStackObject(brkRules); michael@0: ures_initStackObject(brkName); michael@0: michael@0: // Get the locale michael@0: UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, loc.getName(), &status); michael@0: /* this is a hack for now. Should be fixed when the data is fetched from michael@0: brk_index.txt */ michael@0: if(status==U_USING_DEFAULT_WARNING){ michael@0: status=U_ZERO_ERROR; michael@0: ures_openFillIn(b, U_ICUDATA_BRKITR, "", &status); michael@0: } michael@0: michael@0: // Get the "boundaries" array. michael@0: if (U_SUCCESS(status)) { michael@0: brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status); michael@0: // Get the string object naming the rules file michael@0: brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status); michael@0: // Get the actual string michael@0: brkfname = ures_getString(brkName, &size, &status); michael@0: U_ASSERT((size_t)size=sizeof(fnbuff)) { michael@0: size=0; michael@0: if (U_SUCCESS(status)) { michael@0: status = U_BUFFER_OVERFLOW_ERROR; michael@0: } michael@0: } michael@0: michael@0: // Use the string if we found it michael@0: if (U_SUCCESS(status) && brkfname) { michael@0: uprv_strncpy(actualLocale, michael@0: ures_getLocaleInternal(brkName, &status), michael@0: sizeof(actualLocale)/sizeof(actualLocale[0])); michael@0: michael@0: UChar* extStart=u_strchr(brkfname, 0x002e); michael@0: int len = 0; michael@0: if(extStart!=NULL){ michael@0: len = (int)(extStart-brkfname); michael@0: u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff michael@0: u_UCharsToChars(brkfname, fnbuff, len); michael@0: } michael@0: fnbuff[len]=0; // nul terminate michael@0: } michael@0: } michael@0: michael@0: ures_close(brkRules); michael@0: ures_close(brkName); michael@0: michael@0: UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status); michael@0: if (U_FAILURE(status)) { michael@0: ures_close(b); michael@0: return NULL; michael@0: } michael@0: michael@0: // Create a RuleBasedBreakIterator michael@0: result = new RuleBasedBreakIterator(file, status); michael@0: michael@0: // If there is a result, set the valid locale and actual locale, and the kind michael@0: if (U_SUCCESS(status) && result != NULL) { michael@0: U_LOCALE_BASED(locBased, *(BreakIterator*)result); michael@0: locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status), actualLocale); michael@0: result->setBreakType(kind); michael@0: } michael@0: michael@0: ures_close(b); michael@0: michael@0: if (U_FAILURE(status) && result != NULL) { // Sometimes redundant check, but simple michael@0: delete result; michael@0: return NULL; michael@0: } michael@0: michael@0: if (result == NULL) { michael@0: udata_close(file); michael@0: if (U_SUCCESS(status)) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: } michael@0: } michael@0: michael@0: return result; michael@0: } michael@0: michael@0: // Creates a break iterator for word breaks. michael@0: BreakIterator* U_EXPORT2 michael@0: BreakIterator::createWordInstance(const Locale& key, UErrorCode& status) michael@0: { michael@0: return createInstance(key, UBRK_WORD, status); michael@0: } michael@0: michael@0: // ------------------------------------- michael@0: michael@0: // Creates a break iterator for line breaks. michael@0: BreakIterator* U_EXPORT2 michael@0: BreakIterator::createLineInstance(const Locale& key, UErrorCode& status) michael@0: { michael@0: return createInstance(key, UBRK_LINE, status); michael@0: } michael@0: michael@0: // ------------------------------------- michael@0: michael@0: // Creates a break iterator for character breaks. michael@0: BreakIterator* U_EXPORT2 michael@0: BreakIterator::createCharacterInstance(const Locale& key, UErrorCode& status) michael@0: { michael@0: return createInstance(key, UBRK_CHARACTER, status); michael@0: } michael@0: michael@0: // ------------------------------------- michael@0: michael@0: // Creates a break iterator for sentence breaks. michael@0: BreakIterator* U_EXPORT2 michael@0: BreakIterator::createSentenceInstance(const Locale& key, UErrorCode& status) michael@0: { michael@0: return createInstance(key, UBRK_SENTENCE, status); michael@0: } michael@0: michael@0: // ------------------------------------- michael@0: michael@0: // Creates a break iterator for title casing breaks. michael@0: BreakIterator* U_EXPORT2 michael@0: BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status) michael@0: { michael@0: return createInstance(key, UBRK_TITLE, status); michael@0: } michael@0: michael@0: // ------------------------------------- michael@0: michael@0: // Gets all the available locales that has localized text boundary data. michael@0: const Locale* U_EXPORT2 michael@0: BreakIterator::getAvailableLocales(int32_t& count) michael@0: { michael@0: return Locale::getAvailableLocales(count); michael@0: } michael@0: michael@0: // ------------------------------------------ michael@0: // michael@0: // Default constructor and destructor michael@0: // michael@0: //------------------------------------------- michael@0: michael@0: BreakIterator::BreakIterator() michael@0: { michael@0: *validLocale = *actualLocale = 0; michael@0: } michael@0: michael@0: BreakIterator::~BreakIterator() michael@0: { michael@0: } michael@0: michael@0: // ------------------------------------------ michael@0: // michael@0: // Registration michael@0: // michael@0: //------------------------------------------- michael@0: #if !UCONFIG_NO_SERVICE michael@0: michael@0: // ------------------------------------- michael@0: michael@0: class ICUBreakIteratorFactory : public ICUResourceBundleFactory { michael@0: public: michael@0: virtual ~ICUBreakIteratorFactory(); michael@0: protected: michael@0: virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const { michael@0: return BreakIterator::makeInstance(loc, kind, status); michael@0: } michael@0: }; michael@0: michael@0: ICUBreakIteratorFactory::~ICUBreakIteratorFactory() {} michael@0: michael@0: // ------------------------------------- michael@0: michael@0: class ICUBreakIteratorService : public ICULocaleService { michael@0: public: michael@0: ICUBreakIteratorService() michael@0: : ICULocaleService(UNICODE_STRING("Break Iterator", 14)) michael@0: { michael@0: UErrorCode status = U_ZERO_ERROR; michael@0: registerFactory(new ICUBreakIteratorFactory(), status); michael@0: } michael@0: michael@0: virtual ~ICUBreakIteratorService(); michael@0: michael@0: virtual UObject* cloneInstance(UObject* instance) const { michael@0: return ((BreakIterator*)instance)->clone(); michael@0: } michael@0: michael@0: virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const { michael@0: LocaleKey& lkey = (LocaleKey&)key; michael@0: int32_t kind = lkey.kind(); michael@0: Locale loc; michael@0: lkey.currentLocale(loc); michael@0: return BreakIterator::makeInstance(loc, kind, status); michael@0: } michael@0: michael@0: virtual UBool isDefault() const { michael@0: return countFactories() == 1; michael@0: } michael@0: }; michael@0: michael@0: ICUBreakIteratorService::~ICUBreakIteratorService() {} michael@0: michael@0: // ------------------------------------- michael@0: michael@0: // defined in ucln_cmn.h michael@0: U_NAMESPACE_END michael@0: michael@0: static icu::UInitOnce gInitOnce; michael@0: static icu::ICULocaleService* gService = NULL; michael@0: michael@0: michael@0: michael@0: /** michael@0: * Release all static memory held by breakiterator. michael@0: */ michael@0: U_CDECL_BEGIN michael@0: static UBool U_CALLCONV breakiterator_cleanup(void) { michael@0: #if !UCONFIG_NO_SERVICE michael@0: if (gService) { michael@0: delete gService; michael@0: gService = NULL; michael@0: } michael@0: gInitOnce.reset(); michael@0: #endif michael@0: return TRUE; michael@0: } michael@0: U_CDECL_END michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: static void U_CALLCONV michael@0: initService(void) { michael@0: gService = new ICUBreakIteratorService(); michael@0: ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR, breakiterator_cleanup); michael@0: } michael@0: michael@0: static ICULocaleService* michael@0: getService(void) michael@0: { michael@0: umtx_initOnce(gInitOnce, &initService); michael@0: return gService; michael@0: } michael@0: michael@0: michael@0: // ------------------------------------- michael@0: michael@0: static inline UBool michael@0: hasService(void) michael@0: { michael@0: return !gInitOnce.isReset() && getService() != NULL; michael@0: } michael@0: michael@0: // ------------------------------------- michael@0: michael@0: URegistryKey U_EXPORT2 michael@0: BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status) michael@0: { michael@0: ICULocaleService *service = getService(); michael@0: if (service == NULL) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: return NULL; michael@0: } michael@0: return service->registerInstance(toAdopt, locale, kind, status); michael@0: } michael@0: michael@0: // ------------------------------------- michael@0: michael@0: UBool U_EXPORT2 michael@0: BreakIterator::unregister(URegistryKey key, UErrorCode& status) michael@0: { michael@0: if (U_SUCCESS(status)) { michael@0: if (hasService()) { michael@0: return gService->unregister(key, status); michael@0: } michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: } michael@0: return FALSE; michael@0: } michael@0: michael@0: // ------------------------------------- michael@0: michael@0: StringEnumeration* U_EXPORT2 michael@0: BreakIterator::getAvailableLocales(void) michael@0: { michael@0: ICULocaleService *service = getService(); michael@0: if (service == NULL) { michael@0: return NULL; michael@0: } michael@0: return service->getAvailableLocales(); michael@0: } michael@0: #endif /* UCONFIG_NO_SERVICE */ michael@0: michael@0: // ------------------------------------- michael@0: michael@0: BreakIterator* michael@0: BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& status) michael@0: { michael@0: if (U_FAILURE(status)) { michael@0: return NULL; michael@0: } michael@0: michael@0: #if !UCONFIG_NO_SERVICE michael@0: if (hasService()) { michael@0: Locale actualLoc(""); michael@0: BreakIterator *result = (BreakIterator*)gService->get(loc, kind, &actualLoc, status); michael@0: // TODO: The way the service code works in ICU 2.8 is that if michael@0: // there is a real registered break iterator, the actualLoc michael@0: // will be populated, but if the handleDefault path is taken michael@0: // (because nothing is registered that can handle the michael@0: // requested locale) then the actualLoc comes back empty. In michael@0: // that case, the returned object already has its actual/valid michael@0: // locale data populated (by makeInstance, which is what michael@0: // handleDefault calls), so we don't touch it. YES, A COMMENT michael@0: // THIS LONG is a sign of bad code -- so the action item is to michael@0: // revisit this in ICU 3.0 and clean it up/fix it/remove it. michael@0: if (U_SUCCESS(status) && (result != NULL) && *actualLoc.getName() != 0) { michael@0: U_LOCALE_BASED(locBased, *result); michael@0: locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName()); michael@0: } michael@0: return result; michael@0: } michael@0: else michael@0: #endif michael@0: { michael@0: return makeInstance(loc, kind, status); michael@0: } michael@0: } michael@0: michael@0: // ------------------------------------- michael@0: michael@0: BreakIterator* michael@0: BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) michael@0: { michael@0: michael@0: if (U_FAILURE(status)) { michael@0: return NULL; michael@0: } michael@0: michael@0: BreakIterator *result = NULL; michael@0: switch (kind) { michael@0: case UBRK_CHARACTER: michael@0: result = BreakIterator::buildInstance(loc, "grapheme", kind, status); michael@0: break; michael@0: case UBRK_WORD: michael@0: result = BreakIterator::buildInstance(loc, "word", kind, status); michael@0: break; michael@0: case UBRK_LINE: michael@0: result = BreakIterator::buildInstance(loc, "line", kind, status); michael@0: break; michael@0: case UBRK_SENTENCE: michael@0: result = BreakIterator::buildInstance(loc, "sentence", kind, status); michael@0: break; michael@0: case UBRK_TITLE: michael@0: result = BreakIterator::buildInstance(loc, "title", kind, status); michael@0: break; michael@0: default: michael@0: status = U_ILLEGAL_ARGUMENT_ERROR; michael@0: } michael@0: michael@0: if (U_FAILURE(status)) { michael@0: return NULL; michael@0: } michael@0: michael@0: return result; michael@0: } michael@0: michael@0: Locale michael@0: BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const { michael@0: U_LOCALE_BASED(locBased, *this); michael@0: return locBased.getLocale(type, status); michael@0: } michael@0: michael@0: const char * michael@0: BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const { michael@0: U_LOCALE_BASED(locBased, *this); michael@0: return locBased.getLocaleID(type, status); michael@0: } michael@0: michael@0: michael@0: // This implementation of getRuleStatus is a do-nothing stub, here to michael@0: // provide a default implementation for any derived BreakIterator classes that michael@0: // do not implement it themselves. michael@0: int32_t BreakIterator::getRuleStatus() const { michael@0: return 0; michael@0: } michael@0: michael@0: // This implementation of getRuleStatusVec is a do-nothing stub, here to michael@0: // provide a default implementation for any derived BreakIterator classes that michael@0: // do not implement it themselves. michael@0: int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) { michael@0: if (U_FAILURE(status)) { michael@0: return 0; michael@0: } michael@0: if (capacity < 1) { michael@0: status = U_BUFFER_OVERFLOW_ERROR; michael@0: return 1; michael@0: } michael@0: *fillInVec = 0; michael@0: return 1; michael@0: } michael@0: michael@0: U_NAMESPACE_END michael@0: michael@0: #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ michael@0: michael@0: //eof