1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/brkiter.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,468 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* Copyright (C) 1997-2013, International Business Machines Corporation and 1.7 +* others. All Rights Reserved. 1.8 +******************************************************************************* 1.9 +* 1.10 +* File TXTBDRY.CPP 1.11 +* 1.12 +* Modification History: 1.13 +* 1.14 +* Date Name Description 1.15 +* 02/18/97 aliu Converted from OpenClass. Added DONE. 1.16 +* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods. 1.17 +***************************************************************************************** 1.18 +*/ 1.19 + 1.20 +// ***************************************************************************** 1.21 +// This file was generated from the java source file BreakIterator.java 1.22 +// ***************************************************************************** 1.23 + 1.24 +#include "unicode/utypes.h" 1.25 + 1.26 +#if !UCONFIG_NO_BREAK_ITERATION 1.27 + 1.28 +#include "unicode/rbbi.h" 1.29 +#include "unicode/brkiter.h" 1.30 +#include "unicode/udata.h" 1.31 +#include "unicode/ures.h" 1.32 +#include "unicode/ustring.h" 1.33 +#include "ucln_cmn.h" 1.34 +#include "cstring.h" 1.35 +#include "umutex.h" 1.36 +#include "servloc.h" 1.37 +#include "locbased.h" 1.38 +#include "uresimp.h" 1.39 +#include "uassert.h" 1.40 +#include "ubrkimpl.h" 1.41 + 1.42 +// ***************************************************************************** 1.43 +// class BreakIterator 1.44 +// This class implements methods for finding the location of boundaries in text. 1.45 +// Instances of BreakIterator maintain a current position and scan over text 1.46 +// returning the index of characters where boundaries occur. 1.47 +// ***************************************************************************** 1.48 + 1.49 +U_NAMESPACE_BEGIN 1.50 + 1.51 +// ------------------------------------- 1.52 + 1.53 +BreakIterator* 1.54 +BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode &status) 1.55 +{ 1.56 + char fnbuff[256]; 1.57 + char ext[4]={'\0'}; 1.58 + char actualLocale[ULOC_FULLNAME_CAPACITY]; 1.59 + int32_t size; 1.60 + const UChar* brkfname = NULL; 1.61 + UResourceBundle brkRulesStack; 1.62 + UResourceBundle brkNameStack; 1.63 + UResourceBundle *brkRules = &brkRulesStack; 1.64 + UResourceBundle *brkName = &brkNameStack; 1.65 + RuleBasedBreakIterator *result = NULL; 1.66 + 1.67 + if (U_FAILURE(status)) 1.68 + return NULL; 1.69 + 1.70 + ures_initStackObject(brkRules); 1.71 + ures_initStackObject(brkName); 1.72 + 1.73 + // Get the locale 1.74 + UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, loc.getName(), &status); 1.75 + /* this is a hack for now. Should be fixed when the data is fetched from 1.76 + brk_index.txt */ 1.77 + if(status==U_USING_DEFAULT_WARNING){ 1.78 + status=U_ZERO_ERROR; 1.79 + ures_openFillIn(b, U_ICUDATA_BRKITR, "", &status); 1.80 + } 1.81 + 1.82 + // Get the "boundaries" array. 1.83 + if (U_SUCCESS(status)) { 1.84 + brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status); 1.85 + // Get the string object naming the rules file 1.86 + brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status); 1.87 + // Get the actual string 1.88 + brkfname = ures_getString(brkName, &size, &status); 1.89 + U_ASSERT((size_t)size<sizeof(fnbuff)); 1.90 + if ((size_t)size>=sizeof(fnbuff)) { 1.91 + size=0; 1.92 + if (U_SUCCESS(status)) { 1.93 + status = U_BUFFER_OVERFLOW_ERROR; 1.94 + } 1.95 + } 1.96 + 1.97 + // Use the string if we found it 1.98 + if (U_SUCCESS(status) && brkfname) { 1.99 + uprv_strncpy(actualLocale, 1.100 + ures_getLocaleInternal(brkName, &status), 1.101 + sizeof(actualLocale)/sizeof(actualLocale[0])); 1.102 + 1.103 + UChar* extStart=u_strchr(brkfname, 0x002e); 1.104 + int len = 0; 1.105 + if(extStart!=NULL){ 1.106 + len = (int)(extStart-brkfname); 1.107 + u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff 1.108 + u_UCharsToChars(brkfname, fnbuff, len); 1.109 + } 1.110 + fnbuff[len]=0; // nul terminate 1.111 + } 1.112 + } 1.113 + 1.114 + ures_close(brkRules); 1.115 + ures_close(brkName); 1.116 + 1.117 + UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status); 1.118 + if (U_FAILURE(status)) { 1.119 + ures_close(b); 1.120 + return NULL; 1.121 + } 1.122 + 1.123 + // Create a RuleBasedBreakIterator 1.124 + result = new RuleBasedBreakIterator(file, status); 1.125 + 1.126 + // If there is a result, set the valid locale and actual locale, and the kind 1.127 + if (U_SUCCESS(status) && result != NULL) { 1.128 + U_LOCALE_BASED(locBased, *(BreakIterator*)result); 1.129 + locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status), actualLocale); 1.130 + result->setBreakType(kind); 1.131 + } 1.132 + 1.133 + ures_close(b); 1.134 + 1.135 + if (U_FAILURE(status) && result != NULL) { // Sometimes redundant check, but simple 1.136 + delete result; 1.137 + return NULL; 1.138 + } 1.139 + 1.140 + if (result == NULL) { 1.141 + udata_close(file); 1.142 + if (U_SUCCESS(status)) { 1.143 + status = U_MEMORY_ALLOCATION_ERROR; 1.144 + } 1.145 + } 1.146 + 1.147 + return result; 1.148 +} 1.149 + 1.150 +// Creates a break iterator for word breaks. 1.151 +BreakIterator* U_EXPORT2 1.152 +BreakIterator::createWordInstance(const Locale& key, UErrorCode& status) 1.153 +{ 1.154 + return createInstance(key, UBRK_WORD, status); 1.155 +} 1.156 + 1.157 +// ------------------------------------- 1.158 + 1.159 +// Creates a break iterator for line breaks. 1.160 +BreakIterator* U_EXPORT2 1.161 +BreakIterator::createLineInstance(const Locale& key, UErrorCode& status) 1.162 +{ 1.163 + return createInstance(key, UBRK_LINE, status); 1.164 +} 1.165 + 1.166 +// ------------------------------------- 1.167 + 1.168 +// Creates a break iterator for character breaks. 1.169 +BreakIterator* U_EXPORT2 1.170 +BreakIterator::createCharacterInstance(const Locale& key, UErrorCode& status) 1.171 +{ 1.172 + return createInstance(key, UBRK_CHARACTER, status); 1.173 +} 1.174 + 1.175 +// ------------------------------------- 1.176 + 1.177 +// Creates a break iterator for sentence breaks. 1.178 +BreakIterator* U_EXPORT2 1.179 +BreakIterator::createSentenceInstance(const Locale& key, UErrorCode& status) 1.180 +{ 1.181 + return createInstance(key, UBRK_SENTENCE, status); 1.182 +} 1.183 + 1.184 +// ------------------------------------- 1.185 + 1.186 +// Creates a break iterator for title casing breaks. 1.187 +BreakIterator* U_EXPORT2 1.188 +BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status) 1.189 +{ 1.190 + return createInstance(key, UBRK_TITLE, status); 1.191 +} 1.192 + 1.193 +// ------------------------------------- 1.194 + 1.195 +// Gets all the available locales that has localized text boundary data. 1.196 +const Locale* U_EXPORT2 1.197 +BreakIterator::getAvailableLocales(int32_t& count) 1.198 +{ 1.199 + return Locale::getAvailableLocales(count); 1.200 +} 1.201 + 1.202 +// ------------------------------------------ 1.203 +// 1.204 +// Default constructor and destructor 1.205 +// 1.206 +//------------------------------------------- 1.207 + 1.208 +BreakIterator::BreakIterator() 1.209 +{ 1.210 + *validLocale = *actualLocale = 0; 1.211 +} 1.212 + 1.213 +BreakIterator::~BreakIterator() 1.214 +{ 1.215 +} 1.216 + 1.217 +// ------------------------------------------ 1.218 +// 1.219 +// Registration 1.220 +// 1.221 +//------------------------------------------- 1.222 +#if !UCONFIG_NO_SERVICE 1.223 + 1.224 +// ------------------------------------- 1.225 + 1.226 +class ICUBreakIteratorFactory : public ICUResourceBundleFactory { 1.227 +public: 1.228 + virtual ~ICUBreakIteratorFactory(); 1.229 +protected: 1.230 + virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const { 1.231 + return BreakIterator::makeInstance(loc, kind, status); 1.232 + } 1.233 +}; 1.234 + 1.235 +ICUBreakIteratorFactory::~ICUBreakIteratorFactory() {} 1.236 + 1.237 +// ------------------------------------- 1.238 + 1.239 +class ICUBreakIteratorService : public ICULocaleService { 1.240 +public: 1.241 + ICUBreakIteratorService() 1.242 + : ICULocaleService(UNICODE_STRING("Break Iterator", 14)) 1.243 + { 1.244 + UErrorCode status = U_ZERO_ERROR; 1.245 + registerFactory(new ICUBreakIteratorFactory(), status); 1.246 + } 1.247 + 1.248 + virtual ~ICUBreakIteratorService(); 1.249 + 1.250 + virtual UObject* cloneInstance(UObject* instance) const { 1.251 + return ((BreakIterator*)instance)->clone(); 1.252 + } 1.253 + 1.254 + virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const { 1.255 + LocaleKey& lkey = (LocaleKey&)key; 1.256 + int32_t kind = lkey.kind(); 1.257 + Locale loc; 1.258 + lkey.currentLocale(loc); 1.259 + return BreakIterator::makeInstance(loc, kind, status); 1.260 + } 1.261 + 1.262 + virtual UBool isDefault() const { 1.263 + return countFactories() == 1; 1.264 + } 1.265 +}; 1.266 + 1.267 +ICUBreakIteratorService::~ICUBreakIteratorService() {} 1.268 + 1.269 +// ------------------------------------- 1.270 + 1.271 +// defined in ucln_cmn.h 1.272 +U_NAMESPACE_END 1.273 + 1.274 +static icu::UInitOnce gInitOnce; 1.275 +static icu::ICULocaleService* gService = NULL; 1.276 + 1.277 + 1.278 + 1.279 +/** 1.280 + * Release all static memory held by breakiterator. 1.281 + */ 1.282 +U_CDECL_BEGIN 1.283 +static UBool U_CALLCONV breakiterator_cleanup(void) { 1.284 +#if !UCONFIG_NO_SERVICE 1.285 + if (gService) { 1.286 + delete gService; 1.287 + gService = NULL; 1.288 + } 1.289 + gInitOnce.reset(); 1.290 +#endif 1.291 + return TRUE; 1.292 +} 1.293 +U_CDECL_END 1.294 +U_NAMESPACE_BEGIN 1.295 + 1.296 +static void U_CALLCONV 1.297 +initService(void) { 1.298 + gService = new ICUBreakIteratorService(); 1.299 + ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR, breakiterator_cleanup); 1.300 +} 1.301 + 1.302 +static ICULocaleService* 1.303 +getService(void) 1.304 +{ 1.305 + umtx_initOnce(gInitOnce, &initService); 1.306 + return gService; 1.307 +} 1.308 + 1.309 + 1.310 +// ------------------------------------- 1.311 + 1.312 +static inline UBool 1.313 +hasService(void) 1.314 +{ 1.315 + return !gInitOnce.isReset() && getService() != NULL; 1.316 +} 1.317 + 1.318 +// ------------------------------------- 1.319 + 1.320 +URegistryKey U_EXPORT2 1.321 +BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status) 1.322 +{ 1.323 + ICULocaleService *service = getService(); 1.324 + if (service == NULL) { 1.325 + status = U_MEMORY_ALLOCATION_ERROR; 1.326 + return NULL; 1.327 + } 1.328 + return service->registerInstance(toAdopt, locale, kind, status); 1.329 +} 1.330 + 1.331 +// ------------------------------------- 1.332 + 1.333 +UBool U_EXPORT2 1.334 +BreakIterator::unregister(URegistryKey key, UErrorCode& status) 1.335 +{ 1.336 + if (U_SUCCESS(status)) { 1.337 + if (hasService()) { 1.338 + return gService->unregister(key, status); 1.339 + } 1.340 + status = U_MEMORY_ALLOCATION_ERROR; 1.341 + } 1.342 + return FALSE; 1.343 +} 1.344 + 1.345 +// ------------------------------------- 1.346 + 1.347 +StringEnumeration* U_EXPORT2 1.348 +BreakIterator::getAvailableLocales(void) 1.349 +{ 1.350 + ICULocaleService *service = getService(); 1.351 + if (service == NULL) { 1.352 + return NULL; 1.353 + } 1.354 + return service->getAvailableLocales(); 1.355 +} 1.356 +#endif /* UCONFIG_NO_SERVICE */ 1.357 + 1.358 +// ------------------------------------- 1.359 + 1.360 +BreakIterator* 1.361 +BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& status) 1.362 +{ 1.363 + if (U_FAILURE(status)) { 1.364 + return NULL; 1.365 + } 1.366 + 1.367 +#if !UCONFIG_NO_SERVICE 1.368 + if (hasService()) { 1.369 + Locale actualLoc(""); 1.370 + BreakIterator *result = (BreakIterator*)gService->get(loc, kind, &actualLoc, status); 1.371 + // TODO: The way the service code works in ICU 2.8 is that if 1.372 + // there is a real registered break iterator, the actualLoc 1.373 + // will be populated, but if the handleDefault path is taken 1.374 + // (because nothing is registered that can handle the 1.375 + // requested locale) then the actualLoc comes back empty. In 1.376 + // that case, the returned object already has its actual/valid 1.377 + // locale data populated (by makeInstance, which is what 1.378 + // handleDefault calls), so we don't touch it. YES, A COMMENT 1.379 + // THIS LONG is a sign of bad code -- so the action item is to 1.380 + // revisit this in ICU 3.0 and clean it up/fix it/remove it. 1.381 + if (U_SUCCESS(status) && (result != NULL) && *actualLoc.getName() != 0) { 1.382 + U_LOCALE_BASED(locBased, *result); 1.383 + locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName()); 1.384 + } 1.385 + return result; 1.386 + } 1.387 + else 1.388 +#endif 1.389 + { 1.390 + return makeInstance(loc, kind, status); 1.391 + } 1.392 +} 1.393 + 1.394 +// ------------------------------------- 1.395 + 1.396 +BreakIterator* 1.397 +BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) 1.398 +{ 1.399 + 1.400 + if (U_FAILURE(status)) { 1.401 + return NULL; 1.402 + } 1.403 + 1.404 + BreakIterator *result = NULL; 1.405 + switch (kind) { 1.406 + case UBRK_CHARACTER: 1.407 + result = BreakIterator::buildInstance(loc, "grapheme", kind, status); 1.408 + break; 1.409 + case UBRK_WORD: 1.410 + result = BreakIterator::buildInstance(loc, "word", kind, status); 1.411 + break; 1.412 + case UBRK_LINE: 1.413 + result = BreakIterator::buildInstance(loc, "line", kind, status); 1.414 + break; 1.415 + case UBRK_SENTENCE: 1.416 + result = BreakIterator::buildInstance(loc, "sentence", kind, status); 1.417 + break; 1.418 + case UBRK_TITLE: 1.419 + result = BreakIterator::buildInstance(loc, "title", kind, status); 1.420 + break; 1.421 + default: 1.422 + status = U_ILLEGAL_ARGUMENT_ERROR; 1.423 + } 1.424 + 1.425 + if (U_FAILURE(status)) { 1.426 + return NULL; 1.427 + } 1.428 + 1.429 + return result; 1.430 +} 1.431 + 1.432 +Locale 1.433 +BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const { 1.434 + U_LOCALE_BASED(locBased, *this); 1.435 + return locBased.getLocale(type, status); 1.436 +} 1.437 + 1.438 +const char * 1.439 +BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const { 1.440 + U_LOCALE_BASED(locBased, *this); 1.441 + return locBased.getLocaleID(type, status); 1.442 +} 1.443 + 1.444 + 1.445 +// This implementation of getRuleStatus is a do-nothing stub, here to 1.446 +// provide a default implementation for any derived BreakIterator classes that 1.447 +// do not implement it themselves. 1.448 +int32_t BreakIterator::getRuleStatus() const { 1.449 + return 0; 1.450 +} 1.451 + 1.452 +// This implementation of getRuleStatusVec is a do-nothing stub, here to 1.453 +// provide a default implementation for any derived BreakIterator classes that 1.454 +// do not implement it themselves. 1.455 +int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) { 1.456 + if (U_FAILURE(status)) { 1.457 + return 0; 1.458 + } 1.459 + if (capacity < 1) { 1.460 + status = U_BUFFER_OVERFLOW_ERROR; 1.461 + return 1; 1.462 + } 1.463 + *fillInVec = 0; 1.464 + return 1; 1.465 +} 1.466 + 1.467 +U_NAMESPACE_END 1.468 + 1.469 +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 1.470 + 1.471 +//eof