1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/ubrk.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,301 @@ 1.4 +/* 1.5 +******************************************************************************** 1.6 +* Copyright (C) 1996-2013, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +******************************************************************************** 1.9 +*/ 1.10 + 1.11 +#include "unicode/utypes.h" 1.12 + 1.13 +#if !UCONFIG_NO_BREAK_ITERATION 1.14 + 1.15 +#include "unicode/ubrk.h" 1.16 + 1.17 +#include "unicode/brkiter.h" 1.18 +#include "unicode/uloc.h" 1.19 +#include "unicode/ustring.h" 1.20 +#include "unicode/uchriter.h" 1.21 +#include "unicode/rbbi.h" 1.22 +#include "rbbirb.h" 1.23 +#include "uassert.h" 1.24 + 1.25 +U_NAMESPACE_USE 1.26 + 1.27 +//------------------------------------------------------------------------------ 1.28 +// 1.29 +// ubrk_open Create a canned type of break iterator based on type (word, line, etc.) 1.30 +// and locale. 1.31 +// 1.32 +//------------------------------------------------------------------------------ 1.33 +U_CAPI UBreakIterator* U_EXPORT2 1.34 +ubrk_open(UBreakIteratorType type, 1.35 + const char *locale, 1.36 + const UChar *text, 1.37 + int32_t textLength, 1.38 + UErrorCode *status) 1.39 +{ 1.40 + 1.41 + if(U_FAILURE(*status)) return 0; 1.42 + 1.43 + BreakIterator *result = 0; 1.44 + 1.45 + switch(type) { 1.46 + 1.47 + case UBRK_CHARACTER: 1.48 + result = BreakIterator::createCharacterInstance(Locale(locale), *status); 1.49 + break; 1.50 + 1.51 + case UBRK_WORD: 1.52 + result = BreakIterator::createWordInstance(Locale(locale), *status); 1.53 + break; 1.54 + 1.55 + case UBRK_LINE: 1.56 + result = BreakIterator::createLineInstance(Locale(locale), *status); 1.57 + break; 1.58 + 1.59 + case UBRK_SENTENCE: 1.60 + result = BreakIterator::createSentenceInstance(Locale(locale), *status); 1.61 + break; 1.62 + 1.63 + case UBRK_TITLE: 1.64 + result = BreakIterator::createTitleInstance(Locale(locale), *status); 1.65 + break; 1.66 + 1.67 + default: 1.68 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.69 + } 1.70 + 1.71 + // check for allocation error 1.72 + if (U_FAILURE(*status)) { 1.73 + return 0; 1.74 + } 1.75 + if(result == 0) { 1.76 + *status = U_MEMORY_ALLOCATION_ERROR; 1.77 + return 0; 1.78 + } 1.79 + 1.80 + 1.81 + UBreakIterator *uBI = (UBreakIterator *)result; 1.82 + if (text != NULL) { 1.83 + ubrk_setText(uBI, text, textLength, status); 1.84 + } 1.85 + return uBI; 1.86 +} 1.87 + 1.88 + 1.89 + 1.90 +//------------------------------------------------------------------------------ 1.91 +// 1.92 +// ubrk_openRules open a break iterator from a set of break rules. 1.93 +// Invokes the rule builder. 1.94 +// 1.95 +//------------------------------------------------------------------------------ 1.96 +U_CAPI UBreakIterator* U_EXPORT2 1.97 +ubrk_openRules( const UChar *rules, 1.98 + int32_t rulesLength, 1.99 + const UChar *text, 1.100 + int32_t textLength, 1.101 + UParseError *parseErr, 1.102 + UErrorCode *status) { 1.103 + 1.104 + if (status == NULL || U_FAILURE(*status)){ 1.105 + return 0; 1.106 + } 1.107 + 1.108 + BreakIterator *result = 0; 1.109 + UnicodeString ruleString(rules, rulesLength); 1.110 + result = RBBIRuleBuilder::createRuleBasedBreakIterator(ruleString, parseErr, *status); 1.111 + if(U_FAILURE(*status)) { 1.112 + return 0; 1.113 + } 1.114 + 1.115 + UBreakIterator *uBI = (UBreakIterator *)result; 1.116 + if (text != NULL) { 1.117 + ubrk_setText(uBI, text, textLength, status); 1.118 + } 1.119 + return uBI; 1.120 +} 1.121 + 1.122 + 1.123 + 1.124 + 1.125 + 1.126 +U_CAPI UBreakIterator * U_EXPORT2 1.127 +ubrk_safeClone( 1.128 + const UBreakIterator *bi, 1.129 + void * /*stackBuffer*/, 1.130 + int32_t *pBufferSize, 1.131 + UErrorCode *status) 1.132 +{ 1.133 + if (status == NULL || U_FAILURE(*status)){ 1.134 + return NULL; 1.135 + } 1.136 + if (bi == NULL) { 1.137 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.138 + return NULL; 1.139 + } 1.140 + if (pBufferSize != NULL) { 1.141 + int32_t inputSize = *pBufferSize; 1.142 + *pBufferSize = 1; 1.143 + if (inputSize == 0) { 1.144 + return NULL; // preflighting for deprecated functionality 1.145 + } 1.146 + } 1.147 + BreakIterator *newBI = ((BreakIterator *)bi)->clone(); 1.148 + if (newBI == NULL) { 1.149 + *status = U_MEMORY_ALLOCATION_ERROR; 1.150 + } else { 1.151 + *status = U_SAFECLONE_ALLOCATED_WARNING; 1.152 + } 1.153 + return (UBreakIterator *)newBI; 1.154 +} 1.155 + 1.156 + 1.157 + 1.158 +U_CAPI void U_EXPORT2 1.159 +ubrk_close(UBreakIterator *bi) 1.160 +{ 1.161 + delete (BreakIterator *)bi; 1.162 +} 1.163 + 1.164 +U_CAPI void U_EXPORT2 1.165 +ubrk_setText(UBreakIterator* bi, 1.166 + const UChar* text, 1.167 + int32_t textLength, 1.168 + UErrorCode* status) 1.169 +{ 1.170 + BreakIterator *brit = (BreakIterator *)bi; 1.171 + UText ut = UTEXT_INITIALIZER; 1.172 + utext_openUChars(&ut, text, textLength, status); 1.173 + brit->setText(&ut, *status); 1.174 + // A stack allocated UText wrapping a UChar * string 1.175 + // can be dumped without explicitly closing it. 1.176 +} 1.177 + 1.178 + 1.179 + 1.180 +U_CAPI void U_EXPORT2 1.181 +ubrk_setUText(UBreakIterator *bi, 1.182 + UText *text, 1.183 + UErrorCode *status) 1.184 +{ 1.185 + RuleBasedBreakIterator *brit = (RuleBasedBreakIterator *)bi; 1.186 + brit->RuleBasedBreakIterator::setText(text, *status); 1.187 +} 1.188 + 1.189 + 1.190 + 1.191 + 1.192 + 1.193 +U_CAPI int32_t U_EXPORT2 1.194 +ubrk_current(const UBreakIterator *bi) 1.195 +{ 1.196 + 1.197 + return ((RuleBasedBreakIterator*)bi)->RuleBasedBreakIterator::current(); 1.198 +} 1.199 + 1.200 +U_CAPI int32_t U_EXPORT2 1.201 +ubrk_next(UBreakIterator *bi) 1.202 +{ 1.203 + 1.204 + return ((RuleBasedBreakIterator*)bi)->RuleBasedBreakIterator::next(); 1.205 +} 1.206 + 1.207 +U_CAPI int32_t U_EXPORT2 1.208 +ubrk_previous(UBreakIterator *bi) 1.209 +{ 1.210 + 1.211 + return ((RuleBasedBreakIterator*)bi)->RuleBasedBreakIterator::previous(); 1.212 +} 1.213 + 1.214 +U_CAPI int32_t U_EXPORT2 1.215 +ubrk_first(UBreakIterator *bi) 1.216 +{ 1.217 + 1.218 + return ((RuleBasedBreakIterator*)bi)->RuleBasedBreakIterator::first(); 1.219 +} 1.220 + 1.221 +U_CAPI int32_t U_EXPORT2 1.222 +ubrk_last(UBreakIterator *bi) 1.223 +{ 1.224 + 1.225 + return ((RuleBasedBreakIterator*)bi)->RuleBasedBreakIterator::last(); 1.226 +} 1.227 + 1.228 +U_CAPI int32_t U_EXPORT2 1.229 +ubrk_preceding(UBreakIterator *bi, 1.230 + int32_t offset) 1.231 +{ 1.232 + 1.233 + return ((RuleBasedBreakIterator*)bi)->RuleBasedBreakIterator::preceding(offset); 1.234 +} 1.235 + 1.236 +U_CAPI int32_t U_EXPORT2 1.237 +ubrk_following(UBreakIterator *bi, 1.238 + int32_t offset) 1.239 +{ 1.240 + 1.241 + return ((RuleBasedBreakIterator*)bi)->RuleBasedBreakIterator::following(offset); 1.242 +} 1.243 + 1.244 +U_CAPI const char* U_EXPORT2 1.245 +ubrk_getAvailable(int32_t index) 1.246 +{ 1.247 + 1.248 + return uloc_getAvailable(index); 1.249 +} 1.250 + 1.251 +U_CAPI int32_t U_EXPORT2 1.252 +ubrk_countAvailable() 1.253 +{ 1.254 + 1.255 + return uloc_countAvailable(); 1.256 +} 1.257 + 1.258 + 1.259 +U_CAPI UBool U_EXPORT2 1.260 +ubrk_isBoundary(UBreakIterator *bi, int32_t offset) 1.261 +{ 1.262 + return ((RuleBasedBreakIterator *)bi)->RuleBasedBreakIterator::isBoundary(offset); 1.263 +} 1.264 + 1.265 + 1.266 +U_CAPI int32_t U_EXPORT2 1.267 +ubrk_getRuleStatus(UBreakIterator *bi) 1.268 +{ 1.269 + return ((RuleBasedBreakIterator *)bi)->RuleBasedBreakIterator::getRuleStatus(); 1.270 +} 1.271 + 1.272 +U_CAPI int32_t U_EXPORT2 1.273 +ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status) 1.274 +{ 1.275 + return ((RuleBasedBreakIterator *)bi)->RuleBasedBreakIterator::getRuleStatusVec(fillInVec, capacity, *status); 1.276 +} 1.277 + 1.278 + 1.279 +U_CAPI const char* U_EXPORT2 1.280 +ubrk_getLocaleByType(const UBreakIterator *bi, 1.281 + ULocDataLocaleType type, 1.282 + UErrorCode* status) 1.283 +{ 1.284 + if (bi == NULL) { 1.285 + if (U_SUCCESS(*status)) { 1.286 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.287 + } 1.288 + return NULL; 1.289 + } 1.290 + return ((BreakIterator*)bi)->getLocaleID(type, *status); 1.291 +} 1.292 + 1.293 + 1.294 +void ubrk_refreshUText(UBreakIterator *bi, 1.295 + UText *text, 1.296 + UErrorCode *status) 1.297 +{ 1.298 + BreakIterator *bii = reinterpret_cast<BreakIterator *>(bi); 1.299 + bii->refreshInputText(text, *status); 1.300 +} 1.301 + 1.302 + 1.303 + 1.304 +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */