intl/icu/source/common/ubrk.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/common/ubrk.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,301 @@
     1.4 +/*
     1.5 +********************************************************************************
     1.6 +*   Copyright (C) 1996-2013, International Business Machines
     1.7 +*   Corporation and others.  All Rights Reserved.
     1.8 +********************************************************************************
     1.9 +*/
    1.10 +
    1.11 +#include "unicode/utypes.h"
    1.12 +
    1.13 +#if !UCONFIG_NO_BREAK_ITERATION
    1.14 +
    1.15 +#include "unicode/ubrk.h"
    1.16 +
    1.17 +#include "unicode/brkiter.h"
    1.18 +#include "unicode/uloc.h"
    1.19 +#include "unicode/ustring.h"
    1.20 +#include "unicode/uchriter.h"
    1.21 +#include "unicode/rbbi.h"
    1.22 +#include "rbbirb.h"
    1.23 +#include "uassert.h"
    1.24 +
    1.25 +U_NAMESPACE_USE
    1.26 +
    1.27 +//------------------------------------------------------------------------------
    1.28 +//
    1.29 +//    ubrk_open      Create a canned type of break iterator based on type (word, line, etc.)
    1.30 +//                   and locale.
    1.31 +//
    1.32 +//------------------------------------------------------------------------------
    1.33 +U_CAPI UBreakIterator* U_EXPORT2
    1.34 +ubrk_open(UBreakIteratorType type,
    1.35 +      const char *locale,
    1.36 +      const UChar *text,
    1.37 +      int32_t textLength,
    1.38 +      UErrorCode *status)
    1.39 +{
    1.40 +
    1.41 +  if(U_FAILURE(*status)) return 0;
    1.42 +
    1.43 +  BreakIterator *result = 0;
    1.44 +
    1.45 +  switch(type) {
    1.46 +
    1.47 +  case UBRK_CHARACTER:
    1.48 +    result = BreakIterator::createCharacterInstance(Locale(locale), *status);
    1.49 +    break;
    1.50 +
    1.51 +  case UBRK_WORD:
    1.52 +    result = BreakIterator::createWordInstance(Locale(locale), *status);
    1.53 +    break;
    1.54 +
    1.55 +  case UBRK_LINE:
    1.56 +    result = BreakIterator::createLineInstance(Locale(locale), *status);
    1.57 +    break;
    1.58 +
    1.59 +  case UBRK_SENTENCE:
    1.60 +    result = BreakIterator::createSentenceInstance(Locale(locale), *status);
    1.61 +    break;
    1.62 +
    1.63 +  case UBRK_TITLE:
    1.64 +    result = BreakIterator::createTitleInstance(Locale(locale), *status);
    1.65 +    break;
    1.66 +
    1.67 +  default:
    1.68 +    *status = U_ILLEGAL_ARGUMENT_ERROR;
    1.69 +  }
    1.70 +
    1.71 +  // check for allocation error
    1.72 +  if (U_FAILURE(*status)) {
    1.73 +     return 0;
    1.74 +  }
    1.75 +  if(result == 0) {
    1.76 +    *status = U_MEMORY_ALLOCATION_ERROR;
    1.77 +    return 0;
    1.78 +  }
    1.79 +
    1.80 +
    1.81 +  UBreakIterator *uBI = (UBreakIterator *)result;
    1.82 +  if (text != NULL) {
    1.83 +      ubrk_setText(uBI, text, textLength, status);
    1.84 +  }
    1.85 +  return uBI;
    1.86 +}
    1.87 +
    1.88 +
    1.89 +
    1.90 +//------------------------------------------------------------------------------
    1.91 +//
    1.92 +//   ubrk_openRules      open a break iterator from a set of break rules.
    1.93 +//                       Invokes the rule builder.
    1.94 +//
    1.95 +//------------------------------------------------------------------------------
    1.96 +U_CAPI UBreakIterator* U_EXPORT2
    1.97 +ubrk_openRules(  const UChar        *rules,
    1.98 +                       int32_t       rulesLength,
    1.99 +                 const UChar        *text,
   1.100 +                       int32_t       textLength,
   1.101 +                       UParseError  *parseErr,
   1.102 +                       UErrorCode   *status)  {
   1.103 +
   1.104 +    if (status == NULL || U_FAILURE(*status)){
   1.105 +        return 0;
   1.106 +    }
   1.107 +
   1.108 +    BreakIterator *result = 0;
   1.109 +    UnicodeString ruleString(rules, rulesLength);
   1.110 +    result = RBBIRuleBuilder::createRuleBasedBreakIterator(ruleString, parseErr, *status);
   1.111 +    if(U_FAILURE(*status)) {
   1.112 +        return 0;
   1.113 +    }
   1.114 +
   1.115 +    UBreakIterator *uBI = (UBreakIterator *)result;
   1.116 +    if (text != NULL) {
   1.117 +        ubrk_setText(uBI, text, textLength, status);
   1.118 +    }
   1.119 +    return uBI;
   1.120 +}
   1.121 +
   1.122 +
   1.123 +
   1.124 +
   1.125 +
   1.126 +U_CAPI UBreakIterator * U_EXPORT2
   1.127 +ubrk_safeClone(
   1.128 +          const UBreakIterator *bi,
   1.129 +          void * /*stackBuffer*/,
   1.130 +          int32_t *pBufferSize,
   1.131 +          UErrorCode *status)
   1.132 +{
   1.133 +    if (status == NULL || U_FAILURE(*status)){
   1.134 +        return NULL;
   1.135 +    }
   1.136 +    if (bi == NULL) {
   1.137 +       *status = U_ILLEGAL_ARGUMENT_ERROR;
   1.138 +        return NULL;
   1.139 +    }
   1.140 +    if (pBufferSize != NULL) {
   1.141 +        int32_t inputSize = *pBufferSize;
   1.142 +        *pBufferSize = 1;
   1.143 +        if (inputSize == 0) {
   1.144 +            return NULL;  // preflighting for deprecated functionality
   1.145 +        }
   1.146 +    }
   1.147 +    BreakIterator *newBI = ((BreakIterator *)bi)->clone();
   1.148 +    if (newBI == NULL) {
   1.149 +        *status = U_MEMORY_ALLOCATION_ERROR;
   1.150 +    } else {
   1.151 +        *status = U_SAFECLONE_ALLOCATED_WARNING;
   1.152 +    }
   1.153 +    return (UBreakIterator *)newBI;
   1.154 +}
   1.155 +
   1.156 +
   1.157 +
   1.158 +U_CAPI void U_EXPORT2
   1.159 +ubrk_close(UBreakIterator *bi)
   1.160 +{
   1.161 +    delete (BreakIterator *)bi;
   1.162 +}
   1.163 +
   1.164 +U_CAPI void U_EXPORT2
   1.165 +ubrk_setText(UBreakIterator* bi,
   1.166 +             const UChar*    text,
   1.167 +             int32_t         textLength,
   1.168 +             UErrorCode*     status)
   1.169 +{
   1.170 +    BreakIterator *brit = (BreakIterator *)bi;
   1.171 +    UText  ut = UTEXT_INITIALIZER;
   1.172 +    utext_openUChars(&ut, text, textLength, status);
   1.173 +    brit->setText(&ut, *status);
   1.174 +    // A stack allocated UText wrapping a UChar * string
   1.175 +    //   can be dumped without explicitly closing it.
   1.176 +}
   1.177 +
   1.178 +
   1.179 +
   1.180 +U_CAPI void U_EXPORT2
   1.181 +ubrk_setUText(UBreakIterator *bi,
   1.182 +             UText          *text,
   1.183 +             UErrorCode     *status)
   1.184 +{
   1.185 +    RuleBasedBreakIterator *brit = (RuleBasedBreakIterator *)bi;
   1.186 +    brit->RuleBasedBreakIterator::setText(text, *status);
   1.187 +}
   1.188 +
   1.189 +
   1.190 +
   1.191 +
   1.192 +
   1.193 +U_CAPI int32_t U_EXPORT2
   1.194 +ubrk_current(const UBreakIterator *bi)
   1.195 +{
   1.196 +
   1.197 +  return ((RuleBasedBreakIterator*)bi)->RuleBasedBreakIterator::current();
   1.198 +}
   1.199 +
   1.200 +U_CAPI int32_t U_EXPORT2
   1.201 +ubrk_next(UBreakIterator *bi)
   1.202 +{
   1.203 +
   1.204 +  return ((RuleBasedBreakIterator*)bi)->RuleBasedBreakIterator::next();
   1.205 +}
   1.206 +
   1.207 +U_CAPI int32_t U_EXPORT2
   1.208 +ubrk_previous(UBreakIterator *bi)
   1.209 +{
   1.210 +
   1.211 +  return ((RuleBasedBreakIterator*)bi)->RuleBasedBreakIterator::previous();
   1.212 +}
   1.213 +
   1.214 +U_CAPI int32_t U_EXPORT2
   1.215 +ubrk_first(UBreakIterator *bi)
   1.216 +{
   1.217 +
   1.218 +  return ((RuleBasedBreakIterator*)bi)->RuleBasedBreakIterator::first();
   1.219 +}
   1.220 +
   1.221 +U_CAPI int32_t U_EXPORT2
   1.222 +ubrk_last(UBreakIterator *bi)
   1.223 +{
   1.224 +
   1.225 +  return ((RuleBasedBreakIterator*)bi)->RuleBasedBreakIterator::last();
   1.226 +}
   1.227 +
   1.228 +U_CAPI int32_t U_EXPORT2
   1.229 +ubrk_preceding(UBreakIterator *bi,
   1.230 +           int32_t offset)
   1.231 +{
   1.232 +
   1.233 +  return ((RuleBasedBreakIterator*)bi)->RuleBasedBreakIterator::preceding(offset);
   1.234 +}
   1.235 +
   1.236 +U_CAPI int32_t U_EXPORT2
   1.237 +ubrk_following(UBreakIterator *bi,
   1.238 +           int32_t offset)
   1.239 +{
   1.240 +
   1.241 +  return ((RuleBasedBreakIterator*)bi)->RuleBasedBreakIterator::following(offset);
   1.242 +}
   1.243 +
   1.244 +U_CAPI const char* U_EXPORT2
   1.245 +ubrk_getAvailable(int32_t index)
   1.246 +{
   1.247 +
   1.248 +  return uloc_getAvailable(index);
   1.249 +}
   1.250 +
   1.251 +U_CAPI int32_t U_EXPORT2
   1.252 +ubrk_countAvailable()
   1.253 +{
   1.254 +
   1.255 +  return uloc_countAvailable();
   1.256 +}
   1.257 +
   1.258 +
   1.259 +U_CAPI  UBool U_EXPORT2
   1.260 +ubrk_isBoundary(UBreakIterator *bi, int32_t offset)
   1.261 +{
   1.262 +    return ((RuleBasedBreakIterator *)bi)->RuleBasedBreakIterator::isBoundary(offset);
   1.263 +}
   1.264 +
   1.265 +
   1.266 +U_CAPI  int32_t U_EXPORT2
   1.267 +ubrk_getRuleStatus(UBreakIterator *bi)
   1.268 +{
   1.269 +    return ((RuleBasedBreakIterator *)bi)->RuleBasedBreakIterator::getRuleStatus();
   1.270 +}
   1.271 +
   1.272 +U_CAPI  int32_t U_EXPORT2
   1.273 +ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status)
   1.274 +{
   1.275 +    return ((RuleBasedBreakIterator *)bi)->RuleBasedBreakIterator::getRuleStatusVec(fillInVec, capacity, *status);
   1.276 +}
   1.277 +
   1.278 +
   1.279 +U_CAPI const char* U_EXPORT2
   1.280 +ubrk_getLocaleByType(const UBreakIterator *bi,
   1.281 +                     ULocDataLocaleType type,
   1.282 +                     UErrorCode* status)
   1.283 +{
   1.284 +    if (bi == NULL) {
   1.285 +        if (U_SUCCESS(*status)) {
   1.286 +            *status = U_ILLEGAL_ARGUMENT_ERROR;
   1.287 +        }
   1.288 +        return NULL;
   1.289 +    }
   1.290 +    return ((BreakIterator*)bi)->getLocaleID(type, *status);
   1.291 +}
   1.292 +
   1.293 +
   1.294 +void ubrk_refreshUText(UBreakIterator *bi,
   1.295 +                       UText          *text,
   1.296 +                       UErrorCode     *status)
   1.297 +{
   1.298 +    BreakIterator *bii = reinterpret_cast<BreakIterator *>(bi);
   1.299 +    bii->refreshInputText(text, *status);
   1.300 +}
   1.301 +
   1.302 +
   1.303 +
   1.304 +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */

mercurial