1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/i18n/utrans.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,495 @@ 1.4 +/* 1.5 + ******************************************************************************* 1.6 + * Copyright (C) 1997-2009, International Business Machines 1.7 + * Corporation and others. All Rights Reserved. 1.8 + ******************************************************************************* 1.9 + * Date Name Description 1.10 + * 06/21/00 aliu Creation. 1.11 + ******************************************************************************* 1.12 + */ 1.13 + 1.14 +#include "unicode/utypes.h" 1.15 + 1.16 +#if !UCONFIG_NO_TRANSLITERATION 1.17 + 1.18 +#include "unicode/utrans.h" 1.19 +#include "unicode/putil.h" 1.20 +#include "unicode/rep.h" 1.21 +#include "unicode/translit.h" 1.22 +#include "unicode/unifilt.h" 1.23 +#include "unicode/uniset.h" 1.24 +#include "unicode/ustring.h" 1.25 +#include "unicode/uenum.h" 1.26 +#include "uenumimp.h" 1.27 +#include "cpputils.h" 1.28 +#include "rbt.h" 1.29 + 1.30 +// Following macro is to be followed by <return value>';' or just ';' 1.31 +#define utrans_ENTRY(s) if ((s)==NULL || U_FAILURE(*(s))) return 1.32 + 1.33 +/******************************************************************** 1.34 + * Replaceable-UReplaceableCallbacks glue 1.35 + ********************************************************************/ 1.36 + 1.37 +/** 1.38 + * Make a UReplaceable + UReplaceableCallbacks into a Replaceable object. 1.39 + */ 1.40 +U_NAMESPACE_BEGIN 1.41 +class ReplaceableGlue : public Replaceable { 1.42 + 1.43 + UReplaceable *rep; 1.44 + UReplaceableCallbacks *func; 1.45 + 1.46 +public: 1.47 + 1.48 + ReplaceableGlue(UReplaceable *replaceable, 1.49 + UReplaceableCallbacks *funcCallback); 1.50 + 1.51 + virtual ~ReplaceableGlue(); 1.52 + 1.53 + virtual void handleReplaceBetween(int32_t start, 1.54 + int32_t limit, 1.55 + const UnicodeString& text); 1.56 + 1.57 + virtual void extractBetween(int32_t start, 1.58 + int32_t limit, 1.59 + UnicodeString& target) const; 1.60 + 1.61 + virtual void copy(int32_t start, int32_t limit, int32_t dest); 1.62 + 1.63 + // virtual Replaceable *clone() const { return NULL; } same as default 1.64 + 1.65 + /** 1.66 + * ICU "poor man's RTTI", returns a UClassID for the actual class. 1.67 + * 1.68 + * @draft ICU 2.2 1.69 + */ 1.70 + virtual UClassID getDynamicClassID() const; 1.71 + 1.72 + /** 1.73 + * ICU "poor man's RTTI", returns a UClassID for this class. 1.74 + * 1.75 + * @draft ICU 2.2 1.76 + */ 1.77 + static UClassID U_EXPORT2 getStaticClassID(); 1.78 + 1.79 +protected: 1.80 + 1.81 + virtual int32_t getLength() const; 1.82 + 1.83 + virtual UChar getCharAt(int32_t offset) const; 1.84 + 1.85 + virtual UChar32 getChar32At(int32_t offset) const; 1.86 +}; 1.87 + 1.88 +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ReplaceableGlue) 1.89 + 1.90 +ReplaceableGlue::ReplaceableGlue(UReplaceable *replaceable, 1.91 + UReplaceableCallbacks *funcCallback) 1.92 + : Replaceable() 1.93 +{ 1.94 + this->rep = replaceable; 1.95 + this->func = funcCallback; 1.96 +} 1.97 + 1.98 +ReplaceableGlue::~ReplaceableGlue() {} 1.99 + 1.100 +int32_t ReplaceableGlue::getLength() const { 1.101 + return (*func->length)(rep); 1.102 +} 1.103 + 1.104 +UChar ReplaceableGlue::getCharAt(int32_t offset) const { 1.105 + return (*func->charAt)(rep, offset); 1.106 +} 1.107 + 1.108 +UChar32 ReplaceableGlue::getChar32At(int32_t offset) const { 1.109 + return (*func->char32At)(rep, offset); 1.110 +} 1.111 + 1.112 +void ReplaceableGlue::handleReplaceBetween(int32_t start, 1.113 + int32_t limit, 1.114 + const UnicodeString& text) { 1.115 + (*func->replace)(rep, start, limit, text.getBuffer(), text.length()); 1.116 +} 1.117 + 1.118 +void ReplaceableGlue::extractBetween(int32_t start, 1.119 + int32_t limit, 1.120 + UnicodeString& target) const { 1.121 + (*func->extract)(rep, start, limit, target.getBuffer(limit-start)); 1.122 + target.releaseBuffer(limit-start); 1.123 +} 1.124 + 1.125 +void ReplaceableGlue::copy(int32_t start, int32_t limit, int32_t dest) { 1.126 + (*func->copy)(rep, start, limit, dest); 1.127 +} 1.128 +U_NAMESPACE_END 1.129 +/******************************************************************** 1.130 + * General API 1.131 + ********************************************************************/ 1.132 +U_NAMESPACE_USE 1.133 + 1.134 +U_CAPI UTransliterator* U_EXPORT2 1.135 +utrans_openU(const UChar *id, 1.136 + int32_t idLength, 1.137 + UTransDirection dir, 1.138 + const UChar *rules, 1.139 + int32_t rulesLength, 1.140 + UParseError *parseError, 1.141 + UErrorCode *status) { 1.142 + if(status==NULL || U_FAILURE(*status)) { 1.143 + return NULL; 1.144 + } 1.145 + if (id == NULL) { 1.146 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.147 + return NULL; 1.148 + } 1.149 + UParseError temp; 1.150 + 1.151 + if(parseError == NULL){ 1.152 + parseError = &temp; 1.153 + } 1.154 + 1.155 + UnicodeString ID(idLength<0, id, idLength); // r-o alias 1.156 + 1.157 + if(rules==NULL){ 1.158 + 1.159 + Transliterator *trans = NULL; 1.160 + 1.161 + trans = Transliterator::createInstance(ID, dir, *parseError, *status); 1.162 + 1.163 + if(U_FAILURE(*status)){ 1.164 + return NULL; 1.165 + } 1.166 + return (UTransliterator*) trans; 1.167 + }else{ 1.168 + UnicodeString ruleStr(rulesLength < 0, 1.169 + rules, 1.170 + rulesLength); // r-o alias 1.171 + 1.172 + Transliterator *trans = NULL; 1.173 + trans = Transliterator::createFromRules(ID, ruleStr, dir, *parseError, *status); 1.174 + if(U_FAILURE(*status)) { 1.175 + return NULL; 1.176 + } 1.177 + 1.178 + return (UTransliterator*) trans; 1.179 + } 1.180 +} 1.181 + 1.182 +U_CAPI UTransliterator* U_EXPORT2 1.183 +utrans_open(const char* id, 1.184 + UTransDirection dir, 1.185 + const UChar* rules, /* may be Null */ 1.186 + int32_t rulesLength, /* -1 if null-terminated */ 1.187 + UParseError* parseError, /* may be Null */ 1.188 + UErrorCode* status) { 1.189 + UnicodeString ID(id, -1, US_INV); // use invariant converter 1.190 + return utrans_openU(ID.getBuffer(), ID.length(), dir, 1.191 + rules, rulesLength, 1.192 + parseError, status); 1.193 +} 1.194 + 1.195 +U_CAPI UTransliterator* U_EXPORT2 1.196 +utrans_openInverse(const UTransliterator* trans, 1.197 + UErrorCode* status) { 1.198 + 1.199 + utrans_ENTRY(status) NULL; 1.200 + 1.201 + UTransliterator* result = 1.202 + (UTransliterator*) ((Transliterator*) trans)->createInverse(*status); 1.203 + 1.204 + return result; 1.205 +} 1.206 + 1.207 +U_CAPI UTransliterator* U_EXPORT2 1.208 +utrans_clone(const UTransliterator* trans, 1.209 + UErrorCode* status) { 1.210 + 1.211 + utrans_ENTRY(status) NULL; 1.212 + 1.213 + if (trans == NULL) { 1.214 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.215 + return NULL; 1.216 + } 1.217 + 1.218 + Transliterator *t = ((Transliterator*) trans)->clone(); 1.219 + if (t == NULL) { 1.220 + *status = U_MEMORY_ALLOCATION_ERROR; 1.221 + } 1.222 + return (UTransliterator*) t; 1.223 +} 1.224 + 1.225 +U_CAPI void U_EXPORT2 1.226 +utrans_close(UTransliterator* trans) { 1.227 + delete (Transliterator*) trans; 1.228 +} 1.229 + 1.230 +U_CAPI const UChar * U_EXPORT2 1.231 +utrans_getUnicodeID(const UTransliterator *trans, 1.232 + int32_t *resultLength) { 1.233 + // Transliterator keeps its ID NUL-terminated 1.234 + const UnicodeString &ID=((Transliterator*) trans)->getID(); 1.235 + if(resultLength!=NULL) { 1.236 + *resultLength=ID.length(); 1.237 + } 1.238 + return ID.getBuffer(); 1.239 +} 1.240 + 1.241 +U_CAPI int32_t U_EXPORT2 1.242 +utrans_getID(const UTransliterator* trans, 1.243 + char* buf, 1.244 + int32_t bufCapacity) { 1.245 + return ((Transliterator*) trans)->getID().extract(0, 0x7fffffff, buf, bufCapacity, US_INV); 1.246 +} 1.247 + 1.248 +U_CAPI void U_EXPORT2 1.249 +utrans_register(UTransliterator* adoptedTrans, 1.250 + UErrorCode* status) { 1.251 + utrans_ENTRY(status); 1.252 + // status currently ignored; may remove later 1.253 + Transliterator::registerInstance((Transliterator*) adoptedTrans); 1.254 +} 1.255 + 1.256 +U_CAPI void U_EXPORT2 1.257 +utrans_unregisterID(const UChar* id, int32_t idLength) { 1.258 + UnicodeString ID(idLength<0, id, idLength); // r-o alias 1.259 + Transliterator::unregister(ID); 1.260 +} 1.261 + 1.262 +U_CAPI void U_EXPORT2 1.263 +utrans_unregister(const char* id) { 1.264 + UnicodeString ID(id, -1, US_INV); // use invariant converter 1.265 + Transliterator::unregister(ID); 1.266 +} 1.267 + 1.268 +U_CAPI void U_EXPORT2 1.269 +utrans_setFilter(UTransliterator* trans, 1.270 + const UChar* filterPattern, 1.271 + int32_t filterPatternLen, 1.272 + UErrorCode* status) { 1.273 + 1.274 + utrans_ENTRY(status); 1.275 + UnicodeFilter* filter = NULL; 1.276 + if (filterPattern != NULL && *filterPattern != 0) { 1.277 + // Create read only alias of filterPattern: 1.278 + UnicodeString pat(filterPatternLen < 0, filterPattern, filterPatternLen); 1.279 + filter = new UnicodeSet(pat, *status); 1.280 + /* test for NULL */ 1.281 + if (filter == NULL) { 1.282 + *status = U_MEMORY_ALLOCATION_ERROR; 1.283 + return; 1.284 + } 1.285 + if (U_FAILURE(*status)) { 1.286 + delete filter; 1.287 + filter = NULL; 1.288 + } 1.289 + } 1.290 + ((Transliterator*) trans)->adoptFilter(filter); 1.291 +} 1.292 + 1.293 +U_CAPI int32_t U_EXPORT2 1.294 +utrans_countAvailableIDs(void) { 1.295 + return Transliterator::countAvailableIDs(); 1.296 +} 1.297 + 1.298 +U_CAPI int32_t U_EXPORT2 1.299 +utrans_getAvailableID(int32_t index, 1.300 + char* buf, // may be NULL 1.301 + int32_t bufCapacity) { 1.302 + return Transliterator::getAvailableID(index).extract(0, 0x7fffffff, buf, bufCapacity, US_INV); 1.303 +} 1.304 + 1.305 +/* Transliterator UEnumeration ---------------------------------------------- */ 1.306 + 1.307 +typedef struct UTransEnumeration { 1.308 + UEnumeration uenum; 1.309 + int32_t index, count; 1.310 +} UTransEnumeration; 1.311 + 1.312 +U_CDECL_BEGIN 1.313 +static int32_t U_CALLCONV 1.314 +utrans_enum_count(UEnumeration *uenum, UErrorCode *pErrorCode) { 1.315 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.316 + return 0; 1.317 + } 1.318 + return ((UTransEnumeration *)uenum)->count; 1.319 +} 1.320 + 1.321 +static const UChar* U_CALLCONV 1.322 +utrans_enum_unext(UEnumeration *uenum, 1.323 + int32_t* resultLength, 1.324 + UErrorCode *pErrorCode) { 1.325 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.326 + return 0; 1.327 + } 1.328 + 1.329 + UTransEnumeration *ute=(UTransEnumeration *)uenum; 1.330 + int32_t index=ute->index; 1.331 + if(index<ute->count) { 1.332 + const UnicodeString &ID=Transliterator::getAvailableID(index); 1.333 + ute->index=index+1; 1.334 + if(resultLength!=NULL) { 1.335 + *resultLength=ID.length(); 1.336 + } 1.337 + // Transliterator keeps its ID NUL-terminated 1.338 + return ID.getBuffer(); 1.339 + } 1.340 + 1.341 + if(resultLength!=NULL) { 1.342 + *resultLength=0; 1.343 + } 1.344 + return NULL; 1.345 +} 1.346 + 1.347 +static void U_CALLCONV 1.348 +utrans_enum_reset(UEnumeration *uenum, UErrorCode *pErrorCode) { 1.349 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.350 + return; 1.351 + } 1.352 + 1.353 + UTransEnumeration *ute=(UTransEnumeration *)uenum; 1.354 + ute->index=0; 1.355 + ute->count=Transliterator::countAvailableIDs(); 1.356 +} 1.357 + 1.358 +static void U_CALLCONV 1.359 +utrans_enum_close(UEnumeration *uenum) { 1.360 + uprv_free(uenum); 1.361 +} 1.362 +U_CDECL_END 1.363 + 1.364 +static const UEnumeration utransEnumeration={ 1.365 + NULL, 1.366 + NULL, 1.367 + utrans_enum_close, 1.368 + utrans_enum_count, 1.369 + utrans_enum_unext, 1.370 + uenum_nextDefault, 1.371 + utrans_enum_reset 1.372 +}; 1.373 + 1.374 +U_CAPI UEnumeration * U_EXPORT2 1.375 +utrans_openIDs(UErrorCode *pErrorCode) { 1.376 + UTransEnumeration *ute; 1.377 + 1.378 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.379 + return NULL; 1.380 + } 1.381 + 1.382 + ute=(UTransEnumeration *)uprv_malloc(sizeof(UTransEnumeration)); 1.383 + if(ute==NULL) { 1.384 + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1.385 + return NULL; 1.386 + } 1.387 + 1.388 + ute->uenum=utransEnumeration; 1.389 + ute->index=0; 1.390 + ute->count=Transliterator::countAvailableIDs(); 1.391 + return (UEnumeration *)ute; 1.392 +} 1.393 + 1.394 +/******************************************************************** 1.395 + * Transliteration API 1.396 + ********************************************************************/ 1.397 + 1.398 +U_CAPI void U_EXPORT2 1.399 +utrans_trans(const UTransliterator* trans, 1.400 + UReplaceable* rep, 1.401 + UReplaceableCallbacks* repFunc, 1.402 + int32_t start, 1.403 + int32_t* limit, 1.404 + UErrorCode* status) { 1.405 + 1.406 + utrans_ENTRY(status); 1.407 + 1.408 + if (trans == 0 || rep == 0 || repFunc == 0 || limit == 0) { 1.409 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.410 + return; 1.411 + } 1.412 + 1.413 + ReplaceableGlue r(rep, repFunc); 1.414 + 1.415 + *limit = ((Transliterator*) trans)->transliterate(r, start, *limit); 1.416 +} 1.417 + 1.418 +U_CAPI void U_EXPORT2 1.419 +utrans_transIncremental(const UTransliterator* trans, 1.420 + UReplaceable* rep, 1.421 + UReplaceableCallbacks* repFunc, 1.422 + UTransPosition* pos, 1.423 + UErrorCode* status) { 1.424 + 1.425 + utrans_ENTRY(status); 1.426 + 1.427 + if (trans == 0 || rep == 0 || repFunc == 0 || pos == 0) { 1.428 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.429 + return; 1.430 + } 1.431 + 1.432 + ReplaceableGlue r(rep, repFunc); 1.433 + 1.434 + ((Transliterator*) trans)->transliterate(r, *pos, *status); 1.435 +} 1.436 + 1.437 +U_CAPI void U_EXPORT2 1.438 +utrans_transUChars(const UTransliterator* trans, 1.439 + UChar* text, 1.440 + int32_t* textLength, 1.441 + int32_t textCapacity, 1.442 + int32_t start, 1.443 + int32_t* limit, 1.444 + UErrorCode* status) { 1.445 + 1.446 + utrans_ENTRY(status); 1.447 + 1.448 + if (trans == 0 || text == 0 || limit == 0) { 1.449 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.450 + return; 1.451 + } 1.452 + 1.453 + int32_t textLen = (textLength == NULL || *textLength < 0) 1.454 + ? u_strlen(text) : *textLength; 1.455 + // writeable alias: for this ct, len CANNOT be -1 (why?) 1.456 + UnicodeString str(text, textLen, textCapacity); 1.457 + 1.458 + *limit = ((Transliterator*) trans)->transliterate(str, start, *limit); 1.459 + 1.460 + // Copy the string buffer back to text (only if necessary) 1.461 + // and fill in *neededCapacity (if neededCapacity != NULL). 1.462 + textLen = str.extract(text, textCapacity, *status); 1.463 + if(textLength != NULL) { 1.464 + *textLength = textLen; 1.465 + } 1.466 +} 1.467 + 1.468 +U_CAPI void U_EXPORT2 1.469 +utrans_transIncrementalUChars(const UTransliterator* trans, 1.470 + UChar* text, 1.471 + int32_t* textLength, 1.472 + int32_t textCapacity, 1.473 + UTransPosition* pos, 1.474 + UErrorCode* status) { 1.475 + 1.476 + utrans_ENTRY(status); 1.477 + 1.478 + if (trans == 0 || text == 0 || pos == 0) { 1.479 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.480 + return; 1.481 + } 1.482 + 1.483 + int32_t textLen = (textLength == NULL || *textLength < 0) 1.484 + ? u_strlen(text) : *textLength; 1.485 + // writeable alias: for this ct, len CANNOT be -1 (why?) 1.486 + UnicodeString str(text, textLen, textCapacity); 1.487 + 1.488 + ((Transliterator*) trans)->transliterate(str, *pos, *status); 1.489 + 1.490 + // Copy the string buffer back to text (only if necessary) 1.491 + // and fill in *neededCapacity (if neededCapacity != NULL). 1.492 + textLen = str.extract(text, textCapacity, *status); 1.493 + if(textLength != NULL) { 1.494 + *textLength = textLen; 1.495 + } 1.496 +} 1.497 + 1.498 +#endif /* #if !UCONFIG_NO_TRANSLITERATION */