1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/loclikely.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1275 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* 1.7 +* Copyright (C) 1997-2012, International Business Machines 1.8 +* Corporation and others. All Rights Reserved. 1.9 +* 1.10 +******************************************************************************* 1.11 +* file name: loclikely.cpp 1.12 +* encoding: US-ASCII 1.13 +* tab size: 8 (not used) 1.14 +* indentation:4 1.15 +* 1.16 +* created on: 2010feb25 1.17 +* created by: Markus W. Scherer 1.18 +* 1.19 +* Code for likely and minimized locale subtags, separated out from other .cpp files 1.20 +* that then do not depend on resource bundle code and likely-subtags data. 1.21 +*/ 1.22 + 1.23 +#include "unicode/utypes.h" 1.24 +#include "unicode/putil.h" 1.25 +#include "unicode/uloc.h" 1.26 +#include "unicode/ures.h" 1.27 +#include "cmemory.h" 1.28 +#include "cstring.h" 1.29 +#include "ulocimp.h" 1.30 +#include "ustr_imp.h" 1.31 + 1.32 +/** 1.33 + * This function looks for the localeID in the likelySubtags resource. 1.34 + * 1.35 + * @param localeID The tag to find. 1.36 + * @param buffer A buffer to hold the matching entry 1.37 + * @param bufferLength The length of the output buffer 1.38 + * @return A pointer to "buffer" if found, or a null pointer if not. 1.39 + */ 1.40 +static const char* U_CALLCONV 1.41 +findLikelySubtags(const char* localeID, 1.42 + char* buffer, 1.43 + int32_t bufferLength, 1.44 + UErrorCode* err) { 1.45 + const char* result = NULL; 1.46 + 1.47 + if (!U_FAILURE(*err)) { 1.48 + int32_t resLen = 0; 1.49 + const UChar* s = NULL; 1.50 + UErrorCode tmpErr = U_ZERO_ERROR; 1.51 + UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr); 1.52 + if (U_SUCCESS(tmpErr)) { 1.53 + s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr); 1.54 + 1.55 + if (U_FAILURE(tmpErr)) { 1.56 + /* 1.57 + * If a resource is missing, it's not really an error, it's 1.58 + * just that we don't have any data for that particular locale ID. 1.59 + */ 1.60 + if (tmpErr != U_MISSING_RESOURCE_ERROR) { 1.61 + *err = tmpErr; 1.62 + } 1.63 + } 1.64 + else if (resLen >= bufferLength) { 1.65 + /* The buffer should never overflow. */ 1.66 + *err = U_INTERNAL_PROGRAM_ERROR; 1.67 + } 1.68 + else { 1.69 + u_UCharsToChars(s, buffer, resLen + 1); 1.70 + result = buffer; 1.71 + } 1.72 + 1.73 + ures_close(subtags); 1.74 + } else { 1.75 + *err = tmpErr; 1.76 + } 1.77 + } 1.78 + 1.79 + return result; 1.80 +} 1.81 + 1.82 +/** 1.83 + * Append a tag to a buffer, adding the separator if necessary. The buffer 1.84 + * must be large enough to contain the resulting tag plus any separator 1.85 + * necessary. The tag must not be a zero-length string. 1.86 + * 1.87 + * @param tag The tag to add. 1.88 + * @param tagLength The length of the tag. 1.89 + * @param buffer The output buffer. 1.90 + * @param bufferLength The length of the output buffer. This is an input/ouput parameter. 1.91 + **/ 1.92 +static void U_CALLCONV 1.93 +appendTag( 1.94 + const char* tag, 1.95 + int32_t tagLength, 1.96 + char* buffer, 1.97 + int32_t* bufferLength) { 1.98 + 1.99 + if (*bufferLength > 0) { 1.100 + buffer[*bufferLength] = '_'; 1.101 + ++(*bufferLength); 1.102 + } 1.103 + 1.104 + uprv_memmove( 1.105 + &buffer[*bufferLength], 1.106 + tag, 1.107 + tagLength); 1.108 + 1.109 + *bufferLength += tagLength; 1.110 +} 1.111 + 1.112 +/** 1.113 + * These are the canonical strings for unknown languages, scripts and regions. 1.114 + **/ 1.115 +static const char* const unknownLanguage = "und"; 1.116 +static const char* const unknownScript = "Zzzz"; 1.117 +static const char* const unknownRegion = "ZZ"; 1.118 + 1.119 +/** 1.120 + * Create a tag string from the supplied parameters. The lang, script and region 1.121 + * parameters may be NULL pointers. If they are, their corresponding length parameters 1.122 + * must be less than or equal to 0. 1.123 + * 1.124 + * If any of the language, script or region parameters are empty, and the alternateTags 1.125 + * parameter is not NULL, it will be parsed for potential language, script and region tags 1.126 + * to be used when constructing the new tag. If the alternateTags parameter is NULL, or 1.127 + * it contains no language tag, the default tag for the unknown language is used. 1.128 + * 1.129 + * If the length of the new string exceeds the capacity of the output buffer, 1.130 + * the function copies as many bytes to the output buffer as it can, and returns 1.131 + * the error U_BUFFER_OVERFLOW_ERROR. 1.132 + * 1.133 + * If an illegal argument is provided, the function returns the error 1.134 + * U_ILLEGAL_ARGUMENT_ERROR. 1.135 + * 1.136 + * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if 1.137 + * the tag string fits in the output buffer, but the null terminator doesn't. 1.138 + * 1.139 + * @param lang The language tag to use. 1.140 + * @param langLength The length of the language tag. 1.141 + * @param script The script tag to use. 1.142 + * @param scriptLength The length of the script tag. 1.143 + * @param region The region tag to use. 1.144 + * @param regionLength The length of the region tag. 1.145 + * @param trailing Any trailing data to append to the new tag. 1.146 + * @param trailingLength The length of the trailing data. 1.147 + * @param alternateTags A string containing any alternate tags. 1.148 + * @param tag The output buffer. 1.149 + * @param tagCapacity The capacity of the output buffer. 1.150 + * @param err A pointer to a UErrorCode for error reporting. 1.151 + * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error. 1.152 + **/ 1.153 +static int32_t U_CALLCONV 1.154 +createTagStringWithAlternates( 1.155 + const char* lang, 1.156 + int32_t langLength, 1.157 + const char* script, 1.158 + int32_t scriptLength, 1.159 + const char* region, 1.160 + int32_t regionLength, 1.161 + const char* trailing, 1.162 + int32_t trailingLength, 1.163 + const char* alternateTags, 1.164 + char* tag, 1.165 + int32_t tagCapacity, 1.166 + UErrorCode* err) { 1.167 + 1.168 + if (U_FAILURE(*err)) { 1.169 + goto error; 1.170 + } 1.171 + else if (tag == NULL || 1.172 + tagCapacity <= 0 || 1.173 + langLength >= ULOC_LANG_CAPACITY || 1.174 + scriptLength >= ULOC_SCRIPT_CAPACITY || 1.175 + regionLength >= ULOC_COUNTRY_CAPACITY) { 1.176 + goto error; 1.177 + } 1.178 + else { 1.179 + /** 1.180 + * ULOC_FULLNAME_CAPACITY will provide enough capacity 1.181 + * that we can build a string that contains the language, 1.182 + * script and region code without worrying about overrunning 1.183 + * the user-supplied buffer. 1.184 + **/ 1.185 + char tagBuffer[ULOC_FULLNAME_CAPACITY]; 1.186 + int32_t tagLength = 0; 1.187 + int32_t capacityRemaining = tagCapacity; 1.188 + UBool regionAppended = FALSE; 1.189 + 1.190 + if (langLength > 0) { 1.191 + appendTag( 1.192 + lang, 1.193 + langLength, 1.194 + tagBuffer, 1.195 + &tagLength); 1.196 + } 1.197 + else if (alternateTags == NULL) { 1.198 + /* 1.199 + * Append the value for an unknown language, if 1.200 + * we found no language. 1.201 + */ 1.202 + appendTag( 1.203 + unknownLanguage, 1.204 + (int32_t)uprv_strlen(unknownLanguage), 1.205 + tagBuffer, 1.206 + &tagLength); 1.207 + } 1.208 + else { 1.209 + /* 1.210 + * Parse the alternateTags string for the language. 1.211 + */ 1.212 + char alternateLang[ULOC_LANG_CAPACITY]; 1.213 + int32_t alternateLangLength = sizeof(alternateLang); 1.214 + 1.215 + alternateLangLength = 1.216 + uloc_getLanguage( 1.217 + alternateTags, 1.218 + alternateLang, 1.219 + alternateLangLength, 1.220 + err); 1.221 + if(U_FAILURE(*err) || 1.222 + alternateLangLength >= ULOC_LANG_CAPACITY) { 1.223 + goto error; 1.224 + } 1.225 + else if (alternateLangLength == 0) { 1.226 + /* 1.227 + * Append the value for an unknown language, if 1.228 + * we found no language. 1.229 + */ 1.230 + appendTag( 1.231 + unknownLanguage, 1.232 + (int32_t)uprv_strlen(unknownLanguage), 1.233 + tagBuffer, 1.234 + &tagLength); 1.235 + } 1.236 + else { 1.237 + appendTag( 1.238 + alternateLang, 1.239 + alternateLangLength, 1.240 + tagBuffer, 1.241 + &tagLength); 1.242 + } 1.243 + } 1.244 + 1.245 + if (scriptLength > 0) { 1.246 + appendTag( 1.247 + script, 1.248 + scriptLength, 1.249 + tagBuffer, 1.250 + &tagLength); 1.251 + } 1.252 + else if (alternateTags != NULL) { 1.253 + /* 1.254 + * Parse the alternateTags string for the script. 1.255 + */ 1.256 + char alternateScript[ULOC_SCRIPT_CAPACITY]; 1.257 + 1.258 + const int32_t alternateScriptLength = 1.259 + uloc_getScript( 1.260 + alternateTags, 1.261 + alternateScript, 1.262 + sizeof(alternateScript), 1.263 + err); 1.264 + 1.265 + if (U_FAILURE(*err) || 1.266 + alternateScriptLength >= ULOC_SCRIPT_CAPACITY) { 1.267 + goto error; 1.268 + } 1.269 + else if (alternateScriptLength > 0) { 1.270 + appendTag( 1.271 + alternateScript, 1.272 + alternateScriptLength, 1.273 + tagBuffer, 1.274 + &tagLength); 1.275 + } 1.276 + } 1.277 + 1.278 + if (regionLength > 0) { 1.279 + appendTag( 1.280 + region, 1.281 + regionLength, 1.282 + tagBuffer, 1.283 + &tagLength); 1.284 + 1.285 + regionAppended = TRUE; 1.286 + } 1.287 + else if (alternateTags != NULL) { 1.288 + /* 1.289 + * Parse the alternateTags string for the region. 1.290 + */ 1.291 + char alternateRegion[ULOC_COUNTRY_CAPACITY]; 1.292 + 1.293 + const int32_t alternateRegionLength = 1.294 + uloc_getCountry( 1.295 + alternateTags, 1.296 + alternateRegion, 1.297 + sizeof(alternateRegion), 1.298 + err); 1.299 + if (U_FAILURE(*err) || 1.300 + alternateRegionLength >= ULOC_COUNTRY_CAPACITY) { 1.301 + goto error; 1.302 + } 1.303 + else if (alternateRegionLength > 0) { 1.304 + appendTag( 1.305 + alternateRegion, 1.306 + alternateRegionLength, 1.307 + tagBuffer, 1.308 + &tagLength); 1.309 + 1.310 + regionAppended = TRUE; 1.311 + } 1.312 + } 1.313 + 1.314 + { 1.315 + const int32_t toCopy = 1.316 + tagLength >= tagCapacity ? tagCapacity : tagLength; 1.317 + 1.318 + /** 1.319 + * Copy the partial tag from our internal buffer to the supplied 1.320 + * target. 1.321 + **/ 1.322 + uprv_memcpy( 1.323 + tag, 1.324 + tagBuffer, 1.325 + toCopy); 1.326 + 1.327 + capacityRemaining -= toCopy; 1.328 + } 1.329 + 1.330 + if (trailingLength > 0) { 1.331 + if (*trailing != '@' && capacityRemaining > 0) { 1.332 + tag[tagLength++] = '_'; 1.333 + --capacityRemaining; 1.334 + if (capacityRemaining > 0 && !regionAppended) { 1.335 + /* extra separator is required */ 1.336 + tag[tagLength++] = '_'; 1.337 + --capacityRemaining; 1.338 + } 1.339 + } 1.340 + 1.341 + if (capacityRemaining > 0) { 1.342 + /* 1.343 + * Copy the trailing data into the supplied buffer. Use uprv_memmove, since we 1.344 + * don't know if the user-supplied buffers overlap. 1.345 + */ 1.346 + const int32_t toCopy = 1.347 + trailingLength >= capacityRemaining ? capacityRemaining : trailingLength; 1.348 + 1.349 + uprv_memmove( 1.350 + &tag[tagLength], 1.351 + trailing, 1.352 + toCopy); 1.353 + } 1.354 + } 1.355 + 1.356 + tagLength += trailingLength; 1.357 + 1.358 + return u_terminateChars( 1.359 + tag, 1.360 + tagCapacity, 1.361 + tagLength, 1.362 + err); 1.363 + } 1.364 + 1.365 +error: 1.366 + 1.367 + /** 1.368 + * An overflow indicates the locale ID passed in 1.369 + * is ill-formed. If we got here, and there was 1.370 + * no previous error, it's an implicit overflow. 1.371 + **/ 1.372 + if (*err == U_BUFFER_OVERFLOW_ERROR || 1.373 + U_SUCCESS(*err)) { 1.374 + *err = U_ILLEGAL_ARGUMENT_ERROR; 1.375 + } 1.376 + 1.377 + return -1; 1.378 +} 1.379 + 1.380 +/** 1.381 + * Create a tag string from the supplied parameters. The lang, script and region 1.382 + * parameters may be NULL pointers. If they are, their corresponding length parameters 1.383 + * must be less than or equal to 0. If the lang parameter is an empty string, the 1.384 + * default value for an unknown language is written to the output buffer. 1.385 + * 1.386 + * If the length of the new string exceeds the capacity of the output buffer, 1.387 + * the function copies as many bytes to the output buffer as it can, and returns 1.388 + * the error U_BUFFER_OVERFLOW_ERROR. 1.389 + * 1.390 + * If an illegal argument is provided, the function returns the error 1.391 + * U_ILLEGAL_ARGUMENT_ERROR. 1.392 + * 1.393 + * @param lang The language tag to use. 1.394 + * @param langLength The length of the language tag. 1.395 + * @param script The script tag to use. 1.396 + * @param scriptLength The length of the script tag. 1.397 + * @param region The region tag to use. 1.398 + * @param regionLength The length of the region tag. 1.399 + * @param trailing Any trailing data to append to the new tag. 1.400 + * @param trailingLength The length of the trailing data. 1.401 + * @param tag The output buffer. 1.402 + * @param tagCapacity The capacity of the output buffer. 1.403 + * @param err A pointer to a UErrorCode for error reporting. 1.404 + * @return The length of the tag string, which may be greater than tagCapacity. 1.405 + **/ 1.406 +static int32_t U_CALLCONV 1.407 +createTagString( 1.408 + const char* lang, 1.409 + int32_t langLength, 1.410 + const char* script, 1.411 + int32_t scriptLength, 1.412 + const char* region, 1.413 + int32_t regionLength, 1.414 + const char* trailing, 1.415 + int32_t trailingLength, 1.416 + char* tag, 1.417 + int32_t tagCapacity, 1.418 + UErrorCode* err) 1.419 +{ 1.420 + return createTagStringWithAlternates( 1.421 + lang, 1.422 + langLength, 1.423 + script, 1.424 + scriptLength, 1.425 + region, 1.426 + regionLength, 1.427 + trailing, 1.428 + trailingLength, 1.429 + NULL, 1.430 + tag, 1.431 + tagCapacity, 1.432 + err); 1.433 +} 1.434 + 1.435 +/** 1.436 + * Parse the language, script, and region subtags from a tag string, and copy the 1.437 + * results into the corresponding output parameters. The buffers are null-terminated, 1.438 + * unless overflow occurs. 1.439 + * 1.440 + * The langLength, scriptLength, and regionLength parameters are input/output 1.441 + * parameters, and must contain the capacity of their corresponding buffers on 1.442 + * input. On output, they will contain the actual length of the buffers, not 1.443 + * including the null terminator. 1.444 + * 1.445 + * If the length of any of the output subtags exceeds the capacity of the corresponding 1.446 + * buffer, the function copies as many bytes to the output buffer as it can, and returns 1.447 + * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow 1.448 + * occurs. 1.449 + * 1.450 + * If an illegal argument is provided, the function returns the error 1.451 + * U_ILLEGAL_ARGUMENT_ERROR. 1.452 + * 1.453 + * @param localeID The locale ID to parse. 1.454 + * @param lang The language tag buffer. 1.455 + * @param langLength The length of the language tag. 1.456 + * @param script The script tag buffer. 1.457 + * @param scriptLength The length of the script tag. 1.458 + * @param region The region tag buffer. 1.459 + * @param regionLength The length of the region tag. 1.460 + * @param err A pointer to a UErrorCode for error reporting. 1.461 + * @return The number of chars of the localeID parameter consumed. 1.462 + **/ 1.463 +static int32_t U_CALLCONV 1.464 +parseTagString( 1.465 + const char* localeID, 1.466 + char* lang, 1.467 + int32_t* langLength, 1.468 + char* script, 1.469 + int32_t* scriptLength, 1.470 + char* region, 1.471 + int32_t* regionLength, 1.472 + UErrorCode* err) 1.473 +{ 1.474 + const char* position = localeID; 1.475 + int32_t subtagLength = 0; 1.476 + 1.477 + if(U_FAILURE(*err) || 1.478 + localeID == NULL || 1.479 + lang == NULL || 1.480 + langLength == NULL || 1.481 + script == NULL || 1.482 + scriptLength == NULL || 1.483 + region == NULL || 1.484 + regionLength == NULL) { 1.485 + goto error; 1.486 + } 1.487 + 1.488 + subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position); 1.489 + u_terminateChars(lang, *langLength, subtagLength, err); 1.490 + 1.491 + /* 1.492 + * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING 1.493 + * to be an error, because it indicates the user-supplied tag is 1.494 + * not well-formed. 1.495 + */ 1.496 + if(U_FAILURE(*err)) { 1.497 + goto error; 1.498 + } 1.499 + 1.500 + *langLength = subtagLength; 1.501 + 1.502 + /* 1.503 + * If no language was present, use the value of unknownLanguage 1.504 + * instead. Otherwise, move past any separator. 1.505 + */ 1.506 + if (*langLength == 0) { 1.507 + uprv_strcpy( 1.508 + lang, 1.509 + unknownLanguage); 1.510 + *langLength = (int32_t)uprv_strlen(lang); 1.511 + } 1.512 + else if (_isIDSeparator(*position)) { 1.513 + ++position; 1.514 + } 1.515 + 1.516 + subtagLength = ulocimp_getScript(position, script, *scriptLength, &position); 1.517 + u_terminateChars(script, *scriptLength, subtagLength, err); 1.518 + 1.519 + if(U_FAILURE(*err)) { 1.520 + goto error; 1.521 + } 1.522 + 1.523 + *scriptLength = subtagLength; 1.524 + 1.525 + if (*scriptLength > 0) { 1.526 + if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) { 1.527 + /** 1.528 + * If the script part is the "unknown" script, then don't return it. 1.529 + **/ 1.530 + *scriptLength = 0; 1.531 + } 1.532 + 1.533 + /* 1.534 + * Move past any separator. 1.535 + */ 1.536 + if (_isIDSeparator(*position)) { 1.537 + ++position; 1.538 + } 1.539 + } 1.540 + 1.541 + subtagLength = ulocimp_getCountry(position, region, *regionLength, &position); 1.542 + u_terminateChars(region, *regionLength, subtagLength, err); 1.543 + 1.544 + if(U_FAILURE(*err)) { 1.545 + goto error; 1.546 + } 1.547 + 1.548 + *regionLength = subtagLength; 1.549 + 1.550 + if (*regionLength > 0) { 1.551 + if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) { 1.552 + /** 1.553 + * If the region part is the "unknown" region, then don't return it. 1.554 + **/ 1.555 + *regionLength = 0; 1.556 + } 1.557 + } else if (*position != 0 && *position != '@') { 1.558 + /* back up over consumed trailing separator */ 1.559 + --position; 1.560 + } 1.561 + 1.562 +exit: 1.563 + 1.564 + return (int32_t)(position - localeID); 1.565 + 1.566 +error: 1.567 + 1.568 + /** 1.569 + * If we get here, we have no explicit error, it's the result of an 1.570 + * illegal argument. 1.571 + **/ 1.572 + if (!U_FAILURE(*err)) { 1.573 + *err = U_ILLEGAL_ARGUMENT_ERROR; 1.574 + } 1.575 + 1.576 + goto exit; 1.577 +} 1.578 + 1.579 +static int32_t U_CALLCONV 1.580 +createLikelySubtagsString( 1.581 + const char* lang, 1.582 + int32_t langLength, 1.583 + const char* script, 1.584 + int32_t scriptLength, 1.585 + const char* region, 1.586 + int32_t regionLength, 1.587 + const char* variants, 1.588 + int32_t variantsLength, 1.589 + char* tag, 1.590 + int32_t tagCapacity, 1.591 + UErrorCode* err) 1.592 +{ 1.593 + /** 1.594 + * ULOC_FULLNAME_CAPACITY will provide enough capacity 1.595 + * that we can build a string that contains the language, 1.596 + * script and region code without worrying about overrunning 1.597 + * the user-supplied buffer. 1.598 + **/ 1.599 + char tagBuffer[ULOC_FULLNAME_CAPACITY]; 1.600 + char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY]; 1.601 + 1.602 + if(U_FAILURE(*err)) { 1.603 + goto error; 1.604 + } 1.605 + 1.606 + /** 1.607 + * Try the language with the script and region first. 1.608 + **/ 1.609 + if (scriptLength > 0 && regionLength > 0) { 1.610 + 1.611 + const char* likelySubtags = NULL; 1.612 + 1.613 + createTagString( 1.614 + lang, 1.615 + langLength, 1.616 + script, 1.617 + scriptLength, 1.618 + region, 1.619 + regionLength, 1.620 + NULL, 1.621 + 0, 1.622 + tagBuffer, 1.623 + sizeof(tagBuffer), 1.624 + err); 1.625 + if(U_FAILURE(*err)) { 1.626 + goto error; 1.627 + } 1.628 + 1.629 + likelySubtags = 1.630 + findLikelySubtags( 1.631 + tagBuffer, 1.632 + likelySubtagsBuffer, 1.633 + sizeof(likelySubtagsBuffer), 1.634 + err); 1.635 + if(U_FAILURE(*err)) { 1.636 + goto error; 1.637 + } 1.638 + 1.639 + if (likelySubtags != NULL) { 1.640 + /* Always use the language tag from the 1.641 + maximal string, since it may be more 1.642 + specific than the one provided. */ 1.643 + return createTagStringWithAlternates( 1.644 + NULL, 1.645 + 0, 1.646 + NULL, 1.647 + 0, 1.648 + NULL, 1.649 + 0, 1.650 + variants, 1.651 + variantsLength, 1.652 + likelySubtags, 1.653 + tag, 1.654 + tagCapacity, 1.655 + err); 1.656 + } 1.657 + } 1.658 + 1.659 + /** 1.660 + * Try the language with just the script. 1.661 + **/ 1.662 + if (scriptLength > 0) { 1.663 + 1.664 + const char* likelySubtags = NULL; 1.665 + 1.666 + createTagString( 1.667 + lang, 1.668 + langLength, 1.669 + script, 1.670 + scriptLength, 1.671 + NULL, 1.672 + 0, 1.673 + NULL, 1.674 + 0, 1.675 + tagBuffer, 1.676 + sizeof(tagBuffer), 1.677 + err); 1.678 + if(U_FAILURE(*err)) { 1.679 + goto error; 1.680 + } 1.681 + 1.682 + likelySubtags = 1.683 + findLikelySubtags( 1.684 + tagBuffer, 1.685 + likelySubtagsBuffer, 1.686 + sizeof(likelySubtagsBuffer), 1.687 + err); 1.688 + if(U_FAILURE(*err)) { 1.689 + goto error; 1.690 + } 1.691 + 1.692 + if (likelySubtags != NULL) { 1.693 + /* Always use the language tag from the 1.694 + maximal string, since it may be more 1.695 + specific than the one provided. */ 1.696 + return createTagStringWithAlternates( 1.697 + NULL, 1.698 + 0, 1.699 + NULL, 1.700 + 0, 1.701 + region, 1.702 + regionLength, 1.703 + variants, 1.704 + variantsLength, 1.705 + likelySubtags, 1.706 + tag, 1.707 + tagCapacity, 1.708 + err); 1.709 + } 1.710 + } 1.711 + 1.712 + /** 1.713 + * Try the language with just the region. 1.714 + **/ 1.715 + if (regionLength > 0) { 1.716 + 1.717 + const char* likelySubtags = NULL; 1.718 + 1.719 + createTagString( 1.720 + lang, 1.721 + langLength, 1.722 + NULL, 1.723 + 0, 1.724 + region, 1.725 + regionLength, 1.726 + NULL, 1.727 + 0, 1.728 + tagBuffer, 1.729 + sizeof(tagBuffer), 1.730 + err); 1.731 + if(U_FAILURE(*err)) { 1.732 + goto error; 1.733 + } 1.734 + 1.735 + likelySubtags = 1.736 + findLikelySubtags( 1.737 + tagBuffer, 1.738 + likelySubtagsBuffer, 1.739 + sizeof(likelySubtagsBuffer), 1.740 + err); 1.741 + if(U_FAILURE(*err)) { 1.742 + goto error; 1.743 + } 1.744 + 1.745 + if (likelySubtags != NULL) { 1.746 + /* Always use the language tag from the 1.747 + maximal string, since it may be more 1.748 + specific than the one provided. */ 1.749 + return createTagStringWithAlternates( 1.750 + NULL, 1.751 + 0, 1.752 + script, 1.753 + scriptLength, 1.754 + NULL, 1.755 + 0, 1.756 + variants, 1.757 + variantsLength, 1.758 + likelySubtags, 1.759 + tag, 1.760 + tagCapacity, 1.761 + err); 1.762 + } 1.763 + } 1.764 + 1.765 + /** 1.766 + * Finally, try just the language. 1.767 + **/ 1.768 + { 1.769 + const char* likelySubtags = NULL; 1.770 + 1.771 + createTagString( 1.772 + lang, 1.773 + langLength, 1.774 + NULL, 1.775 + 0, 1.776 + NULL, 1.777 + 0, 1.778 + NULL, 1.779 + 0, 1.780 + tagBuffer, 1.781 + sizeof(tagBuffer), 1.782 + err); 1.783 + if(U_FAILURE(*err)) { 1.784 + goto error; 1.785 + } 1.786 + 1.787 + likelySubtags = 1.788 + findLikelySubtags( 1.789 + tagBuffer, 1.790 + likelySubtagsBuffer, 1.791 + sizeof(likelySubtagsBuffer), 1.792 + err); 1.793 + if(U_FAILURE(*err)) { 1.794 + goto error; 1.795 + } 1.796 + 1.797 + if (likelySubtags != NULL) { 1.798 + /* Always use the language tag from the 1.799 + maximal string, since it may be more 1.800 + specific than the one provided. */ 1.801 + return createTagStringWithAlternates( 1.802 + NULL, 1.803 + 0, 1.804 + script, 1.805 + scriptLength, 1.806 + region, 1.807 + regionLength, 1.808 + variants, 1.809 + variantsLength, 1.810 + likelySubtags, 1.811 + tag, 1.812 + tagCapacity, 1.813 + err); 1.814 + } 1.815 + } 1.816 + 1.817 + return u_terminateChars( 1.818 + tag, 1.819 + tagCapacity, 1.820 + 0, 1.821 + err); 1.822 + 1.823 +error: 1.824 + 1.825 + if (!U_FAILURE(*err)) { 1.826 + *err = U_ILLEGAL_ARGUMENT_ERROR; 1.827 + } 1.828 + 1.829 + return -1; 1.830 +} 1.831 + 1.832 +#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \ 1.833 + { int32_t count = 0; \ 1.834 + int32_t i; \ 1.835 + for (i = 0; i < trailingLength; i++) { \ 1.836 + if (trailing[i] == '-' || trailing[i] == '_') { \ 1.837 + count = 0; \ 1.838 + if (count > 8) { \ 1.839 + goto error; \ 1.840 + } \ 1.841 + } else if (trailing[i] == '@') { \ 1.842 + break; \ 1.843 + } else if (count > 8) { \ 1.844 + goto error; \ 1.845 + } else { \ 1.846 + count++; \ 1.847 + } \ 1.848 + } \ 1.849 + } 1.850 + 1.851 +static int32_t 1.852 +_uloc_addLikelySubtags(const char* localeID, 1.853 + char* maximizedLocaleID, 1.854 + int32_t maximizedLocaleIDCapacity, 1.855 + UErrorCode* err) 1.856 +{ 1.857 + char lang[ULOC_LANG_CAPACITY]; 1.858 + int32_t langLength = sizeof(lang); 1.859 + char script[ULOC_SCRIPT_CAPACITY]; 1.860 + int32_t scriptLength = sizeof(script); 1.861 + char region[ULOC_COUNTRY_CAPACITY]; 1.862 + int32_t regionLength = sizeof(region); 1.863 + const char* trailing = ""; 1.864 + int32_t trailingLength = 0; 1.865 + int32_t trailingIndex = 0; 1.866 + int32_t resultLength = 0; 1.867 + 1.868 + if(U_FAILURE(*err)) { 1.869 + goto error; 1.870 + } 1.871 + else if (localeID == NULL || 1.872 + maximizedLocaleID == NULL || 1.873 + maximizedLocaleIDCapacity <= 0) { 1.874 + goto error; 1.875 + } 1.876 + 1.877 + trailingIndex = parseTagString( 1.878 + localeID, 1.879 + lang, 1.880 + &langLength, 1.881 + script, 1.882 + &scriptLength, 1.883 + region, 1.884 + ®ionLength, 1.885 + err); 1.886 + if(U_FAILURE(*err)) { 1.887 + /* Overflow indicates an illegal argument error */ 1.888 + if (*err == U_BUFFER_OVERFLOW_ERROR) { 1.889 + *err = U_ILLEGAL_ARGUMENT_ERROR; 1.890 + } 1.891 + 1.892 + goto error; 1.893 + } 1.894 + 1.895 + /* Find the length of the trailing portion. */ 1.896 + while (_isIDSeparator(localeID[trailingIndex])) { 1.897 + trailingIndex++; 1.898 + } 1.899 + trailing = &localeID[trailingIndex]; 1.900 + trailingLength = (int32_t)uprv_strlen(trailing); 1.901 + 1.902 + CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); 1.903 + 1.904 + resultLength = 1.905 + createLikelySubtagsString( 1.906 + lang, 1.907 + langLength, 1.908 + script, 1.909 + scriptLength, 1.910 + region, 1.911 + regionLength, 1.912 + trailing, 1.913 + trailingLength, 1.914 + maximizedLocaleID, 1.915 + maximizedLocaleIDCapacity, 1.916 + err); 1.917 + 1.918 + if (resultLength == 0) { 1.919 + const int32_t localIDLength = (int32_t)uprv_strlen(localeID); 1.920 + 1.921 + /* 1.922 + * If we get here, we need to return localeID. 1.923 + */ 1.924 + uprv_memcpy( 1.925 + maximizedLocaleID, 1.926 + localeID, 1.927 + localIDLength <= maximizedLocaleIDCapacity ? 1.928 + localIDLength : maximizedLocaleIDCapacity); 1.929 + 1.930 + resultLength = 1.931 + u_terminateChars( 1.932 + maximizedLocaleID, 1.933 + maximizedLocaleIDCapacity, 1.934 + localIDLength, 1.935 + err); 1.936 + } 1.937 + 1.938 + return resultLength; 1.939 + 1.940 +error: 1.941 + 1.942 + if (!U_FAILURE(*err)) { 1.943 + *err = U_ILLEGAL_ARGUMENT_ERROR; 1.944 + } 1.945 + 1.946 + return -1; 1.947 +} 1.948 + 1.949 +static int32_t 1.950 +_uloc_minimizeSubtags(const char* localeID, 1.951 + char* minimizedLocaleID, 1.952 + int32_t minimizedLocaleIDCapacity, 1.953 + UErrorCode* err) 1.954 +{ 1.955 + /** 1.956 + * ULOC_FULLNAME_CAPACITY will provide enough capacity 1.957 + * that we can build a string that contains the language, 1.958 + * script and region code without worrying about overrunning 1.959 + * the user-supplied buffer. 1.960 + **/ 1.961 + char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY]; 1.962 + int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer); 1.963 + 1.964 + char lang[ULOC_LANG_CAPACITY]; 1.965 + int32_t langLength = sizeof(lang); 1.966 + char script[ULOC_SCRIPT_CAPACITY]; 1.967 + int32_t scriptLength = sizeof(script); 1.968 + char region[ULOC_COUNTRY_CAPACITY]; 1.969 + int32_t regionLength = sizeof(region); 1.970 + const char* trailing = ""; 1.971 + int32_t trailingLength = 0; 1.972 + int32_t trailingIndex = 0; 1.973 + 1.974 + if(U_FAILURE(*err)) { 1.975 + goto error; 1.976 + } 1.977 + else if (localeID == NULL || 1.978 + minimizedLocaleID == NULL || 1.979 + minimizedLocaleIDCapacity <= 0) { 1.980 + goto error; 1.981 + } 1.982 + 1.983 + trailingIndex = 1.984 + parseTagString( 1.985 + localeID, 1.986 + lang, 1.987 + &langLength, 1.988 + script, 1.989 + &scriptLength, 1.990 + region, 1.991 + ®ionLength, 1.992 + err); 1.993 + if(U_FAILURE(*err)) { 1.994 + 1.995 + /* Overflow indicates an illegal argument error */ 1.996 + if (*err == U_BUFFER_OVERFLOW_ERROR) { 1.997 + *err = U_ILLEGAL_ARGUMENT_ERROR; 1.998 + } 1.999 + 1.1000 + goto error; 1.1001 + } 1.1002 + 1.1003 + /* Find the spot where the variants or the keywords begin, if any. */ 1.1004 + while (_isIDSeparator(localeID[trailingIndex])) { 1.1005 + trailingIndex++; 1.1006 + } 1.1007 + trailing = &localeID[trailingIndex]; 1.1008 + trailingLength = (int32_t)uprv_strlen(trailing); 1.1009 + 1.1010 + CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); 1.1011 + 1.1012 + createTagString( 1.1013 + lang, 1.1014 + langLength, 1.1015 + script, 1.1016 + scriptLength, 1.1017 + region, 1.1018 + regionLength, 1.1019 + NULL, 1.1020 + 0, 1.1021 + maximizedTagBuffer, 1.1022 + maximizedTagBufferLength, 1.1023 + err); 1.1024 + if(U_FAILURE(*err)) { 1.1025 + goto error; 1.1026 + } 1.1027 + 1.1028 + /** 1.1029 + * First, we need to first get the maximization 1.1030 + * from AddLikelySubtags. 1.1031 + **/ 1.1032 + maximizedTagBufferLength = 1.1033 + uloc_addLikelySubtags( 1.1034 + maximizedTagBuffer, 1.1035 + maximizedTagBuffer, 1.1036 + maximizedTagBufferLength, 1.1037 + err); 1.1038 + 1.1039 + if(U_FAILURE(*err)) { 1.1040 + goto error; 1.1041 + } 1.1042 + 1.1043 + /** 1.1044 + * Start first with just the language. 1.1045 + **/ 1.1046 + { 1.1047 + char tagBuffer[ULOC_FULLNAME_CAPACITY]; 1.1048 + 1.1049 + const int32_t tagBufferLength = 1.1050 + createLikelySubtagsString( 1.1051 + lang, 1.1052 + langLength, 1.1053 + NULL, 1.1054 + 0, 1.1055 + NULL, 1.1056 + 0, 1.1057 + NULL, 1.1058 + 0, 1.1059 + tagBuffer, 1.1060 + sizeof(tagBuffer), 1.1061 + err); 1.1062 + 1.1063 + if(U_FAILURE(*err)) { 1.1064 + goto error; 1.1065 + } 1.1066 + else if (uprv_strnicmp( 1.1067 + maximizedTagBuffer, 1.1068 + tagBuffer, 1.1069 + tagBufferLength) == 0) { 1.1070 + 1.1071 + return createTagString( 1.1072 + lang, 1.1073 + langLength, 1.1074 + NULL, 1.1075 + 0, 1.1076 + NULL, 1.1077 + 0, 1.1078 + trailing, 1.1079 + trailingLength, 1.1080 + minimizedLocaleID, 1.1081 + minimizedLocaleIDCapacity, 1.1082 + err); 1.1083 + } 1.1084 + } 1.1085 + 1.1086 + /** 1.1087 + * Next, try the language and region. 1.1088 + **/ 1.1089 + if (regionLength > 0) { 1.1090 + 1.1091 + char tagBuffer[ULOC_FULLNAME_CAPACITY]; 1.1092 + 1.1093 + const int32_t tagBufferLength = 1.1094 + createLikelySubtagsString( 1.1095 + lang, 1.1096 + langLength, 1.1097 + NULL, 1.1098 + 0, 1.1099 + region, 1.1100 + regionLength, 1.1101 + NULL, 1.1102 + 0, 1.1103 + tagBuffer, 1.1104 + sizeof(tagBuffer), 1.1105 + err); 1.1106 + 1.1107 + if(U_FAILURE(*err)) { 1.1108 + goto error; 1.1109 + } 1.1110 + else if (uprv_strnicmp( 1.1111 + maximizedTagBuffer, 1.1112 + tagBuffer, 1.1113 + tagBufferLength) == 0) { 1.1114 + 1.1115 + return createTagString( 1.1116 + lang, 1.1117 + langLength, 1.1118 + NULL, 1.1119 + 0, 1.1120 + region, 1.1121 + regionLength, 1.1122 + trailing, 1.1123 + trailingLength, 1.1124 + minimizedLocaleID, 1.1125 + minimizedLocaleIDCapacity, 1.1126 + err); 1.1127 + } 1.1128 + } 1.1129 + 1.1130 + /** 1.1131 + * Finally, try the language and script. This is our last chance, 1.1132 + * since trying with all three subtags would only yield the 1.1133 + * maximal version that we already have. 1.1134 + **/ 1.1135 + if (scriptLength > 0 && regionLength > 0) { 1.1136 + char tagBuffer[ULOC_FULLNAME_CAPACITY]; 1.1137 + 1.1138 + const int32_t tagBufferLength = 1.1139 + createLikelySubtagsString( 1.1140 + lang, 1.1141 + langLength, 1.1142 + script, 1.1143 + scriptLength, 1.1144 + NULL, 1.1145 + 0, 1.1146 + NULL, 1.1147 + 0, 1.1148 + tagBuffer, 1.1149 + sizeof(tagBuffer), 1.1150 + err); 1.1151 + 1.1152 + if(U_FAILURE(*err)) { 1.1153 + goto error; 1.1154 + } 1.1155 + else if (uprv_strnicmp( 1.1156 + maximizedTagBuffer, 1.1157 + tagBuffer, 1.1158 + tagBufferLength) == 0) { 1.1159 + 1.1160 + return createTagString( 1.1161 + lang, 1.1162 + langLength, 1.1163 + script, 1.1164 + scriptLength, 1.1165 + NULL, 1.1166 + 0, 1.1167 + trailing, 1.1168 + trailingLength, 1.1169 + minimizedLocaleID, 1.1170 + minimizedLocaleIDCapacity, 1.1171 + err); 1.1172 + } 1.1173 + } 1.1174 + 1.1175 + { 1.1176 + /** 1.1177 + * If we got here, return the locale ID parameter. 1.1178 + **/ 1.1179 + const int32_t localeIDLength = (int32_t)uprv_strlen(localeID); 1.1180 + 1.1181 + uprv_memcpy( 1.1182 + minimizedLocaleID, 1.1183 + localeID, 1.1184 + localeIDLength <= minimizedLocaleIDCapacity ? 1.1185 + localeIDLength : minimizedLocaleIDCapacity); 1.1186 + 1.1187 + return u_terminateChars( 1.1188 + minimizedLocaleID, 1.1189 + minimizedLocaleIDCapacity, 1.1190 + localeIDLength, 1.1191 + err); 1.1192 + } 1.1193 + 1.1194 +error: 1.1195 + 1.1196 + if (!U_FAILURE(*err)) { 1.1197 + *err = U_ILLEGAL_ARGUMENT_ERROR; 1.1198 + } 1.1199 + 1.1200 + return -1; 1.1201 + 1.1202 + 1.1203 +} 1.1204 + 1.1205 +static UBool 1.1206 +do_canonicalize(const char* localeID, 1.1207 + char* buffer, 1.1208 + int32_t bufferCapacity, 1.1209 + UErrorCode* err) 1.1210 +{ 1.1211 + uloc_canonicalize( 1.1212 + localeID, 1.1213 + buffer, 1.1214 + bufferCapacity, 1.1215 + err); 1.1216 + 1.1217 + if (*err == U_STRING_NOT_TERMINATED_WARNING || 1.1218 + *err == U_BUFFER_OVERFLOW_ERROR) { 1.1219 + *err = U_ILLEGAL_ARGUMENT_ERROR; 1.1220 + 1.1221 + return FALSE; 1.1222 + } 1.1223 + else if (U_FAILURE(*err)) { 1.1224 + 1.1225 + return FALSE; 1.1226 + } 1.1227 + else { 1.1228 + return TRUE; 1.1229 + } 1.1230 +} 1.1231 + 1.1232 +U_CAPI int32_t U_EXPORT2 1.1233 +uloc_addLikelySubtags(const char* localeID, 1.1234 + char* maximizedLocaleID, 1.1235 + int32_t maximizedLocaleIDCapacity, 1.1236 + UErrorCode* err) 1.1237 +{ 1.1238 + char localeBuffer[ULOC_FULLNAME_CAPACITY]; 1.1239 + 1.1240 + if (!do_canonicalize( 1.1241 + localeID, 1.1242 + localeBuffer, 1.1243 + sizeof(localeBuffer), 1.1244 + err)) { 1.1245 + return -1; 1.1246 + } 1.1247 + else { 1.1248 + return _uloc_addLikelySubtags( 1.1249 + localeBuffer, 1.1250 + maximizedLocaleID, 1.1251 + maximizedLocaleIDCapacity, 1.1252 + err); 1.1253 + } 1.1254 +} 1.1255 + 1.1256 +U_CAPI int32_t U_EXPORT2 1.1257 +uloc_minimizeSubtags(const char* localeID, 1.1258 + char* minimizedLocaleID, 1.1259 + int32_t minimizedLocaleIDCapacity, 1.1260 + UErrorCode* err) 1.1261 +{ 1.1262 + char localeBuffer[ULOC_FULLNAME_CAPACITY]; 1.1263 + 1.1264 + if (!do_canonicalize( 1.1265 + localeID, 1.1266 + localeBuffer, 1.1267 + sizeof(localeBuffer), 1.1268 + err)) { 1.1269 + return -1; 1.1270 + } 1.1271 + else { 1.1272 + return _uloc_minimizeSubtags( 1.1273 + localeBuffer, 1.1274 + minimizedLocaleID, 1.1275 + minimizedLocaleIDCapacity, 1.1276 + err); 1.1277 + } 1.1278 +}