michael@0: /* michael@0: ******************************************************************************* michael@0: * michael@0: * Copyright (C) 1997-2012, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: * michael@0: ******************************************************************************* michael@0: * file name: loclikely.cpp michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created on: 2010feb25 michael@0: * created by: Markus W. Scherer michael@0: * michael@0: * Code for likely and minimized locale subtags, separated out from other .cpp files michael@0: * that then do not depend on resource bundle code and likely-subtags data. michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: #include "unicode/putil.h" michael@0: #include "unicode/uloc.h" michael@0: #include "unicode/ures.h" michael@0: #include "cmemory.h" michael@0: #include "cstring.h" michael@0: #include "ulocimp.h" michael@0: #include "ustr_imp.h" michael@0: michael@0: /** michael@0: * This function looks for the localeID in the likelySubtags resource. michael@0: * michael@0: * @param localeID The tag to find. michael@0: * @param buffer A buffer to hold the matching entry michael@0: * @param bufferLength The length of the output buffer michael@0: * @return A pointer to "buffer" if found, or a null pointer if not. michael@0: */ michael@0: static const char* U_CALLCONV michael@0: findLikelySubtags(const char* localeID, michael@0: char* buffer, michael@0: int32_t bufferLength, michael@0: UErrorCode* err) { michael@0: const char* result = NULL; michael@0: michael@0: if (!U_FAILURE(*err)) { michael@0: int32_t resLen = 0; michael@0: const UChar* s = NULL; michael@0: UErrorCode tmpErr = U_ZERO_ERROR; michael@0: UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr); michael@0: if (U_SUCCESS(tmpErr)) { michael@0: s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr); michael@0: michael@0: if (U_FAILURE(tmpErr)) { michael@0: /* michael@0: * If a resource is missing, it's not really an error, it's michael@0: * just that we don't have any data for that particular locale ID. michael@0: */ michael@0: if (tmpErr != U_MISSING_RESOURCE_ERROR) { michael@0: *err = tmpErr; michael@0: } michael@0: } michael@0: else if (resLen >= bufferLength) { michael@0: /* The buffer should never overflow. */ michael@0: *err = U_INTERNAL_PROGRAM_ERROR; michael@0: } michael@0: else { michael@0: u_UCharsToChars(s, buffer, resLen + 1); michael@0: result = buffer; michael@0: } michael@0: michael@0: ures_close(subtags); michael@0: } else { michael@0: *err = tmpErr; michael@0: } michael@0: } michael@0: michael@0: return result; michael@0: } michael@0: michael@0: /** michael@0: * Append a tag to a buffer, adding the separator if necessary. The buffer michael@0: * must be large enough to contain the resulting tag plus any separator michael@0: * necessary. The tag must not be a zero-length string. michael@0: * michael@0: * @param tag The tag to add. michael@0: * @param tagLength The length of the tag. michael@0: * @param buffer The output buffer. michael@0: * @param bufferLength The length of the output buffer. This is an input/ouput parameter. michael@0: **/ michael@0: static void U_CALLCONV michael@0: appendTag( michael@0: const char* tag, michael@0: int32_t tagLength, michael@0: char* buffer, michael@0: int32_t* bufferLength) { michael@0: michael@0: if (*bufferLength > 0) { michael@0: buffer[*bufferLength] = '_'; michael@0: ++(*bufferLength); michael@0: } michael@0: michael@0: uprv_memmove( michael@0: &buffer[*bufferLength], michael@0: tag, michael@0: tagLength); michael@0: michael@0: *bufferLength += tagLength; michael@0: } michael@0: michael@0: /** michael@0: * These are the canonical strings for unknown languages, scripts and regions. michael@0: **/ michael@0: static const char* const unknownLanguage = "und"; michael@0: static const char* const unknownScript = "Zzzz"; michael@0: static const char* const unknownRegion = "ZZ"; michael@0: michael@0: /** michael@0: * Create a tag string from the supplied parameters. The lang, script and region michael@0: * parameters may be NULL pointers. If they are, their corresponding length parameters michael@0: * must be less than or equal to 0. michael@0: * michael@0: * If any of the language, script or region parameters are empty, and the alternateTags michael@0: * parameter is not NULL, it will be parsed for potential language, script and region tags michael@0: * to be used when constructing the new tag. If the alternateTags parameter is NULL, or michael@0: * it contains no language tag, the default tag for the unknown language is used. michael@0: * michael@0: * If the length of the new string exceeds the capacity of the output buffer, michael@0: * the function copies as many bytes to the output buffer as it can, and returns michael@0: * the error U_BUFFER_OVERFLOW_ERROR. michael@0: * michael@0: * If an illegal argument is provided, the function returns the error michael@0: * U_ILLEGAL_ARGUMENT_ERROR. michael@0: * michael@0: * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if michael@0: * the tag string fits in the output buffer, but the null terminator doesn't. michael@0: * michael@0: * @param lang The language tag to use. michael@0: * @param langLength The length of the language tag. michael@0: * @param script The script tag to use. michael@0: * @param scriptLength The length of the script tag. michael@0: * @param region The region tag to use. michael@0: * @param regionLength The length of the region tag. michael@0: * @param trailing Any trailing data to append to the new tag. michael@0: * @param trailingLength The length of the trailing data. michael@0: * @param alternateTags A string containing any alternate tags. michael@0: * @param tag The output buffer. michael@0: * @param tagCapacity The capacity of the output buffer. michael@0: * @param err A pointer to a UErrorCode for error reporting. michael@0: * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error. michael@0: **/ michael@0: static int32_t U_CALLCONV michael@0: createTagStringWithAlternates( michael@0: const char* lang, michael@0: int32_t langLength, michael@0: const char* script, michael@0: int32_t scriptLength, michael@0: const char* region, michael@0: int32_t regionLength, michael@0: const char* trailing, michael@0: int32_t trailingLength, michael@0: const char* alternateTags, michael@0: char* tag, michael@0: int32_t tagCapacity, michael@0: UErrorCode* err) { michael@0: michael@0: if (U_FAILURE(*err)) { michael@0: goto error; michael@0: } michael@0: else if (tag == NULL || michael@0: tagCapacity <= 0 || michael@0: langLength >= ULOC_LANG_CAPACITY || michael@0: scriptLength >= ULOC_SCRIPT_CAPACITY || michael@0: regionLength >= ULOC_COUNTRY_CAPACITY) { michael@0: goto error; michael@0: } michael@0: else { michael@0: /** michael@0: * ULOC_FULLNAME_CAPACITY will provide enough capacity michael@0: * that we can build a string that contains the language, michael@0: * script and region code without worrying about overrunning michael@0: * the user-supplied buffer. michael@0: **/ michael@0: char tagBuffer[ULOC_FULLNAME_CAPACITY]; michael@0: int32_t tagLength = 0; michael@0: int32_t capacityRemaining = tagCapacity; michael@0: UBool regionAppended = FALSE; michael@0: michael@0: if (langLength > 0) { michael@0: appendTag( michael@0: lang, michael@0: langLength, michael@0: tagBuffer, michael@0: &tagLength); michael@0: } michael@0: else if (alternateTags == NULL) { michael@0: /* michael@0: * Append the value for an unknown language, if michael@0: * we found no language. michael@0: */ michael@0: appendTag( michael@0: unknownLanguage, michael@0: (int32_t)uprv_strlen(unknownLanguage), michael@0: tagBuffer, michael@0: &tagLength); michael@0: } michael@0: else { michael@0: /* michael@0: * Parse the alternateTags string for the language. michael@0: */ michael@0: char alternateLang[ULOC_LANG_CAPACITY]; michael@0: int32_t alternateLangLength = sizeof(alternateLang); michael@0: michael@0: alternateLangLength = michael@0: uloc_getLanguage( michael@0: alternateTags, michael@0: alternateLang, michael@0: alternateLangLength, michael@0: err); michael@0: if(U_FAILURE(*err) || michael@0: alternateLangLength >= ULOC_LANG_CAPACITY) { michael@0: goto error; michael@0: } michael@0: else if (alternateLangLength == 0) { michael@0: /* michael@0: * Append the value for an unknown language, if michael@0: * we found no language. michael@0: */ michael@0: appendTag( michael@0: unknownLanguage, michael@0: (int32_t)uprv_strlen(unknownLanguage), michael@0: tagBuffer, michael@0: &tagLength); michael@0: } michael@0: else { michael@0: appendTag( michael@0: alternateLang, michael@0: alternateLangLength, michael@0: tagBuffer, michael@0: &tagLength); michael@0: } michael@0: } michael@0: michael@0: if (scriptLength > 0) { michael@0: appendTag( michael@0: script, michael@0: scriptLength, michael@0: tagBuffer, michael@0: &tagLength); michael@0: } michael@0: else if (alternateTags != NULL) { michael@0: /* michael@0: * Parse the alternateTags string for the script. michael@0: */ michael@0: char alternateScript[ULOC_SCRIPT_CAPACITY]; michael@0: michael@0: const int32_t alternateScriptLength = michael@0: uloc_getScript( michael@0: alternateTags, michael@0: alternateScript, michael@0: sizeof(alternateScript), michael@0: err); michael@0: michael@0: if (U_FAILURE(*err) || michael@0: alternateScriptLength >= ULOC_SCRIPT_CAPACITY) { michael@0: goto error; michael@0: } michael@0: else if (alternateScriptLength > 0) { michael@0: appendTag( michael@0: alternateScript, michael@0: alternateScriptLength, michael@0: tagBuffer, michael@0: &tagLength); michael@0: } michael@0: } michael@0: michael@0: if (regionLength > 0) { michael@0: appendTag( michael@0: region, michael@0: regionLength, michael@0: tagBuffer, michael@0: &tagLength); michael@0: michael@0: regionAppended = TRUE; michael@0: } michael@0: else if (alternateTags != NULL) { michael@0: /* michael@0: * Parse the alternateTags string for the region. michael@0: */ michael@0: char alternateRegion[ULOC_COUNTRY_CAPACITY]; michael@0: michael@0: const int32_t alternateRegionLength = michael@0: uloc_getCountry( michael@0: alternateTags, michael@0: alternateRegion, michael@0: sizeof(alternateRegion), michael@0: err); michael@0: if (U_FAILURE(*err) || michael@0: alternateRegionLength >= ULOC_COUNTRY_CAPACITY) { michael@0: goto error; michael@0: } michael@0: else if (alternateRegionLength > 0) { michael@0: appendTag( michael@0: alternateRegion, michael@0: alternateRegionLength, michael@0: tagBuffer, michael@0: &tagLength); michael@0: michael@0: regionAppended = TRUE; michael@0: } michael@0: } michael@0: michael@0: { michael@0: const int32_t toCopy = michael@0: tagLength >= tagCapacity ? tagCapacity : tagLength; michael@0: michael@0: /** michael@0: * Copy the partial tag from our internal buffer to the supplied michael@0: * target. michael@0: **/ michael@0: uprv_memcpy( michael@0: tag, michael@0: tagBuffer, michael@0: toCopy); michael@0: michael@0: capacityRemaining -= toCopy; michael@0: } michael@0: michael@0: if (trailingLength > 0) { michael@0: if (*trailing != '@' && capacityRemaining > 0) { michael@0: tag[tagLength++] = '_'; michael@0: --capacityRemaining; michael@0: if (capacityRemaining > 0 && !regionAppended) { michael@0: /* extra separator is required */ michael@0: tag[tagLength++] = '_'; michael@0: --capacityRemaining; michael@0: } michael@0: } michael@0: michael@0: if (capacityRemaining > 0) { michael@0: /* michael@0: * Copy the trailing data into the supplied buffer. Use uprv_memmove, since we michael@0: * don't know if the user-supplied buffers overlap. michael@0: */ michael@0: const int32_t toCopy = michael@0: trailingLength >= capacityRemaining ? capacityRemaining : trailingLength; michael@0: michael@0: uprv_memmove( michael@0: &tag[tagLength], michael@0: trailing, michael@0: toCopy); michael@0: } michael@0: } michael@0: michael@0: tagLength += trailingLength; michael@0: michael@0: return u_terminateChars( michael@0: tag, michael@0: tagCapacity, michael@0: tagLength, michael@0: err); michael@0: } michael@0: michael@0: error: michael@0: michael@0: /** michael@0: * An overflow indicates the locale ID passed in michael@0: * is ill-formed. If we got here, and there was michael@0: * no previous error, it's an implicit overflow. michael@0: **/ michael@0: if (*err == U_BUFFER_OVERFLOW_ERROR || michael@0: U_SUCCESS(*err)) { michael@0: *err = U_ILLEGAL_ARGUMENT_ERROR; michael@0: } michael@0: michael@0: return -1; michael@0: } michael@0: michael@0: /** michael@0: * Create a tag string from the supplied parameters. The lang, script and region michael@0: * parameters may be NULL pointers. If they are, their corresponding length parameters michael@0: * must be less than or equal to 0. If the lang parameter is an empty string, the michael@0: * default value for an unknown language is written to the output buffer. michael@0: * michael@0: * If the length of the new string exceeds the capacity of the output buffer, michael@0: * the function copies as many bytes to the output buffer as it can, and returns michael@0: * the error U_BUFFER_OVERFLOW_ERROR. michael@0: * michael@0: * If an illegal argument is provided, the function returns the error michael@0: * U_ILLEGAL_ARGUMENT_ERROR. michael@0: * michael@0: * @param lang The language tag to use. michael@0: * @param langLength The length of the language tag. michael@0: * @param script The script tag to use. michael@0: * @param scriptLength The length of the script tag. michael@0: * @param region The region tag to use. michael@0: * @param regionLength The length of the region tag. michael@0: * @param trailing Any trailing data to append to the new tag. michael@0: * @param trailingLength The length of the trailing data. michael@0: * @param tag The output buffer. michael@0: * @param tagCapacity The capacity of the output buffer. michael@0: * @param err A pointer to a UErrorCode for error reporting. michael@0: * @return The length of the tag string, which may be greater than tagCapacity. michael@0: **/ michael@0: static int32_t U_CALLCONV michael@0: createTagString( michael@0: const char* lang, michael@0: int32_t langLength, michael@0: const char* script, michael@0: int32_t scriptLength, michael@0: const char* region, michael@0: int32_t regionLength, michael@0: const char* trailing, michael@0: int32_t trailingLength, michael@0: char* tag, michael@0: int32_t tagCapacity, michael@0: UErrorCode* err) michael@0: { michael@0: return createTagStringWithAlternates( michael@0: lang, michael@0: langLength, michael@0: script, michael@0: scriptLength, michael@0: region, michael@0: regionLength, michael@0: trailing, michael@0: trailingLength, michael@0: NULL, michael@0: tag, michael@0: tagCapacity, michael@0: err); michael@0: } michael@0: michael@0: /** michael@0: * Parse the language, script, and region subtags from a tag string, and copy the michael@0: * results into the corresponding output parameters. The buffers are null-terminated, michael@0: * unless overflow occurs. michael@0: * michael@0: * The langLength, scriptLength, and regionLength parameters are input/output michael@0: * parameters, and must contain the capacity of their corresponding buffers on michael@0: * input. On output, they will contain the actual length of the buffers, not michael@0: * including the null terminator. michael@0: * michael@0: * If the length of any of the output subtags exceeds the capacity of the corresponding michael@0: * buffer, the function copies as many bytes to the output buffer as it can, and returns michael@0: * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow michael@0: * occurs. michael@0: * michael@0: * If an illegal argument is provided, the function returns the error michael@0: * U_ILLEGAL_ARGUMENT_ERROR. michael@0: * michael@0: * @param localeID The locale ID to parse. michael@0: * @param lang The language tag buffer. michael@0: * @param langLength The length of the language tag. michael@0: * @param script The script tag buffer. michael@0: * @param scriptLength The length of the script tag. michael@0: * @param region The region tag buffer. michael@0: * @param regionLength The length of the region tag. michael@0: * @param err A pointer to a UErrorCode for error reporting. michael@0: * @return The number of chars of the localeID parameter consumed. michael@0: **/ michael@0: static int32_t U_CALLCONV michael@0: parseTagString( michael@0: const char* localeID, michael@0: char* lang, michael@0: int32_t* langLength, michael@0: char* script, michael@0: int32_t* scriptLength, michael@0: char* region, michael@0: int32_t* regionLength, michael@0: UErrorCode* err) michael@0: { michael@0: const char* position = localeID; michael@0: int32_t subtagLength = 0; michael@0: michael@0: if(U_FAILURE(*err) || michael@0: localeID == NULL || michael@0: lang == NULL || michael@0: langLength == NULL || michael@0: script == NULL || michael@0: scriptLength == NULL || michael@0: region == NULL || michael@0: regionLength == NULL) { michael@0: goto error; michael@0: } michael@0: michael@0: subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position); michael@0: u_terminateChars(lang, *langLength, subtagLength, err); michael@0: michael@0: /* michael@0: * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING michael@0: * to be an error, because it indicates the user-supplied tag is michael@0: * not well-formed. michael@0: */ michael@0: if(U_FAILURE(*err)) { michael@0: goto error; michael@0: } michael@0: michael@0: *langLength = subtagLength; michael@0: michael@0: /* michael@0: * If no language was present, use the value of unknownLanguage michael@0: * instead. Otherwise, move past any separator. michael@0: */ michael@0: if (*langLength == 0) { michael@0: uprv_strcpy( michael@0: lang, michael@0: unknownLanguage); michael@0: *langLength = (int32_t)uprv_strlen(lang); michael@0: } michael@0: else if (_isIDSeparator(*position)) { michael@0: ++position; michael@0: } michael@0: michael@0: subtagLength = ulocimp_getScript(position, script, *scriptLength, &position); michael@0: u_terminateChars(script, *scriptLength, subtagLength, err); michael@0: michael@0: if(U_FAILURE(*err)) { michael@0: goto error; michael@0: } michael@0: michael@0: *scriptLength = subtagLength; michael@0: michael@0: if (*scriptLength > 0) { michael@0: if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) { michael@0: /** michael@0: * If the script part is the "unknown" script, then don't return it. michael@0: **/ michael@0: *scriptLength = 0; michael@0: } michael@0: michael@0: /* michael@0: * Move past any separator. michael@0: */ michael@0: if (_isIDSeparator(*position)) { michael@0: ++position; michael@0: } michael@0: } michael@0: michael@0: subtagLength = ulocimp_getCountry(position, region, *regionLength, &position); michael@0: u_terminateChars(region, *regionLength, subtagLength, err); michael@0: michael@0: if(U_FAILURE(*err)) { michael@0: goto error; michael@0: } michael@0: michael@0: *regionLength = subtagLength; michael@0: michael@0: if (*regionLength > 0) { michael@0: if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) { michael@0: /** michael@0: * If the region part is the "unknown" region, then don't return it. michael@0: **/ michael@0: *regionLength = 0; michael@0: } michael@0: } else if (*position != 0 && *position != '@') { michael@0: /* back up over consumed trailing separator */ michael@0: --position; michael@0: } michael@0: michael@0: exit: michael@0: michael@0: return (int32_t)(position - localeID); michael@0: michael@0: error: michael@0: michael@0: /** michael@0: * If we get here, we have no explicit error, it's the result of an michael@0: * illegal argument. michael@0: **/ michael@0: if (!U_FAILURE(*err)) { michael@0: *err = U_ILLEGAL_ARGUMENT_ERROR; michael@0: } michael@0: michael@0: goto exit; michael@0: } michael@0: michael@0: static int32_t U_CALLCONV michael@0: createLikelySubtagsString( michael@0: const char* lang, michael@0: int32_t langLength, michael@0: const char* script, michael@0: int32_t scriptLength, michael@0: const char* region, michael@0: int32_t regionLength, michael@0: const char* variants, michael@0: int32_t variantsLength, michael@0: char* tag, michael@0: int32_t tagCapacity, michael@0: UErrorCode* err) michael@0: { michael@0: /** michael@0: * ULOC_FULLNAME_CAPACITY will provide enough capacity michael@0: * that we can build a string that contains the language, michael@0: * script and region code without worrying about overrunning michael@0: * the user-supplied buffer. michael@0: **/ michael@0: char tagBuffer[ULOC_FULLNAME_CAPACITY]; michael@0: char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY]; michael@0: michael@0: if(U_FAILURE(*err)) { michael@0: goto error; michael@0: } michael@0: michael@0: /** michael@0: * Try the language with the script and region first. michael@0: **/ michael@0: if (scriptLength > 0 && regionLength > 0) { michael@0: michael@0: const char* likelySubtags = NULL; michael@0: michael@0: createTagString( michael@0: lang, michael@0: langLength, michael@0: script, michael@0: scriptLength, michael@0: region, michael@0: regionLength, michael@0: NULL, michael@0: 0, michael@0: tagBuffer, michael@0: sizeof(tagBuffer), michael@0: err); michael@0: if(U_FAILURE(*err)) { michael@0: goto error; michael@0: } michael@0: michael@0: likelySubtags = michael@0: findLikelySubtags( michael@0: tagBuffer, michael@0: likelySubtagsBuffer, michael@0: sizeof(likelySubtagsBuffer), michael@0: err); michael@0: if(U_FAILURE(*err)) { michael@0: goto error; michael@0: } michael@0: michael@0: if (likelySubtags != NULL) { michael@0: /* Always use the language tag from the michael@0: maximal string, since it may be more michael@0: specific than the one provided. */ michael@0: return createTagStringWithAlternates( michael@0: NULL, michael@0: 0, michael@0: NULL, michael@0: 0, michael@0: NULL, michael@0: 0, michael@0: variants, michael@0: variantsLength, michael@0: likelySubtags, michael@0: tag, michael@0: tagCapacity, michael@0: err); michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * Try the language with just the script. michael@0: **/ michael@0: if (scriptLength > 0) { michael@0: michael@0: const char* likelySubtags = NULL; michael@0: michael@0: createTagString( michael@0: lang, michael@0: langLength, michael@0: script, michael@0: scriptLength, michael@0: NULL, michael@0: 0, michael@0: NULL, michael@0: 0, michael@0: tagBuffer, michael@0: sizeof(tagBuffer), michael@0: err); michael@0: if(U_FAILURE(*err)) { michael@0: goto error; michael@0: } michael@0: michael@0: likelySubtags = michael@0: findLikelySubtags( michael@0: tagBuffer, michael@0: likelySubtagsBuffer, michael@0: sizeof(likelySubtagsBuffer), michael@0: err); michael@0: if(U_FAILURE(*err)) { michael@0: goto error; michael@0: } michael@0: michael@0: if (likelySubtags != NULL) { michael@0: /* Always use the language tag from the michael@0: maximal string, since it may be more michael@0: specific than the one provided. */ michael@0: return createTagStringWithAlternates( michael@0: NULL, michael@0: 0, michael@0: NULL, michael@0: 0, michael@0: region, michael@0: regionLength, michael@0: variants, michael@0: variantsLength, michael@0: likelySubtags, michael@0: tag, michael@0: tagCapacity, michael@0: err); michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * Try the language with just the region. michael@0: **/ michael@0: if (regionLength > 0) { michael@0: michael@0: const char* likelySubtags = NULL; michael@0: michael@0: createTagString( michael@0: lang, michael@0: langLength, michael@0: NULL, michael@0: 0, michael@0: region, michael@0: regionLength, michael@0: NULL, michael@0: 0, michael@0: tagBuffer, michael@0: sizeof(tagBuffer), michael@0: err); michael@0: if(U_FAILURE(*err)) { michael@0: goto error; michael@0: } michael@0: michael@0: likelySubtags = michael@0: findLikelySubtags( michael@0: tagBuffer, michael@0: likelySubtagsBuffer, michael@0: sizeof(likelySubtagsBuffer), michael@0: err); michael@0: if(U_FAILURE(*err)) { michael@0: goto error; michael@0: } michael@0: michael@0: if (likelySubtags != NULL) { michael@0: /* Always use the language tag from the michael@0: maximal string, since it may be more michael@0: specific than the one provided. */ michael@0: return createTagStringWithAlternates( michael@0: NULL, michael@0: 0, michael@0: script, michael@0: scriptLength, michael@0: NULL, michael@0: 0, michael@0: variants, michael@0: variantsLength, michael@0: likelySubtags, michael@0: tag, michael@0: tagCapacity, michael@0: err); michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * Finally, try just the language. michael@0: **/ michael@0: { michael@0: const char* likelySubtags = NULL; michael@0: michael@0: createTagString( michael@0: lang, michael@0: langLength, michael@0: NULL, michael@0: 0, michael@0: NULL, michael@0: 0, michael@0: NULL, michael@0: 0, michael@0: tagBuffer, michael@0: sizeof(tagBuffer), michael@0: err); michael@0: if(U_FAILURE(*err)) { michael@0: goto error; michael@0: } michael@0: michael@0: likelySubtags = michael@0: findLikelySubtags( michael@0: tagBuffer, michael@0: likelySubtagsBuffer, michael@0: sizeof(likelySubtagsBuffer), michael@0: err); michael@0: if(U_FAILURE(*err)) { michael@0: goto error; michael@0: } michael@0: michael@0: if (likelySubtags != NULL) { michael@0: /* Always use the language tag from the michael@0: maximal string, since it may be more michael@0: specific than the one provided. */ michael@0: return createTagStringWithAlternates( michael@0: NULL, michael@0: 0, michael@0: script, michael@0: scriptLength, michael@0: region, michael@0: regionLength, michael@0: variants, michael@0: variantsLength, michael@0: likelySubtags, michael@0: tag, michael@0: tagCapacity, michael@0: err); michael@0: } michael@0: } michael@0: michael@0: return u_terminateChars( michael@0: tag, michael@0: tagCapacity, michael@0: 0, michael@0: err); michael@0: michael@0: error: michael@0: michael@0: if (!U_FAILURE(*err)) { michael@0: *err = U_ILLEGAL_ARGUMENT_ERROR; michael@0: } michael@0: michael@0: return -1; michael@0: } michael@0: michael@0: #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \ michael@0: { int32_t count = 0; \ michael@0: int32_t i; \ michael@0: for (i = 0; i < trailingLength; i++) { \ michael@0: if (trailing[i] == '-' || trailing[i] == '_') { \ michael@0: count = 0; \ michael@0: if (count > 8) { \ michael@0: goto error; \ michael@0: } \ michael@0: } else if (trailing[i] == '@') { \ michael@0: break; \ michael@0: } else if (count > 8) { \ michael@0: goto error; \ michael@0: } else { \ michael@0: count++; \ michael@0: } \ michael@0: } \ michael@0: } michael@0: michael@0: static int32_t michael@0: _uloc_addLikelySubtags(const char* localeID, michael@0: char* maximizedLocaleID, michael@0: int32_t maximizedLocaleIDCapacity, michael@0: UErrorCode* err) michael@0: { michael@0: char lang[ULOC_LANG_CAPACITY]; michael@0: int32_t langLength = sizeof(lang); michael@0: char script[ULOC_SCRIPT_CAPACITY]; michael@0: int32_t scriptLength = sizeof(script); michael@0: char region[ULOC_COUNTRY_CAPACITY]; michael@0: int32_t regionLength = sizeof(region); michael@0: const char* trailing = ""; michael@0: int32_t trailingLength = 0; michael@0: int32_t trailingIndex = 0; michael@0: int32_t resultLength = 0; michael@0: michael@0: if(U_FAILURE(*err)) { michael@0: goto error; michael@0: } michael@0: else if (localeID == NULL || michael@0: maximizedLocaleID == NULL || michael@0: maximizedLocaleIDCapacity <= 0) { michael@0: goto error; michael@0: } michael@0: michael@0: trailingIndex = parseTagString( michael@0: localeID, michael@0: lang, michael@0: &langLength, michael@0: script, michael@0: &scriptLength, michael@0: region, michael@0: ®ionLength, michael@0: err); michael@0: if(U_FAILURE(*err)) { michael@0: /* Overflow indicates an illegal argument error */ michael@0: if (*err == U_BUFFER_OVERFLOW_ERROR) { michael@0: *err = U_ILLEGAL_ARGUMENT_ERROR; michael@0: } michael@0: michael@0: goto error; michael@0: } michael@0: michael@0: /* Find the length of the trailing portion. */ michael@0: while (_isIDSeparator(localeID[trailingIndex])) { michael@0: trailingIndex++; michael@0: } michael@0: trailing = &localeID[trailingIndex]; michael@0: trailingLength = (int32_t)uprv_strlen(trailing); michael@0: michael@0: CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); michael@0: michael@0: resultLength = michael@0: createLikelySubtagsString( michael@0: lang, michael@0: langLength, michael@0: script, michael@0: scriptLength, michael@0: region, michael@0: regionLength, michael@0: trailing, michael@0: trailingLength, michael@0: maximizedLocaleID, michael@0: maximizedLocaleIDCapacity, michael@0: err); michael@0: michael@0: if (resultLength == 0) { michael@0: const int32_t localIDLength = (int32_t)uprv_strlen(localeID); michael@0: michael@0: /* michael@0: * If we get here, we need to return localeID. michael@0: */ michael@0: uprv_memcpy( michael@0: maximizedLocaleID, michael@0: localeID, michael@0: localIDLength <= maximizedLocaleIDCapacity ? michael@0: localIDLength : maximizedLocaleIDCapacity); michael@0: michael@0: resultLength = michael@0: u_terminateChars( michael@0: maximizedLocaleID, michael@0: maximizedLocaleIDCapacity, michael@0: localIDLength, michael@0: err); michael@0: } michael@0: michael@0: return resultLength; michael@0: michael@0: error: michael@0: michael@0: if (!U_FAILURE(*err)) { michael@0: *err = U_ILLEGAL_ARGUMENT_ERROR; michael@0: } michael@0: michael@0: return -1; michael@0: } michael@0: michael@0: static int32_t michael@0: _uloc_minimizeSubtags(const char* localeID, michael@0: char* minimizedLocaleID, michael@0: int32_t minimizedLocaleIDCapacity, michael@0: UErrorCode* err) michael@0: { michael@0: /** michael@0: * ULOC_FULLNAME_CAPACITY will provide enough capacity michael@0: * that we can build a string that contains the language, michael@0: * script and region code without worrying about overrunning michael@0: * the user-supplied buffer. michael@0: **/ michael@0: char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY]; michael@0: int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer); michael@0: michael@0: char lang[ULOC_LANG_CAPACITY]; michael@0: int32_t langLength = sizeof(lang); michael@0: char script[ULOC_SCRIPT_CAPACITY]; michael@0: int32_t scriptLength = sizeof(script); michael@0: char region[ULOC_COUNTRY_CAPACITY]; michael@0: int32_t regionLength = sizeof(region); michael@0: const char* trailing = ""; michael@0: int32_t trailingLength = 0; michael@0: int32_t trailingIndex = 0; michael@0: michael@0: if(U_FAILURE(*err)) { michael@0: goto error; michael@0: } michael@0: else if (localeID == NULL || michael@0: minimizedLocaleID == NULL || michael@0: minimizedLocaleIDCapacity <= 0) { michael@0: goto error; michael@0: } michael@0: michael@0: trailingIndex = michael@0: parseTagString( michael@0: localeID, michael@0: lang, michael@0: &langLength, michael@0: script, michael@0: &scriptLength, michael@0: region, michael@0: ®ionLength, michael@0: err); michael@0: if(U_FAILURE(*err)) { michael@0: michael@0: /* Overflow indicates an illegal argument error */ michael@0: if (*err == U_BUFFER_OVERFLOW_ERROR) { michael@0: *err = U_ILLEGAL_ARGUMENT_ERROR; michael@0: } michael@0: michael@0: goto error; michael@0: } michael@0: michael@0: /* Find the spot where the variants or the keywords begin, if any. */ michael@0: while (_isIDSeparator(localeID[trailingIndex])) { michael@0: trailingIndex++; michael@0: } michael@0: trailing = &localeID[trailingIndex]; michael@0: trailingLength = (int32_t)uprv_strlen(trailing); michael@0: michael@0: CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); michael@0: michael@0: createTagString( michael@0: lang, michael@0: langLength, michael@0: script, michael@0: scriptLength, michael@0: region, michael@0: regionLength, michael@0: NULL, michael@0: 0, michael@0: maximizedTagBuffer, michael@0: maximizedTagBufferLength, michael@0: err); michael@0: if(U_FAILURE(*err)) { michael@0: goto error; michael@0: } michael@0: michael@0: /** michael@0: * First, we need to first get the maximization michael@0: * from AddLikelySubtags. michael@0: **/ michael@0: maximizedTagBufferLength = michael@0: uloc_addLikelySubtags( michael@0: maximizedTagBuffer, michael@0: maximizedTagBuffer, michael@0: maximizedTagBufferLength, michael@0: err); michael@0: michael@0: if(U_FAILURE(*err)) { michael@0: goto error; michael@0: } michael@0: michael@0: /** michael@0: * Start first with just the language. michael@0: **/ michael@0: { michael@0: char tagBuffer[ULOC_FULLNAME_CAPACITY]; michael@0: michael@0: const int32_t tagBufferLength = michael@0: createLikelySubtagsString( michael@0: lang, michael@0: langLength, michael@0: NULL, michael@0: 0, michael@0: NULL, michael@0: 0, michael@0: NULL, michael@0: 0, michael@0: tagBuffer, michael@0: sizeof(tagBuffer), michael@0: err); michael@0: michael@0: if(U_FAILURE(*err)) { michael@0: goto error; michael@0: } michael@0: else if (uprv_strnicmp( michael@0: maximizedTagBuffer, michael@0: tagBuffer, michael@0: tagBufferLength) == 0) { michael@0: michael@0: return createTagString( michael@0: lang, michael@0: langLength, michael@0: NULL, michael@0: 0, michael@0: NULL, michael@0: 0, michael@0: trailing, michael@0: trailingLength, michael@0: minimizedLocaleID, michael@0: minimizedLocaleIDCapacity, michael@0: err); michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * Next, try the language and region. michael@0: **/ michael@0: if (regionLength > 0) { michael@0: michael@0: char tagBuffer[ULOC_FULLNAME_CAPACITY]; michael@0: michael@0: const int32_t tagBufferLength = michael@0: createLikelySubtagsString( michael@0: lang, michael@0: langLength, michael@0: NULL, michael@0: 0, michael@0: region, michael@0: regionLength, michael@0: NULL, michael@0: 0, michael@0: tagBuffer, michael@0: sizeof(tagBuffer), michael@0: err); michael@0: michael@0: if(U_FAILURE(*err)) { michael@0: goto error; michael@0: } michael@0: else if (uprv_strnicmp( michael@0: maximizedTagBuffer, michael@0: tagBuffer, michael@0: tagBufferLength) == 0) { michael@0: michael@0: return createTagString( michael@0: lang, michael@0: langLength, michael@0: NULL, michael@0: 0, michael@0: region, michael@0: regionLength, michael@0: trailing, michael@0: trailingLength, michael@0: minimizedLocaleID, michael@0: minimizedLocaleIDCapacity, michael@0: err); michael@0: } michael@0: } michael@0: michael@0: /** michael@0: * Finally, try the language and script. This is our last chance, michael@0: * since trying with all three subtags would only yield the michael@0: * maximal version that we already have. michael@0: **/ michael@0: if (scriptLength > 0 && regionLength > 0) { michael@0: char tagBuffer[ULOC_FULLNAME_CAPACITY]; michael@0: michael@0: const int32_t tagBufferLength = michael@0: createLikelySubtagsString( michael@0: lang, michael@0: langLength, michael@0: script, michael@0: scriptLength, michael@0: NULL, michael@0: 0, michael@0: NULL, michael@0: 0, michael@0: tagBuffer, michael@0: sizeof(tagBuffer), michael@0: err); michael@0: michael@0: if(U_FAILURE(*err)) { michael@0: goto error; michael@0: } michael@0: else if (uprv_strnicmp( michael@0: maximizedTagBuffer, michael@0: tagBuffer, michael@0: tagBufferLength) == 0) { michael@0: michael@0: return createTagString( michael@0: lang, michael@0: langLength, michael@0: script, michael@0: scriptLength, michael@0: NULL, michael@0: 0, michael@0: trailing, michael@0: trailingLength, michael@0: minimizedLocaleID, michael@0: minimizedLocaleIDCapacity, michael@0: err); michael@0: } michael@0: } michael@0: michael@0: { michael@0: /** michael@0: * If we got here, return the locale ID parameter. michael@0: **/ michael@0: const int32_t localeIDLength = (int32_t)uprv_strlen(localeID); michael@0: michael@0: uprv_memcpy( michael@0: minimizedLocaleID, michael@0: localeID, michael@0: localeIDLength <= minimizedLocaleIDCapacity ? michael@0: localeIDLength : minimizedLocaleIDCapacity); michael@0: michael@0: return u_terminateChars( michael@0: minimizedLocaleID, michael@0: minimizedLocaleIDCapacity, michael@0: localeIDLength, michael@0: err); michael@0: } michael@0: michael@0: error: michael@0: michael@0: if (!U_FAILURE(*err)) { michael@0: *err = U_ILLEGAL_ARGUMENT_ERROR; michael@0: } michael@0: michael@0: return -1; michael@0: michael@0: michael@0: } michael@0: michael@0: static UBool michael@0: do_canonicalize(const char* localeID, michael@0: char* buffer, michael@0: int32_t bufferCapacity, michael@0: UErrorCode* err) michael@0: { michael@0: uloc_canonicalize( michael@0: localeID, michael@0: buffer, michael@0: bufferCapacity, michael@0: err); michael@0: michael@0: if (*err == U_STRING_NOT_TERMINATED_WARNING || michael@0: *err == U_BUFFER_OVERFLOW_ERROR) { michael@0: *err = U_ILLEGAL_ARGUMENT_ERROR; michael@0: michael@0: return FALSE; michael@0: } michael@0: else if (U_FAILURE(*err)) { michael@0: michael@0: return FALSE; michael@0: } michael@0: else { michael@0: return TRUE; michael@0: } michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: uloc_addLikelySubtags(const char* localeID, michael@0: char* maximizedLocaleID, michael@0: int32_t maximizedLocaleIDCapacity, michael@0: UErrorCode* err) michael@0: { michael@0: char localeBuffer[ULOC_FULLNAME_CAPACITY]; michael@0: michael@0: if (!do_canonicalize( michael@0: localeID, michael@0: localeBuffer, michael@0: sizeof(localeBuffer), michael@0: err)) { michael@0: return -1; michael@0: } michael@0: else { michael@0: return _uloc_addLikelySubtags( michael@0: localeBuffer, michael@0: maximizedLocaleID, michael@0: maximizedLocaleIDCapacity, michael@0: err); michael@0: } michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: uloc_minimizeSubtags(const char* localeID, michael@0: char* minimizedLocaleID, michael@0: int32_t minimizedLocaleIDCapacity, michael@0: UErrorCode* err) michael@0: { michael@0: char localeBuffer[ULOC_FULLNAME_CAPACITY]; michael@0: michael@0: if (!do_canonicalize( michael@0: localeID, michael@0: localeBuffer, michael@0: sizeof(localeBuffer), michael@0: err)) { michael@0: return -1; michael@0: } michael@0: else { michael@0: return _uloc_minimizeSubtags( michael@0: localeBuffer, michael@0: minimizedLocaleID, michael@0: minimizedLocaleIDCapacity, michael@0: err); michael@0: } michael@0: }