michael@0: /*
michael@0: *******************************************************************************
michael@0: *
michael@0: *   Copyright (C) 1997-2012, International Business Machines
michael@0: *   Corporation and others.  All Rights Reserved.
michael@0: *
michael@0: *******************************************************************************
michael@0: *   file name:  loclikely.cpp
michael@0: *   encoding:   US-ASCII
michael@0: *   tab size:   8 (not used)
michael@0: *   indentation:4
michael@0: *
michael@0: *   created on: 2010feb25
michael@0: *   created by: Markus W. Scherer
michael@0: *
michael@0: *   Code for likely and minimized locale subtags, separated out from other .cpp files
michael@0: *   that then do not depend on resource bundle code and likely-subtags data.
michael@0: */
michael@0: 
michael@0: #include "unicode/utypes.h"
michael@0: #include "unicode/putil.h"
michael@0: #include "unicode/uloc.h"
michael@0: #include "unicode/ures.h"
michael@0: #include "cmemory.h"
michael@0: #include "cstring.h"
michael@0: #include "ulocimp.h"
michael@0: #include "ustr_imp.h"
michael@0: 
michael@0: /**
michael@0:  * This function looks for the localeID in the likelySubtags resource.
michael@0:  *
michael@0:  * @param localeID The tag to find.
michael@0:  * @param buffer A buffer to hold the matching entry
michael@0:  * @param bufferLength The length of the output buffer
michael@0:  * @return A pointer to "buffer" if found, or a null pointer if not.
michael@0:  */
michael@0: static const char*  U_CALLCONV
michael@0: findLikelySubtags(const char* localeID,
michael@0:                   char* buffer,
michael@0:                   int32_t bufferLength,
michael@0:                   UErrorCode* err) {
michael@0:     const char* result = NULL;
michael@0: 
michael@0:     if (!U_FAILURE(*err)) {
michael@0:         int32_t resLen = 0;
michael@0:         const UChar* s = NULL;
michael@0:         UErrorCode tmpErr = U_ZERO_ERROR;
michael@0:         UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr);
michael@0:         if (U_SUCCESS(tmpErr)) {
michael@0:             s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr);
michael@0: 
michael@0:             if (U_FAILURE(tmpErr)) {
michael@0:                 /*
michael@0:                  * If a resource is missing, it's not really an error, it's
michael@0:                  * just that we don't have any data for that particular locale ID.
michael@0:                  */
michael@0:                 if (tmpErr != U_MISSING_RESOURCE_ERROR) {
michael@0:                     *err = tmpErr;
michael@0:                 }
michael@0:             }
michael@0:             else if (resLen >= bufferLength) {
michael@0:                 /* The buffer should never overflow. */
michael@0:                 *err = U_INTERNAL_PROGRAM_ERROR;
michael@0:             }
michael@0:             else {
michael@0:                 u_UCharsToChars(s, buffer, resLen + 1);
michael@0:                 result = buffer;
michael@0:             }
michael@0: 
michael@0:             ures_close(subtags);
michael@0:         } else {
michael@0:             *err = tmpErr;
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     return result;
michael@0: }
michael@0: 
michael@0: /**
michael@0:  * Append a tag to a buffer, adding the separator if necessary.  The buffer
michael@0:  * must be large enough to contain the resulting tag plus any separator
michael@0:  * necessary. The tag must not be a zero-length string.
michael@0:  *
michael@0:  * @param tag The tag to add.
michael@0:  * @param tagLength The length of the tag.
michael@0:  * @param buffer The output buffer.
michael@0:  * @param bufferLength The length of the output buffer.  This is an input/ouput parameter.
michael@0:  **/
michael@0: static void U_CALLCONV
michael@0: appendTag(
michael@0:     const char* tag,
michael@0:     int32_t tagLength,
michael@0:     char* buffer,
michael@0:     int32_t* bufferLength) {
michael@0: 
michael@0:     if (*bufferLength > 0) {
michael@0:         buffer[*bufferLength] = '_';
michael@0:         ++(*bufferLength);
michael@0:     }
michael@0: 
michael@0:     uprv_memmove(
michael@0:         &buffer[*bufferLength],
michael@0:         tag,
michael@0:         tagLength);
michael@0: 
michael@0:     *bufferLength += tagLength;
michael@0: }
michael@0: 
michael@0: /**
michael@0:  * These are the canonical strings for unknown languages, scripts and regions.
michael@0:  **/
michael@0: static const char* const unknownLanguage = "und";
michael@0: static const char* const unknownScript = "Zzzz";
michael@0: static const char* const unknownRegion = "ZZ";
michael@0: 
michael@0: /**
michael@0:  * Create a tag string from the supplied parameters.  The lang, script and region
michael@0:  * parameters may be NULL pointers. If they are, their corresponding length parameters
michael@0:  * must be less than or equal to 0.
michael@0:  *
michael@0:  * If any of the language, script or region parameters are empty, and the alternateTags
michael@0:  * parameter is not NULL, it will be parsed for potential language, script and region tags
michael@0:  * to be used when constructing the new tag.  If the alternateTags parameter is NULL, or
michael@0:  * it contains no language tag, the default tag for the unknown language is used.
michael@0:  *
michael@0:  * If the length of the new string exceeds the capacity of the output buffer, 
michael@0:  * the function copies as many bytes to the output buffer as it can, and returns
michael@0:  * the error U_BUFFER_OVERFLOW_ERROR.
michael@0:  *
michael@0:  * If an illegal argument is provided, the function returns the error
michael@0:  * U_ILLEGAL_ARGUMENT_ERROR.
michael@0:  *
michael@0:  * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
michael@0:  * the tag string fits in the output buffer, but the null terminator doesn't.
michael@0:  *
michael@0:  * @param lang The language tag to use.
michael@0:  * @param langLength The length of the language tag.
michael@0:  * @param script The script tag to use.
michael@0:  * @param scriptLength The length of the script tag.
michael@0:  * @param region The region tag to use.
michael@0:  * @param regionLength The length of the region tag.
michael@0:  * @param trailing Any trailing data to append to the new tag.
michael@0:  * @param trailingLength The length of the trailing data.
michael@0:  * @param alternateTags A string containing any alternate tags.
michael@0:  * @param tag The output buffer.
michael@0:  * @param tagCapacity The capacity of the output buffer.
michael@0:  * @param err A pointer to a UErrorCode for error reporting.
michael@0:  * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error.
michael@0:  **/
michael@0: static int32_t U_CALLCONV
michael@0: createTagStringWithAlternates(
michael@0:     const char* lang,
michael@0:     int32_t langLength,
michael@0:     const char* script,
michael@0:     int32_t scriptLength,
michael@0:     const char* region,
michael@0:     int32_t regionLength,
michael@0:     const char* trailing,
michael@0:     int32_t trailingLength,
michael@0:     const char* alternateTags,
michael@0:     char* tag,
michael@0:     int32_t tagCapacity,
michael@0:     UErrorCode* err) {
michael@0: 
michael@0:     if (U_FAILURE(*err)) {
michael@0:         goto error;
michael@0:     }
michael@0:     else if (tag == NULL ||
michael@0:              tagCapacity <= 0 ||
michael@0:              langLength >= ULOC_LANG_CAPACITY ||
michael@0:              scriptLength >= ULOC_SCRIPT_CAPACITY ||
michael@0:              regionLength >= ULOC_COUNTRY_CAPACITY) {
michael@0:         goto error;
michael@0:     }
michael@0:     else {
michael@0:         /**
michael@0:          * ULOC_FULLNAME_CAPACITY will provide enough capacity
michael@0:          * that we can build a string that contains the language,
michael@0:          * script and region code without worrying about overrunning
michael@0:          * the user-supplied buffer.
michael@0:          **/
michael@0:         char tagBuffer[ULOC_FULLNAME_CAPACITY];
michael@0:         int32_t tagLength = 0;
michael@0:         int32_t capacityRemaining = tagCapacity;
michael@0:         UBool regionAppended = FALSE;
michael@0: 
michael@0:         if (langLength > 0) {
michael@0:             appendTag(
michael@0:                 lang,
michael@0:                 langLength,
michael@0:                 tagBuffer,
michael@0:                 &tagLength);
michael@0:         }
michael@0:         else if (alternateTags == NULL) {
michael@0:             /*
michael@0:              * Append the value for an unknown language, if
michael@0:              * we found no language.
michael@0:              */
michael@0:             appendTag(
michael@0:                 unknownLanguage,
michael@0:                 (int32_t)uprv_strlen(unknownLanguage),
michael@0:                 tagBuffer,
michael@0:                 &tagLength);
michael@0:         }
michael@0:         else {
michael@0:             /*
michael@0:              * Parse the alternateTags string for the language.
michael@0:              */
michael@0:             char alternateLang[ULOC_LANG_CAPACITY];
michael@0:             int32_t alternateLangLength = sizeof(alternateLang);
michael@0: 
michael@0:             alternateLangLength =
michael@0:                 uloc_getLanguage(
michael@0:                     alternateTags,
michael@0:                     alternateLang,
michael@0:                     alternateLangLength,
michael@0:                     err);
michael@0:             if(U_FAILURE(*err) ||
michael@0:                 alternateLangLength >= ULOC_LANG_CAPACITY) {
michael@0:                 goto error;
michael@0:             }
michael@0:             else if (alternateLangLength == 0) {
michael@0:                 /*
michael@0:                  * Append the value for an unknown language, if
michael@0:                  * we found no language.
michael@0:                  */
michael@0:                 appendTag(
michael@0:                     unknownLanguage,
michael@0:                     (int32_t)uprv_strlen(unknownLanguage),
michael@0:                     tagBuffer,
michael@0:                     &tagLength);
michael@0:             }
michael@0:             else {
michael@0:                 appendTag(
michael@0:                     alternateLang,
michael@0:                     alternateLangLength,
michael@0:                     tagBuffer,
michael@0:                     &tagLength);
michael@0:             }
michael@0:         }
michael@0: 
michael@0:         if (scriptLength > 0) {
michael@0:             appendTag(
michael@0:                 script,
michael@0:                 scriptLength,
michael@0:                 tagBuffer,
michael@0:                 &tagLength);
michael@0:         }
michael@0:         else if (alternateTags != NULL) {
michael@0:             /*
michael@0:              * Parse the alternateTags string for the script.
michael@0:              */
michael@0:             char alternateScript[ULOC_SCRIPT_CAPACITY];
michael@0: 
michael@0:             const int32_t alternateScriptLength =
michael@0:                 uloc_getScript(
michael@0:                     alternateTags,
michael@0:                     alternateScript,
michael@0:                     sizeof(alternateScript),
michael@0:                     err);
michael@0: 
michael@0:             if (U_FAILURE(*err) ||
michael@0:                 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
michael@0:                 goto error;
michael@0:             }
michael@0:             else if (alternateScriptLength > 0) {
michael@0:                 appendTag(
michael@0:                     alternateScript,
michael@0:                     alternateScriptLength,
michael@0:                     tagBuffer,
michael@0:                     &tagLength);
michael@0:             }
michael@0:         }
michael@0: 
michael@0:         if (regionLength > 0) {
michael@0:             appendTag(
michael@0:                 region,
michael@0:                 regionLength,
michael@0:                 tagBuffer,
michael@0:                 &tagLength);
michael@0: 
michael@0:             regionAppended = TRUE;
michael@0:         }
michael@0:         else if (alternateTags != NULL) {
michael@0:             /*
michael@0:              * Parse the alternateTags string for the region.
michael@0:              */
michael@0:             char alternateRegion[ULOC_COUNTRY_CAPACITY];
michael@0: 
michael@0:             const int32_t alternateRegionLength =
michael@0:                 uloc_getCountry(
michael@0:                     alternateTags,
michael@0:                     alternateRegion,
michael@0:                     sizeof(alternateRegion),
michael@0:                     err);
michael@0:             if (U_FAILURE(*err) ||
michael@0:                 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
michael@0:                 goto error;
michael@0:             }
michael@0:             else if (alternateRegionLength > 0) {
michael@0:                 appendTag(
michael@0:                     alternateRegion,
michael@0:                     alternateRegionLength,
michael@0:                     tagBuffer,
michael@0:                     &tagLength);
michael@0: 
michael@0:                 regionAppended = TRUE;
michael@0:             }
michael@0:         }
michael@0: 
michael@0:         {
michael@0:             const int32_t toCopy =
michael@0:                 tagLength >= tagCapacity ? tagCapacity : tagLength;
michael@0: 
michael@0:             /**
michael@0:              * Copy the partial tag from our internal buffer to the supplied
michael@0:              * target.
michael@0:              **/
michael@0:             uprv_memcpy(
michael@0:                 tag,
michael@0:                 tagBuffer,
michael@0:                 toCopy);
michael@0: 
michael@0:             capacityRemaining -= toCopy;
michael@0:         }
michael@0: 
michael@0:         if (trailingLength > 0) {
michael@0:             if (*trailing != '@' && capacityRemaining > 0) {
michael@0:                 tag[tagLength++] = '_';
michael@0:                 --capacityRemaining;
michael@0:                 if (capacityRemaining > 0 && !regionAppended) {
michael@0:                     /* extra separator is required */
michael@0:                     tag[tagLength++] = '_';
michael@0:                     --capacityRemaining;
michael@0:                 }
michael@0:             }
michael@0: 
michael@0:             if (capacityRemaining > 0) {
michael@0:                 /*
michael@0:                  * Copy the trailing data into the supplied buffer.  Use uprv_memmove, since we
michael@0:                  * don't know if the user-supplied buffers overlap.
michael@0:                  */
michael@0:                 const int32_t toCopy =
michael@0:                     trailingLength >= capacityRemaining ? capacityRemaining : trailingLength;
michael@0: 
michael@0:                 uprv_memmove(
michael@0:                     &tag[tagLength],
michael@0:                     trailing,
michael@0:                     toCopy);
michael@0:             }
michael@0:         }
michael@0: 
michael@0:         tagLength += trailingLength;
michael@0: 
michael@0:         return u_terminateChars(
michael@0:                     tag,
michael@0:                     tagCapacity,
michael@0:                     tagLength,
michael@0:                     err);
michael@0:     }
michael@0: 
michael@0: error:
michael@0: 
michael@0:     /**
michael@0:      * An overflow indicates the locale ID passed in
michael@0:      * is ill-formed.  If we got here, and there was
michael@0:      * no previous error, it's an implicit overflow.
michael@0:      **/
michael@0:     if (*err ==  U_BUFFER_OVERFLOW_ERROR ||
michael@0:         U_SUCCESS(*err)) {
michael@0:         *err = U_ILLEGAL_ARGUMENT_ERROR;
michael@0:     }
michael@0: 
michael@0:     return -1;
michael@0: }
michael@0: 
michael@0: /**
michael@0:  * Create a tag string from the supplied parameters.  The lang, script and region
michael@0:  * parameters may be NULL pointers. If they are, their corresponding length parameters
michael@0:  * must be less than or equal to 0.  If the lang parameter is an empty string, the
michael@0:  * default value for an unknown language is written to the output buffer.
michael@0:  *
michael@0:  * If the length of the new string exceeds the capacity of the output buffer, 
michael@0:  * the function copies as many bytes to the output buffer as it can, and returns
michael@0:  * the error U_BUFFER_OVERFLOW_ERROR.
michael@0:  *
michael@0:  * If an illegal argument is provided, the function returns the error
michael@0:  * U_ILLEGAL_ARGUMENT_ERROR.
michael@0:  *
michael@0:  * @param lang The language tag to use.
michael@0:  * @param langLength The length of the language tag.
michael@0:  * @param script The script tag to use.
michael@0:  * @param scriptLength The length of the script tag.
michael@0:  * @param region The region tag to use.
michael@0:  * @param regionLength The length of the region tag.
michael@0:  * @param trailing Any trailing data to append to the new tag.
michael@0:  * @param trailingLength The length of the trailing data.
michael@0:  * @param tag The output buffer.
michael@0:  * @param tagCapacity The capacity of the output buffer.
michael@0:  * @param err A pointer to a UErrorCode for error reporting.
michael@0:  * @return The length of the tag string, which may be greater than tagCapacity.
michael@0:  **/
michael@0: static int32_t U_CALLCONV
michael@0: createTagString(
michael@0:     const char* lang,
michael@0:     int32_t langLength,
michael@0:     const char* script,
michael@0:     int32_t scriptLength,
michael@0:     const char* region,
michael@0:     int32_t regionLength,
michael@0:     const char* trailing,
michael@0:     int32_t trailingLength,
michael@0:     char* tag,
michael@0:     int32_t tagCapacity,
michael@0:     UErrorCode* err)
michael@0: {
michael@0:     return createTagStringWithAlternates(
michael@0:                 lang,
michael@0:                 langLength,
michael@0:                 script,
michael@0:                 scriptLength,
michael@0:                 region,
michael@0:                 regionLength,
michael@0:                 trailing,
michael@0:                 trailingLength,
michael@0:                 NULL,
michael@0:                 tag,
michael@0:                 tagCapacity,
michael@0:                 err);
michael@0: }
michael@0: 
michael@0: /**
michael@0:  * Parse the language, script, and region subtags from a tag string, and copy the
michael@0:  * results into the corresponding output parameters. The buffers are null-terminated,
michael@0:  * unless overflow occurs.
michael@0:  *
michael@0:  * The langLength, scriptLength, and regionLength parameters are input/output
michael@0:  * parameters, and must contain the capacity of their corresponding buffers on
michael@0:  * input.  On output, they will contain the actual length of the buffers, not
michael@0:  * including the null terminator.
michael@0:  *
michael@0:  * If the length of any of the output subtags exceeds the capacity of the corresponding
michael@0:  * buffer, the function copies as many bytes to the output buffer as it can, and returns
michael@0:  * the error U_BUFFER_OVERFLOW_ERROR.  It will not parse any more subtags once overflow
michael@0:  * occurs.
michael@0:  *
michael@0:  * If an illegal argument is provided, the function returns the error
michael@0:  * U_ILLEGAL_ARGUMENT_ERROR.
michael@0:  *
michael@0:  * @param localeID The locale ID to parse.
michael@0:  * @param lang The language tag buffer.
michael@0:  * @param langLength The length of the language tag.
michael@0:  * @param script The script tag buffer.
michael@0:  * @param scriptLength The length of the script tag.
michael@0:  * @param region The region tag buffer.
michael@0:  * @param regionLength The length of the region tag.
michael@0:  * @param err A pointer to a UErrorCode for error reporting.
michael@0:  * @return The number of chars of the localeID parameter consumed.
michael@0:  **/
michael@0: static int32_t U_CALLCONV
michael@0: parseTagString(
michael@0:     const char* localeID,
michael@0:     char* lang,
michael@0:     int32_t* langLength,
michael@0:     char* script,
michael@0:     int32_t* scriptLength,
michael@0:     char* region,
michael@0:     int32_t* regionLength,
michael@0:     UErrorCode* err)
michael@0: {
michael@0:     const char* position = localeID;
michael@0:     int32_t subtagLength = 0;
michael@0: 
michael@0:     if(U_FAILURE(*err) ||
michael@0:        localeID == NULL ||
michael@0:        lang == NULL ||
michael@0:        langLength == NULL ||
michael@0:        script == NULL ||
michael@0:        scriptLength == NULL ||
michael@0:        region == NULL ||
michael@0:        regionLength == NULL) {
michael@0:         goto error;
michael@0:     }
michael@0: 
michael@0:     subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position);
michael@0:     u_terminateChars(lang, *langLength, subtagLength, err);
michael@0: 
michael@0:     /*
michael@0:      * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
michael@0:      * to be an error, because it indicates the user-supplied tag is
michael@0:      * not well-formed.
michael@0:      */
michael@0:     if(U_FAILURE(*err)) {
michael@0:         goto error;
michael@0:     }
michael@0: 
michael@0:     *langLength = subtagLength;
michael@0: 
michael@0:     /*
michael@0:      * If no language was present, use the value of unknownLanguage
michael@0:      * instead.  Otherwise, move past any separator.
michael@0:      */
michael@0:     if (*langLength == 0) {
michael@0:         uprv_strcpy(
michael@0:             lang,
michael@0:             unknownLanguage);
michael@0:         *langLength = (int32_t)uprv_strlen(lang);
michael@0:     }
michael@0:     else if (_isIDSeparator(*position)) {
michael@0:         ++position;
michael@0:     }
michael@0: 
michael@0:     subtagLength = ulocimp_getScript(position, script, *scriptLength, &position);
michael@0:     u_terminateChars(script, *scriptLength, subtagLength, err);
michael@0: 
michael@0:     if(U_FAILURE(*err)) {
michael@0:         goto error;
michael@0:     }
michael@0: 
michael@0:     *scriptLength = subtagLength;
michael@0: 
michael@0:     if (*scriptLength > 0) {
michael@0:         if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
michael@0:             /**
michael@0:              * If the script part is the "unknown" script, then don't return it.
michael@0:              **/
michael@0:             *scriptLength = 0;
michael@0:         }
michael@0: 
michael@0:         /*
michael@0:          * Move past any separator.
michael@0:          */
michael@0:         if (_isIDSeparator(*position)) {
michael@0:             ++position;
michael@0:         }    
michael@0:     }
michael@0: 
michael@0:     subtagLength = ulocimp_getCountry(position, region, *regionLength, &position);
michael@0:     u_terminateChars(region, *regionLength, subtagLength, err);
michael@0: 
michael@0:     if(U_FAILURE(*err)) {
michael@0:         goto error;
michael@0:     }
michael@0: 
michael@0:     *regionLength = subtagLength;
michael@0: 
michael@0:     if (*regionLength > 0) {
michael@0:         if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
michael@0:             /**
michael@0:              * If the region part is the "unknown" region, then don't return it.
michael@0:              **/
michael@0:             *regionLength = 0;
michael@0:         }
michael@0:     } else if (*position != 0 && *position != '@') {
michael@0:         /* back up over consumed trailing separator */
michael@0:         --position;
michael@0:     }
michael@0: 
michael@0: exit:
michael@0: 
michael@0:     return (int32_t)(position - localeID);
michael@0: 
michael@0: error:
michael@0: 
michael@0:     /**
michael@0:      * If we get here, we have no explicit error, it's the result of an
michael@0:      * illegal argument.
michael@0:      **/
michael@0:     if (!U_FAILURE(*err)) {
michael@0:         *err = U_ILLEGAL_ARGUMENT_ERROR;
michael@0:     }
michael@0: 
michael@0:     goto exit;
michael@0: }
michael@0: 
michael@0: static int32_t U_CALLCONV
michael@0: createLikelySubtagsString(
michael@0:     const char* lang,
michael@0:     int32_t langLength,
michael@0:     const char* script,
michael@0:     int32_t scriptLength,
michael@0:     const char* region,
michael@0:     int32_t regionLength,
michael@0:     const char* variants,
michael@0:     int32_t variantsLength,
michael@0:     char* tag,
michael@0:     int32_t tagCapacity,
michael@0:     UErrorCode* err)
michael@0: {
michael@0:     /**
michael@0:      * ULOC_FULLNAME_CAPACITY will provide enough capacity
michael@0:      * that we can build a string that contains the language,
michael@0:      * script and region code without worrying about overrunning
michael@0:      * the user-supplied buffer.
michael@0:      **/
michael@0:     char tagBuffer[ULOC_FULLNAME_CAPACITY];
michael@0:     char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
michael@0: 
michael@0:     if(U_FAILURE(*err)) {
michael@0:         goto error;
michael@0:     }
michael@0: 
michael@0:     /**
michael@0:      * Try the language with the script and region first.
michael@0:      **/
michael@0:     if (scriptLength > 0 && regionLength > 0) {
michael@0: 
michael@0:         const char* likelySubtags = NULL;
michael@0: 
michael@0:         createTagString(
michael@0:             lang,
michael@0:             langLength,
michael@0:             script,
michael@0:             scriptLength,
michael@0:             region,
michael@0:             regionLength,
michael@0:             NULL,
michael@0:             0,
michael@0:             tagBuffer,
michael@0:             sizeof(tagBuffer),
michael@0:             err);
michael@0:         if(U_FAILURE(*err)) {
michael@0:             goto error;
michael@0:         }
michael@0: 
michael@0:         likelySubtags =
michael@0:             findLikelySubtags(
michael@0:                 tagBuffer,
michael@0:                 likelySubtagsBuffer,
michael@0:                 sizeof(likelySubtagsBuffer),
michael@0:                 err);
michael@0:         if(U_FAILURE(*err)) {
michael@0:             goto error;
michael@0:         }
michael@0: 
michael@0:         if (likelySubtags != NULL) {
michael@0:             /* Always use the language tag from the
michael@0:                maximal string, since it may be more
michael@0:                specific than the one provided. */
michael@0:             return createTagStringWithAlternates(
michael@0:                         NULL,
michael@0:                         0,
michael@0:                         NULL,
michael@0:                         0,
michael@0:                         NULL,
michael@0:                         0,
michael@0:                         variants,
michael@0:                         variantsLength,
michael@0:                         likelySubtags,
michael@0:                         tag,
michael@0:                         tagCapacity,
michael@0:                         err);
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     /**
michael@0:      * Try the language with just the script.
michael@0:      **/
michael@0:     if (scriptLength > 0) {
michael@0: 
michael@0:         const char* likelySubtags = NULL;
michael@0: 
michael@0:         createTagString(
michael@0:             lang,
michael@0:             langLength,
michael@0:             script,
michael@0:             scriptLength,
michael@0:             NULL,
michael@0:             0,
michael@0:             NULL,
michael@0:             0,
michael@0:             tagBuffer,
michael@0:             sizeof(tagBuffer),
michael@0:             err);
michael@0:         if(U_FAILURE(*err)) {
michael@0:             goto error;
michael@0:         }
michael@0: 
michael@0:         likelySubtags =
michael@0:             findLikelySubtags(
michael@0:                 tagBuffer,
michael@0:                 likelySubtagsBuffer,
michael@0:                 sizeof(likelySubtagsBuffer),
michael@0:                 err);
michael@0:         if(U_FAILURE(*err)) {
michael@0:             goto error;
michael@0:         }
michael@0: 
michael@0:         if (likelySubtags != NULL) {
michael@0:             /* Always use the language tag from the
michael@0:                maximal string, since it may be more
michael@0:                specific than the one provided. */
michael@0:             return createTagStringWithAlternates(
michael@0:                         NULL,
michael@0:                         0,
michael@0:                         NULL,
michael@0:                         0,
michael@0:                         region,
michael@0:                         regionLength,
michael@0:                         variants,
michael@0:                         variantsLength,
michael@0:                         likelySubtags,
michael@0:                         tag,
michael@0:                         tagCapacity,
michael@0:                         err);
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     /**
michael@0:      * Try the language with just the region.
michael@0:      **/
michael@0:     if (regionLength > 0) {
michael@0: 
michael@0:         const char* likelySubtags = NULL;
michael@0: 
michael@0:         createTagString(
michael@0:             lang,
michael@0:             langLength,
michael@0:             NULL,
michael@0:             0,
michael@0:             region,
michael@0:             regionLength,
michael@0:             NULL,
michael@0:             0,
michael@0:             tagBuffer,
michael@0:             sizeof(tagBuffer),
michael@0:             err);
michael@0:         if(U_FAILURE(*err)) {
michael@0:             goto error;
michael@0:         }
michael@0: 
michael@0:         likelySubtags =
michael@0:             findLikelySubtags(
michael@0:                 tagBuffer,
michael@0:                 likelySubtagsBuffer,
michael@0:                 sizeof(likelySubtagsBuffer),
michael@0:                 err);
michael@0:         if(U_FAILURE(*err)) {
michael@0:             goto error;
michael@0:         }
michael@0: 
michael@0:         if (likelySubtags != NULL) {
michael@0:             /* Always use the language tag from the
michael@0:                maximal string, since it may be more
michael@0:                specific than the one provided. */
michael@0:             return createTagStringWithAlternates(
michael@0:                         NULL,
michael@0:                         0,
michael@0:                         script,
michael@0:                         scriptLength,
michael@0:                         NULL,
michael@0:                         0,
michael@0:                         variants,
michael@0:                         variantsLength,
michael@0:                         likelySubtags,
michael@0:                         tag,
michael@0:                         tagCapacity,
michael@0:                         err);
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     /**
michael@0:      * Finally, try just the language.
michael@0:      **/
michael@0:     {
michael@0:         const char* likelySubtags = NULL;
michael@0: 
michael@0:         createTagString(
michael@0:             lang,
michael@0:             langLength,
michael@0:             NULL,
michael@0:             0,
michael@0:             NULL,
michael@0:             0,
michael@0:             NULL,
michael@0:             0,
michael@0:             tagBuffer,
michael@0:             sizeof(tagBuffer),
michael@0:             err);
michael@0:         if(U_FAILURE(*err)) {
michael@0:             goto error;
michael@0:         }
michael@0: 
michael@0:         likelySubtags =
michael@0:             findLikelySubtags(
michael@0:                 tagBuffer,
michael@0:                 likelySubtagsBuffer,
michael@0:                 sizeof(likelySubtagsBuffer),
michael@0:                 err);
michael@0:         if(U_FAILURE(*err)) {
michael@0:             goto error;
michael@0:         }
michael@0: 
michael@0:         if (likelySubtags != NULL) {
michael@0:             /* Always use the language tag from the
michael@0:                maximal string, since it may be more
michael@0:                specific than the one provided. */
michael@0:             return createTagStringWithAlternates(
michael@0:                         NULL,
michael@0:                         0,
michael@0:                         script,
michael@0:                         scriptLength,
michael@0:                         region,
michael@0:                         regionLength,
michael@0:                         variants,
michael@0:                         variantsLength,
michael@0:                         likelySubtags,
michael@0:                         tag,
michael@0:                         tagCapacity,
michael@0:                         err);
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     return u_terminateChars(
michael@0:                 tag,
michael@0:                 tagCapacity,
michael@0:                 0,
michael@0:                 err);
michael@0: 
michael@0: error:
michael@0: 
michael@0:     if (!U_FAILURE(*err)) {
michael@0:         *err = U_ILLEGAL_ARGUMENT_ERROR;
michael@0:     }
michael@0: 
michael@0:     return -1;
michael@0: }
michael@0: 
michael@0: #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \
michael@0:     {   int32_t count = 0; \
michael@0:         int32_t i; \
michael@0:         for (i = 0; i < trailingLength; i++) { \
michael@0:             if (trailing[i] == '-' || trailing[i] == '_') { \
michael@0:                 count = 0; \
michael@0:                 if (count > 8) { \
michael@0:                     goto error; \
michael@0:                 } \
michael@0:             } else if (trailing[i] == '@') { \
michael@0:                 break; \
michael@0:             } else if (count > 8) { \
michael@0:                 goto error; \
michael@0:             } else { \
michael@0:                 count++; \
michael@0:             } \
michael@0:         } \
michael@0:     }
michael@0: 
michael@0: static int32_t
michael@0: _uloc_addLikelySubtags(const char*    localeID,
michael@0:          char* maximizedLocaleID,
michael@0:          int32_t maximizedLocaleIDCapacity,
michael@0:          UErrorCode* err)
michael@0: {
michael@0:     char lang[ULOC_LANG_CAPACITY];
michael@0:     int32_t langLength = sizeof(lang);
michael@0:     char script[ULOC_SCRIPT_CAPACITY];
michael@0:     int32_t scriptLength = sizeof(script);
michael@0:     char region[ULOC_COUNTRY_CAPACITY];
michael@0:     int32_t regionLength = sizeof(region);
michael@0:     const char* trailing = "";
michael@0:     int32_t trailingLength = 0;
michael@0:     int32_t trailingIndex = 0;
michael@0:     int32_t resultLength = 0;
michael@0: 
michael@0:     if(U_FAILURE(*err)) {
michael@0:         goto error;
michael@0:     }
michael@0:     else if (localeID == NULL ||
michael@0:              maximizedLocaleID == NULL ||
michael@0:              maximizedLocaleIDCapacity <= 0) {
michael@0:         goto error;
michael@0:     }
michael@0: 
michael@0:     trailingIndex = parseTagString(
michael@0:         localeID,
michael@0:         lang,
michael@0:         &langLength,
michael@0:         script,
michael@0:         &scriptLength,
michael@0:         region,
michael@0:         &regionLength,
michael@0:         err);
michael@0:     if(U_FAILURE(*err)) {
michael@0:         /* Overflow indicates an illegal argument error */
michael@0:         if (*err == U_BUFFER_OVERFLOW_ERROR) {
michael@0:             *err = U_ILLEGAL_ARGUMENT_ERROR;
michael@0:         }
michael@0: 
michael@0:         goto error;
michael@0:     }
michael@0: 
michael@0:     /* Find the length of the trailing portion. */
michael@0:     while (_isIDSeparator(localeID[trailingIndex])) {
michael@0:         trailingIndex++;
michael@0:     }
michael@0:     trailing = &localeID[trailingIndex];
michael@0:     trailingLength = (int32_t)uprv_strlen(trailing);
michael@0: 
michael@0:     CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
michael@0: 
michael@0:     resultLength =
michael@0:         createLikelySubtagsString(
michael@0:             lang,
michael@0:             langLength,
michael@0:             script,
michael@0:             scriptLength,
michael@0:             region,
michael@0:             regionLength,
michael@0:             trailing,
michael@0:             trailingLength,
michael@0:             maximizedLocaleID,
michael@0:             maximizedLocaleIDCapacity,
michael@0:             err);
michael@0: 
michael@0:     if (resultLength == 0) {
michael@0:         const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
michael@0: 
michael@0:         /*
michael@0:          * If we get here, we need to return localeID.
michael@0:          */
michael@0:         uprv_memcpy(
michael@0:             maximizedLocaleID,
michael@0:             localeID,
michael@0:             localIDLength <= maximizedLocaleIDCapacity ? 
michael@0:                 localIDLength : maximizedLocaleIDCapacity);
michael@0: 
michael@0:         resultLength =
michael@0:             u_terminateChars(
michael@0:                 maximizedLocaleID,
michael@0:                 maximizedLocaleIDCapacity,
michael@0:                 localIDLength,
michael@0:                 err);
michael@0:     }
michael@0: 
michael@0:     return resultLength;
michael@0: 
michael@0: error:
michael@0: 
michael@0:     if (!U_FAILURE(*err)) {
michael@0:         *err = U_ILLEGAL_ARGUMENT_ERROR;
michael@0:     }
michael@0: 
michael@0:     return -1;
michael@0: }
michael@0: 
michael@0: static int32_t
michael@0: _uloc_minimizeSubtags(const char*    localeID,
michael@0:          char* minimizedLocaleID,
michael@0:          int32_t minimizedLocaleIDCapacity,
michael@0:          UErrorCode* err)
michael@0: {
michael@0:     /**
michael@0:      * ULOC_FULLNAME_CAPACITY will provide enough capacity
michael@0:      * that we can build a string that contains the language,
michael@0:      * script and region code without worrying about overrunning
michael@0:      * the user-supplied buffer.
michael@0:      **/
michael@0:     char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY];
michael@0:     int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer);
michael@0: 
michael@0:     char lang[ULOC_LANG_CAPACITY];
michael@0:     int32_t langLength = sizeof(lang);
michael@0:     char script[ULOC_SCRIPT_CAPACITY];
michael@0:     int32_t scriptLength = sizeof(script);
michael@0:     char region[ULOC_COUNTRY_CAPACITY];
michael@0:     int32_t regionLength = sizeof(region);
michael@0:     const char* trailing = "";
michael@0:     int32_t trailingLength = 0;
michael@0:     int32_t trailingIndex = 0;
michael@0: 
michael@0:     if(U_FAILURE(*err)) {
michael@0:         goto error;
michael@0:     }
michael@0:     else if (localeID == NULL ||
michael@0:              minimizedLocaleID == NULL ||
michael@0:              minimizedLocaleIDCapacity <= 0) {
michael@0:         goto error;
michael@0:     }
michael@0: 
michael@0:     trailingIndex =
michael@0:         parseTagString(
michael@0:             localeID,
michael@0:             lang,
michael@0:             &langLength,
michael@0:             script,
michael@0:             &scriptLength,
michael@0:             region,
michael@0:             &regionLength,
michael@0:             err);
michael@0:     if(U_FAILURE(*err)) {
michael@0: 
michael@0:         /* Overflow indicates an illegal argument error */
michael@0:         if (*err == U_BUFFER_OVERFLOW_ERROR) {
michael@0:             *err = U_ILLEGAL_ARGUMENT_ERROR;
michael@0:         }
michael@0: 
michael@0:         goto error;
michael@0:     }
michael@0: 
michael@0:     /* Find the spot where the variants or the keywords begin, if any. */
michael@0:     while (_isIDSeparator(localeID[trailingIndex])) {
michael@0:         trailingIndex++;
michael@0:     }
michael@0:     trailing = &localeID[trailingIndex];
michael@0:     trailingLength = (int32_t)uprv_strlen(trailing);
michael@0: 
michael@0:     CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
michael@0: 
michael@0:     createTagString(
michael@0:         lang,
michael@0:         langLength,
michael@0:         script,
michael@0:         scriptLength,
michael@0:         region,
michael@0:         regionLength,
michael@0:         NULL,
michael@0:         0,
michael@0:         maximizedTagBuffer,
michael@0:         maximizedTagBufferLength,
michael@0:         err);
michael@0:     if(U_FAILURE(*err)) {
michael@0:         goto error;
michael@0:     }
michael@0: 
michael@0:     /**
michael@0:      * First, we need to first get the maximization
michael@0:      * from AddLikelySubtags.
michael@0:      **/
michael@0:     maximizedTagBufferLength =
michael@0:         uloc_addLikelySubtags(
michael@0:             maximizedTagBuffer,
michael@0:             maximizedTagBuffer,
michael@0:             maximizedTagBufferLength,
michael@0:             err);
michael@0: 
michael@0:     if(U_FAILURE(*err)) {
michael@0:         goto error;
michael@0:     }
michael@0: 
michael@0:     /**
michael@0:      * Start first with just the language.
michael@0:      **/
michael@0:     {
michael@0:         char tagBuffer[ULOC_FULLNAME_CAPACITY];
michael@0: 
michael@0:         const int32_t tagBufferLength =
michael@0:             createLikelySubtagsString(
michael@0:                 lang,
michael@0:                 langLength,
michael@0:                 NULL,
michael@0:                 0,
michael@0:                 NULL,
michael@0:                 0,
michael@0:                 NULL,
michael@0:                 0,
michael@0:                 tagBuffer,
michael@0:                 sizeof(tagBuffer),
michael@0:                 err);
michael@0: 
michael@0:         if(U_FAILURE(*err)) {
michael@0:             goto error;
michael@0:         }
michael@0:         else if (uprv_strnicmp(
michael@0:                     maximizedTagBuffer,
michael@0:                     tagBuffer,
michael@0:                     tagBufferLength) == 0) {
michael@0: 
michael@0:             return createTagString(
michael@0:                         lang,
michael@0:                         langLength,
michael@0:                         NULL,
michael@0:                         0,
michael@0:                         NULL,
michael@0:                         0,
michael@0:                         trailing,
michael@0:                         trailingLength,
michael@0:                         minimizedLocaleID,
michael@0:                         minimizedLocaleIDCapacity,
michael@0:                         err);
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     /**
michael@0:      * Next, try the language and region.
michael@0:      **/
michael@0:     if (regionLength > 0) {
michael@0: 
michael@0:         char tagBuffer[ULOC_FULLNAME_CAPACITY];
michael@0: 
michael@0:         const int32_t tagBufferLength =
michael@0:             createLikelySubtagsString(
michael@0:                 lang,
michael@0:                 langLength,
michael@0:                 NULL,
michael@0:                 0,
michael@0:                 region,
michael@0:                 regionLength,
michael@0:                 NULL,
michael@0:                 0,
michael@0:                 tagBuffer,
michael@0:                 sizeof(tagBuffer),
michael@0:                 err);
michael@0: 
michael@0:         if(U_FAILURE(*err)) {
michael@0:             goto error;
michael@0:         }
michael@0:         else if (uprv_strnicmp(
michael@0:                     maximizedTagBuffer,
michael@0:                     tagBuffer,
michael@0:                     tagBufferLength) == 0) {
michael@0: 
michael@0:             return createTagString(
michael@0:                         lang,
michael@0:                         langLength,
michael@0:                         NULL,
michael@0:                         0,
michael@0:                         region,
michael@0:                         regionLength,
michael@0:                         trailing,
michael@0:                         trailingLength,
michael@0:                         minimizedLocaleID,
michael@0:                         minimizedLocaleIDCapacity,
michael@0:                         err);
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     /**
michael@0:      * Finally, try the language and script.  This is our last chance,
michael@0:      * since trying with all three subtags would only yield the
michael@0:      * maximal version that we already have.
michael@0:      **/
michael@0:     if (scriptLength > 0 && regionLength > 0) {
michael@0:         char tagBuffer[ULOC_FULLNAME_CAPACITY];
michael@0: 
michael@0:         const int32_t tagBufferLength =
michael@0:             createLikelySubtagsString(
michael@0:                 lang,
michael@0:                 langLength,
michael@0:                 script,
michael@0:                 scriptLength,
michael@0:                 NULL,
michael@0:                 0,
michael@0:                 NULL,
michael@0:                 0,
michael@0:                 tagBuffer,
michael@0:                 sizeof(tagBuffer),
michael@0:                 err);
michael@0: 
michael@0:         if(U_FAILURE(*err)) {
michael@0:             goto error;
michael@0:         }
michael@0:         else if (uprv_strnicmp(
michael@0:                     maximizedTagBuffer,
michael@0:                     tagBuffer,
michael@0:                     tagBufferLength) == 0) {
michael@0: 
michael@0:             return createTagString(
michael@0:                         lang,
michael@0:                         langLength,
michael@0:                         script,
michael@0:                         scriptLength,
michael@0:                         NULL,
michael@0:                         0,
michael@0:                         trailing,
michael@0:                         trailingLength,
michael@0:                         minimizedLocaleID,
michael@0:                         minimizedLocaleIDCapacity,
michael@0:                         err);
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     {
michael@0:         /**
michael@0:          * If we got here, return the locale ID parameter.
michael@0:          **/
michael@0:         const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
michael@0: 
michael@0:         uprv_memcpy(
michael@0:             minimizedLocaleID,
michael@0:             localeID,
michael@0:             localeIDLength <= minimizedLocaleIDCapacity ? 
michael@0:                 localeIDLength : minimizedLocaleIDCapacity);
michael@0: 
michael@0:         return u_terminateChars(
michael@0:                     minimizedLocaleID,
michael@0:                     minimizedLocaleIDCapacity,
michael@0:                     localeIDLength,
michael@0:                     err);
michael@0:     }
michael@0: 
michael@0: error:
michael@0: 
michael@0:     if (!U_FAILURE(*err)) {
michael@0:         *err = U_ILLEGAL_ARGUMENT_ERROR;
michael@0:     }
michael@0: 
michael@0:     return -1;
michael@0: 
michael@0: 
michael@0: }
michael@0: 
michael@0: static UBool
michael@0: do_canonicalize(const char*    localeID,
michael@0:          char* buffer,
michael@0:          int32_t bufferCapacity,
michael@0:          UErrorCode* err)
michael@0: {
michael@0:     uloc_canonicalize(
michael@0:         localeID,
michael@0:         buffer,
michael@0:         bufferCapacity,
michael@0:         err);
michael@0: 
michael@0:     if (*err == U_STRING_NOT_TERMINATED_WARNING ||
michael@0:         *err == U_BUFFER_OVERFLOW_ERROR) {
michael@0:         *err = U_ILLEGAL_ARGUMENT_ERROR;
michael@0: 
michael@0:         return FALSE;
michael@0:     }
michael@0:     else if (U_FAILURE(*err)) {
michael@0: 
michael@0:         return FALSE;
michael@0:     }
michael@0:     else {
michael@0:         return TRUE;
michael@0:     }
michael@0: }
michael@0: 
michael@0: U_CAPI int32_t U_EXPORT2
michael@0: uloc_addLikelySubtags(const char*    localeID,
michael@0:          char* maximizedLocaleID,
michael@0:          int32_t maximizedLocaleIDCapacity,
michael@0:          UErrorCode* err)
michael@0: {
michael@0:     char localeBuffer[ULOC_FULLNAME_CAPACITY];
michael@0: 
michael@0:     if (!do_canonicalize(
michael@0:         localeID,
michael@0:         localeBuffer,
michael@0:         sizeof(localeBuffer),
michael@0:         err)) {
michael@0:         return -1;
michael@0:     }
michael@0:     else {
michael@0:         return _uloc_addLikelySubtags(
michael@0:                     localeBuffer,
michael@0:                     maximizedLocaleID,
michael@0:                     maximizedLocaleIDCapacity,
michael@0:                     err);
michael@0:     }    
michael@0: }
michael@0: 
michael@0: U_CAPI int32_t U_EXPORT2
michael@0: uloc_minimizeSubtags(const char*    localeID,
michael@0:          char* minimizedLocaleID,
michael@0:          int32_t minimizedLocaleIDCapacity,
michael@0:          UErrorCode* err)
michael@0: {
michael@0:     char localeBuffer[ULOC_FULLNAME_CAPACITY];
michael@0: 
michael@0:     if (!do_canonicalize(
michael@0:         localeID,
michael@0:         localeBuffer,
michael@0:         sizeof(localeBuffer),
michael@0:         err)) {
michael@0:         return -1;
michael@0:     }
michael@0:     else {
michael@0:         return _uloc_minimizeSubtags(
michael@0:                     localeBuffer,
michael@0:                     minimizedLocaleID,
michael@0:                     minimizedLocaleIDCapacity,
michael@0:                     err);
michael@0:     }    
michael@0: }