intl/icu/source/common/loclikely.cpp

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rw-r--r--

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

     1 /*
     2 *******************************************************************************
     3 *
     4 *   Copyright (C) 1997-2012, International Business Machines
     5 *   Corporation and others.  All Rights Reserved.
     6 *
     7 *******************************************************************************
     8 *   file name:  loclikely.cpp
     9 *   encoding:   US-ASCII
    10 *   tab size:   8 (not used)
    11 *   indentation:4
    12 *
    13 *   created on: 2010feb25
    14 *   created by: Markus W. Scherer
    15 *
    16 *   Code for likely and minimized locale subtags, separated out from other .cpp files
    17 *   that then do not depend on resource bundle code and likely-subtags data.
    18 */
    20 #include "unicode/utypes.h"
    21 #include "unicode/putil.h"
    22 #include "unicode/uloc.h"
    23 #include "unicode/ures.h"
    24 #include "cmemory.h"
    25 #include "cstring.h"
    26 #include "ulocimp.h"
    27 #include "ustr_imp.h"
    29 /**
    30  * This function looks for the localeID in the likelySubtags resource.
    31  *
    32  * @param localeID The tag to find.
    33  * @param buffer A buffer to hold the matching entry
    34  * @param bufferLength The length of the output buffer
    35  * @return A pointer to "buffer" if found, or a null pointer if not.
    36  */
    37 static const char*  U_CALLCONV
    38 findLikelySubtags(const char* localeID,
    39                   char* buffer,
    40                   int32_t bufferLength,
    41                   UErrorCode* err) {
    42     const char* result = NULL;
    44     if (!U_FAILURE(*err)) {
    45         int32_t resLen = 0;
    46         const UChar* s = NULL;
    47         UErrorCode tmpErr = U_ZERO_ERROR;
    48         UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr);
    49         if (U_SUCCESS(tmpErr)) {
    50             s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr);
    52             if (U_FAILURE(tmpErr)) {
    53                 /*
    54                  * If a resource is missing, it's not really an error, it's
    55                  * just that we don't have any data for that particular locale ID.
    56                  */
    57                 if (tmpErr != U_MISSING_RESOURCE_ERROR) {
    58                     *err = tmpErr;
    59                 }
    60             }
    61             else if (resLen >= bufferLength) {
    62                 /* The buffer should never overflow. */
    63                 *err = U_INTERNAL_PROGRAM_ERROR;
    64             }
    65             else {
    66                 u_UCharsToChars(s, buffer, resLen + 1);
    67                 result = buffer;
    68             }
    70             ures_close(subtags);
    71         } else {
    72             *err = tmpErr;
    73         }
    74     }
    76     return result;
    77 }
    79 /**
    80  * Append a tag to a buffer, adding the separator if necessary.  The buffer
    81  * must be large enough to contain the resulting tag plus any separator
    82  * necessary. The tag must not be a zero-length string.
    83  *
    84  * @param tag The tag to add.
    85  * @param tagLength The length of the tag.
    86  * @param buffer The output buffer.
    87  * @param bufferLength The length of the output buffer.  This is an input/ouput parameter.
    88  **/
    89 static void U_CALLCONV
    90 appendTag(
    91     const char* tag,
    92     int32_t tagLength,
    93     char* buffer,
    94     int32_t* bufferLength) {
    96     if (*bufferLength > 0) {
    97         buffer[*bufferLength] = '_';
    98         ++(*bufferLength);
    99     }
   101     uprv_memmove(
   102         &buffer[*bufferLength],
   103         tag,
   104         tagLength);
   106     *bufferLength += tagLength;
   107 }
   109 /**
   110  * These are the canonical strings for unknown languages, scripts and regions.
   111  **/
   112 static const char* const unknownLanguage = "und";
   113 static const char* const unknownScript = "Zzzz";
   114 static const char* const unknownRegion = "ZZ";
   116 /**
   117  * Create a tag string from the supplied parameters.  The lang, script and region
   118  * parameters may be NULL pointers. If they are, their corresponding length parameters
   119  * must be less than or equal to 0.
   120  *
   121  * If any of the language, script or region parameters are empty, and the alternateTags
   122  * parameter is not NULL, it will be parsed for potential language, script and region tags
   123  * to be used when constructing the new tag.  If the alternateTags parameter is NULL, or
   124  * it contains no language tag, the default tag for the unknown language is used.
   125  *
   126  * If the length of the new string exceeds the capacity of the output buffer, 
   127  * the function copies as many bytes to the output buffer as it can, and returns
   128  * the error U_BUFFER_OVERFLOW_ERROR.
   129  *
   130  * If an illegal argument is provided, the function returns the error
   131  * U_ILLEGAL_ARGUMENT_ERROR.
   132  *
   133  * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
   134  * the tag string fits in the output buffer, but the null terminator doesn't.
   135  *
   136  * @param lang The language tag to use.
   137  * @param langLength The length of the language tag.
   138  * @param script The script tag to use.
   139  * @param scriptLength The length of the script tag.
   140  * @param region The region tag to use.
   141  * @param regionLength The length of the region tag.
   142  * @param trailing Any trailing data to append to the new tag.
   143  * @param trailingLength The length of the trailing data.
   144  * @param alternateTags A string containing any alternate tags.
   145  * @param tag The output buffer.
   146  * @param tagCapacity The capacity of the output buffer.
   147  * @param err A pointer to a UErrorCode for error reporting.
   148  * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error.
   149  **/
   150 static int32_t U_CALLCONV
   151 createTagStringWithAlternates(
   152     const char* lang,
   153     int32_t langLength,
   154     const char* script,
   155     int32_t scriptLength,
   156     const char* region,
   157     int32_t regionLength,
   158     const char* trailing,
   159     int32_t trailingLength,
   160     const char* alternateTags,
   161     char* tag,
   162     int32_t tagCapacity,
   163     UErrorCode* err) {
   165     if (U_FAILURE(*err)) {
   166         goto error;
   167     }
   168     else if (tag == NULL ||
   169              tagCapacity <= 0 ||
   170              langLength >= ULOC_LANG_CAPACITY ||
   171              scriptLength >= ULOC_SCRIPT_CAPACITY ||
   172              regionLength >= ULOC_COUNTRY_CAPACITY) {
   173         goto error;
   174     }
   175     else {
   176         /**
   177          * ULOC_FULLNAME_CAPACITY will provide enough capacity
   178          * that we can build a string that contains the language,
   179          * script and region code without worrying about overrunning
   180          * the user-supplied buffer.
   181          **/
   182         char tagBuffer[ULOC_FULLNAME_CAPACITY];
   183         int32_t tagLength = 0;
   184         int32_t capacityRemaining = tagCapacity;
   185         UBool regionAppended = FALSE;
   187         if (langLength > 0) {
   188             appendTag(
   189                 lang,
   190                 langLength,
   191                 tagBuffer,
   192                 &tagLength);
   193         }
   194         else if (alternateTags == NULL) {
   195             /*
   196              * Append the value for an unknown language, if
   197              * we found no language.
   198              */
   199             appendTag(
   200                 unknownLanguage,
   201                 (int32_t)uprv_strlen(unknownLanguage),
   202                 tagBuffer,
   203                 &tagLength);
   204         }
   205         else {
   206             /*
   207              * Parse the alternateTags string for the language.
   208              */
   209             char alternateLang[ULOC_LANG_CAPACITY];
   210             int32_t alternateLangLength = sizeof(alternateLang);
   212             alternateLangLength =
   213                 uloc_getLanguage(
   214                     alternateTags,
   215                     alternateLang,
   216                     alternateLangLength,
   217                     err);
   218             if(U_FAILURE(*err) ||
   219                 alternateLangLength >= ULOC_LANG_CAPACITY) {
   220                 goto error;
   221             }
   222             else if (alternateLangLength == 0) {
   223                 /*
   224                  * Append the value for an unknown language, if
   225                  * we found no language.
   226                  */
   227                 appendTag(
   228                     unknownLanguage,
   229                     (int32_t)uprv_strlen(unknownLanguage),
   230                     tagBuffer,
   231                     &tagLength);
   232             }
   233             else {
   234                 appendTag(
   235                     alternateLang,
   236                     alternateLangLength,
   237                     tagBuffer,
   238                     &tagLength);
   239             }
   240         }
   242         if (scriptLength > 0) {
   243             appendTag(
   244                 script,
   245                 scriptLength,
   246                 tagBuffer,
   247                 &tagLength);
   248         }
   249         else if (alternateTags != NULL) {
   250             /*
   251              * Parse the alternateTags string for the script.
   252              */
   253             char alternateScript[ULOC_SCRIPT_CAPACITY];
   255             const int32_t alternateScriptLength =
   256                 uloc_getScript(
   257                     alternateTags,
   258                     alternateScript,
   259                     sizeof(alternateScript),
   260                     err);
   262             if (U_FAILURE(*err) ||
   263                 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
   264                 goto error;
   265             }
   266             else if (alternateScriptLength > 0) {
   267                 appendTag(
   268                     alternateScript,
   269                     alternateScriptLength,
   270                     tagBuffer,
   271                     &tagLength);
   272             }
   273         }
   275         if (regionLength > 0) {
   276             appendTag(
   277                 region,
   278                 regionLength,
   279                 tagBuffer,
   280                 &tagLength);
   282             regionAppended = TRUE;
   283         }
   284         else if (alternateTags != NULL) {
   285             /*
   286              * Parse the alternateTags string for the region.
   287              */
   288             char alternateRegion[ULOC_COUNTRY_CAPACITY];
   290             const int32_t alternateRegionLength =
   291                 uloc_getCountry(
   292                     alternateTags,
   293                     alternateRegion,
   294                     sizeof(alternateRegion),
   295                     err);
   296             if (U_FAILURE(*err) ||
   297                 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
   298                 goto error;
   299             }
   300             else if (alternateRegionLength > 0) {
   301                 appendTag(
   302                     alternateRegion,
   303                     alternateRegionLength,
   304                     tagBuffer,
   305                     &tagLength);
   307                 regionAppended = TRUE;
   308             }
   309         }
   311         {
   312             const int32_t toCopy =
   313                 tagLength >= tagCapacity ? tagCapacity : tagLength;
   315             /**
   316              * Copy the partial tag from our internal buffer to the supplied
   317              * target.
   318              **/
   319             uprv_memcpy(
   320                 tag,
   321                 tagBuffer,
   322                 toCopy);
   324             capacityRemaining -= toCopy;
   325         }
   327         if (trailingLength > 0) {
   328             if (*trailing != '@' && capacityRemaining > 0) {
   329                 tag[tagLength++] = '_';
   330                 --capacityRemaining;
   331                 if (capacityRemaining > 0 && !regionAppended) {
   332                     /* extra separator is required */
   333                     tag[tagLength++] = '_';
   334                     --capacityRemaining;
   335                 }
   336             }
   338             if (capacityRemaining > 0) {
   339                 /*
   340                  * Copy the trailing data into the supplied buffer.  Use uprv_memmove, since we
   341                  * don't know if the user-supplied buffers overlap.
   342                  */
   343                 const int32_t toCopy =
   344                     trailingLength >= capacityRemaining ? capacityRemaining : trailingLength;
   346                 uprv_memmove(
   347                     &tag[tagLength],
   348                     trailing,
   349                     toCopy);
   350             }
   351         }
   353         tagLength += trailingLength;
   355         return u_terminateChars(
   356                     tag,
   357                     tagCapacity,
   358                     tagLength,
   359                     err);
   360     }
   362 error:
   364     /**
   365      * An overflow indicates the locale ID passed in
   366      * is ill-formed.  If we got here, and there was
   367      * no previous error, it's an implicit overflow.
   368      **/
   369     if (*err ==  U_BUFFER_OVERFLOW_ERROR ||
   370         U_SUCCESS(*err)) {
   371         *err = U_ILLEGAL_ARGUMENT_ERROR;
   372     }
   374     return -1;
   375 }
   377 /**
   378  * Create a tag string from the supplied parameters.  The lang, script and region
   379  * parameters may be NULL pointers. If they are, their corresponding length parameters
   380  * must be less than or equal to 0.  If the lang parameter is an empty string, the
   381  * default value for an unknown language is written to the output buffer.
   382  *
   383  * If the length of the new string exceeds the capacity of the output buffer, 
   384  * the function copies as many bytes to the output buffer as it can, and returns
   385  * the error U_BUFFER_OVERFLOW_ERROR.
   386  *
   387  * If an illegal argument is provided, the function returns the error
   388  * U_ILLEGAL_ARGUMENT_ERROR.
   389  *
   390  * @param lang The language tag to use.
   391  * @param langLength The length of the language tag.
   392  * @param script The script tag to use.
   393  * @param scriptLength The length of the script tag.
   394  * @param region The region tag to use.
   395  * @param regionLength The length of the region tag.
   396  * @param trailing Any trailing data to append to the new tag.
   397  * @param trailingLength The length of the trailing data.
   398  * @param tag The output buffer.
   399  * @param tagCapacity The capacity of the output buffer.
   400  * @param err A pointer to a UErrorCode for error reporting.
   401  * @return The length of the tag string, which may be greater than tagCapacity.
   402  **/
   403 static int32_t U_CALLCONV
   404 createTagString(
   405     const char* lang,
   406     int32_t langLength,
   407     const char* script,
   408     int32_t scriptLength,
   409     const char* region,
   410     int32_t regionLength,
   411     const char* trailing,
   412     int32_t trailingLength,
   413     char* tag,
   414     int32_t tagCapacity,
   415     UErrorCode* err)
   416 {
   417     return createTagStringWithAlternates(
   418                 lang,
   419                 langLength,
   420                 script,
   421                 scriptLength,
   422                 region,
   423                 regionLength,
   424                 trailing,
   425                 trailingLength,
   426                 NULL,
   427                 tag,
   428                 tagCapacity,
   429                 err);
   430 }
   432 /**
   433  * Parse the language, script, and region subtags from a tag string, and copy the
   434  * results into the corresponding output parameters. The buffers are null-terminated,
   435  * unless overflow occurs.
   436  *
   437  * The langLength, scriptLength, and regionLength parameters are input/output
   438  * parameters, and must contain the capacity of their corresponding buffers on
   439  * input.  On output, they will contain the actual length of the buffers, not
   440  * including the null terminator.
   441  *
   442  * If the length of any of the output subtags exceeds the capacity of the corresponding
   443  * buffer, the function copies as many bytes to the output buffer as it can, and returns
   444  * the error U_BUFFER_OVERFLOW_ERROR.  It will not parse any more subtags once overflow
   445  * occurs.
   446  *
   447  * If an illegal argument is provided, the function returns the error
   448  * U_ILLEGAL_ARGUMENT_ERROR.
   449  *
   450  * @param localeID The locale ID to parse.
   451  * @param lang The language tag buffer.
   452  * @param langLength The length of the language tag.
   453  * @param script The script tag buffer.
   454  * @param scriptLength The length of the script tag.
   455  * @param region The region tag buffer.
   456  * @param regionLength The length of the region tag.
   457  * @param err A pointer to a UErrorCode for error reporting.
   458  * @return The number of chars of the localeID parameter consumed.
   459  **/
   460 static int32_t U_CALLCONV
   461 parseTagString(
   462     const char* localeID,
   463     char* lang,
   464     int32_t* langLength,
   465     char* script,
   466     int32_t* scriptLength,
   467     char* region,
   468     int32_t* regionLength,
   469     UErrorCode* err)
   470 {
   471     const char* position = localeID;
   472     int32_t subtagLength = 0;
   474     if(U_FAILURE(*err) ||
   475        localeID == NULL ||
   476        lang == NULL ||
   477        langLength == NULL ||
   478        script == NULL ||
   479        scriptLength == NULL ||
   480        region == NULL ||
   481        regionLength == NULL) {
   482         goto error;
   483     }
   485     subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position);
   486     u_terminateChars(lang, *langLength, subtagLength, err);
   488     /*
   489      * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
   490      * to be an error, because it indicates the user-supplied tag is
   491      * not well-formed.
   492      */
   493     if(U_FAILURE(*err)) {
   494         goto error;
   495     }
   497     *langLength = subtagLength;
   499     /*
   500      * If no language was present, use the value of unknownLanguage
   501      * instead.  Otherwise, move past any separator.
   502      */
   503     if (*langLength == 0) {
   504         uprv_strcpy(
   505             lang,
   506             unknownLanguage);
   507         *langLength = (int32_t)uprv_strlen(lang);
   508     }
   509     else if (_isIDSeparator(*position)) {
   510         ++position;
   511     }
   513     subtagLength = ulocimp_getScript(position, script, *scriptLength, &position);
   514     u_terminateChars(script, *scriptLength, subtagLength, err);
   516     if(U_FAILURE(*err)) {
   517         goto error;
   518     }
   520     *scriptLength = subtagLength;
   522     if (*scriptLength > 0) {
   523         if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
   524             /**
   525              * If the script part is the "unknown" script, then don't return it.
   526              **/
   527             *scriptLength = 0;
   528         }
   530         /*
   531          * Move past any separator.
   532          */
   533         if (_isIDSeparator(*position)) {
   534             ++position;
   535         }    
   536     }
   538     subtagLength = ulocimp_getCountry(position, region, *regionLength, &position);
   539     u_terminateChars(region, *regionLength, subtagLength, err);
   541     if(U_FAILURE(*err)) {
   542         goto error;
   543     }
   545     *regionLength = subtagLength;
   547     if (*regionLength > 0) {
   548         if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
   549             /**
   550              * If the region part is the "unknown" region, then don't return it.
   551              **/
   552             *regionLength = 0;
   553         }
   554     } else if (*position != 0 && *position != '@') {
   555         /* back up over consumed trailing separator */
   556         --position;
   557     }
   559 exit:
   561     return (int32_t)(position - localeID);
   563 error:
   565     /**
   566      * If we get here, we have no explicit error, it's the result of an
   567      * illegal argument.
   568      **/
   569     if (!U_FAILURE(*err)) {
   570         *err = U_ILLEGAL_ARGUMENT_ERROR;
   571     }
   573     goto exit;
   574 }
   576 static int32_t U_CALLCONV
   577 createLikelySubtagsString(
   578     const char* lang,
   579     int32_t langLength,
   580     const char* script,
   581     int32_t scriptLength,
   582     const char* region,
   583     int32_t regionLength,
   584     const char* variants,
   585     int32_t variantsLength,
   586     char* tag,
   587     int32_t tagCapacity,
   588     UErrorCode* err)
   589 {
   590     /**
   591      * ULOC_FULLNAME_CAPACITY will provide enough capacity
   592      * that we can build a string that contains the language,
   593      * script and region code without worrying about overrunning
   594      * the user-supplied buffer.
   595      **/
   596     char tagBuffer[ULOC_FULLNAME_CAPACITY];
   597     char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
   599     if(U_FAILURE(*err)) {
   600         goto error;
   601     }
   603     /**
   604      * Try the language with the script and region first.
   605      **/
   606     if (scriptLength > 0 && regionLength > 0) {
   608         const char* likelySubtags = NULL;
   610         createTagString(
   611             lang,
   612             langLength,
   613             script,
   614             scriptLength,
   615             region,
   616             regionLength,
   617             NULL,
   618             0,
   619             tagBuffer,
   620             sizeof(tagBuffer),
   621             err);
   622         if(U_FAILURE(*err)) {
   623             goto error;
   624         }
   626         likelySubtags =
   627             findLikelySubtags(
   628                 tagBuffer,
   629                 likelySubtagsBuffer,
   630                 sizeof(likelySubtagsBuffer),
   631                 err);
   632         if(U_FAILURE(*err)) {
   633             goto error;
   634         }
   636         if (likelySubtags != NULL) {
   637             /* Always use the language tag from the
   638                maximal string, since it may be more
   639                specific than the one provided. */
   640             return createTagStringWithAlternates(
   641                         NULL,
   642                         0,
   643                         NULL,
   644                         0,
   645                         NULL,
   646                         0,
   647                         variants,
   648                         variantsLength,
   649                         likelySubtags,
   650                         tag,
   651                         tagCapacity,
   652                         err);
   653         }
   654     }
   656     /**
   657      * Try the language with just the script.
   658      **/
   659     if (scriptLength > 0) {
   661         const char* likelySubtags = NULL;
   663         createTagString(
   664             lang,
   665             langLength,
   666             script,
   667             scriptLength,
   668             NULL,
   669             0,
   670             NULL,
   671             0,
   672             tagBuffer,
   673             sizeof(tagBuffer),
   674             err);
   675         if(U_FAILURE(*err)) {
   676             goto error;
   677         }
   679         likelySubtags =
   680             findLikelySubtags(
   681                 tagBuffer,
   682                 likelySubtagsBuffer,
   683                 sizeof(likelySubtagsBuffer),
   684                 err);
   685         if(U_FAILURE(*err)) {
   686             goto error;
   687         }
   689         if (likelySubtags != NULL) {
   690             /* Always use the language tag from the
   691                maximal string, since it may be more
   692                specific than the one provided. */
   693             return createTagStringWithAlternates(
   694                         NULL,
   695                         0,
   696                         NULL,
   697                         0,
   698                         region,
   699                         regionLength,
   700                         variants,
   701                         variantsLength,
   702                         likelySubtags,
   703                         tag,
   704                         tagCapacity,
   705                         err);
   706         }
   707     }
   709     /**
   710      * Try the language with just the region.
   711      **/
   712     if (regionLength > 0) {
   714         const char* likelySubtags = NULL;
   716         createTagString(
   717             lang,
   718             langLength,
   719             NULL,
   720             0,
   721             region,
   722             regionLength,
   723             NULL,
   724             0,
   725             tagBuffer,
   726             sizeof(tagBuffer),
   727             err);
   728         if(U_FAILURE(*err)) {
   729             goto error;
   730         }
   732         likelySubtags =
   733             findLikelySubtags(
   734                 tagBuffer,
   735                 likelySubtagsBuffer,
   736                 sizeof(likelySubtagsBuffer),
   737                 err);
   738         if(U_FAILURE(*err)) {
   739             goto error;
   740         }
   742         if (likelySubtags != NULL) {
   743             /* Always use the language tag from the
   744                maximal string, since it may be more
   745                specific than the one provided. */
   746             return createTagStringWithAlternates(
   747                         NULL,
   748                         0,
   749                         script,
   750                         scriptLength,
   751                         NULL,
   752                         0,
   753                         variants,
   754                         variantsLength,
   755                         likelySubtags,
   756                         tag,
   757                         tagCapacity,
   758                         err);
   759         }
   760     }
   762     /**
   763      * Finally, try just the language.
   764      **/
   765     {
   766         const char* likelySubtags = NULL;
   768         createTagString(
   769             lang,
   770             langLength,
   771             NULL,
   772             0,
   773             NULL,
   774             0,
   775             NULL,
   776             0,
   777             tagBuffer,
   778             sizeof(tagBuffer),
   779             err);
   780         if(U_FAILURE(*err)) {
   781             goto error;
   782         }
   784         likelySubtags =
   785             findLikelySubtags(
   786                 tagBuffer,
   787                 likelySubtagsBuffer,
   788                 sizeof(likelySubtagsBuffer),
   789                 err);
   790         if(U_FAILURE(*err)) {
   791             goto error;
   792         }
   794         if (likelySubtags != NULL) {
   795             /* Always use the language tag from the
   796                maximal string, since it may be more
   797                specific than the one provided. */
   798             return createTagStringWithAlternates(
   799                         NULL,
   800                         0,
   801                         script,
   802                         scriptLength,
   803                         region,
   804                         regionLength,
   805                         variants,
   806                         variantsLength,
   807                         likelySubtags,
   808                         tag,
   809                         tagCapacity,
   810                         err);
   811         }
   812     }
   814     return u_terminateChars(
   815                 tag,
   816                 tagCapacity,
   817                 0,
   818                 err);
   820 error:
   822     if (!U_FAILURE(*err)) {
   823         *err = U_ILLEGAL_ARGUMENT_ERROR;
   824     }
   826     return -1;
   827 }
   829 #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \
   830     {   int32_t count = 0; \
   831         int32_t i; \
   832         for (i = 0; i < trailingLength; i++) { \
   833             if (trailing[i] == '-' || trailing[i] == '_') { \
   834                 count = 0; \
   835                 if (count > 8) { \
   836                     goto error; \
   837                 } \
   838             } else if (trailing[i] == '@') { \
   839                 break; \
   840             } else if (count > 8) { \
   841                 goto error; \
   842             } else { \
   843                 count++; \
   844             } \
   845         } \
   846     }
   848 static int32_t
   849 _uloc_addLikelySubtags(const char*    localeID,
   850          char* maximizedLocaleID,
   851          int32_t maximizedLocaleIDCapacity,
   852          UErrorCode* err)
   853 {
   854     char lang[ULOC_LANG_CAPACITY];
   855     int32_t langLength = sizeof(lang);
   856     char script[ULOC_SCRIPT_CAPACITY];
   857     int32_t scriptLength = sizeof(script);
   858     char region[ULOC_COUNTRY_CAPACITY];
   859     int32_t regionLength = sizeof(region);
   860     const char* trailing = "";
   861     int32_t trailingLength = 0;
   862     int32_t trailingIndex = 0;
   863     int32_t resultLength = 0;
   865     if(U_FAILURE(*err)) {
   866         goto error;
   867     }
   868     else if (localeID == NULL ||
   869              maximizedLocaleID == NULL ||
   870              maximizedLocaleIDCapacity <= 0) {
   871         goto error;
   872     }
   874     trailingIndex = parseTagString(
   875         localeID,
   876         lang,
   877         &langLength,
   878         script,
   879         &scriptLength,
   880         region,
   881         &regionLength,
   882         err);
   883     if(U_FAILURE(*err)) {
   884         /* Overflow indicates an illegal argument error */
   885         if (*err == U_BUFFER_OVERFLOW_ERROR) {
   886             *err = U_ILLEGAL_ARGUMENT_ERROR;
   887         }
   889         goto error;
   890     }
   892     /* Find the length of the trailing portion. */
   893     while (_isIDSeparator(localeID[trailingIndex])) {
   894         trailingIndex++;
   895     }
   896     trailing = &localeID[trailingIndex];
   897     trailingLength = (int32_t)uprv_strlen(trailing);
   899     CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
   901     resultLength =
   902         createLikelySubtagsString(
   903             lang,
   904             langLength,
   905             script,
   906             scriptLength,
   907             region,
   908             regionLength,
   909             trailing,
   910             trailingLength,
   911             maximizedLocaleID,
   912             maximizedLocaleIDCapacity,
   913             err);
   915     if (resultLength == 0) {
   916         const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
   918         /*
   919          * If we get here, we need to return localeID.
   920          */
   921         uprv_memcpy(
   922             maximizedLocaleID,
   923             localeID,
   924             localIDLength <= maximizedLocaleIDCapacity ? 
   925                 localIDLength : maximizedLocaleIDCapacity);
   927         resultLength =
   928             u_terminateChars(
   929                 maximizedLocaleID,
   930                 maximizedLocaleIDCapacity,
   931                 localIDLength,
   932                 err);
   933     }
   935     return resultLength;
   937 error:
   939     if (!U_FAILURE(*err)) {
   940         *err = U_ILLEGAL_ARGUMENT_ERROR;
   941     }
   943     return -1;
   944 }
   946 static int32_t
   947 _uloc_minimizeSubtags(const char*    localeID,
   948          char* minimizedLocaleID,
   949          int32_t minimizedLocaleIDCapacity,
   950          UErrorCode* err)
   951 {
   952     /**
   953      * ULOC_FULLNAME_CAPACITY will provide enough capacity
   954      * that we can build a string that contains the language,
   955      * script and region code without worrying about overrunning
   956      * the user-supplied buffer.
   957      **/
   958     char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY];
   959     int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer);
   961     char lang[ULOC_LANG_CAPACITY];
   962     int32_t langLength = sizeof(lang);
   963     char script[ULOC_SCRIPT_CAPACITY];
   964     int32_t scriptLength = sizeof(script);
   965     char region[ULOC_COUNTRY_CAPACITY];
   966     int32_t regionLength = sizeof(region);
   967     const char* trailing = "";
   968     int32_t trailingLength = 0;
   969     int32_t trailingIndex = 0;
   971     if(U_FAILURE(*err)) {
   972         goto error;
   973     }
   974     else if (localeID == NULL ||
   975              minimizedLocaleID == NULL ||
   976              minimizedLocaleIDCapacity <= 0) {
   977         goto error;
   978     }
   980     trailingIndex =
   981         parseTagString(
   982             localeID,
   983             lang,
   984             &langLength,
   985             script,
   986             &scriptLength,
   987             region,
   988             &regionLength,
   989             err);
   990     if(U_FAILURE(*err)) {
   992         /* Overflow indicates an illegal argument error */
   993         if (*err == U_BUFFER_OVERFLOW_ERROR) {
   994             *err = U_ILLEGAL_ARGUMENT_ERROR;
   995         }
   997         goto error;
   998     }
  1000     /* Find the spot where the variants or the keywords begin, if any. */
  1001     while (_isIDSeparator(localeID[trailingIndex])) {
  1002         trailingIndex++;
  1004     trailing = &localeID[trailingIndex];
  1005     trailingLength = (int32_t)uprv_strlen(trailing);
  1007     CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
  1009     createTagString(
  1010         lang,
  1011         langLength,
  1012         script,
  1013         scriptLength,
  1014         region,
  1015         regionLength,
  1016         NULL,
  1017         0,
  1018         maximizedTagBuffer,
  1019         maximizedTagBufferLength,
  1020         err);
  1021     if(U_FAILURE(*err)) {
  1022         goto error;
  1025     /**
  1026      * First, we need to first get the maximization
  1027      * from AddLikelySubtags.
  1028      **/
  1029     maximizedTagBufferLength =
  1030         uloc_addLikelySubtags(
  1031             maximizedTagBuffer,
  1032             maximizedTagBuffer,
  1033             maximizedTagBufferLength,
  1034             err);
  1036     if(U_FAILURE(*err)) {
  1037         goto error;
  1040     /**
  1041      * Start first with just the language.
  1042      **/
  1044         char tagBuffer[ULOC_FULLNAME_CAPACITY];
  1046         const int32_t tagBufferLength =
  1047             createLikelySubtagsString(
  1048                 lang,
  1049                 langLength,
  1050                 NULL,
  1051                 0,
  1052                 NULL,
  1053                 0,
  1054                 NULL,
  1055                 0,
  1056                 tagBuffer,
  1057                 sizeof(tagBuffer),
  1058                 err);
  1060         if(U_FAILURE(*err)) {
  1061             goto error;
  1063         else if (uprv_strnicmp(
  1064                     maximizedTagBuffer,
  1065                     tagBuffer,
  1066                     tagBufferLength) == 0) {
  1068             return createTagString(
  1069                         lang,
  1070                         langLength,
  1071                         NULL,
  1072                         0,
  1073                         NULL,
  1074                         0,
  1075                         trailing,
  1076                         trailingLength,
  1077                         minimizedLocaleID,
  1078                         minimizedLocaleIDCapacity,
  1079                         err);
  1083     /**
  1084      * Next, try the language and region.
  1085      **/
  1086     if (regionLength > 0) {
  1088         char tagBuffer[ULOC_FULLNAME_CAPACITY];
  1090         const int32_t tagBufferLength =
  1091             createLikelySubtagsString(
  1092                 lang,
  1093                 langLength,
  1094                 NULL,
  1095                 0,
  1096                 region,
  1097                 regionLength,
  1098                 NULL,
  1099                 0,
  1100                 tagBuffer,
  1101                 sizeof(tagBuffer),
  1102                 err);
  1104         if(U_FAILURE(*err)) {
  1105             goto error;
  1107         else if (uprv_strnicmp(
  1108                     maximizedTagBuffer,
  1109                     tagBuffer,
  1110                     tagBufferLength) == 0) {
  1112             return createTagString(
  1113                         lang,
  1114                         langLength,
  1115                         NULL,
  1116                         0,
  1117                         region,
  1118                         regionLength,
  1119                         trailing,
  1120                         trailingLength,
  1121                         minimizedLocaleID,
  1122                         minimizedLocaleIDCapacity,
  1123                         err);
  1127     /**
  1128      * Finally, try the language and script.  This is our last chance,
  1129      * since trying with all three subtags would only yield the
  1130      * maximal version that we already have.
  1131      **/
  1132     if (scriptLength > 0 && regionLength > 0) {
  1133         char tagBuffer[ULOC_FULLNAME_CAPACITY];
  1135         const int32_t tagBufferLength =
  1136             createLikelySubtagsString(
  1137                 lang,
  1138                 langLength,
  1139                 script,
  1140                 scriptLength,
  1141                 NULL,
  1142                 0,
  1143                 NULL,
  1144                 0,
  1145                 tagBuffer,
  1146                 sizeof(tagBuffer),
  1147                 err);
  1149         if(U_FAILURE(*err)) {
  1150             goto error;
  1152         else if (uprv_strnicmp(
  1153                     maximizedTagBuffer,
  1154                     tagBuffer,
  1155                     tagBufferLength) == 0) {
  1157             return createTagString(
  1158                         lang,
  1159                         langLength,
  1160                         script,
  1161                         scriptLength,
  1162                         NULL,
  1163                         0,
  1164                         trailing,
  1165                         trailingLength,
  1166                         minimizedLocaleID,
  1167                         minimizedLocaleIDCapacity,
  1168                         err);
  1173         /**
  1174          * If we got here, return the locale ID parameter.
  1175          **/
  1176         const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
  1178         uprv_memcpy(
  1179             minimizedLocaleID,
  1180             localeID,
  1181             localeIDLength <= minimizedLocaleIDCapacity ? 
  1182                 localeIDLength : minimizedLocaleIDCapacity);
  1184         return u_terminateChars(
  1185                     minimizedLocaleID,
  1186                     minimizedLocaleIDCapacity,
  1187                     localeIDLength,
  1188                     err);
  1191 error:
  1193     if (!U_FAILURE(*err)) {
  1194         *err = U_ILLEGAL_ARGUMENT_ERROR;
  1197     return -1;
  1202 static UBool
  1203 do_canonicalize(const char*    localeID,
  1204          char* buffer,
  1205          int32_t bufferCapacity,
  1206          UErrorCode* err)
  1208     uloc_canonicalize(
  1209         localeID,
  1210         buffer,
  1211         bufferCapacity,
  1212         err);
  1214     if (*err == U_STRING_NOT_TERMINATED_WARNING ||
  1215         *err == U_BUFFER_OVERFLOW_ERROR) {
  1216         *err = U_ILLEGAL_ARGUMENT_ERROR;
  1218         return FALSE;
  1220     else if (U_FAILURE(*err)) {
  1222         return FALSE;
  1224     else {
  1225         return TRUE;
  1229 U_CAPI int32_t U_EXPORT2
  1230 uloc_addLikelySubtags(const char*    localeID,
  1231          char* maximizedLocaleID,
  1232          int32_t maximizedLocaleIDCapacity,
  1233          UErrorCode* err)
  1235     char localeBuffer[ULOC_FULLNAME_CAPACITY];
  1237     if (!do_canonicalize(
  1238         localeID,
  1239         localeBuffer,
  1240         sizeof(localeBuffer),
  1241         err)) {
  1242         return -1;
  1244     else {
  1245         return _uloc_addLikelySubtags(
  1246                     localeBuffer,
  1247                     maximizedLocaleID,
  1248                     maximizedLocaleIDCapacity,
  1249                     err);
  1253 U_CAPI int32_t U_EXPORT2
  1254 uloc_minimizeSubtags(const char*    localeID,
  1255          char* minimizedLocaleID,
  1256          int32_t minimizedLocaleIDCapacity,
  1257          UErrorCode* err)
  1259     char localeBuffer[ULOC_FULLNAME_CAPACITY];
  1261     if (!do_canonicalize(
  1262         localeID,
  1263         localeBuffer,
  1264         sizeof(localeBuffer),
  1265         err)) {
  1266         return -1;
  1268     else {
  1269         return _uloc_minimizeSubtags(
  1270                     localeBuffer,
  1271                     minimizedLocaleID,
  1272                     minimizedLocaleIDCapacity,
  1273                     err);

mercurial