|
1 /* |
|
2 ******************************************************************************* |
|
3 * |
|
4 * Copyright (C) 1997-2012, International Business Machines |
|
5 * Corporation and others. All Rights Reserved. |
|
6 * |
|
7 ******************************************************************************* |
|
8 * file name: loclikely.cpp |
|
9 * encoding: US-ASCII |
|
10 * tab size: 8 (not used) |
|
11 * indentation:4 |
|
12 * |
|
13 * created on: 2010feb25 |
|
14 * created by: Markus W. Scherer |
|
15 * |
|
16 * Code for likely and minimized locale subtags, separated out from other .cpp files |
|
17 * that then do not depend on resource bundle code and likely-subtags data. |
|
18 */ |
|
19 |
|
20 #include "unicode/utypes.h" |
|
21 #include "unicode/putil.h" |
|
22 #include "unicode/uloc.h" |
|
23 #include "unicode/ures.h" |
|
24 #include "cmemory.h" |
|
25 #include "cstring.h" |
|
26 #include "ulocimp.h" |
|
27 #include "ustr_imp.h" |
|
28 |
|
29 /** |
|
30 * This function looks for the localeID in the likelySubtags resource. |
|
31 * |
|
32 * @param localeID The tag to find. |
|
33 * @param buffer A buffer to hold the matching entry |
|
34 * @param bufferLength The length of the output buffer |
|
35 * @return A pointer to "buffer" if found, or a null pointer if not. |
|
36 */ |
|
37 static const char* U_CALLCONV |
|
38 findLikelySubtags(const char* localeID, |
|
39 char* buffer, |
|
40 int32_t bufferLength, |
|
41 UErrorCode* err) { |
|
42 const char* result = NULL; |
|
43 |
|
44 if (!U_FAILURE(*err)) { |
|
45 int32_t resLen = 0; |
|
46 const UChar* s = NULL; |
|
47 UErrorCode tmpErr = U_ZERO_ERROR; |
|
48 UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr); |
|
49 if (U_SUCCESS(tmpErr)) { |
|
50 s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr); |
|
51 |
|
52 if (U_FAILURE(tmpErr)) { |
|
53 /* |
|
54 * If a resource is missing, it's not really an error, it's |
|
55 * just that we don't have any data for that particular locale ID. |
|
56 */ |
|
57 if (tmpErr != U_MISSING_RESOURCE_ERROR) { |
|
58 *err = tmpErr; |
|
59 } |
|
60 } |
|
61 else if (resLen >= bufferLength) { |
|
62 /* The buffer should never overflow. */ |
|
63 *err = U_INTERNAL_PROGRAM_ERROR; |
|
64 } |
|
65 else { |
|
66 u_UCharsToChars(s, buffer, resLen + 1); |
|
67 result = buffer; |
|
68 } |
|
69 |
|
70 ures_close(subtags); |
|
71 } else { |
|
72 *err = tmpErr; |
|
73 } |
|
74 } |
|
75 |
|
76 return result; |
|
77 } |
|
78 |
|
79 /** |
|
80 * Append a tag to a buffer, adding the separator if necessary. The buffer |
|
81 * must be large enough to contain the resulting tag plus any separator |
|
82 * necessary. The tag must not be a zero-length string. |
|
83 * |
|
84 * @param tag The tag to add. |
|
85 * @param tagLength The length of the tag. |
|
86 * @param buffer The output buffer. |
|
87 * @param bufferLength The length of the output buffer. This is an input/ouput parameter. |
|
88 **/ |
|
89 static void U_CALLCONV |
|
90 appendTag( |
|
91 const char* tag, |
|
92 int32_t tagLength, |
|
93 char* buffer, |
|
94 int32_t* bufferLength) { |
|
95 |
|
96 if (*bufferLength > 0) { |
|
97 buffer[*bufferLength] = '_'; |
|
98 ++(*bufferLength); |
|
99 } |
|
100 |
|
101 uprv_memmove( |
|
102 &buffer[*bufferLength], |
|
103 tag, |
|
104 tagLength); |
|
105 |
|
106 *bufferLength += tagLength; |
|
107 } |
|
108 |
|
109 /** |
|
110 * These are the canonical strings for unknown languages, scripts and regions. |
|
111 **/ |
|
112 static const char* const unknownLanguage = "und"; |
|
113 static const char* const unknownScript = "Zzzz"; |
|
114 static const char* const unknownRegion = "ZZ"; |
|
115 |
|
116 /** |
|
117 * Create a tag string from the supplied parameters. The lang, script and region |
|
118 * parameters may be NULL pointers. If they are, their corresponding length parameters |
|
119 * must be less than or equal to 0. |
|
120 * |
|
121 * If any of the language, script or region parameters are empty, and the alternateTags |
|
122 * parameter is not NULL, it will be parsed for potential language, script and region tags |
|
123 * to be used when constructing the new tag. If the alternateTags parameter is NULL, or |
|
124 * it contains no language tag, the default tag for the unknown language is used. |
|
125 * |
|
126 * If the length of the new string exceeds the capacity of the output buffer, |
|
127 * the function copies as many bytes to the output buffer as it can, and returns |
|
128 * the error U_BUFFER_OVERFLOW_ERROR. |
|
129 * |
|
130 * If an illegal argument is provided, the function returns the error |
|
131 * U_ILLEGAL_ARGUMENT_ERROR. |
|
132 * |
|
133 * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if |
|
134 * the tag string fits in the output buffer, but the null terminator doesn't. |
|
135 * |
|
136 * @param lang The language tag to use. |
|
137 * @param langLength The length of the language tag. |
|
138 * @param script The script tag to use. |
|
139 * @param scriptLength The length of the script tag. |
|
140 * @param region The region tag to use. |
|
141 * @param regionLength The length of the region tag. |
|
142 * @param trailing Any trailing data to append to the new tag. |
|
143 * @param trailingLength The length of the trailing data. |
|
144 * @param alternateTags A string containing any alternate tags. |
|
145 * @param tag The output buffer. |
|
146 * @param tagCapacity The capacity of the output buffer. |
|
147 * @param err A pointer to a UErrorCode for error reporting. |
|
148 * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error. |
|
149 **/ |
|
150 static int32_t U_CALLCONV |
|
151 createTagStringWithAlternates( |
|
152 const char* lang, |
|
153 int32_t langLength, |
|
154 const char* script, |
|
155 int32_t scriptLength, |
|
156 const char* region, |
|
157 int32_t regionLength, |
|
158 const char* trailing, |
|
159 int32_t trailingLength, |
|
160 const char* alternateTags, |
|
161 char* tag, |
|
162 int32_t tagCapacity, |
|
163 UErrorCode* err) { |
|
164 |
|
165 if (U_FAILURE(*err)) { |
|
166 goto error; |
|
167 } |
|
168 else if (tag == NULL || |
|
169 tagCapacity <= 0 || |
|
170 langLength >= ULOC_LANG_CAPACITY || |
|
171 scriptLength >= ULOC_SCRIPT_CAPACITY || |
|
172 regionLength >= ULOC_COUNTRY_CAPACITY) { |
|
173 goto error; |
|
174 } |
|
175 else { |
|
176 /** |
|
177 * ULOC_FULLNAME_CAPACITY will provide enough capacity |
|
178 * that we can build a string that contains the language, |
|
179 * script and region code without worrying about overrunning |
|
180 * the user-supplied buffer. |
|
181 **/ |
|
182 char tagBuffer[ULOC_FULLNAME_CAPACITY]; |
|
183 int32_t tagLength = 0; |
|
184 int32_t capacityRemaining = tagCapacity; |
|
185 UBool regionAppended = FALSE; |
|
186 |
|
187 if (langLength > 0) { |
|
188 appendTag( |
|
189 lang, |
|
190 langLength, |
|
191 tagBuffer, |
|
192 &tagLength); |
|
193 } |
|
194 else if (alternateTags == NULL) { |
|
195 /* |
|
196 * Append the value for an unknown language, if |
|
197 * we found no language. |
|
198 */ |
|
199 appendTag( |
|
200 unknownLanguage, |
|
201 (int32_t)uprv_strlen(unknownLanguage), |
|
202 tagBuffer, |
|
203 &tagLength); |
|
204 } |
|
205 else { |
|
206 /* |
|
207 * Parse the alternateTags string for the language. |
|
208 */ |
|
209 char alternateLang[ULOC_LANG_CAPACITY]; |
|
210 int32_t alternateLangLength = sizeof(alternateLang); |
|
211 |
|
212 alternateLangLength = |
|
213 uloc_getLanguage( |
|
214 alternateTags, |
|
215 alternateLang, |
|
216 alternateLangLength, |
|
217 err); |
|
218 if(U_FAILURE(*err) || |
|
219 alternateLangLength >= ULOC_LANG_CAPACITY) { |
|
220 goto error; |
|
221 } |
|
222 else if (alternateLangLength == 0) { |
|
223 /* |
|
224 * Append the value for an unknown language, if |
|
225 * we found no language. |
|
226 */ |
|
227 appendTag( |
|
228 unknownLanguage, |
|
229 (int32_t)uprv_strlen(unknownLanguage), |
|
230 tagBuffer, |
|
231 &tagLength); |
|
232 } |
|
233 else { |
|
234 appendTag( |
|
235 alternateLang, |
|
236 alternateLangLength, |
|
237 tagBuffer, |
|
238 &tagLength); |
|
239 } |
|
240 } |
|
241 |
|
242 if (scriptLength > 0) { |
|
243 appendTag( |
|
244 script, |
|
245 scriptLength, |
|
246 tagBuffer, |
|
247 &tagLength); |
|
248 } |
|
249 else if (alternateTags != NULL) { |
|
250 /* |
|
251 * Parse the alternateTags string for the script. |
|
252 */ |
|
253 char alternateScript[ULOC_SCRIPT_CAPACITY]; |
|
254 |
|
255 const int32_t alternateScriptLength = |
|
256 uloc_getScript( |
|
257 alternateTags, |
|
258 alternateScript, |
|
259 sizeof(alternateScript), |
|
260 err); |
|
261 |
|
262 if (U_FAILURE(*err) || |
|
263 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) { |
|
264 goto error; |
|
265 } |
|
266 else if (alternateScriptLength > 0) { |
|
267 appendTag( |
|
268 alternateScript, |
|
269 alternateScriptLength, |
|
270 tagBuffer, |
|
271 &tagLength); |
|
272 } |
|
273 } |
|
274 |
|
275 if (regionLength > 0) { |
|
276 appendTag( |
|
277 region, |
|
278 regionLength, |
|
279 tagBuffer, |
|
280 &tagLength); |
|
281 |
|
282 regionAppended = TRUE; |
|
283 } |
|
284 else if (alternateTags != NULL) { |
|
285 /* |
|
286 * Parse the alternateTags string for the region. |
|
287 */ |
|
288 char alternateRegion[ULOC_COUNTRY_CAPACITY]; |
|
289 |
|
290 const int32_t alternateRegionLength = |
|
291 uloc_getCountry( |
|
292 alternateTags, |
|
293 alternateRegion, |
|
294 sizeof(alternateRegion), |
|
295 err); |
|
296 if (U_FAILURE(*err) || |
|
297 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) { |
|
298 goto error; |
|
299 } |
|
300 else if (alternateRegionLength > 0) { |
|
301 appendTag( |
|
302 alternateRegion, |
|
303 alternateRegionLength, |
|
304 tagBuffer, |
|
305 &tagLength); |
|
306 |
|
307 regionAppended = TRUE; |
|
308 } |
|
309 } |
|
310 |
|
311 { |
|
312 const int32_t toCopy = |
|
313 tagLength >= tagCapacity ? tagCapacity : tagLength; |
|
314 |
|
315 /** |
|
316 * Copy the partial tag from our internal buffer to the supplied |
|
317 * target. |
|
318 **/ |
|
319 uprv_memcpy( |
|
320 tag, |
|
321 tagBuffer, |
|
322 toCopy); |
|
323 |
|
324 capacityRemaining -= toCopy; |
|
325 } |
|
326 |
|
327 if (trailingLength > 0) { |
|
328 if (*trailing != '@' && capacityRemaining > 0) { |
|
329 tag[tagLength++] = '_'; |
|
330 --capacityRemaining; |
|
331 if (capacityRemaining > 0 && !regionAppended) { |
|
332 /* extra separator is required */ |
|
333 tag[tagLength++] = '_'; |
|
334 --capacityRemaining; |
|
335 } |
|
336 } |
|
337 |
|
338 if (capacityRemaining > 0) { |
|
339 /* |
|
340 * Copy the trailing data into the supplied buffer. Use uprv_memmove, since we |
|
341 * don't know if the user-supplied buffers overlap. |
|
342 */ |
|
343 const int32_t toCopy = |
|
344 trailingLength >= capacityRemaining ? capacityRemaining : trailingLength; |
|
345 |
|
346 uprv_memmove( |
|
347 &tag[tagLength], |
|
348 trailing, |
|
349 toCopy); |
|
350 } |
|
351 } |
|
352 |
|
353 tagLength += trailingLength; |
|
354 |
|
355 return u_terminateChars( |
|
356 tag, |
|
357 tagCapacity, |
|
358 tagLength, |
|
359 err); |
|
360 } |
|
361 |
|
362 error: |
|
363 |
|
364 /** |
|
365 * An overflow indicates the locale ID passed in |
|
366 * is ill-formed. If we got here, and there was |
|
367 * no previous error, it's an implicit overflow. |
|
368 **/ |
|
369 if (*err == U_BUFFER_OVERFLOW_ERROR || |
|
370 U_SUCCESS(*err)) { |
|
371 *err = U_ILLEGAL_ARGUMENT_ERROR; |
|
372 } |
|
373 |
|
374 return -1; |
|
375 } |
|
376 |
|
377 /** |
|
378 * Create a tag string from the supplied parameters. The lang, script and region |
|
379 * parameters may be NULL pointers. If they are, their corresponding length parameters |
|
380 * must be less than or equal to 0. If the lang parameter is an empty string, the |
|
381 * default value for an unknown language is written to the output buffer. |
|
382 * |
|
383 * If the length of the new string exceeds the capacity of the output buffer, |
|
384 * the function copies as many bytes to the output buffer as it can, and returns |
|
385 * the error U_BUFFER_OVERFLOW_ERROR. |
|
386 * |
|
387 * If an illegal argument is provided, the function returns the error |
|
388 * U_ILLEGAL_ARGUMENT_ERROR. |
|
389 * |
|
390 * @param lang The language tag to use. |
|
391 * @param langLength The length of the language tag. |
|
392 * @param script The script tag to use. |
|
393 * @param scriptLength The length of the script tag. |
|
394 * @param region The region tag to use. |
|
395 * @param regionLength The length of the region tag. |
|
396 * @param trailing Any trailing data to append to the new tag. |
|
397 * @param trailingLength The length of the trailing data. |
|
398 * @param tag The output buffer. |
|
399 * @param tagCapacity The capacity of the output buffer. |
|
400 * @param err A pointer to a UErrorCode for error reporting. |
|
401 * @return The length of the tag string, which may be greater than tagCapacity. |
|
402 **/ |
|
403 static int32_t U_CALLCONV |
|
404 createTagString( |
|
405 const char* lang, |
|
406 int32_t langLength, |
|
407 const char* script, |
|
408 int32_t scriptLength, |
|
409 const char* region, |
|
410 int32_t regionLength, |
|
411 const char* trailing, |
|
412 int32_t trailingLength, |
|
413 char* tag, |
|
414 int32_t tagCapacity, |
|
415 UErrorCode* err) |
|
416 { |
|
417 return createTagStringWithAlternates( |
|
418 lang, |
|
419 langLength, |
|
420 script, |
|
421 scriptLength, |
|
422 region, |
|
423 regionLength, |
|
424 trailing, |
|
425 trailingLength, |
|
426 NULL, |
|
427 tag, |
|
428 tagCapacity, |
|
429 err); |
|
430 } |
|
431 |
|
432 /** |
|
433 * Parse the language, script, and region subtags from a tag string, and copy the |
|
434 * results into the corresponding output parameters. The buffers are null-terminated, |
|
435 * unless overflow occurs. |
|
436 * |
|
437 * The langLength, scriptLength, and regionLength parameters are input/output |
|
438 * parameters, and must contain the capacity of their corresponding buffers on |
|
439 * input. On output, they will contain the actual length of the buffers, not |
|
440 * including the null terminator. |
|
441 * |
|
442 * If the length of any of the output subtags exceeds the capacity of the corresponding |
|
443 * buffer, the function copies as many bytes to the output buffer as it can, and returns |
|
444 * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow |
|
445 * occurs. |
|
446 * |
|
447 * If an illegal argument is provided, the function returns the error |
|
448 * U_ILLEGAL_ARGUMENT_ERROR. |
|
449 * |
|
450 * @param localeID The locale ID to parse. |
|
451 * @param lang The language tag buffer. |
|
452 * @param langLength The length of the language tag. |
|
453 * @param script The script tag buffer. |
|
454 * @param scriptLength The length of the script tag. |
|
455 * @param region The region tag buffer. |
|
456 * @param regionLength The length of the region tag. |
|
457 * @param err A pointer to a UErrorCode for error reporting. |
|
458 * @return The number of chars of the localeID parameter consumed. |
|
459 **/ |
|
460 static int32_t U_CALLCONV |
|
461 parseTagString( |
|
462 const char* localeID, |
|
463 char* lang, |
|
464 int32_t* langLength, |
|
465 char* script, |
|
466 int32_t* scriptLength, |
|
467 char* region, |
|
468 int32_t* regionLength, |
|
469 UErrorCode* err) |
|
470 { |
|
471 const char* position = localeID; |
|
472 int32_t subtagLength = 0; |
|
473 |
|
474 if(U_FAILURE(*err) || |
|
475 localeID == NULL || |
|
476 lang == NULL || |
|
477 langLength == NULL || |
|
478 script == NULL || |
|
479 scriptLength == NULL || |
|
480 region == NULL || |
|
481 regionLength == NULL) { |
|
482 goto error; |
|
483 } |
|
484 |
|
485 subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position); |
|
486 u_terminateChars(lang, *langLength, subtagLength, err); |
|
487 |
|
488 /* |
|
489 * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING |
|
490 * to be an error, because it indicates the user-supplied tag is |
|
491 * not well-formed. |
|
492 */ |
|
493 if(U_FAILURE(*err)) { |
|
494 goto error; |
|
495 } |
|
496 |
|
497 *langLength = subtagLength; |
|
498 |
|
499 /* |
|
500 * If no language was present, use the value of unknownLanguage |
|
501 * instead. Otherwise, move past any separator. |
|
502 */ |
|
503 if (*langLength == 0) { |
|
504 uprv_strcpy( |
|
505 lang, |
|
506 unknownLanguage); |
|
507 *langLength = (int32_t)uprv_strlen(lang); |
|
508 } |
|
509 else if (_isIDSeparator(*position)) { |
|
510 ++position; |
|
511 } |
|
512 |
|
513 subtagLength = ulocimp_getScript(position, script, *scriptLength, &position); |
|
514 u_terminateChars(script, *scriptLength, subtagLength, err); |
|
515 |
|
516 if(U_FAILURE(*err)) { |
|
517 goto error; |
|
518 } |
|
519 |
|
520 *scriptLength = subtagLength; |
|
521 |
|
522 if (*scriptLength > 0) { |
|
523 if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) { |
|
524 /** |
|
525 * If the script part is the "unknown" script, then don't return it. |
|
526 **/ |
|
527 *scriptLength = 0; |
|
528 } |
|
529 |
|
530 /* |
|
531 * Move past any separator. |
|
532 */ |
|
533 if (_isIDSeparator(*position)) { |
|
534 ++position; |
|
535 } |
|
536 } |
|
537 |
|
538 subtagLength = ulocimp_getCountry(position, region, *regionLength, &position); |
|
539 u_terminateChars(region, *regionLength, subtagLength, err); |
|
540 |
|
541 if(U_FAILURE(*err)) { |
|
542 goto error; |
|
543 } |
|
544 |
|
545 *regionLength = subtagLength; |
|
546 |
|
547 if (*regionLength > 0) { |
|
548 if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) { |
|
549 /** |
|
550 * If the region part is the "unknown" region, then don't return it. |
|
551 **/ |
|
552 *regionLength = 0; |
|
553 } |
|
554 } else if (*position != 0 && *position != '@') { |
|
555 /* back up over consumed trailing separator */ |
|
556 --position; |
|
557 } |
|
558 |
|
559 exit: |
|
560 |
|
561 return (int32_t)(position - localeID); |
|
562 |
|
563 error: |
|
564 |
|
565 /** |
|
566 * If we get here, we have no explicit error, it's the result of an |
|
567 * illegal argument. |
|
568 **/ |
|
569 if (!U_FAILURE(*err)) { |
|
570 *err = U_ILLEGAL_ARGUMENT_ERROR; |
|
571 } |
|
572 |
|
573 goto exit; |
|
574 } |
|
575 |
|
576 static int32_t U_CALLCONV |
|
577 createLikelySubtagsString( |
|
578 const char* lang, |
|
579 int32_t langLength, |
|
580 const char* script, |
|
581 int32_t scriptLength, |
|
582 const char* region, |
|
583 int32_t regionLength, |
|
584 const char* variants, |
|
585 int32_t variantsLength, |
|
586 char* tag, |
|
587 int32_t tagCapacity, |
|
588 UErrorCode* err) |
|
589 { |
|
590 /** |
|
591 * ULOC_FULLNAME_CAPACITY will provide enough capacity |
|
592 * that we can build a string that contains the language, |
|
593 * script and region code without worrying about overrunning |
|
594 * the user-supplied buffer. |
|
595 **/ |
|
596 char tagBuffer[ULOC_FULLNAME_CAPACITY]; |
|
597 char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY]; |
|
598 |
|
599 if(U_FAILURE(*err)) { |
|
600 goto error; |
|
601 } |
|
602 |
|
603 /** |
|
604 * Try the language with the script and region first. |
|
605 **/ |
|
606 if (scriptLength > 0 && regionLength > 0) { |
|
607 |
|
608 const char* likelySubtags = NULL; |
|
609 |
|
610 createTagString( |
|
611 lang, |
|
612 langLength, |
|
613 script, |
|
614 scriptLength, |
|
615 region, |
|
616 regionLength, |
|
617 NULL, |
|
618 0, |
|
619 tagBuffer, |
|
620 sizeof(tagBuffer), |
|
621 err); |
|
622 if(U_FAILURE(*err)) { |
|
623 goto error; |
|
624 } |
|
625 |
|
626 likelySubtags = |
|
627 findLikelySubtags( |
|
628 tagBuffer, |
|
629 likelySubtagsBuffer, |
|
630 sizeof(likelySubtagsBuffer), |
|
631 err); |
|
632 if(U_FAILURE(*err)) { |
|
633 goto error; |
|
634 } |
|
635 |
|
636 if (likelySubtags != NULL) { |
|
637 /* Always use the language tag from the |
|
638 maximal string, since it may be more |
|
639 specific than the one provided. */ |
|
640 return createTagStringWithAlternates( |
|
641 NULL, |
|
642 0, |
|
643 NULL, |
|
644 0, |
|
645 NULL, |
|
646 0, |
|
647 variants, |
|
648 variantsLength, |
|
649 likelySubtags, |
|
650 tag, |
|
651 tagCapacity, |
|
652 err); |
|
653 } |
|
654 } |
|
655 |
|
656 /** |
|
657 * Try the language with just the script. |
|
658 **/ |
|
659 if (scriptLength > 0) { |
|
660 |
|
661 const char* likelySubtags = NULL; |
|
662 |
|
663 createTagString( |
|
664 lang, |
|
665 langLength, |
|
666 script, |
|
667 scriptLength, |
|
668 NULL, |
|
669 0, |
|
670 NULL, |
|
671 0, |
|
672 tagBuffer, |
|
673 sizeof(tagBuffer), |
|
674 err); |
|
675 if(U_FAILURE(*err)) { |
|
676 goto error; |
|
677 } |
|
678 |
|
679 likelySubtags = |
|
680 findLikelySubtags( |
|
681 tagBuffer, |
|
682 likelySubtagsBuffer, |
|
683 sizeof(likelySubtagsBuffer), |
|
684 err); |
|
685 if(U_FAILURE(*err)) { |
|
686 goto error; |
|
687 } |
|
688 |
|
689 if (likelySubtags != NULL) { |
|
690 /* Always use the language tag from the |
|
691 maximal string, since it may be more |
|
692 specific than the one provided. */ |
|
693 return createTagStringWithAlternates( |
|
694 NULL, |
|
695 0, |
|
696 NULL, |
|
697 0, |
|
698 region, |
|
699 regionLength, |
|
700 variants, |
|
701 variantsLength, |
|
702 likelySubtags, |
|
703 tag, |
|
704 tagCapacity, |
|
705 err); |
|
706 } |
|
707 } |
|
708 |
|
709 /** |
|
710 * Try the language with just the region. |
|
711 **/ |
|
712 if (regionLength > 0) { |
|
713 |
|
714 const char* likelySubtags = NULL; |
|
715 |
|
716 createTagString( |
|
717 lang, |
|
718 langLength, |
|
719 NULL, |
|
720 0, |
|
721 region, |
|
722 regionLength, |
|
723 NULL, |
|
724 0, |
|
725 tagBuffer, |
|
726 sizeof(tagBuffer), |
|
727 err); |
|
728 if(U_FAILURE(*err)) { |
|
729 goto error; |
|
730 } |
|
731 |
|
732 likelySubtags = |
|
733 findLikelySubtags( |
|
734 tagBuffer, |
|
735 likelySubtagsBuffer, |
|
736 sizeof(likelySubtagsBuffer), |
|
737 err); |
|
738 if(U_FAILURE(*err)) { |
|
739 goto error; |
|
740 } |
|
741 |
|
742 if (likelySubtags != NULL) { |
|
743 /* Always use the language tag from the |
|
744 maximal string, since it may be more |
|
745 specific than the one provided. */ |
|
746 return createTagStringWithAlternates( |
|
747 NULL, |
|
748 0, |
|
749 script, |
|
750 scriptLength, |
|
751 NULL, |
|
752 0, |
|
753 variants, |
|
754 variantsLength, |
|
755 likelySubtags, |
|
756 tag, |
|
757 tagCapacity, |
|
758 err); |
|
759 } |
|
760 } |
|
761 |
|
762 /** |
|
763 * Finally, try just the language. |
|
764 **/ |
|
765 { |
|
766 const char* likelySubtags = NULL; |
|
767 |
|
768 createTagString( |
|
769 lang, |
|
770 langLength, |
|
771 NULL, |
|
772 0, |
|
773 NULL, |
|
774 0, |
|
775 NULL, |
|
776 0, |
|
777 tagBuffer, |
|
778 sizeof(tagBuffer), |
|
779 err); |
|
780 if(U_FAILURE(*err)) { |
|
781 goto error; |
|
782 } |
|
783 |
|
784 likelySubtags = |
|
785 findLikelySubtags( |
|
786 tagBuffer, |
|
787 likelySubtagsBuffer, |
|
788 sizeof(likelySubtagsBuffer), |
|
789 err); |
|
790 if(U_FAILURE(*err)) { |
|
791 goto error; |
|
792 } |
|
793 |
|
794 if (likelySubtags != NULL) { |
|
795 /* Always use the language tag from the |
|
796 maximal string, since it may be more |
|
797 specific than the one provided. */ |
|
798 return createTagStringWithAlternates( |
|
799 NULL, |
|
800 0, |
|
801 script, |
|
802 scriptLength, |
|
803 region, |
|
804 regionLength, |
|
805 variants, |
|
806 variantsLength, |
|
807 likelySubtags, |
|
808 tag, |
|
809 tagCapacity, |
|
810 err); |
|
811 } |
|
812 } |
|
813 |
|
814 return u_terminateChars( |
|
815 tag, |
|
816 tagCapacity, |
|
817 0, |
|
818 err); |
|
819 |
|
820 error: |
|
821 |
|
822 if (!U_FAILURE(*err)) { |
|
823 *err = U_ILLEGAL_ARGUMENT_ERROR; |
|
824 } |
|
825 |
|
826 return -1; |
|
827 } |
|
828 |
|
829 #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \ |
|
830 { int32_t count = 0; \ |
|
831 int32_t i; \ |
|
832 for (i = 0; i < trailingLength; i++) { \ |
|
833 if (trailing[i] == '-' || trailing[i] == '_') { \ |
|
834 count = 0; \ |
|
835 if (count > 8) { \ |
|
836 goto error; \ |
|
837 } \ |
|
838 } else if (trailing[i] == '@') { \ |
|
839 break; \ |
|
840 } else if (count > 8) { \ |
|
841 goto error; \ |
|
842 } else { \ |
|
843 count++; \ |
|
844 } \ |
|
845 } \ |
|
846 } |
|
847 |
|
848 static int32_t |
|
849 _uloc_addLikelySubtags(const char* localeID, |
|
850 char* maximizedLocaleID, |
|
851 int32_t maximizedLocaleIDCapacity, |
|
852 UErrorCode* err) |
|
853 { |
|
854 char lang[ULOC_LANG_CAPACITY]; |
|
855 int32_t langLength = sizeof(lang); |
|
856 char script[ULOC_SCRIPT_CAPACITY]; |
|
857 int32_t scriptLength = sizeof(script); |
|
858 char region[ULOC_COUNTRY_CAPACITY]; |
|
859 int32_t regionLength = sizeof(region); |
|
860 const char* trailing = ""; |
|
861 int32_t trailingLength = 0; |
|
862 int32_t trailingIndex = 0; |
|
863 int32_t resultLength = 0; |
|
864 |
|
865 if(U_FAILURE(*err)) { |
|
866 goto error; |
|
867 } |
|
868 else if (localeID == NULL || |
|
869 maximizedLocaleID == NULL || |
|
870 maximizedLocaleIDCapacity <= 0) { |
|
871 goto error; |
|
872 } |
|
873 |
|
874 trailingIndex = parseTagString( |
|
875 localeID, |
|
876 lang, |
|
877 &langLength, |
|
878 script, |
|
879 &scriptLength, |
|
880 region, |
|
881 ®ionLength, |
|
882 err); |
|
883 if(U_FAILURE(*err)) { |
|
884 /* Overflow indicates an illegal argument error */ |
|
885 if (*err == U_BUFFER_OVERFLOW_ERROR) { |
|
886 *err = U_ILLEGAL_ARGUMENT_ERROR; |
|
887 } |
|
888 |
|
889 goto error; |
|
890 } |
|
891 |
|
892 /* Find the length of the trailing portion. */ |
|
893 while (_isIDSeparator(localeID[trailingIndex])) { |
|
894 trailingIndex++; |
|
895 } |
|
896 trailing = &localeID[trailingIndex]; |
|
897 trailingLength = (int32_t)uprv_strlen(trailing); |
|
898 |
|
899 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); |
|
900 |
|
901 resultLength = |
|
902 createLikelySubtagsString( |
|
903 lang, |
|
904 langLength, |
|
905 script, |
|
906 scriptLength, |
|
907 region, |
|
908 regionLength, |
|
909 trailing, |
|
910 trailingLength, |
|
911 maximizedLocaleID, |
|
912 maximizedLocaleIDCapacity, |
|
913 err); |
|
914 |
|
915 if (resultLength == 0) { |
|
916 const int32_t localIDLength = (int32_t)uprv_strlen(localeID); |
|
917 |
|
918 /* |
|
919 * If we get here, we need to return localeID. |
|
920 */ |
|
921 uprv_memcpy( |
|
922 maximizedLocaleID, |
|
923 localeID, |
|
924 localIDLength <= maximizedLocaleIDCapacity ? |
|
925 localIDLength : maximizedLocaleIDCapacity); |
|
926 |
|
927 resultLength = |
|
928 u_terminateChars( |
|
929 maximizedLocaleID, |
|
930 maximizedLocaleIDCapacity, |
|
931 localIDLength, |
|
932 err); |
|
933 } |
|
934 |
|
935 return resultLength; |
|
936 |
|
937 error: |
|
938 |
|
939 if (!U_FAILURE(*err)) { |
|
940 *err = U_ILLEGAL_ARGUMENT_ERROR; |
|
941 } |
|
942 |
|
943 return -1; |
|
944 } |
|
945 |
|
946 static int32_t |
|
947 _uloc_minimizeSubtags(const char* localeID, |
|
948 char* minimizedLocaleID, |
|
949 int32_t minimizedLocaleIDCapacity, |
|
950 UErrorCode* err) |
|
951 { |
|
952 /** |
|
953 * ULOC_FULLNAME_CAPACITY will provide enough capacity |
|
954 * that we can build a string that contains the language, |
|
955 * script and region code without worrying about overrunning |
|
956 * the user-supplied buffer. |
|
957 **/ |
|
958 char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY]; |
|
959 int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer); |
|
960 |
|
961 char lang[ULOC_LANG_CAPACITY]; |
|
962 int32_t langLength = sizeof(lang); |
|
963 char script[ULOC_SCRIPT_CAPACITY]; |
|
964 int32_t scriptLength = sizeof(script); |
|
965 char region[ULOC_COUNTRY_CAPACITY]; |
|
966 int32_t regionLength = sizeof(region); |
|
967 const char* trailing = ""; |
|
968 int32_t trailingLength = 0; |
|
969 int32_t trailingIndex = 0; |
|
970 |
|
971 if(U_FAILURE(*err)) { |
|
972 goto error; |
|
973 } |
|
974 else if (localeID == NULL || |
|
975 minimizedLocaleID == NULL || |
|
976 minimizedLocaleIDCapacity <= 0) { |
|
977 goto error; |
|
978 } |
|
979 |
|
980 trailingIndex = |
|
981 parseTagString( |
|
982 localeID, |
|
983 lang, |
|
984 &langLength, |
|
985 script, |
|
986 &scriptLength, |
|
987 region, |
|
988 ®ionLength, |
|
989 err); |
|
990 if(U_FAILURE(*err)) { |
|
991 |
|
992 /* Overflow indicates an illegal argument error */ |
|
993 if (*err == U_BUFFER_OVERFLOW_ERROR) { |
|
994 *err = U_ILLEGAL_ARGUMENT_ERROR; |
|
995 } |
|
996 |
|
997 goto error; |
|
998 } |
|
999 |
|
1000 /* Find the spot where the variants or the keywords begin, if any. */ |
|
1001 while (_isIDSeparator(localeID[trailingIndex])) { |
|
1002 trailingIndex++; |
|
1003 } |
|
1004 trailing = &localeID[trailingIndex]; |
|
1005 trailingLength = (int32_t)uprv_strlen(trailing); |
|
1006 |
|
1007 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); |
|
1008 |
|
1009 createTagString( |
|
1010 lang, |
|
1011 langLength, |
|
1012 script, |
|
1013 scriptLength, |
|
1014 region, |
|
1015 regionLength, |
|
1016 NULL, |
|
1017 0, |
|
1018 maximizedTagBuffer, |
|
1019 maximizedTagBufferLength, |
|
1020 err); |
|
1021 if(U_FAILURE(*err)) { |
|
1022 goto error; |
|
1023 } |
|
1024 |
|
1025 /** |
|
1026 * First, we need to first get the maximization |
|
1027 * from AddLikelySubtags. |
|
1028 **/ |
|
1029 maximizedTagBufferLength = |
|
1030 uloc_addLikelySubtags( |
|
1031 maximizedTagBuffer, |
|
1032 maximizedTagBuffer, |
|
1033 maximizedTagBufferLength, |
|
1034 err); |
|
1035 |
|
1036 if(U_FAILURE(*err)) { |
|
1037 goto error; |
|
1038 } |
|
1039 |
|
1040 /** |
|
1041 * Start first with just the language. |
|
1042 **/ |
|
1043 { |
|
1044 char tagBuffer[ULOC_FULLNAME_CAPACITY]; |
|
1045 |
|
1046 const int32_t tagBufferLength = |
|
1047 createLikelySubtagsString( |
|
1048 lang, |
|
1049 langLength, |
|
1050 NULL, |
|
1051 0, |
|
1052 NULL, |
|
1053 0, |
|
1054 NULL, |
|
1055 0, |
|
1056 tagBuffer, |
|
1057 sizeof(tagBuffer), |
|
1058 err); |
|
1059 |
|
1060 if(U_FAILURE(*err)) { |
|
1061 goto error; |
|
1062 } |
|
1063 else if (uprv_strnicmp( |
|
1064 maximizedTagBuffer, |
|
1065 tagBuffer, |
|
1066 tagBufferLength) == 0) { |
|
1067 |
|
1068 return createTagString( |
|
1069 lang, |
|
1070 langLength, |
|
1071 NULL, |
|
1072 0, |
|
1073 NULL, |
|
1074 0, |
|
1075 trailing, |
|
1076 trailingLength, |
|
1077 minimizedLocaleID, |
|
1078 minimizedLocaleIDCapacity, |
|
1079 err); |
|
1080 } |
|
1081 } |
|
1082 |
|
1083 /** |
|
1084 * Next, try the language and region. |
|
1085 **/ |
|
1086 if (regionLength > 0) { |
|
1087 |
|
1088 char tagBuffer[ULOC_FULLNAME_CAPACITY]; |
|
1089 |
|
1090 const int32_t tagBufferLength = |
|
1091 createLikelySubtagsString( |
|
1092 lang, |
|
1093 langLength, |
|
1094 NULL, |
|
1095 0, |
|
1096 region, |
|
1097 regionLength, |
|
1098 NULL, |
|
1099 0, |
|
1100 tagBuffer, |
|
1101 sizeof(tagBuffer), |
|
1102 err); |
|
1103 |
|
1104 if(U_FAILURE(*err)) { |
|
1105 goto error; |
|
1106 } |
|
1107 else if (uprv_strnicmp( |
|
1108 maximizedTagBuffer, |
|
1109 tagBuffer, |
|
1110 tagBufferLength) == 0) { |
|
1111 |
|
1112 return createTagString( |
|
1113 lang, |
|
1114 langLength, |
|
1115 NULL, |
|
1116 0, |
|
1117 region, |
|
1118 regionLength, |
|
1119 trailing, |
|
1120 trailingLength, |
|
1121 minimizedLocaleID, |
|
1122 minimizedLocaleIDCapacity, |
|
1123 err); |
|
1124 } |
|
1125 } |
|
1126 |
|
1127 /** |
|
1128 * Finally, try the language and script. This is our last chance, |
|
1129 * since trying with all three subtags would only yield the |
|
1130 * maximal version that we already have. |
|
1131 **/ |
|
1132 if (scriptLength > 0 && regionLength > 0) { |
|
1133 char tagBuffer[ULOC_FULLNAME_CAPACITY]; |
|
1134 |
|
1135 const int32_t tagBufferLength = |
|
1136 createLikelySubtagsString( |
|
1137 lang, |
|
1138 langLength, |
|
1139 script, |
|
1140 scriptLength, |
|
1141 NULL, |
|
1142 0, |
|
1143 NULL, |
|
1144 0, |
|
1145 tagBuffer, |
|
1146 sizeof(tagBuffer), |
|
1147 err); |
|
1148 |
|
1149 if(U_FAILURE(*err)) { |
|
1150 goto error; |
|
1151 } |
|
1152 else if (uprv_strnicmp( |
|
1153 maximizedTagBuffer, |
|
1154 tagBuffer, |
|
1155 tagBufferLength) == 0) { |
|
1156 |
|
1157 return createTagString( |
|
1158 lang, |
|
1159 langLength, |
|
1160 script, |
|
1161 scriptLength, |
|
1162 NULL, |
|
1163 0, |
|
1164 trailing, |
|
1165 trailingLength, |
|
1166 minimizedLocaleID, |
|
1167 minimizedLocaleIDCapacity, |
|
1168 err); |
|
1169 } |
|
1170 } |
|
1171 |
|
1172 { |
|
1173 /** |
|
1174 * If we got here, return the locale ID parameter. |
|
1175 **/ |
|
1176 const int32_t localeIDLength = (int32_t)uprv_strlen(localeID); |
|
1177 |
|
1178 uprv_memcpy( |
|
1179 minimizedLocaleID, |
|
1180 localeID, |
|
1181 localeIDLength <= minimizedLocaleIDCapacity ? |
|
1182 localeIDLength : minimizedLocaleIDCapacity); |
|
1183 |
|
1184 return u_terminateChars( |
|
1185 minimizedLocaleID, |
|
1186 minimizedLocaleIDCapacity, |
|
1187 localeIDLength, |
|
1188 err); |
|
1189 } |
|
1190 |
|
1191 error: |
|
1192 |
|
1193 if (!U_FAILURE(*err)) { |
|
1194 *err = U_ILLEGAL_ARGUMENT_ERROR; |
|
1195 } |
|
1196 |
|
1197 return -1; |
|
1198 |
|
1199 |
|
1200 } |
|
1201 |
|
1202 static UBool |
|
1203 do_canonicalize(const char* localeID, |
|
1204 char* buffer, |
|
1205 int32_t bufferCapacity, |
|
1206 UErrorCode* err) |
|
1207 { |
|
1208 uloc_canonicalize( |
|
1209 localeID, |
|
1210 buffer, |
|
1211 bufferCapacity, |
|
1212 err); |
|
1213 |
|
1214 if (*err == U_STRING_NOT_TERMINATED_WARNING || |
|
1215 *err == U_BUFFER_OVERFLOW_ERROR) { |
|
1216 *err = U_ILLEGAL_ARGUMENT_ERROR; |
|
1217 |
|
1218 return FALSE; |
|
1219 } |
|
1220 else if (U_FAILURE(*err)) { |
|
1221 |
|
1222 return FALSE; |
|
1223 } |
|
1224 else { |
|
1225 return TRUE; |
|
1226 } |
|
1227 } |
|
1228 |
|
1229 U_CAPI int32_t U_EXPORT2 |
|
1230 uloc_addLikelySubtags(const char* localeID, |
|
1231 char* maximizedLocaleID, |
|
1232 int32_t maximizedLocaleIDCapacity, |
|
1233 UErrorCode* err) |
|
1234 { |
|
1235 char localeBuffer[ULOC_FULLNAME_CAPACITY]; |
|
1236 |
|
1237 if (!do_canonicalize( |
|
1238 localeID, |
|
1239 localeBuffer, |
|
1240 sizeof(localeBuffer), |
|
1241 err)) { |
|
1242 return -1; |
|
1243 } |
|
1244 else { |
|
1245 return _uloc_addLikelySubtags( |
|
1246 localeBuffer, |
|
1247 maximizedLocaleID, |
|
1248 maximizedLocaleIDCapacity, |
|
1249 err); |
|
1250 } |
|
1251 } |
|
1252 |
|
1253 U_CAPI int32_t U_EXPORT2 |
|
1254 uloc_minimizeSubtags(const char* localeID, |
|
1255 char* minimizedLocaleID, |
|
1256 int32_t minimizedLocaleIDCapacity, |
|
1257 UErrorCode* err) |
|
1258 { |
|
1259 char localeBuffer[ULOC_FULLNAME_CAPACITY]; |
|
1260 |
|
1261 if (!do_canonicalize( |
|
1262 localeID, |
|
1263 localeBuffer, |
|
1264 sizeof(localeBuffer), |
|
1265 err)) { |
|
1266 return -1; |
|
1267 } |
|
1268 else { |
|
1269 return _uloc_minimizeSubtags( |
|
1270 localeBuffer, |
|
1271 minimizedLocaleID, |
|
1272 minimizedLocaleIDCapacity, |
|
1273 err); |
|
1274 } |
|
1275 } |