|
1 /* |
|
2 ******************************************************************************* |
|
3 * |
|
4 * Copyright (C) 2003-2009, International Business Machines |
|
5 * Corporation and others. All Rights Reserved. |
|
6 * |
|
7 ******************************************************************************* |
|
8 * file name: uidna.cpp |
|
9 * encoding: US-ASCII |
|
10 * tab size: 8 (not used) |
|
11 * indentation:4 |
|
12 * |
|
13 * created on: 2003feb1 |
|
14 * created by: Ram Viswanadha |
|
15 */ |
|
16 |
|
17 #include "unicode/utypes.h" |
|
18 |
|
19 #if !UCONFIG_NO_IDNA |
|
20 |
|
21 #include "unicode/uidna.h" |
|
22 #include "unicode/ustring.h" |
|
23 #include "unicode/usprep.h" |
|
24 #include "punycode.h" |
|
25 #include "ustr_imp.h" |
|
26 #include "cmemory.h" |
|
27 #include "uassert.h" |
|
28 #include "sprpimpl.h" |
|
29 |
|
30 /* it is official IDNA ACE Prefix is "xn--" */ |
|
31 static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ; |
|
32 #define ACE_PREFIX_LENGTH 4 |
|
33 |
|
34 #define MAX_LABEL_LENGTH 63 |
|
35 /* The Max length of the labels should not be more than MAX_LABEL_LENGTH */ |
|
36 #define MAX_LABEL_BUFFER_SIZE 100 |
|
37 |
|
38 #define MAX_DOMAIN_NAME_LENGTH 255 |
|
39 /* The Max length of the domain names should not be more than MAX_DOMAIN_NAME_LENGTH */ |
|
40 #define MAX_IDN_BUFFER_SIZE MAX_DOMAIN_NAME_LENGTH+1 |
|
41 |
|
42 #define LOWER_CASE_DELTA 0x0020 |
|
43 #define HYPHEN 0x002D |
|
44 #define FULL_STOP 0x002E |
|
45 #define CAPITAL_A 0x0041 |
|
46 #define CAPITAL_Z 0x005A |
|
47 |
|
48 inline static UChar |
|
49 toASCIILower(UChar ch){ |
|
50 if(CAPITAL_A <= ch && ch <= CAPITAL_Z){ |
|
51 return ch + LOWER_CASE_DELTA; |
|
52 } |
|
53 return ch; |
|
54 } |
|
55 |
|
56 inline static UBool |
|
57 startsWithPrefix(const UChar* src , int32_t srcLength){ |
|
58 UBool startsWithPrefix = TRUE; |
|
59 |
|
60 if(srcLength < ACE_PREFIX_LENGTH){ |
|
61 return FALSE; |
|
62 } |
|
63 |
|
64 for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){ |
|
65 if(toASCIILower(src[i]) != ACE_PREFIX[i]){ |
|
66 startsWithPrefix = FALSE; |
|
67 } |
|
68 } |
|
69 return startsWithPrefix; |
|
70 } |
|
71 |
|
72 |
|
73 inline static int32_t |
|
74 compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len, |
|
75 const UChar* s2, int32_t s2Len){ |
|
76 |
|
77 int32_t minLength; |
|
78 int32_t lengthResult; |
|
79 |
|
80 // are we comparing different lengths? |
|
81 if(s1Len != s2Len) { |
|
82 if(s1Len < s2Len) { |
|
83 minLength = s1Len; |
|
84 lengthResult = -1; |
|
85 } else { |
|
86 minLength = s2Len; |
|
87 lengthResult = 1; |
|
88 } |
|
89 } else { |
|
90 // ok the lengths are equal |
|
91 minLength = s1Len; |
|
92 lengthResult = 0; |
|
93 } |
|
94 |
|
95 UChar c1,c2; |
|
96 int32_t rc; |
|
97 |
|
98 for(int32_t i =0;/* no condition */;i++) { |
|
99 |
|
100 /* If we reach the ends of both strings then they match */ |
|
101 if(i == minLength) { |
|
102 return lengthResult; |
|
103 } |
|
104 |
|
105 c1 = s1[i]; |
|
106 c2 = s2[i]; |
|
107 |
|
108 /* Case-insensitive comparison */ |
|
109 if(c1!=c2) { |
|
110 rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2); |
|
111 if(rc!=0) { |
|
112 lengthResult=rc; |
|
113 break; |
|
114 } |
|
115 } |
|
116 } |
|
117 return lengthResult; |
|
118 } |
|
119 |
|
120 |
|
121 /** |
|
122 * Ascertain if the given code point is a label separator as |
|
123 * defined by the IDNA RFC |
|
124 * |
|
125 * @param ch The code point to be ascertained |
|
126 * @return true if the char is a label separator |
|
127 * @stable ICU 2.8 |
|
128 */ |
|
129 static inline UBool isLabelSeparator(UChar ch){ |
|
130 switch(ch){ |
|
131 case 0x002e: |
|
132 case 0x3002: |
|
133 case 0xFF0E: |
|
134 case 0xFF61: |
|
135 return TRUE; |
|
136 default: |
|
137 return FALSE; |
|
138 } |
|
139 } |
|
140 |
|
141 // returns the length of the label excluding the separator |
|
142 // if *limit == separator then the length returned does not include |
|
143 // the separtor. |
|
144 static inline int32_t |
|
145 getNextSeparator(UChar *src, int32_t srcLength, |
|
146 UChar **limit, UBool *done){ |
|
147 if(srcLength == -1){ |
|
148 int32_t i; |
|
149 for(i=0 ; ;i++){ |
|
150 if(src[i] == 0){ |
|
151 *limit = src + i; // point to null |
|
152 *done = TRUE; |
|
153 return i; |
|
154 } |
|
155 if(isLabelSeparator(src[i])){ |
|
156 *limit = src + (i+1); // go past the delimiter |
|
157 return i; |
|
158 |
|
159 } |
|
160 } |
|
161 }else{ |
|
162 int32_t i; |
|
163 for(i=0;i<srcLength;i++){ |
|
164 if(isLabelSeparator(src[i])){ |
|
165 *limit = src + (i+1); // go past the delimiter |
|
166 return i; |
|
167 } |
|
168 } |
|
169 // we have not found the delimiter |
|
170 // if(i==srcLength) |
|
171 *limit = src+srcLength; |
|
172 *done = TRUE; |
|
173 |
|
174 return i; |
|
175 } |
|
176 } |
|
177 static inline UBool isLDHChar(UChar ch){ |
|
178 // high runner case |
|
179 if(ch>0x007A){ |
|
180 return FALSE; |
|
181 } |
|
182 //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A] |
|
183 if( (ch==0x002D) || |
|
184 (0x0030 <= ch && ch <= 0x0039) || |
|
185 (0x0041 <= ch && ch <= 0x005A) || |
|
186 (0x0061 <= ch && ch <= 0x007A) |
|
187 ){ |
|
188 return TRUE; |
|
189 } |
|
190 return FALSE; |
|
191 } |
|
192 |
|
193 static int32_t |
|
194 _internal_toASCII(const UChar* src, int32_t srcLength, |
|
195 UChar* dest, int32_t destCapacity, |
|
196 int32_t options, |
|
197 UStringPrepProfile* nameprep, |
|
198 UParseError* parseError, |
|
199 UErrorCode* status) |
|
200 { |
|
201 |
|
202 // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too. |
|
203 UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE]; |
|
204 //initialize pointers to stack buffers |
|
205 UChar *b1 = b1Stack, *b2 = b2Stack; |
|
206 int32_t b1Len=0, b2Len, |
|
207 b1Capacity = MAX_LABEL_BUFFER_SIZE, |
|
208 b2Capacity = MAX_LABEL_BUFFER_SIZE , |
|
209 reqLength=0; |
|
210 |
|
211 int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0; |
|
212 UBool* caseFlags = NULL; |
|
213 |
|
214 // the source contains all ascii codepoints |
|
215 UBool srcIsASCII = TRUE; |
|
216 // assume the source contains all LDH codepoints |
|
217 UBool srcIsLDH = TRUE; |
|
218 |
|
219 int32_t j=0; |
|
220 |
|
221 //get the options |
|
222 UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0); |
|
223 |
|
224 int32_t failPos = -1; |
|
225 |
|
226 if(srcLength == -1){ |
|
227 srcLength = u_strlen(src); |
|
228 } |
|
229 |
|
230 if(srcLength > b1Capacity){ |
|
231 b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR); |
|
232 if(b1==NULL){ |
|
233 *status = U_MEMORY_ALLOCATION_ERROR; |
|
234 goto CLEANUP; |
|
235 } |
|
236 b1Capacity = srcLength; |
|
237 } |
|
238 |
|
239 // step 1 |
|
240 for( j=0;j<srcLength;j++){ |
|
241 if(src[j] > 0x7F){ |
|
242 srcIsASCII = FALSE; |
|
243 } |
|
244 b1[b1Len++] = src[j]; |
|
245 } |
|
246 |
|
247 // step 2 is performed only if the source contains non ASCII |
|
248 if(srcIsASCII == FALSE){ |
|
249 |
|
250 // step 2 |
|
251 b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status); |
|
252 |
|
253 if(*status == U_BUFFER_OVERFLOW_ERROR){ |
|
254 // redo processing of string |
|
255 // we do not have enough room so grow the buffer |
|
256 if(b1 != b1Stack){ |
|
257 uprv_free(b1); |
|
258 } |
|
259 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); |
|
260 if(b1==NULL){ |
|
261 *status = U_MEMORY_ALLOCATION_ERROR; |
|
262 goto CLEANUP; |
|
263 } |
|
264 |
|
265 *status = U_ZERO_ERROR; // reset error |
|
266 |
|
267 b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status); |
|
268 } |
|
269 } |
|
270 // error bail out |
|
271 if(U_FAILURE(*status)){ |
|
272 goto CLEANUP; |
|
273 } |
|
274 if(b1Len == 0){ |
|
275 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; |
|
276 goto CLEANUP; |
|
277 } |
|
278 |
|
279 // for step 3 & 4 |
|
280 srcIsASCII = TRUE; |
|
281 for( j=0;j<b1Len;j++){ |
|
282 // check if output of usprep_prepare is all ASCII |
|
283 if(b1[j] > 0x7F){ |
|
284 srcIsASCII = FALSE; |
|
285 }else if(isLDHChar(b1[j])==FALSE){ // if the char is in ASCII range verify that it is an LDH character |
|
286 srcIsLDH = FALSE; |
|
287 failPos = j; |
|
288 } |
|
289 } |
|
290 if(useSTD3ASCIIRules == TRUE){ |
|
291 // verify 3a and 3b |
|
292 // 3(a) Verify the absence of non-LDH ASCII code points; that is, the |
|
293 // absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F. |
|
294 // 3(b) Verify the absence of leading and trailing hyphen-minus; that |
|
295 // is, the absence of U+002D at the beginning and end of the |
|
296 // sequence. |
|
297 if( srcIsLDH == FALSE /* source at this point should not contain anyLDH characters */ |
|
298 || b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){ |
|
299 *status = U_IDNA_STD3_ASCII_RULES_ERROR; |
|
300 |
|
301 /* populate the parseError struct */ |
|
302 if(srcIsLDH==FALSE){ |
|
303 // failPos is always set the index of failure |
|
304 uprv_syntaxError(b1,failPos, b1Len,parseError); |
|
305 }else if(b1[0] == HYPHEN){ |
|
306 // fail position is 0 |
|
307 uprv_syntaxError(b1,0,b1Len,parseError); |
|
308 }else{ |
|
309 // the last index in the source is always length-1 |
|
310 uprv_syntaxError(b1, (b1Len>0) ? b1Len-1 : b1Len, b1Len,parseError); |
|
311 } |
|
312 |
|
313 goto CLEANUP; |
|
314 } |
|
315 } |
|
316 // Step 4: if the source is ASCII then proceed to step 8 |
|
317 if(srcIsASCII){ |
|
318 if(b1Len <= destCapacity){ |
|
319 uprv_memmove(dest, b1, b1Len * U_SIZEOF_UCHAR); |
|
320 reqLength = b1Len; |
|
321 }else{ |
|
322 reqLength = b1Len; |
|
323 goto CLEANUP; |
|
324 } |
|
325 }else{ |
|
326 // step 5 : verify the sequence does not begin with ACE prefix |
|
327 if(!startsWithPrefix(b1,b1Len)){ |
|
328 |
|
329 //step 6: encode the sequence with punycode |
|
330 |
|
331 // do not preserve the case flags for now! |
|
332 // TODO: Preserve the case while implementing the RFE |
|
333 // caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool)); |
|
334 // uprv_memset(caseFlags,TRUE,b1Len); |
|
335 |
|
336 b2Len = u_strToPunycode(b1,b1Len,b2,b2Capacity,caseFlags, status); |
|
337 |
|
338 if(*status == U_BUFFER_OVERFLOW_ERROR){ |
|
339 // redo processing of string |
|
340 /* we do not have enough room so grow the buffer*/ |
|
341 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); |
|
342 if(b2 == NULL){ |
|
343 *status = U_MEMORY_ALLOCATION_ERROR; |
|
344 goto CLEANUP; |
|
345 } |
|
346 |
|
347 *status = U_ZERO_ERROR; // reset error |
|
348 |
|
349 b2Len = u_strToPunycode(b1,b1Len,b2,b2Len,caseFlags, status); |
|
350 } |
|
351 //error bail out |
|
352 if(U_FAILURE(*status)){ |
|
353 goto CLEANUP; |
|
354 } |
|
355 // TODO : Reconsider while implementing the case preserve RFE |
|
356 // convert all codepoints to lower case ASCII |
|
357 // toASCIILower(b2,b2Len); |
|
358 reqLength = b2Len+ACE_PREFIX_LENGTH; |
|
359 |
|
360 if(reqLength > destCapacity){ |
|
361 *status = U_BUFFER_OVERFLOW_ERROR; |
|
362 goto CLEANUP; |
|
363 } |
|
364 //Step 7: prepend the ACE prefix |
|
365 uprv_memcpy(dest,ACE_PREFIX,ACE_PREFIX_LENGTH * U_SIZEOF_UCHAR); |
|
366 //Step 6: copy the contents in b2 into dest |
|
367 uprv_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len * U_SIZEOF_UCHAR); |
|
368 |
|
369 }else{ |
|
370 *status = U_IDNA_ACE_PREFIX_ERROR; |
|
371 //position of failure is 0 |
|
372 uprv_syntaxError(b1,0,b1Len,parseError); |
|
373 goto CLEANUP; |
|
374 } |
|
375 } |
|
376 // step 8: verify the length of label |
|
377 if(reqLength > MAX_LABEL_LENGTH){ |
|
378 *status = U_IDNA_LABEL_TOO_LONG_ERROR; |
|
379 } |
|
380 |
|
381 CLEANUP: |
|
382 if(b1 != b1Stack){ |
|
383 uprv_free(b1); |
|
384 } |
|
385 if(b2 != b2Stack){ |
|
386 uprv_free(b2); |
|
387 } |
|
388 uprv_free(caseFlags); |
|
389 |
|
390 return u_terminateUChars(dest, destCapacity, reqLength, status); |
|
391 } |
|
392 |
|
393 static int32_t |
|
394 _internal_toUnicode(const UChar* src, int32_t srcLength, |
|
395 UChar* dest, int32_t destCapacity, |
|
396 int32_t options, |
|
397 UStringPrepProfile* nameprep, |
|
398 UParseError* parseError, |
|
399 UErrorCode* status) |
|
400 { |
|
401 |
|
402 //get the options |
|
403 //UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0); |
|
404 int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0; |
|
405 |
|
406 // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too. |
|
407 UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE]; |
|
408 |
|
409 //initialize pointers to stack buffers |
|
410 UChar *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack; |
|
411 int32_t b1Len, b2Len, b1PrimeLen, b3Len, |
|
412 b1Capacity = MAX_LABEL_BUFFER_SIZE, |
|
413 b2Capacity = MAX_LABEL_BUFFER_SIZE, |
|
414 b3Capacity = MAX_LABEL_BUFFER_SIZE, |
|
415 reqLength=0; |
|
416 |
|
417 b1Len = 0; |
|
418 UBool* caseFlags = NULL; |
|
419 |
|
420 UBool srcIsASCII = TRUE; |
|
421 /*UBool srcIsLDH = TRUE; |
|
422 int32_t failPos =0;*/ |
|
423 |
|
424 // step 1: find out if all the codepoints in src are ASCII |
|
425 if(srcLength==-1){ |
|
426 srcLength = 0; |
|
427 for(;src[srcLength]!=0;){ |
|
428 if(src[srcLength]> 0x7f){ |
|
429 srcIsASCII = FALSE; |
|
430 }/*else if(isLDHChar(src[srcLength])==FALSE){ |
|
431 // here we do not assemble surrogates |
|
432 // since we know that LDH code points |
|
433 // are in the ASCII range only |
|
434 srcIsLDH = FALSE; |
|
435 failPos = srcLength; |
|
436 }*/ |
|
437 srcLength++; |
|
438 } |
|
439 }else if(srcLength > 0){ |
|
440 for(int32_t j=0; j<srcLength; j++){ |
|
441 if(src[j]> 0x7f){ |
|
442 srcIsASCII = FALSE; |
|
443 }/*else if(isLDHChar(src[j])==FALSE){ |
|
444 // here we do not assemble surrogates |
|
445 // since we know that LDH code points |
|
446 // are in the ASCII range only |
|
447 srcIsLDH = FALSE; |
|
448 failPos = j; |
|
449 }*/ |
|
450 } |
|
451 }else{ |
|
452 return 0; |
|
453 } |
|
454 |
|
455 if(srcIsASCII == FALSE){ |
|
456 // step 2: process the string |
|
457 b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status); |
|
458 if(*status == U_BUFFER_OVERFLOW_ERROR){ |
|
459 // redo processing of string |
|
460 /* we do not have enough room so grow the buffer*/ |
|
461 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); |
|
462 if(b1==NULL){ |
|
463 *status = U_MEMORY_ALLOCATION_ERROR; |
|
464 goto CLEANUP; |
|
465 } |
|
466 |
|
467 *status = U_ZERO_ERROR; // reset error |
|
468 |
|
469 b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status); |
|
470 } |
|
471 //bail out on error |
|
472 if(U_FAILURE(*status)){ |
|
473 goto CLEANUP; |
|
474 } |
|
475 }else{ |
|
476 |
|
477 //just point src to b1 |
|
478 b1 = (UChar*) src; |
|
479 b1Len = srcLength; |
|
480 } |
|
481 |
|
482 // The RFC states that |
|
483 // <quote> |
|
484 // ToUnicode never fails. If any step fails, then the original input |
|
485 // is returned immediately in that step. |
|
486 // </quote> |
|
487 |
|
488 //step 3: verify ACE Prefix |
|
489 if(startsWithPrefix(b1,b1Len)){ |
|
490 |
|
491 //step 4: Remove the ACE Prefix |
|
492 b1Prime = b1 + ACE_PREFIX_LENGTH; |
|
493 b1PrimeLen = b1Len - ACE_PREFIX_LENGTH; |
|
494 |
|
495 //step 5: Decode using punycode |
|
496 b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Capacity, caseFlags,status); |
|
497 |
|
498 if(*status == U_BUFFER_OVERFLOW_ERROR){ |
|
499 // redo processing of string |
|
500 /* we do not have enough room so grow the buffer*/ |
|
501 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); |
|
502 if(b2==NULL){ |
|
503 *status = U_MEMORY_ALLOCATION_ERROR; |
|
504 goto CLEANUP; |
|
505 } |
|
506 |
|
507 *status = U_ZERO_ERROR; // reset error |
|
508 |
|
509 b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Len, caseFlags, status); |
|
510 } |
|
511 |
|
512 |
|
513 //step 6:Apply toASCII |
|
514 b3Len = uidna_toASCII(b2, b2Len, b3, b3Capacity, options, parseError, status); |
|
515 |
|
516 if(*status == U_BUFFER_OVERFLOW_ERROR){ |
|
517 // redo processing of string |
|
518 /* we do not have enough room so grow the buffer*/ |
|
519 b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR); |
|
520 if(b3==NULL){ |
|
521 *status = U_MEMORY_ALLOCATION_ERROR; |
|
522 goto CLEANUP; |
|
523 } |
|
524 |
|
525 *status = U_ZERO_ERROR; // reset error |
|
526 |
|
527 b3Len = uidna_toASCII(b2,b2Len,b3,b3Len,options,parseError, status); |
|
528 |
|
529 } |
|
530 //bail out on error |
|
531 if(U_FAILURE(*status)){ |
|
532 goto CLEANUP; |
|
533 } |
|
534 |
|
535 //step 7: verify |
|
536 if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){ |
|
537 // Cause the original to be returned. |
|
538 *status = U_IDNA_VERIFICATION_ERROR; |
|
539 goto CLEANUP; |
|
540 } |
|
541 |
|
542 //step 8: return output of step 5 |
|
543 reqLength = b2Len; |
|
544 if(b2Len <= destCapacity) { |
|
545 uprv_memmove(dest, b2, b2Len * U_SIZEOF_UCHAR); |
|
546 } |
|
547 } |
|
548 else{ |
|
549 // See the start of this if statement for why this is commented out. |
|
550 // verify that STD3 ASCII rules are satisfied |
|
551 /*if(useSTD3ASCIIRules == TRUE){ |
|
552 if( srcIsLDH == FALSE // source contains some non-LDH characters |
|
553 || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){ |
|
554 *status = U_IDNA_STD3_ASCII_RULES_ERROR; |
|
555 |
|
556 // populate the parseError struct |
|
557 if(srcIsLDH==FALSE){ |
|
558 // failPos is always set the index of failure |
|
559 uprv_syntaxError(src,failPos, srcLength,parseError); |
|
560 }else if(src[0] == HYPHEN){ |
|
561 // fail position is 0 |
|
562 uprv_syntaxError(src,0,srcLength,parseError); |
|
563 }else{ |
|
564 // the last index in the source is always length-1 |
|
565 uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError); |
|
566 } |
|
567 |
|
568 goto CLEANUP; |
|
569 } |
|
570 }*/ |
|
571 // just return the source |
|
572 //copy the source to destination |
|
573 if(srcLength <= destCapacity){ |
|
574 uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); |
|
575 } |
|
576 reqLength = srcLength; |
|
577 } |
|
578 |
|
579 |
|
580 CLEANUP: |
|
581 |
|
582 if(b1 != b1Stack && b1!=src){ |
|
583 uprv_free(b1); |
|
584 } |
|
585 if(b2 != b2Stack){ |
|
586 uprv_free(b2); |
|
587 } |
|
588 uprv_free(caseFlags); |
|
589 |
|
590 // The RFC states that |
|
591 // <quote> |
|
592 // ToUnicode never fails. If any step fails, then the original input |
|
593 // is returned immediately in that step. |
|
594 // </quote> |
|
595 // So if any step fails lets copy source to destination |
|
596 if(U_FAILURE(*status)){ |
|
597 //copy the source to destination |
|
598 if(dest && srcLength <= destCapacity){ |
|
599 // srcLength should have already been set earlier. |
|
600 U_ASSERT(srcLength >= 0); |
|
601 uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); |
|
602 } |
|
603 reqLength = srcLength; |
|
604 *status = U_ZERO_ERROR; |
|
605 } |
|
606 |
|
607 return u_terminateUChars(dest, destCapacity, reqLength, status); |
|
608 } |
|
609 |
|
610 U_CAPI int32_t U_EXPORT2 |
|
611 uidna_toASCII(const UChar* src, int32_t srcLength, |
|
612 UChar* dest, int32_t destCapacity, |
|
613 int32_t options, |
|
614 UParseError* parseError, |
|
615 UErrorCode* status){ |
|
616 |
|
617 if(status == NULL || U_FAILURE(*status)){ |
|
618 return 0; |
|
619 } |
|
620 if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ |
|
621 *status = U_ILLEGAL_ARGUMENT_ERROR; |
|
622 return 0; |
|
623 } |
|
624 |
|
625 UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); |
|
626 |
|
627 if(U_FAILURE(*status)){ |
|
628 return -1; |
|
629 } |
|
630 |
|
631 int32_t retLen = _internal_toASCII(src, srcLength, dest, destCapacity, options, nameprep, parseError, status); |
|
632 |
|
633 /* close the profile*/ |
|
634 usprep_close(nameprep); |
|
635 |
|
636 return retLen; |
|
637 } |
|
638 |
|
639 U_CAPI int32_t U_EXPORT2 |
|
640 uidna_toUnicode(const UChar* src, int32_t srcLength, |
|
641 UChar* dest, int32_t destCapacity, |
|
642 int32_t options, |
|
643 UParseError* parseError, |
|
644 UErrorCode* status){ |
|
645 |
|
646 if(status == NULL || U_FAILURE(*status)){ |
|
647 return 0; |
|
648 } |
|
649 if( (src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ |
|
650 *status = U_ILLEGAL_ARGUMENT_ERROR; |
|
651 return 0; |
|
652 } |
|
653 |
|
654 UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); |
|
655 |
|
656 if(U_FAILURE(*status)){ |
|
657 return -1; |
|
658 } |
|
659 |
|
660 int32_t retLen = _internal_toUnicode(src, srcLength, dest, destCapacity, options, nameprep, parseError, status); |
|
661 |
|
662 usprep_close(nameprep); |
|
663 |
|
664 return retLen; |
|
665 } |
|
666 |
|
667 |
|
668 U_CAPI int32_t U_EXPORT2 |
|
669 uidna_IDNToASCII( const UChar *src, int32_t srcLength, |
|
670 UChar* dest, int32_t destCapacity, |
|
671 int32_t options, |
|
672 UParseError *parseError, |
|
673 UErrorCode *status){ |
|
674 |
|
675 if(status == NULL || U_FAILURE(*status)){ |
|
676 return 0; |
|
677 } |
|
678 if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ |
|
679 *status = U_ILLEGAL_ARGUMENT_ERROR; |
|
680 return 0; |
|
681 } |
|
682 |
|
683 int32_t reqLength = 0; |
|
684 |
|
685 UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); |
|
686 |
|
687 if(U_FAILURE(*status)){ |
|
688 return 0; |
|
689 } |
|
690 |
|
691 //initialize pointers |
|
692 UChar *delimiter = (UChar*)src; |
|
693 UChar *labelStart = (UChar*)src; |
|
694 UChar *currentDest = (UChar*) dest; |
|
695 int32_t remainingLen = srcLength; |
|
696 int32_t remainingDestCapacity = destCapacity; |
|
697 int32_t labelLen = 0, labelReqLength = 0; |
|
698 UBool done = FALSE; |
|
699 |
|
700 |
|
701 for(;;){ |
|
702 |
|
703 labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done); |
|
704 labelReqLength = 0; |
|
705 if(!(labelLen==0 && done)){// make sure this is not a root label separator. |
|
706 |
|
707 labelReqLength = _internal_toASCII( labelStart, labelLen, |
|
708 currentDest, remainingDestCapacity, |
|
709 options, nameprep, |
|
710 parseError, status); |
|
711 |
|
712 if(*status == U_BUFFER_OVERFLOW_ERROR){ |
|
713 |
|
714 *status = U_ZERO_ERROR; // reset error |
|
715 remainingDestCapacity = 0; |
|
716 } |
|
717 } |
|
718 |
|
719 |
|
720 if(U_FAILURE(*status)){ |
|
721 break; |
|
722 } |
|
723 |
|
724 reqLength +=labelReqLength; |
|
725 // adjust the destination pointer |
|
726 if(labelReqLength < remainingDestCapacity){ |
|
727 currentDest = currentDest + labelReqLength; |
|
728 remainingDestCapacity -= labelReqLength; |
|
729 }else{ |
|
730 // should never occur |
|
731 remainingDestCapacity = 0; |
|
732 } |
|
733 |
|
734 if(done == TRUE){ |
|
735 break; |
|
736 } |
|
737 |
|
738 // add the label separator |
|
739 if(remainingDestCapacity > 0){ |
|
740 *currentDest++ = FULL_STOP; |
|
741 remainingDestCapacity--; |
|
742 } |
|
743 reqLength++; |
|
744 |
|
745 labelStart = delimiter; |
|
746 if(remainingLen >0 ){ |
|
747 remainingLen = (int32_t)(srcLength - (delimiter - src)); |
|
748 } |
|
749 |
|
750 } |
|
751 |
|
752 if(reqLength > MAX_DOMAIN_NAME_LENGTH){ |
|
753 *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR; |
|
754 } |
|
755 |
|
756 usprep_close(nameprep); |
|
757 |
|
758 return u_terminateUChars(dest, destCapacity, reqLength, status); |
|
759 } |
|
760 |
|
761 U_CAPI int32_t U_EXPORT2 |
|
762 uidna_IDNToUnicode( const UChar* src, int32_t srcLength, |
|
763 UChar* dest, int32_t destCapacity, |
|
764 int32_t options, |
|
765 UParseError* parseError, |
|
766 UErrorCode* status){ |
|
767 |
|
768 if(status == NULL || U_FAILURE(*status)){ |
|
769 return 0; |
|
770 } |
|
771 if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ |
|
772 *status = U_ILLEGAL_ARGUMENT_ERROR; |
|
773 return 0; |
|
774 } |
|
775 |
|
776 int32_t reqLength = 0; |
|
777 |
|
778 UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); |
|
779 |
|
780 if(U_FAILURE(*status)){ |
|
781 return 0; |
|
782 } |
|
783 |
|
784 //initialize pointers |
|
785 UChar *delimiter = (UChar*)src; |
|
786 UChar *labelStart = (UChar*)src; |
|
787 UChar *currentDest = (UChar*) dest; |
|
788 int32_t remainingLen = srcLength; |
|
789 int32_t remainingDestCapacity = destCapacity; |
|
790 int32_t labelLen = 0, labelReqLength = 0; |
|
791 UBool done = FALSE; |
|
792 |
|
793 for(;;){ |
|
794 |
|
795 labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done); |
|
796 |
|
797 // The RFC states that |
|
798 // <quote> |
|
799 // ToUnicode never fails. If any step fails, then the original input |
|
800 // is returned immediately in that step. |
|
801 // </quote> |
|
802 // _internal_toUnicode will copy the label. |
|
803 /*if(labelLen==0 && done==FALSE){ |
|
804 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; |
|
805 break; |
|
806 }*/ |
|
807 |
|
808 labelReqLength = _internal_toUnicode(labelStart, labelLen, |
|
809 currentDest, remainingDestCapacity, |
|
810 options, nameprep, |
|
811 parseError, status); |
|
812 |
|
813 if(*status == U_BUFFER_OVERFLOW_ERROR){ |
|
814 *status = U_ZERO_ERROR; // reset error |
|
815 remainingDestCapacity = 0; |
|
816 } |
|
817 |
|
818 if(U_FAILURE(*status)){ |
|
819 break; |
|
820 } |
|
821 |
|
822 reqLength +=labelReqLength; |
|
823 // adjust the destination pointer |
|
824 if(labelReqLength < remainingDestCapacity){ |
|
825 currentDest = currentDest + labelReqLength; |
|
826 remainingDestCapacity -= labelReqLength; |
|
827 }else{ |
|
828 // should never occur |
|
829 remainingDestCapacity = 0; |
|
830 } |
|
831 |
|
832 if(done == TRUE){ |
|
833 break; |
|
834 } |
|
835 |
|
836 // add the label separator |
|
837 // Unlike the ToASCII operation we don't normalize the label separators |
|
838 if(remainingDestCapacity > 0){ |
|
839 *currentDest++ = *(labelStart + labelLen); |
|
840 remainingDestCapacity--; |
|
841 } |
|
842 reqLength++; |
|
843 |
|
844 labelStart = delimiter; |
|
845 if(remainingLen >0 ){ |
|
846 remainingLen = (int32_t)(srcLength - (delimiter - src)); |
|
847 } |
|
848 |
|
849 } |
|
850 |
|
851 if(reqLength > MAX_DOMAIN_NAME_LENGTH){ |
|
852 *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR; |
|
853 } |
|
854 |
|
855 usprep_close(nameprep); |
|
856 |
|
857 return u_terminateUChars(dest, destCapacity, reqLength, status); |
|
858 } |
|
859 |
|
860 U_CAPI int32_t U_EXPORT2 |
|
861 uidna_compare( const UChar *s1, int32_t length1, |
|
862 const UChar *s2, int32_t length2, |
|
863 int32_t options, |
|
864 UErrorCode* status){ |
|
865 |
|
866 if(status == NULL || U_FAILURE(*status)){ |
|
867 return -1; |
|
868 } |
|
869 |
|
870 UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE]; |
|
871 UChar *b1 = b1Stack, *b2 = b2Stack; |
|
872 int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE; |
|
873 int32_t result=-1; |
|
874 |
|
875 UParseError parseError; |
|
876 |
|
877 b1Len = uidna_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status); |
|
878 if(*status == U_BUFFER_OVERFLOW_ERROR){ |
|
879 // redo processing of string |
|
880 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); |
|
881 if(b1==NULL){ |
|
882 *status = U_MEMORY_ALLOCATION_ERROR; |
|
883 goto CLEANUP; |
|
884 } |
|
885 |
|
886 *status = U_ZERO_ERROR; // reset error |
|
887 |
|
888 b1Len = uidna_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status); |
|
889 |
|
890 } |
|
891 |
|
892 b2Len = uidna_IDNToASCII(s2,length2, b2,b2Capacity, options, &parseError, status); |
|
893 if(*status == U_BUFFER_OVERFLOW_ERROR){ |
|
894 // redo processing of string |
|
895 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); |
|
896 if(b2==NULL){ |
|
897 *status = U_MEMORY_ALLOCATION_ERROR; |
|
898 goto CLEANUP; |
|
899 } |
|
900 |
|
901 *status = U_ZERO_ERROR; // reset error |
|
902 |
|
903 b2Len = uidna_IDNToASCII(s2, length2, b2, b2Len, options, &parseError, status); |
|
904 |
|
905 } |
|
906 // when toASCII is applied all label separators are replaced with FULL_STOP |
|
907 result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len); |
|
908 |
|
909 CLEANUP: |
|
910 if(b1 != b1Stack){ |
|
911 uprv_free(b1); |
|
912 } |
|
913 |
|
914 if(b2 != b2Stack){ |
|
915 uprv_free(b2); |
|
916 } |
|
917 |
|
918 return result; |
|
919 } |
|
920 |
|
921 #endif /* #if !UCONFIG_NO_IDNA */ |