|
1 /* |
|
2 ******************************************************************************* |
|
3 * |
|
4 * Copyright (C) 2001-2012, International Business Machines |
|
5 * Corporation and others. All Rights Reserved. |
|
6 * |
|
7 ******************************************************************************* |
|
8 * file name: ustr_wcs.cpp |
|
9 * encoding: US-ASCII |
|
10 * tab size: 8 (not used) |
|
11 * indentation:4 |
|
12 * |
|
13 * created on: 2004sep07 |
|
14 * created by: Markus W. Scherer |
|
15 * |
|
16 * u_strToWCS() and u_strFromWCS() functions |
|
17 * moved here from ustrtrns.c for better modularization. |
|
18 */ |
|
19 |
|
20 #include "unicode/utypes.h" |
|
21 #include "unicode/ustring.h" |
|
22 #include "cstring.h" |
|
23 #include "cwchar.h" |
|
24 #include "cmemory.h" |
|
25 #include "ustr_imp.h" |
|
26 #include "ustr_cnv.h" |
|
27 |
|
28 #if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION |
|
29 |
|
30 #define _STACK_BUFFER_CAPACITY 1000 |
|
31 #define _BUFFER_CAPACITY_MULTIPLIER 2 |
|
32 |
|
33 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) |
|
34 // TODO: We should use CharString for char buffers and UnicodeString for UChar buffers. |
|
35 // Then we could change this to work only with wchar_t buffers. |
|
36 static inline UBool |
|
37 u_growAnyBufferFromStatic(void *context, |
|
38 void **pBuffer, int32_t *pCapacity, int32_t reqCapacity, |
|
39 int32_t length, int32_t size) { |
|
40 // Use char* not void* to avoid the compiler's strict-aliasing assumptions |
|
41 // and related warnings. |
|
42 char *newBuffer=(char *)uprv_malloc(reqCapacity*size); |
|
43 if(newBuffer!=NULL) { |
|
44 if(length>0) { |
|
45 uprv_memcpy(newBuffer, *pBuffer, length*size); |
|
46 } |
|
47 *pCapacity=reqCapacity; |
|
48 } else { |
|
49 *pCapacity=0; |
|
50 } |
|
51 |
|
52 /* release the old pBuffer if it was not statically allocated */ |
|
53 if(*pBuffer!=(char *)context) { |
|
54 uprv_free(*pBuffer); |
|
55 } |
|
56 |
|
57 *pBuffer=newBuffer; |
|
58 return (UBool)(newBuffer!=NULL); |
|
59 } |
|
60 |
|
61 /* helper function */ |
|
62 static wchar_t* |
|
63 _strToWCS(wchar_t *dest, |
|
64 int32_t destCapacity, |
|
65 int32_t *pDestLength, |
|
66 const UChar *src, |
|
67 int32_t srcLength, |
|
68 UErrorCode *pErrorCode){ |
|
69 |
|
70 char stackBuffer [_STACK_BUFFER_CAPACITY]; |
|
71 char* tempBuf = stackBuffer; |
|
72 int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY; |
|
73 char* tempBufLimit = stackBuffer + tempBufCapacity; |
|
74 UConverter* conv = NULL; |
|
75 char* saveBuf = tempBuf; |
|
76 wchar_t* intTarget=NULL; |
|
77 int32_t intTargetCapacity=0; |
|
78 int count=0,retVal=0; |
|
79 |
|
80 const UChar *pSrcLimit =NULL; |
|
81 const UChar *pSrc = src; |
|
82 |
|
83 conv = u_getDefaultConverter(pErrorCode); |
|
84 |
|
85 if(U_FAILURE(*pErrorCode)){ |
|
86 return NULL; |
|
87 } |
|
88 |
|
89 if(srcLength == -1){ |
|
90 srcLength = u_strlen(pSrc); |
|
91 } |
|
92 |
|
93 pSrcLimit = pSrc + srcLength; |
|
94 |
|
95 for(;;) { |
|
96 /* reset the error state */ |
|
97 *pErrorCode = U_ZERO_ERROR; |
|
98 |
|
99 /* convert to chars using default converter */ |
|
100 ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,NULL,(UBool)(pSrc==pSrcLimit),pErrorCode); |
|
101 count =(tempBuf - saveBuf); |
|
102 |
|
103 /* This should rarely occur */ |
|
104 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){ |
|
105 tempBuf = saveBuf; |
|
106 |
|
107 /* we dont have enough room on the stack grow the buffer */ |
|
108 int32_t newCapacity = 2 * srcLength; |
|
109 if(newCapacity <= tempBufCapacity) { |
|
110 newCapacity = _BUFFER_CAPACITY_MULTIPLIER * tempBufCapacity; |
|
111 } |
|
112 if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, |
|
113 newCapacity, count, 1)) { |
|
114 goto cleanup; |
|
115 } |
|
116 |
|
117 saveBuf = tempBuf; |
|
118 tempBufLimit = tempBuf + tempBufCapacity; |
|
119 tempBuf = tempBuf + count; |
|
120 |
|
121 } else { |
|
122 break; |
|
123 } |
|
124 } |
|
125 |
|
126 if(U_FAILURE(*pErrorCode)){ |
|
127 goto cleanup; |
|
128 } |
|
129 |
|
130 /* done with conversion null terminate the char buffer */ |
|
131 if(count>=tempBufCapacity){ |
|
132 tempBuf = saveBuf; |
|
133 /* we dont have enough room on the stack grow the buffer */ |
|
134 if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, |
|
135 count+1, count, 1)) { |
|
136 goto cleanup; |
|
137 } |
|
138 saveBuf = tempBuf; |
|
139 } |
|
140 |
|
141 saveBuf[count]=0; |
|
142 |
|
143 |
|
144 /* allocate more space than required |
|
145 * here we assume that every char requires |
|
146 * no more than 2 wchar_ts |
|
147 */ |
|
148 intTargetCapacity = (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */; |
|
149 intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) ); |
|
150 |
|
151 if(intTarget){ |
|
152 |
|
153 int32_t nulLen = 0; |
|
154 int32_t remaining = intTargetCapacity; |
|
155 wchar_t* pIntTarget=intTarget; |
|
156 tempBuf = saveBuf; |
|
157 |
|
158 /* now convert the mbs to wcs */ |
|
159 for(;;){ |
|
160 |
|
161 /* we can call the system API since we are sure that |
|
162 * there is atleast 1 null in the input |
|
163 */ |
|
164 retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining); |
|
165 |
|
166 if(retVal==-1){ |
|
167 *pErrorCode = U_INVALID_CHAR_FOUND; |
|
168 break; |
|
169 }else if(retVal== remaining){/* should never occur */ |
|
170 int numWritten = (pIntTarget-intTarget); |
|
171 u_growAnyBufferFromStatic(NULL,(void**) &intTarget, |
|
172 &intTargetCapacity, |
|
173 intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER, |
|
174 numWritten, |
|
175 sizeof(wchar_t)); |
|
176 pIntTarget = intTarget; |
|
177 remaining=intTargetCapacity; |
|
178 |
|
179 if(nulLen!=count){ /*there are embedded nulls*/ |
|
180 pIntTarget+=numWritten; |
|
181 remaining-=numWritten; |
|
182 } |
|
183 |
|
184 }else{ |
|
185 int32_t nulVal; |
|
186 /*scan for nulls */ |
|
187 /* we donot check for limit since tempBuf is null terminated */ |
|
188 while(tempBuf[nulLen++] != 0){ |
|
189 } |
|
190 nulVal = (nulLen < srcLength) ? 1 : 0; |
|
191 pIntTarget = pIntTarget + retVal+nulVal; |
|
192 remaining -=(retVal+nulVal); |
|
193 |
|
194 /* check if we have reached the source limit*/ |
|
195 if(nulLen>=(count)){ |
|
196 break; |
|
197 } |
|
198 } |
|
199 } |
|
200 count = (int32_t)(pIntTarget-intTarget); |
|
201 |
|
202 if(0 < count && count <= destCapacity){ |
|
203 uprv_memcpy(dest,intTarget,count*sizeof(wchar_t)); |
|
204 } |
|
205 |
|
206 if(pDestLength){ |
|
207 *pDestLength = count; |
|
208 } |
|
209 |
|
210 /* free the allocated memory */ |
|
211 uprv_free(intTarget); |
|
212 |
|
213 }else{ |
|
214 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; |
|
215 } |
|
216 cleanup: |
|
217 /* are we still using stack buffer */ |
|
218 if(stackBuffer != saveBuf){ |
|
219 uprv_free(saveBuf); |
|
220 } |
|
221 u_terminateWChars(dest,destCapacity,count,pErrorCode); |
|
222 |
|
223 u_releaseDefaultConverter(conv); |
|
224 |
|
225 return dest; |
|
226 } |
|
227 #endif |
|
228 |
|
229 U_CAPI wchar_t* U_EXPORT2 |
|
230 u_strToWCS(wchar_t *dest, |
|
231 int32_t destCapacity, |
|
232 int32_t *pDestLength, |
|
233 const UChar *src, |
|
234 int32_t srcLength, |
|
235 UErrorCode *pErrorCode){ |
|
236 |
|
237 /* args check */ |
|
238 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ |
|
239 return NULL; |
|
240 } |
|
241 |
|
242 if( (src==NULL && srcLength!=0) || srcLength < -1 || |
|
243 (destCapacity<0) || (dest == NULL && destCapacity > 0) |
|
244 ) { |
|
245 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; |
|
246 return NULL; |
|
247 } |
|
248 |
|
249 #ifdef U_WCHAR_IS_UTF16 |
|
250 /* wchar_t is UTF-16 just do a memcpy */ |
|
251 if(srcLength == -1){ |
|
252 srcLength = u_strlen(src); |
|
253 } |
|
254 if(0 < srcLength && srcLength <= destCapacity){ |
|
255 uprv_memcpy(dest,src,srcLength*U_SIZEOF_UCHAR); |
|
256 } |
|
257 if(pDestLength){ |
|
258 *pDestLength = srcLength; |
|
259 } |
|
260 |
|
261 u_terminateUChars(dest,destCapacity,srcLength,pErrorCode); |
|
262 |
|
263 return dest; |
|
264 |
|
265 #elif defined U_WCHAR_IS_UTF32 |
|
266 |
|
267 return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength, |
|
268 src, srcLength, pErrorCode); |
|
269 |
|
270 #else |
|
271 |
|
272 return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode); |
|
273 |
|
274 #endif |
|
275 |
|
276 } |
|
277 |
|
278 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) |
|
279 /* helper function */ |
|
280 static UChar* |
|
281 _strFromWCS( UChar *dest, |
|
282 int32_t destCapacity, |
|
283 int32_t *pDestLength, |
|
284 const wchar_t *src, |
|
285 int32_t srcLength, |
|
286 UErrorCode *pErrorCode) |
|
287 { |
|
288 int32_t retVal =0, count =0 ; |
|
289 UConverter* conv = NULL; |
|
290 UChar* pTarget = NULL; |
|
291 UChar* pTargetLimit = NULL; |
|
292 UChar* target = NULL; |
|
293 |
|
294 UChar uStack [_STACK_BUFFER_CAPACITY]; |
|
295 |
|
296 wchar_t wStack[_STACK_BUFFER_CAPACITY]; |
|
297 wchar_t* pWStack = wStack; |
|
298 |
|
299 |
|
300 char cStack[_STACK_BUFFER_CAPACITY]; |
|
301 int32_t cStackCap = _STACK_BUFFER_CAPACITY; |
|
302 char* pCSrc=cStack; |
|
303 char* pCSave=pCSrc; |
|
304 char* pCSrcLimit=NULL; |
|
305 |
|
306 const wchar_t* pSrc = src; |
|
307 const wchar_t* pSrcLimit = NULL; |
|
308 |
|
309 if(srcLength ==-1){ |
|
310 /* if the wchar_t source is null terminated we can safely |
|
311 * assume that there are no embedded nulls, this is a fast |
|
312 * path for null terminated strings. |
|
313 */ |
|
314 for(;;){ |
|
315 /* convert wchars to chars */ |
|
316 retVal = uprv_wcstombs(pCSrc,src, cStackCap); |
|
317 |
|
318 if(retVal == -1){ |
|
319 *pErrorCode = U_ILLEGAL_CHAR_FOUND; |
|
320 goto cleanup; |
|
321 }else if(retVal >= (cStackCap-1)){ |
|
322 /* Should rarely occur */ |
|
323 u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, |
|
324 cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char)); |
|
325 pCSave = pCSrc; |
|
326 }else{ |
|
327 /* converted every thing */ |
|
328 pCSrc = pCSrc+retVal; |
|
329 break; |
|
330 } |
|
331 } |
|
332 |
|
333 }else{ |
|
334 /* here the source is not null terminated |
|
335 * so it may have nulls embeded and we need to |
|
336 * do some extra processing |
|
337 */ |
|
338 int32_t remaining =cStackCap; |
|
339 |
|
340 pSrcLimit = src + srcLength; |
|
341 |
|
342 for(;;){ |
|
343 register int32_t nulLen = 0; |
|
344 |
|
345 /* find nulls in the string */ |
|
346 while(nulLen<srcLength && pSrc[nulLen++]!=0){ |
|
347 } |
|
348 |
|
349 if((pSrc+nulLen) < pSrcLimit){ |
|
350 /* check if we have enough room in pCSrc */ |
|
351 if(remaining < (nulLen * MB_CUR_MAX)){ |
|
352 /* should rarely occur */ |
|
353 int32_t len = (pCSrc-pCSave); |
|
354 pCSrc = pCSave; |
|
355 /* we do not have enough room so grow the buffer*/ |
|
356 u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, |
|
357 _BUFFER_CAPACITY_MULTIPLIER*cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char)); |
|
358 |
|
359 pCSave = pCSrc; |
|
360 pCSrc = pCSave+len; |
|
361 remaining = cStackCap-(pCSrc - pCSave); |
|
362 } |
|
363 |
|
364 /* we have found a null so convert the |
|
365 * chunk from begining of non-null char to null |
|
366 */ |
|
367 retVal = uprv_wcstombs(pCSrc,pSrc,remaining); |
|
368 |
|
369 if(retVal==-1){ |
|
370 /* an error occurred bail out */ |
|
371 *pErrorCode = U_ILLEGAL_CHAR_FOUND; |
|
372 goto cleanup; |
|
373 } |
|
374 |
|
375 pCSrc += retVal+1 /* already null terminated */; |
|
376 |
|
377 pSrc += nulLen; /* skip past the null */ |
|
378 srcLength-=nulLen; /* decrement the srcLength */ |
|
379 remaining -= (pCSrc-pCSave); |
|
380 |
|
381 |
|
382 }else{ |
|
383 /* the source is not null terminated and we are |
|
384 * end of source so we copy the source to a temp buffer |
|
385 * null terminate it and convert wchar_ts to chars |
|
386 */ |
|
387 if(nulLen >= _STACK_BUFFER_CAPACITY){ |
|
388 /* Should rarely occcur */ |
|
389 /* allocate new buffer buffer */ |
|
390 pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1)); |
|
391 if(pWStack==NULL){ |
|
392 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; |
|
393 goto cleanup; |
|
394 } |
|
395 } |
|
396 if(nulLen>0){ |
|
397 /* copy the contents to tempStack */ |
|
398 uprv_memcpy(pWStack,pSrc,nulLen*sizeof(wchar_t)); |
|
399 } |
|
400 |
|
401 /* null terminate the tempBuffer */ |
|
402 pWStack[nulLen] =0 ; |
|
403 |
|
404 if(remaining < (nulLen * MB_CUR_MAX)){ |
|
405 /* Should rarely occur */ |
|
406 int32_t len = (pCSrc-pCSave); |
|
407 pCSrc = pCSave; |
|
408 /* we do not have enough room so grow the buffer*/ |
|
409 u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, |
|
410 cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char)); |
|
411 |
|
412 pCSave = pCSrc; |
|
413 pCSrc = pCSave+len; |
|
414 remaining = cStackCap-(pCSrc - pCSave); |
|
415 } |
|
416 /* convert to chars */ |
|
417 retVal = uprv_wcstombs(pCSrc,pWStack,remaining); |
|
418 |
|
419 pCSrc += retVal; |
|
420 pSrc += nulLen; |
|
421 srcLength-=nulLen; /* decrement the srcLength */ |
|
422 break; |
|
423 } |
|
424 } |
|
425 } |
|
426 |
|
427 /* OK..now we have converted from wchar_ts to chars now |
|
428 * convert chars to UChars |
|
429 */ |
|
430 pCSrcLimit = pCSrc; |
|
431 pCSrc = pCSave; |
|
432 pTarget = target= dest; |
|
433 pTargetLimit = dest + destCapacity; |
|
434 |
|
435 conv= u_getDefaultConverter(pErrorCode); |
|
436 |
|
437 if(U_FAILURE(*pErrorCode)|| conv==NULL){ |
|
438 goto cleanup; |
|
439 } |
|
440 |
|
441 for(;;) { |
|
442 |
|
443 *pErrorCode = U_ZERO_ERROR; |
|
444 |
|
445 /* convert to stack buffer*/ |
|
446 ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,NULL,(UBool)(pCSrc==pCSrcLimit),pErrorCode); |
|
447 |
|
448 /* increment count to number written to stack */ |
|
449 count+= pTarget - target; |
|
450 |
|
451 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){ |
|
452 target = uStack; |
|
453 pTarget = uStack; |
|
454 pTargetLimit = uStack + _STACK_BUFFER_CAPACITY; |
|
455 } else { |
|
456 break; |
|
457 } |
|
458 |
|
459 } |
|
460 |
|
461 if(pDestLength){ |
|
462 *pDestLength =count; |
|
463 } |
|
464 |
|
465 u_terminateUChars(dest,destCapacity,count,pErrorCode); |
|
466 |
|
467 cleanup: |
|
468 |
|
469 if(cStack != pCSave){ |
|
470 uprv_free(pCSave); |
|
471 } |
|
472 |
|
473 if(wStack != pWStack){ |
|
474 uprv_free(pWStack); |
|
475 } |
|
476 |
|
477 u_releaseDefaultConverter(conv); |
|
478 |
|
479 return dest; |
|
480 } |
|
481 #endif |
|
482 |
|
483 U_CAPI UChar* U_EXPORT2 |
|
484 u_strFromWCS(UChar *dest, |
|
485 int32_t destCapacity, |
|
486 int32_t *pDestLength, |
|
487 const wchar_t *src, |
|
488 int32_t srcLength, |
|
489 UErrorCode *pErrorCode) |
|
490 { |
|
491 |
|
492 /* args check */ |
|
493 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ |
|
494 return NULL; |
|
495 } |
|
496 |
|
497 if( (src==NULL && srcLength!=0) || srcLength < -1 || |
|
498 (destCapacity<0) || (dest == NULL && destCapacity > 0) |
|
499 ) { |
|
500 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; |
|
501 return NULL; |
|
502 } |
|
503 |
|
504 #ifdef U_WCHAR_IS_UTF16 |
|
505 /* wchar_t is UTF-16 just do a memcpy */ |
|
506 if(srcLength == -1){ |
|
507 srcLength = u_strlen(src); |
|
508 } |
|
509 if(0 < srcLength && srcLength <= destCapacity){ |
|
510 uprv_memcpy(dest,src,srcLength*U_SIZEOF_UCHAR); |
|
511 } |
|
512 if(pDestLength){ |
|
513 *pDestLength = srcLength; |
|
514 } |
|
515 |
|
516 u_terminateUChars(dest,destCapacity,srcLength,pErrorCode); |
|
517 |
|
518 return dest; |
|
519 |
|
520 #elif defined U_WCHAR_IS_UTF32 |
|
521 |
|
522 return u_strFromUTF32(dest, destCapacity, pDestLength, |
|
523 (UChar32*)src, srcLength, pErrorCode); |
|
524 |
|
525 #else |
|
526 |
|
527 return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode); |
|
528 |
|
529 #endif |
|
530 |
|
531 } |
|
532 |
|
533 #endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */ |