1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/ustr_wcs.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,533 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* 1.7 +* Copyright (C) 2001-2012, International Business Machines 1.8 +* Corporation and others. All Rights Reserved. 1.9 +* 1.10 +******************************************************************************* 1.11 +* file name: ustr_wcs.cpp 1.12 +* encoding: US-ASCII 1.13 +* tab size: 8 (not used) 1.14 +* indentation:4 1.15 +* 1.16 +* created on: 2004sep07 1.17 +* created by: Markus W. Scherer 1.18 +* 1.19 +* u_strToWCS() and u_strFromWCS() functions 1.20 +* moved here from ustrtrns.c for better modularization. 1.21 +*/ 1.22 + 1.23 +#include "unicode/utypes.h" 1.24 +#include "unicode/ustring.h" 1.25 +#include "cstring.h" 1.26 +#include "cwchar.h" 1.27 +#include "cmemory.h" 1.28 +#include "ustr_imp.h" 1.29 +#include "ustr_cnv.h" 1.30 + 1.31 +#if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION 1.32 + 1.33 +#define _STACK_BUFFER_CAPACITY 1000 1.34 +#define _BUFFER_CAPACITY_MULTIPLIER 2 1.35 + 1.36 +#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) 1.37 +// TODO: We should use CharString for char buffers and UnicodeString for UChar buffers. 1.38 +// Then we could change this to work only with wchar_t buffers. 1.39 +static inline UBool 1.40 +u_growAnyBufferFromStatic(void *context, 1.41 + void **pBuffer, int32_t *pCapacity, int32_t reqCapacity, 1.42 + int32_t length, int32_t size) { 1.43 + // Use char* not void* to avoid the compiler's strict-aliasing assumptions 1.44 + // and related warnings. 1.45 + char *newBuffer=(char *)uprv_malloc(reqCapacity*size); 1.46 + if(newBuffer!=NULL) { 1.47 + if(length>0) { 1.48 + uprv_memcpy(newBuffer, *pBuffer, length*size); 1.49 + } 1.50 + *pCapacity=reqCapacity; 1.51 + } else { 1.52 + *pCapacity=0; 1.53 + } 1.54 + 1.55 + /* release the old pBuffer if it was not statically allocated */ 1.56 + if(*pBuffer!=(char *)context) { 1.57 + uprv_free(*pBuffer); 1.58 + } 1.59 + 1.60 + *pBuffer=newBuffer; 1.61 + return (UBool)(newBuffer!=NULL); 1.62 +} 1.63 + 1.64 +/* helper function */ 1.65 +static wchar_t* 1.66 +_strToWCS(wchar_t *dest, 1.67 + int32_t destCapacity, 1.68 + int32_t *pDestLength, 1.69 + const UChar *src, 1.70 + int32_t srcLength, 1.71 + UErrorCode *pErrorCode){ 1.72 + 1.73 + char stackBuffer [_STACK_BUFFER_CAPACITY]; 1.74 + char* tempBuf = stackBuffer; 1.75 + int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY; 1.76 + char* tempBufLimit = stackBuffer + tempBufCapacity; 1.77 + UConverter* conv = NULL; 1.78 + char* saveBuf = tempBuf; 1.79 + wchar_t* intTarget=NULL; 1.80 + int32_t intTargetCapacity=0; 1.81 + int count=0,retVal=0; 1.82 + 1.83 + const UChar *pSrcLimit =NULL; 1.84 + const UChar *pSrc = src; 1.85 + 1.86 + conv = u_getDefaultConverter(pErrorCode); 1.87 + 1.88 + if(U_FAILURE(*pErrorCode)){ 1.89 + return NULL; 1.90 + } 1.91 + 1.92 + if(srcLength == -1){ 1.93 + srcLength = u_strlen(pSrc); 1.94 + } 1.95 + 1.96 + pSrcLimit = pSrc + srcLength; 1.97 + 1.98 + for(;;) { 1.99 + /* reset the error state */ 1.100 + *pErrorCode = U_ZERO_ERROR; 1.101 + 1.102 + /* convert to chars using default converter */ 1.103 + ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,NULL,(UBool)(pSrc==pSrcLimit),pErrorCode); 1.104 + count =(tempBuf - saveBuf); 1.105 + 1.106 + /* This should rarely occur */ 1.107 + if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){ 1.108 + tempBuf = saveBuf; 1.109 + 1.110 + /* we dont have enough room on the stack grow the buffer */ 1.111 + int32_t newCapacity = 2 * srcLength; 1.112 + if(newCapacity <= tempBufCapacity) { 1.113 + newCapacity = _BUFFER_CAPACITY_MULTIPLIER * tempBufCapacity; 1.114 + } 1.115 + if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, 1.116 + newCapacity, count, 1)) { 1.117 + goto cleanup; 1.118 + } 1.119 + 1.120 + saveBuf = tempBuf; 1.121 + tempBufLimit = tempBuf + tempBufCapacity; 1.122 + tempBuf = tempBuf + count; 1.123 + 1.124 + } else { 1.125 + break; 1.126 + } 1.127 + } 1.128 + 1.129 + if(U_FAILURE(*pErrorCode)){ 1.130 + goto cleanup; 1.131 + } 1.132 + 1.133 + /* done with conversion null terminate the char buffer */ 1.134 + if(count>=tempBufCapacity){ 1.135 + tempBuf = saveBuf; 1.136 + /* we dont have enough room on the stack grow the buffer */ 1.137 + if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, 1.138 + count+1, count, 1)) { 1.139 + goto cleanup; 1.140 + } 1.141 + saveBuf = tempBuf; 1.142 + } 1.143 + 1.144 + saveBuf[count]=0; 1.145 + 1.146 + 1.147 + /* allocate more space than required 1.148 + * here we assume that every char requires 1.149 + * no more than 2 wchar_ts 1.150 + */ 1.151 + intTargetCapacity = (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */; 1.152 + intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) ); 1.153 + 1.154 + if(intTarget){ 1.155 + 1.156 + int32_t nulLen = 0; 1.157 + int32_t remaining = intTargetCapacity; 1.158 + wchar_t* pIntTarget=intTarget; 1.159 + tempBuf = saveBuf; 1.160 + 1.161 + /* now convert the mbs to wcs */ 1.162 + for(;;){ 1.163 + 1.164 + /* we can call the system API since we are sure that 1.165 + * there is atleast 1 null in the input 1.166 + */ 1.167 + retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining); 1.168 + 1.169 + if(retVal==-1){ 1.170 + *pErrorCode = U_INVALID_CHAR_FOUND; 1.171 + break; 1.172 + }else if(retVal== remaining){/* should never occur */ 1.173 + int numWritten = (pIntTarget-intTarget); 1.174 + u_growAnyBufferFromStatic(NULL,(void**) &intTarget, 1.175 + &intTargetCapacity, 1.176 + intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER, 1.177 + numWritten, 1.178 + sizeof(wchar_t)); 1.179 + pIntTarget = intTarget; 1.180 + remaining=intTargetCapacity; 1.181 + 1.182 + if(nulLen!=count){ /*there are embedded nulls*/ 1.183 + pIntTarget+=numWritten; 1.184 + remaining-=numWritten; 1.185 + } 1.186 + 1.187 + }else{ 1.188 + int32_t nulVal; 1.189 + /*scan for nulls */ 1.190 + /* we donot check for limit since tempBuf is null terminated */ 1.191 + while(tempBuf[nulLen++] != 0){ 1.192 + } 1.193 + nulVal = (nulLen < srcLength) ? 1 : 0; 1.194 + pIntTarget = pIntTarget + retVal+nulVal; 1.195 + remaining -=(retVal+nulVal); 1.196 + 1.197 + /* check if we have reached the source limit*/ 1.198 + if(nulLen>=(count)){ 1.199 + break; 1.200 + } 1.201 + } 1.202 + } 1.203 + count = (int32_t)(pIntTarget-intTarget); 1.204 + 1.205 + if(0 < count && count <= destCapacity){ 1.206 + uprv_memcpy(dest,intTarget,count*sizeof(wchar_t)); 1.207 + } 1.208 + 1.209 + if(pDestLength){ 1.210 + *pDestLength = count; 1.211 + } 1.212 + 1.213 + /* free the allocated memory */ 1.214 + uprv_free(intTarget); 1.215 + 1.216 + }else{ 1.217 + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 1.218 + } 1.219 +cleanup: 1.220 + /* are we still using stack buffer */ 1.221 + if(stackBuffer != saveBuf){ 1.222 + uprv_free(saveBuf); 1.223 + } 1.224 + u_terminateWChars(dest,destCapacity,count,pErrorCode); 1.225 + 1.226 + u_releaseDefaultConverter(conv); 1.227 + 1.228 + return dest; 1.229 +} 1.230 +#endif 1.231 + 1.232 +U_CAPI wchar_t* U_EXPORT2 1.233 +u_strToWCS(wchar_t *dest, 1.234 + int32_t destCapacity, 1.235 + int32_t *pDestLength, 1.236 + const UChar *src, 1.237 + int32_t srcLength, 1.238 + UErrorCode *pErrorCode){ 1.239 + 1.240 + /* args check */ 1.241 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ 1.242 + return NULL; 1.243 + } 1.244 + 1.245 + if( (src==NULL && srcLength!=0) || srcLength < -1 || 1.246 + (destCapacity<0) || (dest == NULL && destCapacity > 0) 1.247 + ) { 1.248 + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 1.249 + return NULL; 1.250 + } 1.251 + 1.252 +#ifdef U_WCHAR_IS_UTF16 1.253 + /* wchar_t is UTF-16 just do a memcpy */ 1.254 + if(srcLength == -1){ 1.255 + srcLength = u_strlen(src); 1.256 + } 1.257 + if(0 < srcLength && srcLength <= destCapacity){ 1.258 + uprv_memcpy(dest,src,srcLength*U_SIZEOF_UCHAR); 1.259 + } 1.260 + if(pDestLength){ 1.261 + *pDestLength = srcLength; 1.262 + } 1.263 + 1.264 + u_terminateUChars(dest,destCapacity,srcLength,pErrorCode); 1.265 + 1.266 + return dest; 1.267 + 1.268 +#elif defined U_WCHAR_IS_UTF32 1.269 + 1.270 + return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength, 1.271 + src, srcLength, pErrorCode); 1.272 + 1.273 +#else 1.274 + 1.275 + return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode); 1.276 + 1.277 +#endif 1.278 + 1.279 +} 1.280 + 1.281 +#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) 1.282 +/* helper function */ 1.283 +static UChar* 1.284 +_strFromWCS( UChar *dest, 1.285 + int32_t destCapacity, 1.286 + int32_t *pDestLength, 1.287 + const wchar_t *src, 1.288 + int32_t srcLength, 1.289 + UErrorCode *pErrorCode) 1.290 +{ 1.291 + int32_t retVal =0, count =0 ; 1.292 + UConverter* conv = NULL; 1.293 + UChar* pTarget = NULL; 1.294 + UChar* pTargetLimit = NULL; 1.295 + UChar* target = NULL; 1.296 + 1.297 + UChar uStack [_STACK_BUFFER_CAPACITY]; 1.298 + 1.299 + wchar_t wStack[_STACK_BUFFER_CAPACITY]; 1.300 + wchar_t* pWStack = wStack; 1.301 + 1.302 + 1.303 + char cStack[_STACK_BUFFER_CAPACITY]; 1.304 + int32_t cStackCap = _STACK_BUFFER_CAPACITY; 1.305 + char* pCSrc=cStack; 1.306 + char* pCSave=pCSrc; 1.307 + char* pCSrcLimit=NULL; 1.308 + 1.309 + const wchar_t* pSrc = src; 1.310 + const wchar_t* pSrcLimit = NULL; 1.311 + 1.312 + if(srcLength ==-1){ 1.313 + /* if the wchar_t source is null terminated we can safely 1.314 + * assume that there are no embedded nulls, this is a fast 1.315 + * path for null terminated strings. 1.316 + */ 1.317 + for(;;){ 1.318 + /* convert wchars to chars */ 1.319 + retVal = uprv_wcstombs(pCSrc,src, cStackCap); 1.320 + 1.321 + if(retVal == -1){ 1.322 + *pErrorCode = U_ILLEGAL_CHAR_FOUND; 1.323 + goto cleanup; 1.324 + }else if(retVal >= (cStackCap-1)){ 1.325 + /* Should rarely occur */ 1.326 + u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, 1.327 + cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char)); 1.328 + pCSave = pCSrc; 1.329 + }else{ 1.330 + /* converted every thing */ 1.331 + pCSrc = pCSrc+retVal; 1.332 + break; 1.333 + } 1.334 + } 1.335 + 1.336 + }else{ 1.337 + /* here the source is not null terminated 1.338 + * so it may have nulls embeded and we need to 1.339 + * do some extra processing 1.340 + */ 1.341 + int32_t remaining =cStackCap; 1.342 + 1.343 + pSrcLimit = src + srcLength; 1.344 + 1.345 + for(;;){ 1.346 + register int32_t nulLen = 0; 1.347 + 1.348 + /* find nulls in the string */ 1.349 + while(nulLen<srcLength && pSrc[nulLen++]!=0){ 1.350 + } 1.351 + 1.352 + if((pSrc+nulLen) < pSrcLimit){ 1.353 + /* check if we have enough room in pCSrc */ 1.354 + if(remaining < (nulLen * MB_CUR_MAX)){ 1.355 + /* should rarely occur */ 1.356 + int32_t len = (pCSrc-pCSave); 1.357 + pCSrc = pCSave; 1.358 + /* we do not have enough room so grow the buffer*/ 1.359 + u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, 1.360 + _BUFFER_CAPACITY_MULTIPLIER*cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char)); 1.361 + 1.362 + pCSave = pCSrc; 1.363 + pCSrc = pCSave+len; 1.364 + remaining = cStackCap-(pCSrc - pCSave); 1.365 + } 1.366 + 1.367 + /* we have found a null so convert the 1.368 + * chunk from begining of non-null char to null 1.369 + */ 1.370 + retVal = uprv_wcstombs(pCSrc,pSrc,remaining); 1.371 + 1.372 + if(retVal==-1){ 1.373 + /* an error occurred bail out */ 1.374 + *pErrorCode = U_ILLEGAL_CHAR_FOUND; 1.375 + goto cleanup; 1.376 + } 1.377 + 1.378 + pCSrc += retVal+1 /* already null terminated */; 1.379 + 1.380 + pSrc += nulLen; /* skip past the null */ 1.381 + srcLength-=nulLen; /* decrement the srcLength */ 1.382 + remaining -= (pCSrc-pCSave); 1.383 + 1.384 + 1.385 + }else{ 1.386 + /* the source is not null terminated and we are 1.387 + * end of source so we copy the source to a temp buffer 1.388 + * null terminate it and convert wchar_ts to chars 1.389 + */ 1.390 + if(nulLen >= _STACK_BUFFER_CAPACITY){ 1.391 + /* Should rarely occcur */ 1.392 + /* allocate new buffer buffer */ 1.393 + pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1)); 1.394 + if(pWStack==NULL){ 1.395 + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 1.396 + goto cleanup; 1.397 + } 1.398 + } 1.399 + if(nulLen>0){ 1.400 + /* copy the contents to tempStack */ 1.401 + uprv_memcpy(pWStack,pSrc,nulLen*sizeof(wchar_t)); 1.402 + } 1.403 + 1.404 + /* null terminate the tempBuffer */ 1.405 + pWStack[nulLen] =0 ; 1.406 + 1.407 + if(remaining < (nulLen * MB_CUR_MAX)){ 1.408 + /* Should rarely occur */ 1.409 + int32_t len = (pCSrc-pCSave); 1.410 + pCSrc = pCSave; 1.411 + /* we do not have enough room so grow the buffer*/ 1.412 + u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, 1.413 + cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char)); 1.414 + 1.415 + pCSave = pCSrc; 1.416 + pCSrc = pCSave+len; 1.417 + remaining = cStackCap-(pCSrc - pCSave); 1.418 + } 1.419 + /* convert to chars */ 1.420 + retVal = uprv_wcstombs(pCSrc,pWStack,remaining); 1.421 + 1.422 + pCSrc += retVal; 1.423 + pSrc += nulLen; 1.424 + srcLength-=nulLen; /* decrement the srcLength */ 1.425 + break; 1.426 + } 1.427 + } 1.428 + } 1.429 + 1.430 + /* OK..now we have converted from wchar_ts to chars now 1.431 + * convert chars to UChars 1.432 + */ 1.433 + pCSrcLimit = pCSrc; 1.434 + pCSrc = pCSave; 1.435 + pTarget = target= dest; 1.436 + pTargetLimit = dest + destCapacity; 1.437 + 1.438 + conv= u_getDefaultConverter(pErrorCode); 1.439 + 1.440 + if(U_FAILURE(*pErrorCode)|| conv==NULL){ 1.441 + goto cleanup; 1.442 + } 1.443 + 1.444 + for(;;) { 1.445 + 1.446 + *pErrorCode = U_ZERO_ERROR; 1.447 + 1.448 + /* convert to stack buffer*/ 1.449 + ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,NULL,(UBool)(pCSrc==pCSrcLimit),pErrorCode); 1.450 + 1.451 + /* increment count to number written to stack */ 1.452 + count+= pTarget - target; 1.453 + 1.454 + if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){ 1.455 + target = uStack; 1.456 + pTarget = uStack; 1.457 + pTargetLimit = uStack + _STACK_BUFFER_CAPACITY; 1.458 + } else { 1.459 + break; 1.460 + } 1.461 + 1.462 + } 1.463 + 1.464 + if(pDestLength){ 1.465 + *pDestLength =count; 1.466 + } 1.467 + 1.468 + u_terminateUChars(dest,destCapacity,count,pErrorCode); 1.469 + 1.470 +cleanup: 1.471 + 1.472 + if(cStack != pCSave){ 1.473 + uprv_free(pCSave); 1.474 + } 1.475 + 1.476 + if(wStack != pWStack){ 1.477 + uprv_free(pWStack); 1.478 + } 1.479 + 1.480 + u_releaseDefaultConverter(conv); 1.481 + 1.482 + return dest; 1.483 +} 1.484 +#endif 1.485 + 1.486 +U_CAPI UChar* U_EXPORT2 1.487 +u_strFromWCS(UChar *dest, 1.488 + int32_t destCapacity, 1.489 + int32_t *pDestLength, 1.490 + const wchar_t *src, 1.491 + int32_t srcLength, 1.492 + UErrorCode *pErrorCode) 1.493 +{ 1.494 + 1.495 + /* args check */ 1.496 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ 1.497 + return NULL; 1.498 + } 1.499 + 1.500 + if( (src==NULL && srcLength!=0) || srcLength < -1 || 1.501 + (destCapacity<0) || (dest == NULL && destCapacity > 0) 1.502 + ) { 1.503 + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 1.504 + return NULL; 1.505 + } 1.506 + 1.507 +#ifdef U_WCHAR_IS_UTF16 1.508 + /* wchar_t is UTF-16 just do a memcpy */ 1.509 + if(srcLength == -1){ 1.510 + srcLength = u_strlen(src); 1.511 + } 1.512 + if(0 < srcLength && srcLength <= destCapacity){ 1.513 + uprv_memcpy(dest,src,srcLength*U_SIZEOF_UCHAR); 1.514 + } 1.515 + if(pDestLength){ 1.516 + *pDestLength = srcLength; 1.517 + } 1.518 + 1.519 + u_terminateUChars(dest,destCapacity,srcLength,pErrorCode); 1.520 + 1.521 + return dest; 1.522 + 1.523 +#elif defined U_WCHAR_IS_UTF32 1.524 + 1.525 + return u_strFromUTF32(dest, destCapacity, pDestLength, 1.526 + (UChar32*)src, srcLength, pErrorCode); 1.527 + 1.528 +#else 1.529 + 1.530 + return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode); 1.531 + 1.532 +#endif 1.533 + 1.534 +} 1.535 + 1.536 +#endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */