michael@0: /* michael@0: ******************************************************************************* michael@0: * michael@0: * Copyright (C) 2001-2012, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: * michael@0: ******************************************************************************* michael@0: * file name: ustr_wcs.cpp michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created on: 2004sep07 michael@0: * created by: Markus W. Scherer michael@0: * michael@0: * u_strToWCS() and u_strFromWCS() functions michael@0: * moved here from ustrtrns.c for better modularization. michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: #include "unicode/ustring.h" michael@0: #include "cstring.h" michael@0: #include "cwchar.h" michael@0: #include "cmemory.h" michael@0: #include "ustr_imp.h" michael@0: #include "ustr_cnv.h" michael@0: michael@0: #if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION michael@0: michael@0: #define _STACK_BUFFER_CAPACITY 1000 michael@0: #define _BUFFER_CAPACITY_MULTIPLIER 2 michael@0: michael@0: #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) michael@0: // TODO: We should use CharString for char buffers and UnicodeString for UChar buffers. michael@0: // Then we could change this to work only with wchar_t buffers. michael@0: static inline UBool michael@0: u_growAnyBufferFromStatic(void *context, michael@0: void **pBuffer, int32_t *pCapacity, int32_t reqCapacity, michael@0: int32_t length, int32_t size) { michael@0: // Use char* not void* to avoid the compiler's strict-aliasing assumptions michael@0: // and related warnings. michael@0: char *newBuffer=(char *)uprv_malloc(reqCapacity*size); michael@0: if(newBuffer!=NULL) { michael@0: if(length>0) { michael@0: uprv_memcpy(newBuffer, *pBuffer, length*size); michael@0: } michael@0: *pCapacity=reqCapacity; michael@0: } else { michael@0: *pCapacity=0; michael@0: } michael@0: michael@0: /* release the old pBuffer if it was not statically allocated */ michael@0: if(*pBuffer!=(char *)context) { michael@0: uprv_free(*pBuffer); michael@0: } michael@0: michael@0: *pBuffer=newBuffer; michael@0: return (UBool)(newBuffer!=NULL); michael@0: } michael@0: michael@0: /* helper function */ michael@0: static wchar_t* michael@0: _strToWCS(wchar_t *dest, michael@0: int32_t destCapacity, michael@0: int32_t *pDestLength, michael@0: const UChar *src, michael@0: int32_t srcLength, michael@0: UErrorCode *pErrorCode){ michael@0: michael@0: char stackBuffer [_STACK_BUFFER_CAPACITY]; michael@0: char* tempBuf = stackBuffer; michael@0: int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY; michael@0: char* tempBufLimit = stackBuffer + tempBufCapacity; michael@0: UConverter* conv = NULL; michael@0: char* saveBuf = tempBuf; michael@0: wchar_t* intTarget=NULL; michael@0: int32_t intTargetCapacity=0; michael@0: int count=0,retVal=0; michael@0: michael@0: const UChar *pSrcLimit =NULL; michael@0: const UChar *pSrc = src; michael@0: michael@0: conv = u_getDefaultConverter(pErrorCode); michael@0: michael@0: if(U_FAILURE(*pErrorCode)){ michael@0: return NULL; michael@0: } michael@0: michael@0: if(srcLength == -1){ michael@0: srcLength = u_strlen(pSrc); michael@0: } michael@0: michael@0: pSrcLimit = pSrc + srcLength; michael@0: michael@0: for(;;) { michael@0: /* reset the error state */ michael@0: *pErrorCode = U_ZERO_ERROR; michael@0: michael@0: /* convert to chars using default converter */ michael@0: ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,NULL,(UBool)(pSrc==pSrcLimit),pErrorCode); michael@0: count =(tempBuf - saveBuf); michael@0: michael@0: /* This should rarely occur */ michael@0: if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){ michael@0: tempBuf = saveBuf; michael@0: michael@0: /* we dont have enough room on the stack grow the buffer */ michael@0: int32_t newCapacity = 2 * srcLength; michael@0: if(newCapacity <= tempBufCapacity) { michael@0: newCapacity = _BUFFER_CAPACITY_MULTIPLIER * tempBufCapacity; michael@0: } michael@0: if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, michael@0: newCapacity, count, 1)) { michael@0: goto cleanup; michael@0: } michael@0: michael@0: saveBuf = tempBuf; michael@0: tempBufLimit = tempBuf + tempBufCapacity; michael@0: tempBuf = tempBuf + count; michael@0: michael@0: } else { michael@0: break; michael@0: } michael@0: } michael@0: michael@0: if(U_FAILURE(*pErrorCode)){ michael@0: goto cleanup; michael@0: } michael@0: michael@0: /* done with conversion null terminate the char buffer */ michael@0: if(count>=tempBufCapacity){ michael@0: tempBuf = saveBuf; michael@0: /* we dont have enough room on the stack grow the buffer */ michael@0: if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, michael@0: count+1, count, 1)) { michael@0: goto cleanup; michael@0: } michael@0: saveBuf = tempBuf; michael@0: } michael@0: michael@0: saveBuf[count]=0; michael@0: michael@0: michael@0: /* allocate more space than required michael@0: * here we assume that every char requires michael@0: * no more than 2 wchar_ts michael@0: */ michael@0: intTargetCapacity = (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */; michael@0: intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) ); michael@0: michael@0: if(intTarget){ michael@0: michael@0: int32_t nulLen = 0; michael@0: int32_t remaining = intTargetCapacity; michael@0: wchar_t* pIntTarget=intTarget; michael@0: tempBuf = saveBuf; michael@0: michael@0: /* now convert the mbs to wcs */ michael@0: for(;;){ michael@0: michael@0: /* we can call the system API since we are sure that michael@0: * there is atleast 1 null in the input michael@0: */ michael@0: retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining); michael@0: michael@0: if(retVal==-1){ michael@0: *pErrorCode = U_INVALID_CHAR_FOUND; michael@0: break; michael@0: }else if(retVal== remaining){/* should never occur */ michael@0: int numWritten = (pIntTarget-intTarget); michael@0: u_growAnyBufferFromStatic(NULL,(void**) &intTarget, michael@0: &intTargetCapacity, michael@0: intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER, michael@0: numWritten, michael@0: sizeof(wchar_t)); michael@0: pIntTarget = intTarget; michael@0: remaining=intTargetCapacity; michael@0: michael@0: if(nulLen!=count){ /*there are embedded nulls*/ michael@0: pIntTarget+=numWritten; michael@0: remaining-=numWritten; michael@0: } michael@0: michael@0: }else{ michael@0: int32_t nulVal; michael@0: /*scan for nulls */ michael@0: /* we donot check for limit since tempBuf is null terminated */ michael@0: while(tempBuf[nulLen++] != 0){ michael@0: } michael@0: nulVal = (nulLen < srcLength) ? 1 : 0; michael@0: pIntTarget = pIntTarget + retVal+nulVal; michael@0: remaining -=(retVal+nulVal); michael@0: michael@0: /* check if we have reached the source limit*/ michael@0: if(nulLen>=(count)){ michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: count = (int32_t)(pIntTarget-intTarget); michael@0: michael@0: if(0 < count && count <= destCapacity){ michael@0: uprv_memcpy(dest,intTarget,count*sizeof(wchar_t)); michael@0: } michael@0: michael@0: if(pDestLength){ michael@0: *pDestLength = count; michael@0: } michael@0: michael@0: /* free the allocated memory */ michael@0: uprv_free(intTarget); michael@0: michael@0: }else{ michael@0: *pErrorCode = U_MEMORY_ALLOCATION_ERROR; michael@0: } michael@0: cleanup: michael@0: /* are we still using stack buffer */ michael@0: if(stackBuffer != saveBuf){ michael@0: uprv_free(saveBuf); michael@0: } michael@0: u_terminateWChars(dest,destCapacity,count,pErrorCode); michael@0: michael@0: u_releaseDefaultConverter(conv); michael@0: michael@0: return dest; michael@0: } michael@0: #endif michael@0: michael@0: U_CAPI wchar_t* U_EXPORT2 michael@0: u_strToWCS(wchar_t *dest, michael@0: int32_t destCapacity, michael@0: int32_t *pDestLength, michael@0: const UChar *src, michael@0: int32_t srcLength, michael@0: UErrorCode *pErrorCode){ michael@0: michael@0: /* args check */ michael@0: if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ michael@0: return NULL; michael@0: } michael@0: michael@0: if( (src==NULL && srcLength!=0) || srcLength < -1 || michael@0: (destCapacity<0) || (dest == NULL && destCapacity > 0) michael@0: ) { michael@0: *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; michael@0: return NULL; michael@0: } michael@0: michael@0: #ifdef U_WCHAR_IS_UTF16 michael@0: /* wchar_t is UTF-16 just do a memcpy */ michael@0: if(srcLength == -1){ michael@0: srcLength = u_strlen(src); michael@0: } michael@0: if(0 < srcLength && srcLength <= destCapacity){ michael@0: uprv_memcpy(dest,src,srcLength*U_SIZEOF_UCHAR); michael@0: } michael@0: if(pDestLength){ michael@0: *pDestLength = srcLength; michael@0: } michael@0: michael@0: u_terminateUChars(dest,destCapacity,srcLength,pErrorCode); michael@0: michael@0: return dest; michael@0: michael@0: #elif defined U_WCHAR_IS_UTF32 michael@0: michael@0: return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength, michael@0: src, srcLength, pErrorCode); michael@0: michael@0: #else michael@0: michael@0: return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode); michael@0: michael@0: #endif michael@0: michael@0: } michael@0: michael@0: #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) michael@0: /* helper function */ michael@0: static UChar* michael@0: _strFromWCS( UChar *dest, michael@0: int32_t destCapacity, michael@0: int32_t *pDestLength, michael@0: const wchar_t *src, michael@0: int32_t srcLength, michael@0: UErrorCode *pErrorCode) michael@0: { michael@0: int32_t retVal =0, count =0 ; michael@0: UConverter* conv = NULL; michael@0: UChar* pTarget = NULL; michael@0: UChar* pTargetLimit = NULL; michael@0: UChar* target = NULL; michael@0: michael@0: UChar uStack [_STACK_BUFFER_CAPACITY]; michael@0: michael@0: wchar_t wStack[_STACK_BUFFER_CAPACITY]; michael@0: wchar_t* pWStack = wStack; michael@0: michael@0: michael@0: char cStack[_STACK_BUFFER_CAPACITY]; michael@0: int32_t cStackCap = _STACK_BUFFER_CAPACITY; michael@0: char* pCSrc=cStack; michael@0: char* pCSave=pCSrc; michael@0: char* pCSrcLimit=NULL; michael@0: michael@0: const wchar_t* pSrc = src; michael@0: const wchar_t* pSrcLimit = NULL; michael@0: michael@0: if(srcLength ==-1){ michael@0: /* if the wchar_t source is null terminated we can safely michael@0: * assume that there are no embedded nulls, this is a fast michael@0: * path for null terminated strings. michael@0: */ michael@0: for(;;){ michael@0: /* convert wchars to chars */ michael@0: retVal = uprv_wcstombs(pCSrc,src, cStackCap); michael@0: michael@0: if(retVal == -1){ michael@0: *pErrorCode = U_ILLEGAL_CHAR_FOUND; michael@0: goto cleanup; michael@0: }else if(retVal >= (cStackCap-1)){ michael@0: /* Should rarely occur */ michael@0: u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, michael@0: cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char)); michael@0: pCSave = pCSrc; michael@0: }else{ michael@0: /* converted every thing */ michael@0: pCSrc = pCSrc+retVal; michael@0: break; michael@0: } michael@0: } michael@0: michael@0: }else{ michael@0: /* here the source is not null terminated michael@0: * so it may have nulls embeded and we need to michael@0: * do some extra processing michael@0: */ michael@0: int32_t remaining =cStackCap; michael@0: michael@0: pSrcLimit = src + srcLength; michael@0: michael@0: for(;;){ michael@0: register int32_t nulLen = 0; michael@0: michael@0: /* find nulls in the string */ michael@0: while(nulLen= _STACK_BUFFER_CAPACITY){ michael@0: /* Should rarely occcur */ michael@0: /* allocate new buffer buffer */ michael@0: pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1)); michael@0: if(pWStack==NULL){ michael@0: *pErrorCode = U_MEMORY_ALLOCATION_ERROR; michael@0: goto cleanup; michael@0: } michael@0: } michael@0: if(nulLen>0){ michael@0: /* copy the contents to tempStack */ michael@0: uprv_memcpy(pWStack,pSrc,nulLen*sizeof(wchar_t)); michael@0: } michael@0: michael@0: /* null terminate the tempBuffer */ michael@0: pWStack[nulLen] =0 ; michael@0: michael@0: if(remaining < (nulLen * MB_CUR_MAX)){ michael@0: /* Should rarely occur */ michael@0: int32_t len = (pCSrc-pCSave); michael@0: pCSrc = pCSave; michael@0: /* we do not have enough room so grow the buffer*/ michael@0: u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, michael@0: cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char)); michael@0: michael@0: pCSave = pCSrc; michael@0: pCSrc = pCSave+len; michael@0: remaining = cStackCap-(pCSrc - pCSave); michael@0: } michael@0: /* convert to chars */ michael@0: retVal = uprv_wcstombs(pCSrc,pWStack,remaining); michael@0: michael@0: pCSrc += retVal; michael@0: pSrc += nulLen; michael@0: srcLength-=nulLen; /* decrement the srcLength */ michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: michael@0: /* OK..now we have converted from wchar_ts to chars now michael@0: * convert chars to UChars michael@0: */ michael@0: pCSrcLimit = pCSrc; michael@0: pCSrc = pCSave; michael@0: pTarget = target= dest; michael@0: pTargetLimit = dest + destCapacity; michael@0: michael@0: conv= u_getDefaultConverter(pErrorCode); michael@0: michael@0: if(U_FAILURE(*pErrorCode)|| conv==NULL){ michael@0: goto cleanup; michael@0: } michael@0: michael@0: for(;;) { michael@0: michael@0: *pErrorCode = U_ZERO_ERROR; michael@0: michael@0: /* convert to stack buffer*/ michael@0: ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,NULL,(UBool)(pCSrc==pCSrcLimit),pErrorCode); michael@0: michael@0: /* increment count to number written to stack */ michael@0: count+= pTarget - target; michael@0: michael@0: if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){ michael@0: target = uStack; michael@0: pTarget = uStack; michael@0: pTargetLimit = uStack + _STACK_BUFFER_CAPACITY; michael@0: } else { michael@0: break; michael@0: } michael@0: michael@0: } michael@0: michael@0: if(pDestLength){ michael@0: *pDestLength =count; michael@0: } michael@0: michael@0: u_terminateUChars(dest,destCapacity,count,pErrorCode); michael@0: michael@0: cleanup: michael@0: michael@0: if(cStack != pCSave){ michael@0: uprv_free(pCSave); michael@0: } michael@0: michael@0: if(wStack != pWStack){ michael@0: uprv_free(pWStack); michael@0: } michael@0: michael@0: u_releaseDefaultConverter(conv); michael@0: michael@0: return dest; michael@0: } michael@0: #endif michael@0: michael@0: U_CAPI UChar* U_EXPORT2 michael@0: u_strFromWCS(UChar *dest, michael@0: int32_t destCapacity, michael@0: int32_t *pDestLength, michael@0: const wchar_t *src, michael@0: int32_t srcLength, michael@0: UErrorCode *pErrorCode) michael@0: { michael@0: michael@0: /* args check */ michael@0: if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ michael@0: return NULL; michael@0: } michael@0: michael@0: if( (src==NULL && srcLength!=0) || srcLength < -1 || michael@0: (destCapacity<0) || (dest == NULL && destCapacity > 0) michael@0: ) { michael@0: *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; michael@0: return NULL; michael@0: } michael@0: michael@0: #ifdef U_WCHAR_IS_UTF16 michael@0: /* wchar_t is UTF-16 just do a memcpy */ michael@0: if(srcLength == -1){ michael@0: srcLength = u_strlen(src); michael@0: } michael@0: if(0 < srcLength && srcLength <= destCapacity){ michael@0: uprv_memcpy(dest,src,srcLength*U_SIZEOF_UCHAR); michael@0: } michael@0: if(pDestLength){ michael@0: *pDestLength = srcLength; michael@0: } michael@0: michael@0: u_terminateUChars(dest,destCapacity,srcLength,pErrorCode); michael@0: michael@0: return dest; michael@0: michael@0: #elif defined U_WCHAR_IS_UTF32 michael@0: michael@0: return u_strFromUTF32(dest, destCapacity, pDestLength, michael@0: (UChar32*)src, srcLength, pErrorCode); michael@0: michael@0: #else michael@0: michael@0: return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode); michael@0: michael@0: #endif michael@0: michael@0: } michael@0: michael@0: #endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */