intl/icu/source/common/ustr_wcs.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2 *******************************************************************************
     3 *
     4 *   Copyright (C) 2001-2012, International Business Machines
     5 *   Corporation and others.  All Rights Reserved.
     6 *
     7 *******************************************************************************
     8 *   file name:  ustr_wcs.cpp
     9 *   encoding:   US-ASCII
    10 *   tab size:   8 (not used)
    11 *   indentation:4
    12 *
    13 *   created on: 2004sep07
    14 *   created by: Markus W. Scherer
    15 *
    16 *   u_strToWCS() and u_strFromWCS() functions
    17 *   moved here from ustrtrns.c for better modularization.
    18 */
    20 #include "unicode/utypes.h"
    21 #include "unicode/ustring.h"
    22 #include "cstring.h"
    23 #include "cwchar.h"
    24 #include "cmemory.h"
    25 #include "ustr_imp.h"
    26 #include "ustr_cnv.h"
    28 #if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
    30 #define _STACK_BUFFER_CAPACITY 1000
    31 #define _BUFFER_CAPACITY_MULTIPLIER 2
    33 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
    34 // TODO: We should use CharString for char buffers and UnicodeString for UChar buffers.
    35 // Then we could change this to work only with wchar_t buffers.
    36 static inline UBool 
    37 u_growAnyBufferFromStatic(void *context,
    38                        void **pBuffer, int32_t *pCapacity, int32_t reqCapacity,
    39                        int32_t length, int32_t size) {
    40     // Use char* not void* to avoid the compiler's strict-aliasing assumptions
    41     // and related warnings.
    42     char *newBuffer=(char *)uprv_malloc(reqCapacity*size);
    43     if(newBuffer!=NULL) {
    44         if(length>0) {
    45             uprv_memcpy(newBuffer, *pBuffer, length*size);
    46         }
    47         *pCapacity=reqCapacity;
    48     } else {
    49         *pCapacity=0;
    50     }
    52     /* release the old pBuffer if it was not statically allocated */
    53     if(*pBuffer!=(char *)context) {
    54         uprv_free(*pBuffer);
    55     }
    57     *pBuffer=newBuffer;
    58     return (UBool)(newBuffer!=NULL);
    59 }
    61 /* helper function */
    62 static wchar_t* 
    63 _strToWCS(wchar_t *dest, 
    64            int32_t destCapacity,
    65            int32_t *pDestLength,
    66            const UChar *src, 
    67            int32_t srcLength,
    68            UErrorCode *pErrorCode){
    70     char stackBuffer [_STACK_BUFFER_CAPACITY];
    71     char* tempBuf = stackBuffer;
    72     int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY;
    73     char* tempBufLimit = stackBuffer + tempBufCapacity;
    74     UConverter* conv = NULL;
    75     char* saveBuf = tempBuf;
    76     wchar_t* intTarget=NULL;
    77     int32_t intTargetCapacity=0;
    78     int count=0,retVal=0;
    80     const UChar *pSrcLimit =NULL;
    81     const UChar *pSrc = src;
    83     conv = u_getDefaultConverter(pErrorCode);
    85     if(U_FAILURE(*pErrorCode)){
    86         return NULL;
    87     }
    89     if(srcLength == -1){
    90         srcLength = u_strlen(pSrc);
    91     }
    93     pSrcLimit = pSrc + srcLength;
    95     for(;;) {
    96         /* reset the error state */
    97         *pErrorCode = U_ZERO_ERROR;
    99         /* convert to chars using default converter */
   100         ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,NULL,(UBool)(pSrc==pSrcLimit),pErrorCode);
   101         count =(tempBuf - saveBuf);
   103         /* This should rarely occur */
   104         if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
   105             tempBuf = saveBuf;
   107             /* we dont have enough room on the stack grow the buffer */
   108             int32_t newCapacity = 2 * srcLength;
   109             if(newCapacity <= tempBufCapacity) {
   110                 newCapacity = _BUFFER_CAPACITY_MULTIPLIER * tempBufCapacity;
   111             }
   112             if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
   113                     newCapacity, count, 1)) {
   114                 goto cleanup;
   115             }
   117            saveBuf = tempBuf;
   118            tempBufLimit = tempBuf + tempBufCapacity;
   119            tempBuf = tempBuf + count;
   121         } else {
   122             break;
   123         }
   124     }
   126     if(U_FAILURE(*pErrorCode)){
   127         goto cleanup;
   128     }
   130     /* done with conversion null terminate the char buffer */
   131     if(count>=tempBufCapacity){
   132         tempBuf = saveBuf;
   133         /* we dont have enough room on the stack grow the buffer */
   134         if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, 
   135                 count+1, count, 1)) {
   136             goto cleanup;
   137         }              
   138        saveBuf = tempBuf;
   139     }
   141     saveBuf[count]=0;
   144     /* allocate more space than required 
   145      * here we assume that every char requires 
   146      * no more than 2 wchar_ts
   147      */
   148     intTargetCapacity =  (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */;
   149     intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) );
   151     if(intTarget){
   153         int32_t nulLen = 0;
   154         int32_t remaining = intTargetCapacity;
   155         wchar_t* pIntTarget=intTarget;
   156         tempBuf = saveBuf;
   158         /* now convert the mbs to wcs */
   159         for(;;){
   161             /* we can call the system API since we are sure that
   162              * there is atleast 1 null in the input
   163              */
   164             retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining);
   166             if(retVal==-1){
   167                 *pErrorCode = U_INVALID_CHAR_FOUND;
   168                 break;
   169             }else if(retVal== remaining){/* should never occur */
   170                 int numWritten = (pIntTarget-intTarget);
   171                 u_growAnyBufferFromStatic(NULL,(void**) &intTarget,
   172                                           &intTargetCapacity,
   173                                           intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER,
   174                                           numWritten,
   175                                           sizeof(wchar_t));
   176                 pIntTarget = intTarget;
   177                 remaining=intTargetCapacity;
   179                 if(nulLen!=count){ /*there are embedded nulls*/
   180                     pIntTarget+=numWritten;
   181                     remaining-=numWritten;
   182                 }
   184             }else{
   185                 int32_t nulVal;
   186                 /*scan for nulls */
   187                 /* we donot check for limit since tempBuf is null terminated */
   188                 while(tempBuf[nulLen++] != 0){
   189                 }
   190                 nulVal = (nulLen < srcLength) ? 1 : 0; 
   191                 pIntTarget = pIntTarget + retVal+nulVal;
   192                 remaining -=(retVal+nulVal);
   194                 /* check if we have reached the source limit*/
   195                 if(nulLen>=(count)){
   196                     break;
   197                 }
   198             }
   199         }
   200         count = (int32_t)(pIntTarget-intTarget);
   202         if(0 < count && count <= destCapacity){
   203             uprv_memcpy(dest,intTarget,count*sizeof(wchar_t));
   204         }  
   206         if(pDestLength){
   207             *pDestLength = count;
   208         }
   210         /* free the allocated memory */
   211         uprv_free(intTarget);
   213     }else{
   214         *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
   215     }
   216 cleanup:
   217     /* are we still using stack buffer */
   218     if(stackBuffer != saveBuf){
   219         uprv_free(saveBuf);
   220     }
   221     u_terminateWChars(dest,destCapacity,count,pErrorCode);
   223     u_releaseDefaultConverter(conv);
   225     return dest;
   226 }
   227 #endif
   229 U_CAPI wchar_t* U_EXPORT2
   230 u_strToWCS(wchar_t *dest, 
   231            int32_t destCapacity,
   232            int32_t *pDestLength,
   233            const UChar *src, 
   234            int32_t srcLength,
   235            UErrorCode *pErrorCode){
   237     /* args check */
   238     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
   239         return NULL;
   240     }
   242     if( (src==NULL && srcLength!=0) || srcLength < -1 ||
   243         (destCapacity<0) || (dest == NULL && destCapacity > 0)
   244     ) {
   245         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
   246         return NULL;
   247     }
   249 #ifdef U_WCHAR_IS_UTF16
   250     /* wchar_t is UTF-16 just do a memcpy */
   251     if(srcLength == -1){
   252         srcLength = u_strlen(src);
   253     }
   254     if(0 < srcLength && srcLength <= destCapacity){
   255         uprv_memcpy(dest,src,srcLength*U_SIZEOF_UCHAR);
   256     }
   257     if(pDestLength){
   258        *pDestLength = srcLength;
   259     }
   261     u_terminateUChars(dest,destCapacity,srcLength,pErrorCode);
   263     return dest;
   265 #elif defined U_WCHAR_IS_UTF32
   267     return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength,
   268                                   src, srcLength, pErrorCode);
   270 #else
   272     return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode);
   274 #endif
   276 }
   278 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
   279 /* helper function */
   280 static UChar* 
   281 _strFromWCS( UChar   *dest,
   282              int32_t destCapacity, 
   283              int32_t *pDestLength,
   284              const wchar_t *src,
   285              int32_t srcLength,
   286              UErrorCode *pErrorCode)
   287 {
   288     int32_t retVal =0, count =0 ;
   289     UConverter* conv = NULL;
   290     UChar* pTarget = NULL;
   291     UChar* pTargetLimit = NULL;
   292     UChar* target = NULL;
   294     UChar uStack [_STACK_BUFFER_CAPACITY];
   296     wchar_t wStack[_STACK_BUFFER_CAPACITY];
   297     wchar_t* pWStack = wStack;
   300     char cStack[_STACK_BUFFER_CAPACITY];
   301     int32_t cStackCap = _STACK_BUFFER_CAPACITY;
   302     char* pCSrc=cStack;
   303     char* pCSave=pCSrc;
   304     char* pCSrcLimit=NULL;
   306     const wchar_t* pSrc = src;
   307     const wchar_t* pSrcLimit = NULL;
   309     if(srcLength ==-1){
   310         /* if the wchar_t source is null terminated we can safely
   311          * assume that there are no embedded nulls, this is a fast
   312          * path for null terminated strings.
   313          */
   314         for(;;){
   315             /* convert wchars  to chars */
   316             retVal = uprv_wcstombs(pCSrc,src, cStackCap);
   318             if(retVal == -1){
   319                 *pErrorCode = U_ILLEGAL_CHAR_FOUND;
   320                 goto cleanup;
   321             }else if(retVal >= (cStackCap-1)){
   322                 /* Should rarely occur */
   323                 u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
   324                     cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char));
   325                 pCSave = pCSrc;
   326             }else{
   327                 /* converted every thing */
   328                 pCSrc = pCSrc+retVal;
   329                 break;
   330             }
   331         }
   333     }else{
   334         /* here the source is not null terminated 
   335          * so it may have nulls embeded and we need to
   336          * do some extra processing 
   337          */
   338         int32_t remaining =cStackCap;
   340         pSrcLimit = src + srcLength;
   342         for(;;){
   343             register int32_t nulLen = 0;
   345             /* find nulls in the string */
   346             while(nulLen<srcLength && pSrc[nulLen++]!=0){
   347             }
   349             if((pSrc+nulLen) < pSrcLimit){
   350                 /* check if we have enough room in pCSrc */
   351                 if(remaining < (nulLen * MB_CUR_MAX)){
   352                     /* should rarely occur */
   353                     int32_t len = (pCSrc-pCSave);
   354                     pCSrc = pCSave;
   355                     /* we do not have enough room so grow the buffer*/
   356                     u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
   357                            _BUFFER_CAPACITY_MULTIPLIER*cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
   359                     pCSave = pCSrc;
   360                     pCSrc = pCSave+len;
   361                     remaining = cStackCap-(pCSrc - pCSave);
   362                 }
   364                 /* we have found a null  so convert the 
   365                  * chunk from begining of non-null char to null
   366                  */
   367                 retVal = uprv_wcstombs(pCSrc,pSrc,remaining);
   369                 if(retVal==-1){
   370                     /* an error occurred bail out */
   371                     *pErrorCode = U_ILLEGAL_CHAR_FOUND;
   372                     goto cleanup;
   373                 }
   375                 pCSrc += retVal+1 /* already null terminated */;
   377                 pSrc += nulLen; /* skip past the null */
   378                 srcLength-=nulLen; /* decrement the srcLength */
   379                 remaining -= (pCSrc-pCSave);
   382             }else{
   383                 /* the source is not null terminated and we are 
   384                  * end of source so we copy the source to a temp buffer
   385                  * null terminate it and convert wchar_ts to chars
   386                  */
   387                 if(nulLen >= _STACK_BUFFER_CAPACITY){
   388                     /* Should rarely occcur */
   389                     /* allocate new buffer buffer */
   390                     pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1));
   391                     if(pWStack==NULL){
   392                         *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
   393                         goto cleanup;
   394                     }
   395                 }
   396                 if(nulLen>0){
   397                     /* copy the contents to tempStack */
   398                     uprv_memcpy(pWStack,pSrc,nulLen*sizeof(wchar_t));
   399                 }
   401                 /* null terminate the tempBuffer */
   402                 pWStack[nulLen] =0 ;
   404                 if(remaining < (nulLen * MB_CUR_MAX)){
   405                     /* Should rarely occur */
   406                     int32_t len = (pCSrc-pCSave);
   407                     pCSrc = pCSave;
   408                     /* we do not have enough room so grow the buffer*/
   409                     u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
   410                            cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
   412                     pCSave = pCSrc;
   413                     pCSrc = pCSave+len;
   414                     remaining = cStackCap-(pCSrc - pCSave);
   415                 }
   416                 /* convert to chars */
   417                 retVal = uprv_wcstombs(pCSrc,pWStack,remaining);
   419                 pCSrc += retVal;
   420                 pSrc  += nulLen;
   421                 srcLength-=nulLen; /* decrement the srcLength */
   422                 break;
   423             }
   424         }
   425     }
   427     /* OK..now we have converted from wchar_ts to chars now 
   428      * convert chars to UChars 
   429      */
   430     pCSrcLimit = pCSrc;
   431     pCSrc = pCSave;
   432     pTarget = target= dest;
   433     pTargetLimit = dest + destCapacity;    
   435     conv= u_getDefaultConverter(pErrorCode);
   437     if(U_FAILURE(*pErrorCode)|| conv==NULL){
   438         goto cleanup;
   439     }
   441     for(;;) {
   443         *pErrorCode = U_ZERO_ERROR;
   445         /* convert to stack buffer*/
   446         ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,NULL,(UBool)(pCSrc==pCSrcLimit),pErrorCode);
   448         /* increment count to number written to stack */
   449         count+= pTarget - target;
   451         if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
   452             target = uStack;
   453             pTarget = uStack;
   454             pTargetLimit = uStack + _STACK_BUFFER_CAPACITY;
   455         } else {
   456             break;
   457         }
   459     }
   461     if(pDestLength){
   462         *pDestLength =count;
   463     }
   465     u_terminateUChars(dest,destCapacity,count,pErrorCode);
   467 cleanup:
   469     if(cStack != pCSave){
   470         uprv_free(pCSave);
   471     }
   473     if(wStack != pWStack){
   474         uprv_free(pWStack);
   475     }
   477     u_releaseDefaultConverter(conv);
   479     return dest;
   480 }
   481 #endif
   483 U_CAPI UChar* U_EXPORT2
   484 u_strFromWCS(UChar   *dest,
   485              int32_t destCapacity, 
   486              int32_t *pDestLength,
   487              const wchar_t *src,
   488              int32_t srcLength,
   489              UErrorCode *pErrorCode)
   490 {
   492     /* args check */
   493     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
   494         return NULL;
   495     }
   497     if( (src==NULL && srcLength!=0) || srcLength < -1 ||
   498         (destCapacity<0) || (dest == NULL && destCapacity > 0)
   499     ) {
   500         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
   501         return NULL;
   502     }
   504 #ifdef U_WCHAR_IS_UTF16
   505     /* wchar_t is UTF-16 just do a memcpy */
   506     if(srcLength == -1){
   507         srcLength = u_strlen(src);
   508     }
   509     if(0 < srcLength && srcLength <= destCapacity){
   510         uprv_memcpy(dest,src,srcLength*U_SIZEOF_UCHAR);
   511     }
   512     if(pDestLength){
   513        *pDestLength = srcLength;
   514     }
   516     u_terminateUChars(dest,destCapacity,srcLength,pErrorCode);
   518     return dest;
   520 #elif defined U_WCHAR_IS_UTF32
   522     return u_strFromUTF32(dest, destCapacity, pDestLength,
   523                           (UChar32*)src, srcLength, pErrorCode);
   525 #else
   527     return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode);  
   529 #endif
   531 }
   533 #endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */

mercurial