intl/icu/source/tools/toolutil/ucbuf.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/tools/toolutil/ucbuf.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,787 @@
     1.4 +/*
     1.5 +*******************************************************************************
     1.6 +*
     1.7 +*   Copyright (C) 1998-2011, International Business Machines
     1.8 +*   Corporation and others.  All Rights Reserved.
     1.9 +*
    1.10 +*******************************************************************************
    1.11 +*
    1.12 +* File ucbuf.c
    1.13 +*
    1.14 +* Modification History:
    1.15 +*
    1.16 +*   Date        Name        Description
    1.17 +*   05/10/01    Ram         Creation.
    1.18 +*******************************************************************************
    1.19 +*/
    1.20 +
    1.21 +#include "unicode/utypes.h"
    1.22 +#include "unicode/putil.h"
    1.23 +#include "unicode/uchar.h"
    1.24 +#include "unicode/ucnv.h"
    1.25 +#include "unicode/ucnv_err.h"
    1.26 +#include "unicode/ustring.h"
    1.27 +#include "unicode/utf16.h"
    1.28 +#include "filestrm.h"
    1.29 +#include "cstring.h"
    1.30 +#include "cmemory.h"
    1.31 +#include "ustrfmt.h"
    1.32 +#include "ucbuf.h"
    1.33 +#include <stdio.h>
    1.34 +
    1.35 +#if !UCONFIG_NO_CONVERSION
    1.36 +
    1.37 +
    1.38 +#define MAX_IN_BUF 1000
    1.39 +#define MAX_U_BUF 1500
    1.40 +#define CONTEXT_LEN 20
    1.41 +
    1.42 +struct UCHARBUF {
    1.43 +    UChar* buffer;
    1.44 +    UChar* currentPos;
    1.45 +    UChar* bufLimit;
    1.46 +    int32_t bufCapacity;
    1.47 +    int32_t remaining;
    1.48 +    int32_t signatureLength;
    1.49 +    FileStream* in;
    1.50 +    UConverter* conv;
    1.51 +    UBool showWarning; /* makes this API not produce any errors */
    1.52 +    UBool isBuffered;
    1.53 +};
    1.54 +
    1.55 +U_CAPI UBool U_EXPORT2
    1.56 +ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* error){
    1.57 +    char start[8];
    1.58 +    int32_t numRead;
    1.59 +
    1.60 +    UChar target[1]={ 0 };
    1.61 +    UChar* pTarget;
    1.62 +    const char* pStart;
    1.63 +
    1.64 +    /* read a few bytes */
    1.65 +    numRead=T_FileStream_read(in, start, sizeof(start));
    1.66 +
    1.67 +    *cp = ucnv_detectUnicodeSignature(start, numRead, signatureLength, error);
    1.68 +    
    1.69 +    /* unread the bytes beyond what was consumed for U+FEFF */
    1.70 +    T_FileStream_rewind(in);
    1.71 +    if (*signatureLength > 0) {
    1.72 +        T_FileStream_read(in, start, *signatureLength);
    1.73 +    }
    1.74 +
    1.75 +    if(*cp==NULL){
    1.76 +        *conv =NULL;
    1.77 +        return FALSE;
    1.78 +    }
    1.79 +
    1.80 +    /* open the converter for the detected Unicode charset */
    1.81 +    *conv = ucnv_open(*cp,error);
    1.82 +
    1.83 +    /* convert and ignore initial U+FEFF, and the buffer overflow */
    1.84 +    pTarget = target;
    1.85 +    pStart = start;
    1.86 +    ucnv_toUnicode(*conv, &pTarget, target+1, &pStart, start+*signatureLength, NULL, FALSE, error);
    1.87 +    *signatureLength = (int32_t)(pStart - start);
    1.88 +    if(*error==U_BUFFER_OVERFLOW_ERROR) {
    1.89 +        *error=U_ZERO_ERROR;
    1.90 +    }
    1.91 +
    1.92 +    /* verify that we successfully read exactly U+FEFF */
    1.93 +    if(U_SUCCESS(*error) && (pTarget!=(target+1) || target[0]!=0xfeff)) {
    1.94 +        *error=U_INTERNAL_PROGRAM_ERROR;
    1.95 +    }
    1.96 +
    1.97 +
    1.98 +    return TRUE; 
    1.99 +}
   1.100 +static UBool ucbuf_isCPKnown(const char* cp){
   1.101 +    if(ucnv_compareNames("UTF-8",cp)==0){
   1.102 +        return TRUE;
   1.103 +    }
   1.104 +    if(ucnv_compareNames("UTF-16BE",cp)==0){
   1.105 +        return TRUE;
   1.106 +    }
   1.107 +    if(ucnv_compareNames("UTF-16LE",cp)==0){
   1.108 +        return TRUE;
   1.109 +    }
   1.110 +    if(ucnv_compareNames("UTF-16",cp)==0){
   1.111 +        return TRUE;
   1.112 +    }
   1.113 +    if(ucnv_compareNames("UTF-32",cp)==0){
   1.114 +        return TRUE;
   1.115 +    }
   1.116 +    if(ucnv_compareNames("UTF-32BE",cp)==0){
   1.117 +        return TRUE;
   1.118 +    }
   1.119 +    if(ucnv_compareNames("UTF-32LE",cp)==0){
   1.120 +        return TRUE;
   1.121 +    }
   1.122 +    if(ucnv_compareNames("SCSU",cp)==0){
   1.123 +        return TRUE;
   1.124 +    }
   1.125 +    if(ucnv_compareNames("BOCU-1",cp)==0){
   1.126 +        return TRUE;
   1.127 +    }
   1.128 +    if(ucnv_compareNames("UTF-7",cp)==0){
   1.129 +        return TRUE;
   1.130 +    }
   1.131 +    return FALSE;
   1.132 +}
   1.133 +
   1.134 +U_CAPI FileStream * U_EXPORT2
   1.135 +ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv, int32_t* signatureLength,UErrorCode* error){
   1.136 +    FileStream* in=NULL;
   1.137 +    if(error==NULL || U_FAILURE(*error)){
   1.138 +        return NULL;
   1.139 +    }
   1.140 +    if(conv==NULL || cp==NULL || fileName==NULL){
   1.141 +        *error = U_ILLEGAL_ARGUMENT_ERROR;
   1.142 +        return NULL;
   1.143 +    }
   1.144 +    /* open the file */
   1.145 +    in= T_FileStream_open(fileName,"rb");
   1.146 +    
   1.147 +    if(in == NULL){
   1.148 +        *error=U_FILE_ACCESS_ERROR;
   1.149 +        return NULL;
   1.150 +    }
   1.151 +
   1.152 +    if(ucbuf_autodetect_fs(in,cp,conv,signatureLength,error)) {
   1.153 +        return in;
   1.154 +    } else {
   1.155 +        ucnv_close(*conv);
   1.156 +        *conv=NULL;
   1.157 +        T_FileStream_close(in);
   1.158 +        return NULL;
   1.159 +    }
   1.160 +}
   1.161 +
   1.162 +/* fill the uchar buffer */
   1.163 +static UCHARBUF*
   1.164 +ucbuf_fillucbuf( UCHARBUF* buf,UErrorCode* error){
   1.165 +    UChar* pTarget=NULL;
   1.166 +    UChar* target=NULL;
   1.167 +    const char* source=NULL;
   1.168 +    char  carr[MAX_IN_BUF] = {'\0'};
   1.169 +    char* cbuf =  carr;
   1.170 +    int32_t inputRead=0;
   1.171 +    int32_t outputWritten=0;
   1.172 +    int32_t offset=0;
   1.173 +    const char* sourceLimit =NULL;
   1.174 +    int32_t cbufSize=0;
   1.175 +    pTarget = buf->buffer;
   1.176 +    /* check if we arrived here without exhausting the buffer*/
   1.177 +    if(buf->currentPos<buf->bufLimit){
   1.178 +        offset = (int32_t)(buf->bufLimit-buf->currentPos);
   1.179 +        memmove(buf->buffer,buf->currentPos,offset* sizeof(UChar));
   1.180 +    }
   1.181 +
   1.182 +#if DEBUG
   1.183 +    memset(pTarget+offset,0xff,sizeof(UChar)*(MAX_IN_BUF-offset));
   1.184 +#endif
   1.185 +    if(buf->isBuffered){
   1.186 +        cbufSize = MAX_IN_BUF;
   1.187 +        /* read the file */
   1.188 +        inputRead=T_FileStream_read(buf->in,cbuf,cbufSize-offset);
   1.189 +        buf->remaining-=inputRead;
   1.190 +        
   1.191 +    }else{
   1.192 +        cbufSize = T_FileStream_size(buf->in);
   1.193 +        cbuf = (char*)uprv_malloc(cbufSize);
   1.194 +        if (cbuf == NULL) {
   1.195 +        	*error = U_MEMORY_ALLOCATION_ERROR;
   1.196 +        	return NULL;
   1.197 +        }
   1.198 +        inputRead= T_FileStream_read(buf->in,cbuf,cbufSize);
   1.199 +        buf->remaining-=inputRead;
   1.200 +    }
   1.201 +
   1.202 +    /* just to be sure...*/
   1.203 +    if ( 0 == inputRead )
   1.204 +       buf->remaining = 0;
   1.205 +
   1.206 +    target=pTarget;
   1.207 +    /* convert the bytes */
   1.208 +    if(buf->conv){
   1.209 +        /* set the callback to stop */
   1.210 +        UConverterToUCallback toUOldAction ;
   1.211 +        void* toUOldContext;
   1.212 +        void* toUNewContext=NULL;
   1.213 +        ucnv_setToUCallBack(buf->conv,
   1.214 +           UCNV_TO_U_CALLBACK_STOP,
   1.215 +           toUNewContext,
   1.216 +           &toUOldAction,
   1.217 +           (const void**)&toUOldContext,
   1.218 +           error);
   1.219 +        /* since state is saved in the converter we add offset to source*/
   1.220 +        target = pTarget+offset;
   1.221 +        source = cbuf;
   1.222 +        sourceLimit = source + inputRead;
   1.223 +        ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset),
   1.224 +                        &source,sourceLimit,NULL,
   1.225 +                        (UBool)(buf->remaining==0),error);
   1.226 +
   1.227 +        if(U_FAILURE(*error)){
   1.228 +            char context[CONTEXT_LEN+1];
   1.229 +            char preContext[CONTEXT_LEN+1];
   1.230 +            char postContext[CONTEXT_LEN+1];
   1.231 +            int8_t len = CONTEXT_LEN;
   1.232 +            int32_t start=0;
   1.233 +            int32_t stop =0;
   1.234 +            int32_t pos =0;
   1.235 +            /* use erro1 to preserve the error code */
   1.236 +            UErrorCode error1 =U_ZERO_ERROR;
   1.237 +            
   1.238 +            if( buf->showWarning==TRUE){
   1.239 +                fprintf(stderr,"\n###WARNING: Encountered abnormal bytes while"
   1.240 +                               " converting input stream to target encoding: %s\n",
   1.241 +                               u_errorName(*error));
   1.242 +            }
   1.243 +
   1.244 +
   1.245 +            /* now get the context chars */
   1.246 +            ucnv_getInvalidChars(buf->conv,context,&len,&error1);
   1.247 +            context[len]= 0 ; /* null terminate the buffer */
   1.248 +
   1.249 +            pos = (int32_t)(source - cbuf - len);
   1.250 +
   1.251 +            /* for pre-context */
   1.252 +            start = (pos <=CONTEXT_LEN)? 0 : (pos - (CONTEXT_LEN-1));
   1.253 +            stop  = pos-len;
   1.254 +
   1.255 +            memcpy(preContext,cbuf+start,stop-start);
   1.256 +            /* null terminate the buffer */
   1.257 +            preContext[stop-start] = 0;
   1.258 +
   1.259 +            /* for post-context */
   1.260 +            start = pos+len;
   1.261 +            stop  = (int32_t)(((pos+CONTEXT_LEN)<= (sourceLimit-cbuf) )? (pos+(CONTEXT_LEN-1)) : (sourceLimit-cbuf));
   1.262 +
   1.263 +            memcpy(postContext,source,stop-start);
   1.264 +            /* null terminate the buffer */
   1.265 +            postContext[stop-start] = 0;
   1.266 +
   1.267 +            if(buf->showWarning ==TRUE){
   1.268 +                /* print out the context */
   1.269 +                fprintf(stderr,"\tPre-context: %s\n",preContext);
   1.270 +                fprintf(stderr,"\tContext: %s\n",context);
   1.271 +                fprintf(stderr,"\tPost-context: %s\n", postContext);
   1.272 +            }
   1.273 +
   1.274 +            /* reset the converter */
   1.275 +            ucnv_reset(buf->conv);
   1.276 +
   1.277 +            /* set the call back to substitute
   1.278 +             * and restart conversion
   1.279 +             */
   1.280 +            ucnv_setToUCallBack(buf->conv,
   1.281 +               UCNV_TO_U_CALLBACK_SUBSTITUTE,
   1.282 +               toUNewContext,
   1.283 +               &toUOldAction,
   1.284 +               (const void**)&toUOldContext,
   1.285 +               &error1);
   1.286 +
   1.287 +            /* reset source and target start positions */
   1.288 +            target = pTarget+offset;
   1.289 +            source = cbuf;
   1.290 +
   1.291 +            /* re convert */
   1.292 +            ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset),
   1.293 +                            &source,sourceLimit,NULL,
   1.294 +                            (UBool)(buf->remaining==0),&error1);
   1.295 +
   1.296 +        }
   1.297 +        outputWritten = (int32_t)(target - pTarget);
   1.298 +
   1.299 +
   1.300 +#if DEBUG
   1.301 +        {
   1.302 +            int i;
   1.303 +            target = pTarget;
   1.304 +            for(i=0;i<numRead;i++){
   1.305 +              /*  printf("%c", (char)(*target++));*/
   1.306 +            }
   1.307 +        }
   1.308 +#endif
   1.309 +
   1.310 +    }else{
   1.311 +        u_charsToUChars(cbuf,target+offset,inputRead);
   1.312 +        outputWritten=((buf->remaining>cbufSize)? cbufSize:inputRead+offset);
   1.313 +    }
   1.314 +    buf->currentPos = pTarget;
   1.315 +    buf->bufLimit=pTarget+outputWritten;
   1.316 +    *buf->bufLimit=0; /*NUL terminate*/
   1.317 +    if(cbuf!=carr){
   1.318 +        uprv_free(cbuf);
   1.319 +    }
   1.320 +    return buf;
   1.321 +}
   1.322 +
   1.323 +
   1.324 +
   1.325 +/* get a UChar from the stream*/
   1.326 +U_CAPI int32_t U_EXPORT2
   1.327 +ucbuf_getc(UCHARBUF* buf,UErrorCode* error){
   1.328 +    if(error==NULL || U_FAILURE(*error)){
   1.329 +        return FALSE;
   1.330 +    }
   1.331 +    if(buf->currentPos>=buf->bufLimit){
   1.332 +        if(buf->remaining==0){
   1.333 +            return U_EOF;
   1.334 +        }
   1.335 +        buf=ucbuf_fillucbuf(buf,error);
   1.336 +        if(U_FAILURE(*error)){
   1.337 +            return U_EOF;
   1.338 +        }
   1.339 +    }
   1.340 +
   1.341 +    return *(buf->currentPos++);
   1.342 +}
   1.343 +
   1.344 +/* get a UChar32 from the stream*/
   1.345 +U_CAPI int32_t U_EXPORT2
   1.346 +ucbuf_getc32(UCHARBUF* buf,UErrorCode* error){
   1.347 +    int32_t retVal = (int32_t)U_EOF;
   1.348 +    if(error==NULL || U_FAILURE(*error)){
   1.349 +        return FALSE;
   1.350 +    }
   1.351 +    if(buf->currentPos+1>=buf->bufLimit){
   1.352 +        if(buf->remaining==0){
   1.353 +            return U_EOF;
   1.354 +        }
   1.355 +        buf=ucbuf_fillucbuf(buf,error);
   1.356 +        if(U_FAILURE(*error)){
   1.357 +            return U_EOF;
   1.358 +        }
   1.359 +    }
   1.360 +    if(U16_IS_LEAD(*(buf->currentPos))){
   1.361 +        retVal=U16_GET_SUPPLEMENTARY(buf->currentPos[0],buf->currentPos[1]);
   1.362 +        buf->currentPos+=2;
   1.363 +    }else{
   1.364 +        retVal = *(buf->currentPos++);
   1.365 +    }
   1.366 +    return retVal;
   1.367 +}
   1.368 +
   1.369 +/* u_unescapeAt() callback to return a UChar*/
   1.370 +static UChar U_CALLCONV
   1.371 +_charAt(int32_t offset, void *context) {
   1.372 +    return ((UCHARBUF*) context)->currentPos[offset];
   1.373 +}
   1.374 +
   1.375 +/* getc and escape it */
   1.376 +U_CAPI int32_t U_EXPORT2
   1.377 +ucbuf_getcx32(UCHARBUF* buf,UErrorCode* error) {
   1.378 +    int32_t length;
   1.379 +    int32_t offset;
   1.380 +    UChar32 c32,c1,c2;
   1.381 +    if(error==NULL || U_FAILURE(*error)){
   1.382 +        return FALSE;
   1.383 +    }
   1.384 +    /* Fill the buffer if it is empty */
   1.385 +    if (buf->currentPos >=buf->bufLimit-2) {
   1.386 +        ucbuf_fillucbuf(buf,error);
   1.387 +    }
   1.388 +
   1.389 +    /* Get the next character in the buffer */
   1.390 +    if (buf->currentPos < buf->bufLimit) {
   1.391 +        c1 = *(buf->currentPos)++;
   1.392 +    } else {
   1.393 +        c1 = U_EOF;
   1.394 +    }
   1.395 +
   1.396 +    c2 = *(buf->currentPos);
   1.397 +
   1.398 +    /* If it isn't a backslash, return it */
   1.399 +    if (c1 != 0x005C) {
   1.400 +        return c1;
   1.401 +    }
   1.402 +
   1.403 +    /* Determine the amount of data in the buffer */
   1.404 +    length = (int32_t)(buf->bufLimit - buf->currentPos);
   1.405 +
   1.406 +    /* The longest escape sequence is \Uhhhhhhhh; make sure
   1.407 +       we have at least that many characters */
   1.408 +    if (length < 10) {
   1.409 +
   1.410 +        /* fill the buffer */
   1.411 +        ucbuf_fillucbuf(buf,error);
   1.412 +        length = (int32_t)(buf->bufLimit - buf->buffer);
   1.413 +    }
   1.414 +
   1.415 +    /* Process the escape */
   1.416 +    offset = 0;
   1.417 +    c32 = u_unescapeAt(_charAt, &offset, length, (void*)buf);
   1.418 +
   1.419 +    /* check if u_unescapeAt unescaped and converted
   1.420 +     * to c32 or not
   1.421 +     */
   1.422 +    if(c32==0xFFFFFFFF){
   1.423 +        if(buf->showWarning) {
   1.424 +            char context[CONTEXT_LEN+1];
   1.425 +            int32_t len = CONTEXT_LEN;
   1.426 +            if(length < len) {
   1.427 +                len = length; 
   1.428 +            }
   1.429 +            context[len]= 0 ; /* null terminate the buffer */
   1.430 +            u_UCharsToChars( buf->currentPos, context, len);
   1.431 +            fprintf(stderr,"Bad escape: [%c%s]...\n", (int)c1, context);
   1.432 +        }
   1.433 +        *error= U_ILLEGAL_ESCAPE_SEQUENCE;
   1.434 +        return c1;
   1.435 +    }else if(c32!=c2 || (c32==0x0075 && c2==0x0075 && c1==0x005C) /* for \u0075 c2=0x0075 and c32==0x0075*/){
   1.436 +        /* Update the current buffer position */
   1.437 +        buf->currentPos += offset;
   1.438 +    }else{
   1.439 +        /* unescaping failed so we just return
   1.440 +         * c1 and not consume the buffer
   1.441 +         * this is useful for rules with escapes
   1.442 +         * in resouce bundles
   1.443 +         * eg: \' \\ \"
   1.444 +         */
   1.445 +        return c1;
   1.446 +    }
   1.447 +
   1.448 +    return c32;
   1.449 +}
   1.450 +
   1.451 +U_CAPI UCHARBUF* U_EXPORT2
   1.452 +ucbuf_open(const char* fileName,const char** cp,UBool showWarning, UBool buffered, UErrorCode* error){
   1.453 +
   1.454 +    FileStream* in = NULL; 
   1.455 +    int32_t fileSize=0;
   1.456 +    const char* knownCp;
   1.457 +    if(error==NULL || U_FAILURE(*error)){
   1.458 +        return NULL;
   1.459 +    }
   1.460 +    if(cp==NULL || fileName==NULL){
   1.461 +        *error = U_ILLEGAL_ARGUMENT_ERROR;
   1.462 +        return FALSE;
   1.463 +    }
   1.464 +    if (!uprv_strcmp(fileName, "-")) {
   1.465 +        in = T_FileStream_stdin();
   1.466 +    }else{ 
   1.467 +        in = T_FileStream_open(fileName, "rb");
   1.468 +    }
   1.469 +    
   1.470 +    if(in!=NULL){
   1.471 +        UCHARBUF* buf =(UCHARBUF*) uprv_malloc(sizeof(UCHARBUF));
   1.472 +        fileSize = T_FileStream_size(in);
   1.473 +        if(buf == NULL){
   1.474 +            *error = U_MEMORY_ALLOCATION_ERROR;
   1.475 +            T_FileStream_close(in);
   1.476 +            return NULL;
   1.477 +        }
   1.478 +        buf->in=in;
   1.479 +        buf->conv=NULL;
   1.480 +        buf->showWarning = showWarning;
   1.481 +        buf->isBuffered = buffered;
   1.482 +        buf->signatureLength=0;
   1.483 +        if(*cp==NULL || **cp=='\0'){
   1.484 +            /* don't have code page name... try to autodetect */
   1.485 +            ucbuf_autodetect_fs(in,cp,&buf->conv,&buf->signatureLength,error);
   1.486 +        }else if(ucbuf_isCPKnown(*cp)){
   1.487 +            /* discard BOM */
   1.488 +            ucbuf_autodetect_fs(in,&knownCp,&buf->conv,&buf->signatureLength,error);
   1.489 +        }
   1.490 +        if(U_SUCCESS(*error) && buf->conv==NULL) {
   1.491 +            buf->conv=ucnv_open(*cp,error);
   1.492 +        }
   1.493 +        if(U_FAILURE(*error)){
   1.494 +            ucnv_close(buf->conv);
   1.495 +            uprv_free(buf);
   1.496 +            T_FileStream_close(in);
   1.497 +            return NULL;
   1.498 +        }
   1.499 +        
   1.500 +        if((buf->conv==NULL) && (buf->showWarning==TRUE)){
   1.501 +            fprintf(stderr,"###WARNING: No converter defined. Using codepage of system.\n");
   1.502 +        }
   1.503 +        buf->remaining=fileSize-buf->signatureLength;
   1.504 +        if(buf->isBuffered){
   1.505 +            buf->bufCapacity=MAX_U_BUF;
   1.506 +        }else{
   1.507 +            buf->bufCapacity=buf->remaining+buf->signatureLength+1/*for terminating nul*/;               
   1.508 +        }
   1.509 +        buf->buffer=(UChar*) uprv_malloc(U_SIZEOF_UCHAR * buf->bufCapacity );
   1.510 +        if (buf->buffer == NULL) {
   1.511 +            *error = U_MEMORY_ALLOCATION_ERROR;
   1.512 +            ucbuf_close(buf);
   1.513 +            return NULL;
   1.514 +        }
   1.515 +        buf->currentPos=buf->buffer;
   1.516 +        buf->bufLimit=buf->buffer;
   1.517 +        if(U_FAILURE(*error)){
   1.518 +            fprintf(stderr, "Could not open codepage [%s]: %s\n", *cp, u_errorName(*error));
   1.519 +            ucbuf_close(buf);
   1.520 +            return NULL;
   1.521 +        }
   1.522 +        ucbuf_fillucbuf(buf,error);
   1.523 +        if(U_FAILURE(*error)){
   1.524 +            ucbuf_close(buf);
   1.525 +            return NULL;
   1.526 +        }
   1.527 +        return buf;
   1.528 +    }
   1.529 +    *error =U_FILE_ACCESS_ERROR;
   1.530 +    return NULL;
   1.531 +}
   1.532 +
   1.533 +
   1.534 +
   1.535 +/* TODO: this method will fail if at the
   1.536 + * begining of buffer and the uchar to unget
   1.537 + * is from the previous buffer. Need to implement
   1.538 + * system to take care of that situation.
   1.539 + */
   1.540 +U_CAPI void U_EXPORT2
   1.541 +ucbuf_ungetc(int32_t c,UCHARBUF* buf){
   1.542 +    /* decrement currentPos pointer
   1.543 +     * if not at the begining of buffer
   1.544 +     */
   1.545 +    if(buf->currentPos!=buf->buffer){
   1.546 +        if(*(buf->currentPos-1)==c){
   1.547 +            buf->currentPos--;
   1.548 +        } else {
   1.549 +            /* ungetc failed - did not match. */
   1.550 +        }
   1.551 +    } else {
   1.552 +       /* ungetc failed - beginning of buffer. */
   1.553 +    }
   1.554 +}
   1.555 +
   1.556 +/* frees the resources of UChar* buffer */
   1.557 +static void
   1.558 +ucbuf_closebuf(UCHARBUF* buf){
   1.559 +    uprv_free(buf->buffer);
   1.560 +    buf->buffer = NULL;
   1.561 +}
   1.562 +
   1.563 +/* close the buf and release resources*/
   1.564 +U_CAPI void U_EXPORT2
   1.565 +ucbuf_close(UCHARBUF* buf){
   1.566 +    if(buf!=NULL){
   1.567 +        if(buf->conv){
   1.568 +            ucnv_close(buf->conv);
   1.569 +        }
   1.570 +        T_FileStream_close(buf->in);
   1.571 +        ucbuf_closebuf(buf);
   1.572 +        uprv_free(buf);
   1.573 +    }
   1.574 +}
   1.575 +
   1.576 +/* rewind the buf and file stream */
   1.577 +U_CAPI void U_EXPORT2
   1.578 +ucbuf_rewind(UCHARBUF* buf,UErrorCode* error){
   1.579 +    if(error==NULL || U_FAILURE(*error)){
   1.580 +        return;
   1.581 +    }
   1.582 +    if(buf){
   1.583 +        buf->currentPos=buf->buffer;
   1.584 +        buf->bufLimit=buf->buffer;
   1.585 +        T_FileStream_rewind(buf->in);
   1.586 +        buf->remaining=T_FileStream_size(buf->in)-buf->signatureLength;
   1.587 +
   1.588 +        ucnv_resetToUnicode(buf->conv);
   1.589 +        if(buf->signatureLength>0) {
   1.590 +            UChar target[1]={ 0 };
   1.591 +            UChar* pTarget;
   1.592 +            char start[8];
   1.593 +            const char* pStart;
   1.594 +            int32_t numRead;
   1.595 +
   1.596 +            /* read the signature bytes */
   1.597 +            numRead=T_FileStream_read(buf->in, start, buf->signatureLength);
   1.598 +
   1.599 +            /* convert and ignore initial U+FEFF, and the buffer overflow */
   1.600 +            pTarget = target;
   1.601 +            pStart = start;
   1.602 +            ucnv_toUnicode(buf->conv, &pTarget, target+1, &pStart, start+numRead, NULL, FALSE, error);
   1.603 +            if(*error==U_BUFFER_OVERFLOW_ERROR) {
   1.604 +                *error=U_ZERO_ERROR;
   1.605 +            }
   1.606 +
   1.607 +            /* verify that we successfully read exactly U+FEFF */
   1.608 +            if(U_SUCCESS(*error) && (numRead!=buf->signatureLength || pTarget!=(target+1) || target[0]!=0xfeff)) {
   1.609 +                *error=U_INTERNAL_PROGRAM_ERROR;
   1.610 +            }
   1.611 +        }
   1.612 +    }
   1.613 +}
   1.614 +
   1.615 +
   1.616 +U_CAPI int32_t U_EXPORT2
   1.617 +ucbuf_size(UCHARBUF* buf){
   1.618 +    if(buf){
   1.619 +        if(buf->isBuffered){
   1.620 +            return (T_FileStream_size(buf->in)-buf->signatureLength)/ucnv_getMinCharSize(buf->conv);
   1.621 +        }else{
   1.622 +            return (int32_t)(buf->bufLimit - buf->buffer);
   1.623 +        }
   1.624 +    }
   1.625 +    return 0;
   1.626 +}
   1.627 +
   1.628 +U_CAPI const UChar* U_EXPORT2
   1.629 +ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* error){
   1.630 +    if(error==NULL || U_FAILURE(*error)){
   1.631 +        return NULL;
   1.632 +    }
   1.633 +    if(buf==NULL || len==NULL){
   1.634 +        *error = U_ILLEGAL_ARGUMENT_ERROR;
   1.635 +        return NULL;
   1.636 +    }
   1.637 +    *len = (int32_t)(buf->bufLimit - buf->buffer);
   1.638 +    return buf->buffer;
   1.639 +}
   1.640 +
   1.641 +U_CAPI const char* U_EXPORT2
   1.642 +ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status){
   1.643 +    int32_t requiredLen = 0;
   1.644 +    int32_t dirlen =  0;
   1.645 +    int32_t filelen = 0;
   1.646 +    if(status==NULL || U_FAILURE(*status)){
   1.647 +        return NULL;
   1.648 +    }
   1.649 +
   1.650 +    if(inputDir == NULL || fileName == NULL || len==NULL || (target==NULL && *len>0)){
   1.651 +        *status = U_ILLEGAL_ARGUMENT_ERROR;
   1.652 +        return NULL;
   1.653 +    }
   1.654 +
   1.655 +
   1.656 +    dirlen  = (int32_t)uprv_strlen(inputDir);
   1.657 +    filelen = (int32_t)uprv_strlen(fileName);
   1.658 +    if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
   1.659 +        requiredLen = dirlen + filelen + 2;
   1.660 +        if((*len < requiredLen) || target==NULL){
   1.661 +            *len = requiredLen;
   1.662 +            *status = U_BUFFER_OVERFLOW_ERROR;
   1.663 +            return NULL;
   1.664 +        }
   1.665 +
   1.666 +        target[0] = '\0';
   1.667 +        /*
   1.668 +         * append the input dir to openFileName if the first char in 
   1.669 +         * filename is not file seperation char and the last char input directory is  not '.'.
   1.670 +         * This is to support :
   1.671 +         * genrb -s. /home/icu/data
   1.672 +         * genrb -s. icu/data
   1.673 +         * The user cannot mix notations like
   1.674 +         * genrb -s. /icu/data --- the absolute path specified. -s redundant
   1.675 +         * user should use
   1.676 +         * genrb -s. icu/data  --- start from CWD and look in icu/data dir
   1.677 +         */
   1.678 +        if( (fileName[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){
   1.679 +            uprv_strcpy(target, inputDir);
   1.680 +            target[dirlen]     = U_FILE_SEP_CHAR;
   1.681 +        }
   1.682 +        target[dirlen + 1] = '\0';
   1.683 +    } else {
   1.684 +        requiredLen = dirlen + filelen + 1;
   1.685 +        if((*len < requiredLen) || target==NULL){
   1.686 +            *len = requiredLen;
   1.687 +            *status = U_BUFFER_OVERFLOW_ERROR;
   1.688 +            return NULL;
   1.689 +        }
   1.690 +        
   1.691 +        uprv_strcpy(target, inputDir);
   1.692 +    }
   1.693 +
   1.694 +    uprv_strcat(target, fileName);
   1.695 +    return target;
   1.696 +}
   1.697 +/*
   1.698 + * Unicode TR 13 says any of the below chars is
   1.699 + * a new line char in a readline function in addition
   1.700 + * to CR+LF combination which needs to be 
   1.701 + * handled seperately
   1.702 + */
   1.703 +static UBool ucbuf_isCharNewLine(UChar c){
   1.704 +    switch(c){
   1.705 +    case 0x000A: /* LF  */
   1.706 +    case 0x000D: /* CR  */
   1.707 +    case 0x000C: /* FF  */
   1.708 +    case 0x0085: /* NEL */
   1.709 +    case 0x2028: /* LS  */
   1.710 +    case 0x2029: /* PS  */
   1.711 +        return TRUE;
   1.712 +    default:
   1.713 +        return FALSE;
   1.714 +    }
   1.715 +}
   1.716 +
   1.717 +U_CAPI const UChar* U_EXPORT2
   1.718 +ucbuf_readline(UCHARBUF* buf,int32_t* len,UErrorCode* err){
   1.719 +    UChar* temp = buf->currentPos;
   1.720 +    UChar* savePos =NULL;
   1.721 +    UChar c=0x0000;
   1.722 +    if(buf->isBuffered){
   1.723 +        /* The input is buffered we have to do more
   1.724 +        * for returning a pointer U_TRUNCATED_CHAR_FOUND
   1.725 +        */
   1.726 +        for(;;){
   1.727 +            c = *temp++;
   1.728 +            if(buf->remaining==0){
   1.729 +                return NULL; /* end of file is reached return NULL */
   1.730 +            }
   1.731 +            if(temp>=buf->bufLimit && buf->currentPos == buf->buffer){
   1.732 +                *err= U_TRUNCATED_CHAR_FOUND;
   1.733 +                return NULL;
   1.734 +            }else{
   1.735 +                ucbuf_fillucbuf(buf,err);
   1.736 +                if(U_FAILURE(*err)){
   1.737 +                    return NULL; 
   1.738 +                }
   1.739 +            }
   1.740 +            /*
   1.741 +             * Accoding to TR 13 readLine functions must interpret
   1.742 +             * CR, CR+LF, LF, NEL, PS, LS or FF as line seperators
   1.743 +             */
   1.744 +            /* Windows CR LF */
   1.745 +            if(c ==0x0d && temp+1<=buf->bufLimit && *(temp+1) == 0x0a ){
   1.746 +                *len = (int32_t)(temp++ - buf->currentPos);
   1.747 +                savePos = buf->currentPos;
   1.748 +                buf->currentPos = temp;
   1.749 +                return savePos;
   1.750 +            }
   1.751 +            /* else */
   1.752 +
   1.753 +            if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)){  /* Unipad inserts 2028 line separators! */
   1.754 +                *len = (int32_t)(temp - buf->currentPos);
   1.755 +                savePos = buf->currentPos;
   1.756 +                buf->currentPos = temp;
   1.757 +                return savePos;
   1.758 +            }
   1.759 +        }
   1.760 +    }else{
   1.761 +    /* we know that all input is read into the internal
   1.762 +    * buffer so we can safely return pointers
   1.763 +        */
   1.764 +        for(;;){
   1.765 +            c = *temp++;
   1.766 +            
   1.767 +            if(buf->currentPos==buf->bufLimit){
   1.768 +                return NULL; /* end of file is reached return NULL */
   1.769 +            }
   1.770 +            /* Windows CR LF */
   1.771 +            if(c ==0x0d && temp+1<=buf->bufLimit && *(temp+1) == 0x0a ){
   1.772 +                *len = (int32_t)(temp++ - buf->currentPos);
   1.773 +                savePos = buf->currentPos;
   1.774 +                buf->currentPos = temp;
   1.775 +                return savePos;
   1.776 +            }
   1.777 +            /* else */
   1.778 +            if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)) {  /* Unipad inserts 2028 line separators! */
   1.779 +                *len = (int32_t)(temp - buf->currentPos);
   1.780 +                savePos = buf->currentPos;
   1.781 +                buf->currentPos = temp;
   1.782 +                return savePos;
   1.783 +            }
   1.784 +        }
   1.785 +    }
   1.786 +    /* not reached */
   1.787 +    /* A compiler warning will appear if all paths don't contain a return statement. */
   1.788 +/*    return NULL;*/
   1.789 +}
   1.790 +#endif

mercurial