1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/tools/toolutil/ucbuf.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,787 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* 1.7 +* Copyright (C) 1998-2011, International Business Machines 1.8 +* Corporation and others. All Rights Reserved. 1.9 +* 1.10 +******************************************************************************* 1.11 +* 1.12 +* File ucbuf.c 1.13 +* 1.14 +* Modification History: 1.15 +* 1.16 +* Date Name Description 1.17 +* 05/10/01 Ram Creation. 1.18 +******************************************************************************* 1.19 +*/ 1.20 + 1.21 +#include "unicode/utypes.h" 1.22 +#include "unicode/putil.h" 1.23 +#include "unicode/uchar.h" 1.24 +#include "unicode/ucnv.h" 1.25 +#include "unicode/ucnv_err.h" 1.26 +#include "unicode/ustring.h" 1.27 +#include "unicode/utf16.h" 1.28 +#include "filestrm.h" 1.29 +#include "cstring.h" 1.30 +#include "cmemory.h" 1.31 +#include "ustrfmt.h" 1.32 +#include "ucbuf.h" 1.33 +#include <stdio.h> 1.34 + 1.35 +#if !UCONFIG_NO_CONVERSION 1.36 + 1.37 + 1.38 +#define MAX_IN_BUF 1000 1.39 +#define MAX_U_BUF 1500 1.40 +#define CONTEXT_LEN 20 1.41 + 1.42 +struct UCHARBUF { 1.43 + UChar* buffer; 1.44 + UChar* currentPos; 1.45 + UChar* bufLimit; 1.46 + int32_t bufCapacity; 1.47 + int32_t remaining; 1.48 + int32_t signatureLength; 1.49 + FileStream* in; 1.50 + UConverter* conv; 1.51 + UBool showWarning; /* makes this API not produce any errors */ 1.52 + UBool isBuffered; 1.53 +}; 1.54 + 1.55 +U_CAPI UBool U_EXPORT2 1.56 +ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* error){ 1.57 + char start[8]; 1.58 + int32_t numRead; 1.59 + 1.60 + UChar target[1]={ 0 }; 1.61 + UChar* pTarget; 1.62 + const char* pStart; 1.63 + 1.64 + /* read a few bytes */ 1.65 + numRead=T_FileStream_read(in, start, sizeof(start)); 1.66 + 1.67 + *cp = ucnv_detectUnicodeSignature(start, numRead, signatureLength, error); 1.68 + 1.69 + /* unread the bytes beyond what was consumed for U+FEFF */ 1.70 + T_FileStream_rewind(in); 1.71 + if (*signatureLength > 0) { 1.72 + T_FileStream_read(in, start, *signatureLength); 1.73 + } 1.74 + 1.75 + if(*cp==NULL){ 1.76 + *conv =NULL; 1.77 + return FALSE; 1.78 + } 1.79 + 1.80 + /* open the converter for the detected Unicode charset */ 1.81 + *conv = ucnv_open(*cp,error); 1.82 + 1.83 + /* convert and ignore initial U+FEFF, and the buffer overflow */ 1.84 + pTarget = target; 1.85 + pStart = start; 1.86 + ucnv_toUnicode(*conv, &pTarget, target+1, &pStart, start+*signatureLength, NULL, FALSE, error); 1.87 + *signatureLength = (int32_t)(pStart - start); 1.88 + if(*error==U_BUFFER_OVERFLOW_ERROR) { 1.89 + *error=U_ZERO_ERROR; 1.90 + } 1.91 + 1.92 + /* verify that we successfully read exactly U+FEFF */ 1.93 + if(U_SUCCESS(*error) && (pTarget!=(target+1) || target[0]!=0xfeff)) { 1.94 + *error=U_INTERNAL_PROGRAM_ERROR; 1.95 + } 1.96 + 1.97 + 1.98 + return TRUE; 1.99 +} 1.100 +static UBool ucbuf_isCPKnown(const char* cp){ 1.101 + if(ucnv_compareNames("UTF-8",cp)==0){ 1.102 + return TRUE; 1.103 + } 1.104 + if(ucnv_compareNames("UTF-16BE",cp)==0){ 1.105 + return TRUE; 1.106 + } 1.107 + if(ucnv_compareNames("UTF-16LE",cp)==0){ 1.108 + return TRUE; 1.109 + } 1.110 + if(ucnv_compareNames("UTF-16",cp)==0){ 1.111 + return TRUE; 1.112 + } 1.113 + if(ucnv_compareNames("UTF-32",cp)==0){ 1.114 + return TRUE; 1.115 + } 1.116 + if(ucnv_compareNames("UTF-32BE",cp)==0){ 1.117 + return TRUE; 1.118 + } 1.119 + if(ucnv_compareNames("UTF-32LE",cp)==0){ 1.120 + return TRUE; 1.121 + } 1.122 + if(ucnv_compareNames("SCSU",cp)==0){ 1.123 + return TRUE; 1.124 + } 1.125 + if(ucnv_compareNames("BOCU-1",cp)==0){ 1.126 + return TRUE; 1.127 + } 1.128 + if(ucnv_compareNames("UTF-7",cp)==0){ 1.129 + return TRUE; 1.130 + } 1.131 + return FALSE; 1.132 +} 1.133 + 1.134 +U_CAPI FileStream * U_EXPORT2 1.135 +ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv, int32_t* signatureLength,UErrorCode* error){ 1.136 + FileStream* in=NULL; 1.137 + if(error==NULL || U_FAILURE(*error)){ 1.138 + return NULL; 1.139 + } 1.140 + if(conv==NULL || cp==NULL || fileName==NULL){ 1.141 + *error = U_ILLEGAL_ARGUMENT_ERROR; 1.142 + return NULL; 1.143 + } 1.144 + /* open the file */ 1.145 + in= T_FileStream_open(fileName,"rb"); 1.146 + 1.147 + if(in == NULL){ 1.148 + *error=U_FILE_ACCESS_ERROR; 1.149 + return NULL; 1.150 + } 1.151 + 1.152 + if(ucbuf_autodetect_fs(in,cp,conv,signatureLength,error)) { 1.153 + return in; 1.154 + } else { 1.155 + ucnv_close(*conv); 1.156 + *conv=NULL; 1.157 + T_FileStream_close(in); 1.158 + return NULL; 1.159 + } 1.160 +} 1.161 + 1.162 +/* fill the uchar buffer */ 1.163 +static UCHARBUF* 1.164 +ucbuf_fillucbuf( UCHARBUF* buf,UErrorCode* error){ 1.165 + UChar* pTarget=NULL; 1.166 + UChar* target=NULL; 1.167 + const char* source=NULL; 1.168 + char carr[MAX_IN_BUF] = {'\0'}; 1.169 + char* cbuf = carr; 1.170 + int32_t inputRead=0; 1.171 + int32_t outputWritten=0; 1.172 + int32_t offset=0; 1.173 + const char* sourceLimit =NULL; 1.174 + int32_t cbufSize=0; 1.175 + pTarget = buf->buffer; 1.176 + /* check if we arrived here without exhausting the buffer*/ 1.177 + if(buf->currentPos<buf->bufLimit){ 1.178 + offset = (int32_t)(buf->bufLimit-buf->currentPos); 1.179 + memmove(buf->buffer,buf->currentPos,offset* sizeof(UChar)); 1.180 + } 1.181 + 1.182 +#if DEBUG 1.183 + memset(pTarget+offset,0xff,sizeof(UChar)*(MAX_IN_BUF-offset)); 1.184 +#endif 1.185 + if(buf->isBuffered){ 1.186 + cbufSize = MAX_IN_BUF; 1.187 + /* read the file */ 1.188 + inputRead=T_FileStream_read(buf->in,cbuf,cbufSize-offset); 1.189 + buf->remaining-=inputRead; 1.190 + 1.191 + }else{ 1.192 + cbufSize = T_FileStream_size(buf->in); 1.193 + cbuf = (char*)uprv_malloc(cbufSize); 1.194 + if (cbuf == NULL) { 1.195 + *error = U_MEMORY_ALLOCATION_ERROR; 1.196 + return NULL; 1.197 + } 1.198 + inputRead= T_FileStream_read(buf->in,cbuf,cbufSize); 1.199 + buf->remaining-=inputRead; 1.200 + } 1.201 + 1.202 + /* just to be sure...*/ 1.203 + if ( 0 == inputRead ) 1.204 + buf->remaining = 0; 1.205 + 1.206 + target=pTarget; 1.207 + /* convert the bytes */ 1.208 + if(buf->conv){ 1.209 + /* set the callback to stop */ 1.210 + UConverterToUCallback toUOldAction ; 1.211 + void* toUOldContext; 1.212 + void* toUNewContext=NULL; 1.213 + ucnv_setToUCallBack(buf->conv, 1.214 + UCNV_TO_U_CALLBACK_STOP, 1.215 + toUNewContext, 1.216 + &toUOldAction, 1.217 + (const void**)&toUOldContext, 1.218 + error); 1.219 + /* since state is saved in the converter we add offset to source*/ 1.220 + target = pTarget+offset; 1.221 + source = cbuf; 1.222 + sourceLimit = source + inputRead; 1.223 + ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset), 1.224 + &source,sourceLimit,NULL, 1.225 + (UBool)(buf->remaining==0),error); 1.226 + 1.227 + if(U_FAILURE(*error)){ 1.228 + char context[CONTEXT_LEN+1]; 1.229 + char preContext[CONTEXT_LEN+1]; 1.230 + char postContext[CONTEXT_LEN+1]; 1.231 + int8_t len = CONTEXT_LEN; 1.232 + int32_t start=0; 1.233 + int32_t stop =0; 1.234 + int32_t pos =0; 1.235 + /* use erro1 to preserve the error code */ 1.236 + UErrorCode error1 =U_ZERO_ERROR; 1.237 + 1.238 + if( buf->showWarning==TRUE){ 1.239 + fprintf(stderr,"\n###WARNING: Encountered abnormal bytes while" 1.240 + " converting input stream to target encoding: %s\n", 1.241 + u_errorName(*error)); 1.242 + } 1.243 + 1.244 + 1.245 + /* now get the context chars */ 1.246 + ucnv_getInvalidChars(buf->conv,context,&len,&error1); 1.247 + context[len]= 0 ; /* null terminate the buffer */ 1.248 + 1.249 + pos = (int32_t)(source - cbuf - len); 1.250 + 1.251 + /* for pre-context */ 1.252 + start = (pos <=CONTEXT_LEN)? 0 : (pos - (CONTEXT_LEN-1)); 1.253 + stop = pos-len; 1.254 + 1.255 + memcpy(preContext,cbuf+start,stop-start); 1.256 + /* null terminate the buffer */ 1.257 + preContext[stop-start] = 0; 1.258 + 1.259 + /* for post-context */ 1.260 + start = pos+len; 1.261 + stop = (int32_t)(((pos+CONTEXT_LEN)<= (sourceLimit-cbuf) )? (pos+(CONTEXT_LEN-1)) : (sourceLimit-cbuf)); 1.262 + 1.263 + memcpy(postContext,source,stop-start); 1.264 + /* null terminate the buffer */ 1.265 + postContext[stop-start] = 0; 1.266 + 1.267 + if(buf->showWarning ==TRUE){ 1.268 + /* print out the context */ 1.269 + fprintf(stderr,"\tPre-context: %s\n",preContext); 1.270 + fprintf(stderr,"\tContext: %s\n",context); 1.271 + fprintf(stderr,"\tPost-context: %s\n", postContext); 1.272 + } 1.273 + 1.274 + /* reset the converter */ 1.275 + ucnv_reset(buf->conv); 1.276 + 1.277 + /* set the call back to substitute 1.278 + * and restart conversion 1.279 + */ 1.280 + ucnv_setToUCallBack(buf->conv, 1.281 + UCNV_TO_U_CALLBACK_SUBSTITUTE, 1.282 + toUNewContext, 1.283 + &toUOldAction, 1.284 + (const void**)&toUOldContext, 1.285 + &error1); 1.286 + 1.287 + /* reset source and target start positions */ 1.288 + target = pTarget+offset; 1.289 + source = cbuf; 1.290 + 1.291 + /* re convert */ 1.292 + ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset), 1.293 + &source,sourceLimit,NULL, 1.294 + (UBool)(buf->remaining==0),&error1); 1.295 + 1.296 + } 1.297 + outputWritten = (int32_t)(target - pTarget); 1.298 + 1.299 + 1.300 +#if DEBUG 1.301 + { 1.302 + int i; 1.303 + target = pTarget; 1.304 + for(i=0;i<numRead;i++){ 1.305 + /* printf("%c", (char)(*target++));*/ 1.306 + } 1.307 + } 1.308 +#endif 1.309 + 1.310 + }else{ 1.311 + u_charsToUChars(cbuf,target+offset,inputRead); 1.312 + outputWritten=((buf->remaining>cbufSize)? cbufSize:inputRead+offset); 1.313 + } 1.314 + buf->currentPos = pTarget; 1.315 + buf->bufLimit=pTarget+outputWritten; 1.316 + *buf->bufLimit=0; /*NUL terminate*/ 1.317 + if(cbuf!=carr){ 1.318 + uprv_free(cbuf); 1.319 + } 1.320 + return buf; 1.321 +} 1.322 + 1.323 + 1.324 + 1.325 +/* get a UChar from the stream*/ 1.326 +U_CAPI int32_t U_EXPORT2 1.327 +ucbuf_getc(UCHARBUF* buf,UErrorCode* error){ 1.328 + if(error==NULL || U_FAILURE(*error)){ 1.329 + return FALSE; 1.330 + } 1.331 + if(buf->currentPos>=buf->bufLimit){ 1.332 + if(buf->remaining==0){ 1.333 + return U_EOF; 1.334 + } 1.335 + buf=ucbuf_fillucbuf(buf,error); 1.336 + if(U_FAILURE(*error)){ 1.337 + return U_EOF; 1.338 + } 1.339 + } 1.340 + 1.341 + return *(buf->currentPos++); 1.342 +} 1.343 + 1.344 +/* get a UChar32 from the stream*/ 1.345 +U_CAPI int32_t U_EXPORT2 1.346 +ucbuf_getc32(UCHARBUF* buf,UErrorCode* error){ 1.347 + int32_t retVal = (int32_t)U_EOF; 1.348 + if(error==NULL || U_FAILURE(*error)){ 1.349 + return FALSE; 1.350 + } 1.351 + if(buf->currentPos+1>=buf->bufLimit){ 1.352 + if(buf->remaining==0){ 1.353 + return U_EOF; 1.354 + } 1.355 + buf=ucbuf_fillucbuf(buf,error); 1.356 + if(U_FAILURE(*error)){ 1.357 + return U_EOF; 1.358 + } 1.359 + } 1.360 + if(U16_IS_LEAD(*(buf->currentPos))){ 1.361 + retVal=U16_GET_SUPPLEMENTARY(buf->currentPos[0],buf->currentPos[1]); 1.362 + buf->currentPos+=2; 1.363 + }else{ 1.364 + retVal = *(buf->currentPos++); 1.365 + } 1.366 + return retVal; 1.367 +} 1.368 + 1.369 +/* u_unescapeAt() callback to return a UChar*/ 1.370 +static UChar U_CALLCONV 1.371 +_charAt(int32_t offset, void *context) { 1.372 + return ((UCHARBUF*) context)->currentPos[offset]; 1.373 +} 1.374 + 1.375 +/* getc and escape it */ 1.376 +U_CAPI int32_t U_EXPORT2 1.377 +ucbuf_getcx32(UCHARBUF* buf,UErrorCode* error) { 1.378 + int32_t length; 1.379 + int32_t offset; 1.380 + UChar32 c32,c1,c2; 1.381 + if(error==NULL || U_FAILURE(*error)){ 1.382 + return FALSE; 1.383 + } 1.384 + /* Fill the buffer if it is empty */ 1.385 + if (buf->currentPos >=buf->bufLimit-2) { 1.386 + ucbuf_fillucbuf(buf,error); 1.387 + } 1.388 + 1.389 + /* Get the next character in the buffer */ 1.390 + if (buf->currentPos < buf->bufLimit) { 1.391 + c1 = *(buf->currentPos)++; 1.392 + } else { 1.393 + c1 = U_EOF; 1.394 + } 1.395 + 1.396 + c2 = *(buf->currentPos); 1.397 + 1.398 + /* If it isn't a backslash, return it */ 1.399 + if (c1 != 0x005C) { 1.400 + return c1; 1.401 + } 1.402 + 1.403 + /* Determine the amount of data in the buffer */ 1.404 + length = (int32_t)(buf->bufLimit - buf->currentPos); 1.405 + 1.406 + /* The longest escape sequence is \Uhhhhhhhh; make sure 1.407 + we have at least that many characters */ 1.408 + if (length < 10) { 1.409 + 1.410 + /* fill the buffer */ 1.411 + ucbuf_fillucbuf(buf,error); 1.412 + length = (int32_t)(buf->bufLimit - buf->buffer); 1.413 + } 1.414 + 1.415 + /* Process the escape */ 1.416 + offset = 0; 1.417 + c32 = u_unescapeAt(_charAt, &offset, length, (void*)buf); 1.418 + 1.419 + /* check if u_unescapeAt unescaped and converted 1.420 + * to c32 or not 1.421 + */ 1.422 + if(c32==0xFFFFFFFF){ 1.423 + if(buf->showWarning) { 1.424 + char context[CONTEXT_LEN+1]; 1.425 + int32_t len = CONTEXT_LEN; 1.426 + if(length < len) { 1.427 + len = length; 1.428 + } 1.429 + context[len]= 0 ; /* null terminate the buffer */ 1.430 + u_UCharsToChars( buf->currentPos, context, len); 1.431 + fprintf(stderr,"Bad escape: [%c%s]...\n", (int)c1, context); 1.432 + } 1.433 + *error= U_ILLEGAL_ESCAPE_SEQUENCE; 1.434 + return c1; 1.435 + }else if(c32!=c2 || (c32==0x0075 && c2==0x0075 && c1==0x005C) /* for \u0075 c2=0x0075 and c32==0x0075*/){ 1.436 + /* Update the current buffer position */ 1.437 + buf->currentPos += offset; 1.438 + }else{ 1.439 + /* unescaping failed so we just return 1.440 + * c1 and not consume the buffer 1.441 + * this is useful for rules with escapes 1.442 + * in resouce bundles 1.443 + * eg: \' \\ \" 1.444 + */ 1.445 + return c1; 1.446 + } 1.447 + 1.448 + return c32; 1.449 +} 1.450 + 1.451 +U_CAPI UCHARBUF* U_EXPORT2 1.452 +ucbuf_open(const char* fileName,const char** cp,UBool showWarning, UBool buffered, UErrorCode* error){ 1.453 + 1.454 + FileStream* in = NULL; 1.455 + int32_t fileSize=0; 1.456 + const char* knownCp; 1.457 + if(error==NULL || U_FAILURE(*error)){ 1.458 + return NULL; 1.459 + } 1.460 + if(cp==NULL || fileName==NULL){ 1.461 + *error = U_ILLEGAL_ARGUMENT_ERROR; 1.462 + return FALSE; 1.463 + } 1.464 + if (!uprv_strcmp(fileName, "-")) { 1.465 + in = T_FileStream_stdin(); 1.466 + }else{ 1.467 + in = T_FileStream_open(fileName, "rb"); 1.468 + } 1.469 + 1.470 + if(in!=NULL){ 1.471 + UCHARBUF* buf =(UCHARBUF*) uprv_malloc(sizeof(UCHARBUF)); 1.472 + fileSize = T_FileStream_size(in); 1.473 + if(buf == NULL){ 1.474 + *error = U_MEMORY_ALLOCATION_ERROR; 1.475 + T_FileStream_close(in); 1.476 + return NULL; 1.477 + } 1.478 + buf->in=in; 1.479 + buf->conv=NULL; 1.480 + buf->showWarning = showWarning; 1.481 + buf->isBuffered = buffered; 1.482 + buf->signatureLength=0; 1.483 + if(*cp==NULL || **cp=='\0'){ 1.484 + /* don't have code page name... try to autodetect */ 1.485 + ucbuf_autodetect_fs(in,cp,&buf->conv,&buf->signatureLength,error); 1.486 + }else if(ucbuf_isCPKnown(*cp)){ 1.487 + /* discard BOM */ 1.488 + ucbuf_autodetect_fs(in,&knownCp,&buf->conv,&buf->signatureLength,error); 1.489 + } 1.490 + if(U_SUCCESS(*error) && buf->conv==NULL) { 1.491 + buf->conv=ucnv_open(*cp,error); 1.492 + } 1.493 + if(U_FAILURE(*error)){ 1.494 + ucnv_close(buf->conv); 1.495 + uprv_free(buf); 1.496 + T_FileStream_close(in); 1.497 + return NULL; 1.498 + } 1.499 + 1.500 + if((buf->conv==NULL) && (buf->showWarning==TRUE)){ 1.501 + fprintf(stderr,"###WARNING: No converter defined. Using codepage of system.\n"); 1.502 + } 1.503 + buf->remaining=fileSize-buf->signatureLength; 1.504 + if(buf->isBuffered){ 1.505 + buf->bufCapacity=MAX_U_BUF; 1.506 + }else{ 1.507 + buf->bufCapacity=buf->remaining+buf->signatureLength+1/*for terminating nul*/; 1.508 + } 1.509 + buf->buffer=(UChar*) uprv_malloc(U_SIZEOF_UCHAR * buf->bufCapacity ); 1.510 + if (buf->buffer == NULL) { 1.511 + *error = U_MEMORY_ALLOCATION_ERROR; 1.512 + ucbuf_close(buf); 1.513 + return NULL; 1.514 + } 1.515 + buf->currentPos=buf->buffer; 1.516 + buf->bufLimit=buf->buffer; 1.517 + if(U_FAILURE(*error)){ 1.518 + fprintf(stderr, "Could not open codepage [%s]: %s\n", *cp, u_errorName(*error)); 1.519 + ucbuf_close(buf); 1.520 + return NULL; 1.521 + } 1.522 + ucbuf_fillucbuf(buf,error); 1.523 + if(U_FAILURE(*error)){ 1.524 + ucbuf_close(buf); 1.525 + return NULL; 1.526 + } 1.527 + return buf; 1.528 + } 1.529 + *error =U_FILE_ACCESS_ERROR; 1.530 + return NULL; 1.531 +} 1.532 + 1.533 + 1.534 + 1.535 +/* TODO: this method will fail if at the 1.536 + * begining of buffer and the uchar to unget 1.537 + * is from the previous buffer. Need to implement 1.538 + * system to take care of that situation. 1.539 + */ 1.540 +U_CAPI void U_EXPORT2 1.541 +ucbuf_ungetc(int32_t c,UCHARBUF* buf){ 1.542 + /* decrement currentPos pointer 1.543 + * if not at the begining of buffer 1.544 + */ 1.545 + if(buf->currentPos!=buf->buffer){ 1.546 + if(*(buf->currentPos-1)==c){ 1.547 + buf->currentPos--; 1.548 + } else { 1.549 + /* ungetc failed - did not match. */ 1.550 + } 1.551 + } else { 1.552 + /* ungetc failed - beginning of buffer. */ 1.553 + } 1.554 +} 1.555 + 1.556 +/* frees the resources of UChar* buffer */ 1.557 +static void 1.558 +ucbuf_closebuf(UCHARBUF* buf){ 1.559 + uprv_free(buf->buffer); 1.560 + buf->buffer = NULL; 1.561 +} 1.562 + 1.563 +/* close the buf and release resources*/ 1.564 +U_CAPI void U_EXPORT2 1.565 +ucbuf_close(UCHARBUF* buf){ 1.566 + if(buf!=NULL){ 1.567 + if(buf->conv){ 1.568 + ucnv_close(buf->conv); 1.569 + } 1.570 + T_FileStream_close(buf->in); 1.571 + ucbuf_closebuf(buf); 1.572 + uprv_free(buf); 1.573 + } 1.574 +} 1.575 + 1.576 +/* rewind the buf and file stream */ 1.577 +U_CAPI void U_EXPORT2 1.578 +ucbuf_rewind(UCHARBUF* buf,UErrorCode* error){ 1.579 + if(error==NULL || U_FAILURE(*error)){ 1.580 + return; 1.581 + } 1.582 + if(buf){ 1.583 + buf->currentPos=buf->buffer; 1.584 + buf->bufLimit=buf->buffer; 1.585 + T_FileStream_rewind(buf->in); 1.586 + buf->remaining=T_FileStream_size(buf->in)-buf->signatureLength; 1.587 + 1.588 + ucnv_resetToUnicode(buf->conv); 1.589 + if(buf->signatureLength>0) { 1.590 + UChar target[1]={ 0 }; 1.591 + UChar* pTarget; 1.592 + char start[8]; 1.593 + const char* pStart; 1.594 + int32_t numRead; 1.595 + 1.596 + /* read the signature bytes */ 1.597 + numRead=T_FileStream_read(buf->in, start, buf->signatureLength); 1.598 + 1.599 + /* convert and ignore initial U+FEFF, and the buffer overflow */ 1.600 + pTarget = target; 1.601 + pStart = start; 1.602 + ucnv_toUnicode(buf->conv, &pTarget, target+1, &pStart, start+numRead, NULL, FALSE, error); 1.603 + if(*error==U_BUFFER_OVERFLOW_ERROR) { 1.604 + *error=U_ZERO_ERROR; 1.605 + } 1.606 + 1.607 + /* verify that we successfully read exactly U+FEFF */ 1.608 + if(U_SUCCESS(*error) && (numRead!=buf->signatureLength || pTarget!=(target+1) || target[0]!=0xfeff)) { 1.609 + *error=U_INTERNAL_PROGRAM_ERROR; 1.610 + } 1.611 + } 1.612 + } 1.613 +} 1.614 + 1.615 + 1.616 +U_CAPI int32_t U_EXPORT2 1.617 +ucbuf_size(UCHARBUF* buf){ 1.618 + if(buf){ 1.619 + if(buf->isBuffered){ 1.620 + return (T_FileStream_size(buf->in)-buf->signatureLength)/ucnv_getMinCharSize(buf->conv); 1.621 + }else{ 1.622 + return (int32_t)(buf->bufLimit - buf->buffer); 1.623 + } 1.624 + } 1.625 + return 0; 1.626 +} 1.627 + 1.628 +U_CAPI const UChar* U_EXPORT2 1.629 +ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* error){ 1.630 + if(error==NULL || U_FAILURE(*error)){ 1.631 + return NULL; 1.632 + } 1.633 + if(buf==NULL || len==NULL){ 1.634 + *error = U_ILLEGAL_ARGUMENT_ERROR; 1.635 + return NULL; 1.636 + } 1.637 + *len = (int32_t)(buf->bufLimit - buf->buffer); 1.638 + return buf->buffer; 1.639 +} 1.640 + 1.641 +U_CAPI const char* U_EXPORT2 1.642 +ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status){ 1.643 + int32_t requiredLen = 0; 1.644 + int32_t dirlen = 0; 1.645 + int32_t filelen = 0; 1.646 + if(status==NULL || U_FAILURE(*status)){ 1.647 + return NULL; 1.648 + } 1.649 + 1.650 + if(inputDir == NULL || fileName == NULL || len==NULL || (target==NULL && *len>0)){ 1.651 + *status = U_ILLEGAL_ARGUMENT_ERROR; 1.652 + return NULL; 1.653 + } 1.654 + 1.655 + 1.656 + dirlen = (int32_t)uprv_strlen(inputDir); 1.657 + filelen = (int32_t)uprv_strlen(fileName); 1.658 + if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) { 1.659 + requiredLen = dirlen + filelen + 2; 1.660 + if((*len < requiredLen) || target==NULL){ 1.661 + *len = requiredLen; 1.662 + *status = U_BUFFER_OVERFLOW_ERROR; 1.663 + return NULL; 1.664 + } 1.665 + 1.666 + target[0] = '\0'; 1.667 + /* 1.668 + * append the input dir to openFileName if the first char in 1.669 + * filename is not file seperation char and the last char input directory is not '.'. 1.670 + * This is to support : 1.671 + * genrb -s. /home/icu/data 1.672 + * genrb -s. icu/data 1.673 + * The user cannot mix notations like 1.674 + * genrb -s. /icu/data --- the absolute path specified. -s redundant 1.675 + * user should use 1.676 + * genrb -s. icu/data --- start from CWD and look in icu/data dir 1.677 + */ 1.678 + if( (fileName[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){ 1.679 + uprv_strcpy(target, inputDir); 1.680 + target[dirlen] = U_FILE_SEP_CHAR; 1.681 + } 1.682 + target[dirlen + 1] = '\0'; 1.683 + } else { 1.684 + requiredLen = dirlen + filelen + 1; 1.685 + if((*len < requiredLen) || target==NULL){ 1.686 + *len = requiredLen; 1.687 + *status = U_BUFFER_OVERFLOW_ERROR; 1.688 + return NULL; 1.689 + } 1.690 + 1.691 + uprv_strcpy(target, inputDir); 1.692 + } 1.693 + 1.694 + uprv_strcat(target, fileName); 1.695 + return target; 1.696 +} 1.697 +/* 1.698 + * Unicode TR 13 says any of the below chars is 1.699 + * a new line char in a readline function in addition 1.700 + * to CR+LF combination which needs to be 1.701 + * handled seperately 1.702 + */ 1.703 +static UBool ucbuf_isCharNewLine(UChar c){ 1.704 + switch(c){ 1.705 + case 0x000A: /* LF */ 1.706 + case 0x000D: /* CR */ 1.707 + case 0x000C: /* FF */ 1.708 + case 0x0085: /* NEL */ 1.709 + case 0x2028: /* LS */ 1.710 + case 0x2029: /* PS */ 1.711 + return TRUE; 1.712 + default: 1.713 + return FALSE; 1.714 + } 1.715 +} 1.716 + 1.717 +U_CAPI const UChar* U_EXPORT2 1.718 +ucbuf_readline(UCHARBUF* buf,int32_t* len,UErrorCode* err){ 1.719 + UChar* temp = buf->currentPos; 1.720 + UChar* savePos =NULL; 1.721 + UChar c=0x0000; 1.722 + if(buf->isBuffered){ 1.723 + /* The input is buffered we have to do more 1.724 + * for returning a pointer U_TRUNCATED_CHAR_FOUND 1.725 + */ 1.726 + for(;;){ 1.727 + c = *temp++; 1.728 + if(buf->remaining==0){ 1.729 + return NULL; /* end of file is reached return NULL */ 1.730 + } 1.731 + if(temp>=buf->bufLimit && buf->currentPos == buf->buffer){ 1.732 + *err= U_TRUNCATED_CHAR_FOUND; 1.733 + return NULL; 1.734 + }else{ 1.735 + ucbuf_fillucbuf(buf,err); 1.736 + if(U_FAILURE(*err)){ 1.737 + return NULL; 1.738 + } 1.739 + } 1.740 + /* 1.741 + * Accoding to TR 13 readLine functions must interpret 1.742 + * CR, CR+LF, LF, NEL, PS, LS or FF as line seperators 1.743 + */ 1.744 + /* Windows CR LF */ 1.745 + if(c ==0x0d && temp+1<=buf->bufLimit && *(temp+1) == 0x0a ){ 1.746 + *len = (int32_t)(temp++ - buf->currentPos); 1.747 + savePos = buf->currentPos; 1.748 + buf->currentPos = temp; 1.749 + return savePos; 1.750 + } 1.751 + /* else */ 1.752 + 1.753 + if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)){ /* Unipad inserts 2028 line separators! */ 1.754 + *len = (int32_t)(temp - buf->currentPos); 1.755 + savePos = buf->currentPos; 1.756 + buf->currentPos = temp; 1.757 + return savePos; 1.758 + } 1.759 + } 1.760 + }else{ 1.761 + /* we know that all input is read into the internal 1.762 + * buffer so we can safely return pointers 1.763 + */ 1.764 + for(;;){ 1.765 + c = *temp++; 1.766 + 1.767 + if(buf->currentPos==buf->bufLimit){ 1.768 + return NULL; /* end of file is reached return NULL */ 1.769 + } 1.770 + /* Windows CR LF */ 1.771 + if(c ==0x0d && temp+1<=buf->bufLimit && *(temp+1) == 0x0a ){ 1.772 + *len = (int32_t)(temp++ - buf->currentPos); 1.773 + savePos = buf->currentPos; 1.774 + buf->currentPos = temp; 1.775 + return savePos; 1.776 + } 1.777 + /* else */ 1.778 + if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)) { /* Unipad inserts 2028 line separators! */ 1.779 + *len = (int32_t)(temp - buf->currentPos); 1.780 + savePos = buf->currentPos; 1.781 + buf->currentPos = temp; 1.782 + return savePos; 1.783 + } 1.784 + } 1.785 + } 1.786 + /* not reached */ 1.787 + /* A compiler warning will appear if all paths don't contain a return statement. */ 1.788 +/* return NULL;*/ 1.789 +} 1.790 +#endif