intl/icu/source/tools/toolutil/ucbuf.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 1998-2011, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 *******************************************************************************
michael@0 8 *
michael@0 9 * File ucbuf.c
michael@0 10 *
michael@0 11 * Modification History:
michael@0 12 *
michael@0 13 * Date Name Description
michael@0 14 * 05/10/01 Ram Creation.
michael@0 15 *******************************************************************************
michael@0 16 */
michael@0 17
michael@0 18 #include "unicode/utypes.h"
michael@0 19 #include "unicode/putil.h"
michael@0 20 #include "unicode/uchar.h"
michael@0 21 #include "unicode/ucnv.h"
michael@0 22 #include "unicode/ucnv_err.h"
michael@0 23 #include "unicode/ustring.h"
michael@0 24 #include "unicode/utf16.h"
michael@0 25 #include "filestrm.h"
michael@0 26 #include "cstring.h"
michael@0 27 #include "cmemory.h"
michael@0 28 #include "ustrfmt.h"
michael@0 29 #include "ucbuf.h"
michael@0 30 #include <stdio.h>
michael@0 31
michael@0 32 #if !UCONFIG_NO_CONVERSION
michael@0 33
michael@0 34
michael@0 35 #define MAX_IN_BUF 1000
michael@0 36 #define MAX_U_BUF 1500
michael@0 37 #define CONTEXT_LEN 20
michael@0 38
michael@0 39 struct UCHARBUF {
michael@0 40 UChar* buffer;
michael@0 41 UChar* currentPos;
michael@0 42 UChar* bufLimit;
michael@0 43 int32_t bufCapacity;
michael@0 44 int32_t remaining;
michael@0 45 int32_t signatureLength;
michael@0 46 FileStream* in;
michael@0 47 UConverter* conv;
michael@0 48 UBool showWarning; /* makes this API not produce any errors */
michael@0 49 UBool isBuffered;
michael@0 50 };
michael@0 51
michael@0 52 U_CAPI UBool U_EXPORT2
michael@0 53 ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* error){
michael@0 54 char start[8];
michael@0 55 int32_t numRead;
michael@0 56
michael@0 57 UChar target[1]={ 0 };
michael@0 58 UChar* pTarget;
michael@0 59 const char* pStart;
michael@0 60
michael@0 61 /* read a few bytes */
michael@0 62 numRead=T_FileStream_read(in, start, sizeof(start));
michael@0 63
michael@0 64 *cp = ucnv_detectUnicodeSignature(start, numRead, signatureLength, error);
michael@0 65
michael@0 66 /* unread the bytes beyond what was consumed for U+FEFF */
michael@0 67 T_FileStream_rewind(in);
michael@0 68 if (*signatureLength > 0) {
michael@0 69 T_FileStream_read(in, start, *signatureLength);
michael@0 70 }
michael@0 71
michael@0 72 if(*cp==NULL){
michael@0 73 *conv =NULL;
michael@0 74 return FALSE;
michael@0 75 }
michael@0 76
michael@0 77 /* open the converter for the detected Unicode charset */
michael@0 78 *conv = ucnv_open(*cp,error);
michael@0 79
michael@0 80 /* convert and ignore initial U+FEFF, and the buffer overflow */
michael@0 81 pTarget = target;
michael@0 82 pStart = start;
michael@0 83 ucnv_toUnicode(*conv, &pTarget, target+1, &pStart, start+*signatureLength, NULL, FALSE, error);
michael@0 84 *signatureLength = (int32_t)(pStart - start);
michael@0 85 if(*error==U_BUFFER_OVERFLOW_ERROR) {
michael@0 86 *error=U_ZERO_ERROR;
michael@0 87 }
michael@0 88
michael@0 89 /* verify that we successfully read exactly U+FEFF */
michael@0 90 if(U_SUCCESS(*error) && (pTarget!=(target+1) || target[0]!=0xfeff)) {
michael@0 91 *error=U_INTERNAL_PROGRAM_ERROR;
michael@0 92 }
michael@0 93
michael@0 94
michael@0 95 return TRUE;
michael@0 96 }
michael@0 97 static UBool ucbuf_isCPKnown(const char* cp){
michael@0 98 if(ucnv_compareNames("UTF-8",cp)==0){
michael@0 99 return TRUE;
michael@0 100 }
michael@0 101 if(ucnv_compareNames("UTF-16BE",cp)==0){
michael@0 102 return TRUE;
michael@0 103 }
michael@0 104 if(ucnv_compareNames("UTF-16LE",cp)==0){
michael@0 105 return TRUE;
michael@0 106 }
michael@0 107 if(ucnv_compareNames("UTF-16",cp)==0){
michael@0 108 return TRUE;
michael@0 109 }
michael@0 110 if(ucnv_compareNames("UTF-32",cp)==0){
michael@0 111 return TRUE;
michael@0 112 }
michael@0 113 if(ucnv_compareNames("UTF-32BE",cp)==0){
michael@0 114 return TRUE;
michael@0 115 }
michael@0 116 if(ucnv_compareNames("UTF-32LE",cp)==0){
michael@0 117 return TRUE;
michael@0 118 }
michael@0 119 if(ucnv_compareNames("SCSU",cp)==0){
michael@0 120 return TRUE;
michael@0 121 }
michael@0 122 if(ucnv_compareNames("BOCU-1",cp)==0){
michael@0 123 return TRUE;
michael@0 124 }
michael@0 125 if(ucnv_compareNames("UTF-7",cp)==0){
michael@0 126 return TRUE;
michael@0 127 }
michael@0 128 return FALSE;
michael@0 129 }
michael@0 130
michael@0 131 U_CAPI FileStream * U_EXPORT2
michael@0 132 ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv, int32_t* signatureLength,UErrorCode* error){
michael@0 133 FileStream* in=NULL;
michael@0 134 if(error==NULL || U_FAILURE(*error)){
michael@0 135 return NULL;
michael@0 136 }
michael@0 137 if(conv==NULL || cp==NULL || fileName==NULL){
michael@0 138 *error = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 139 return NULL;
michael@0 140 }
michael@0 141 /* open the file */
michael@0 142 in= T_FileStream_open(fileName,"rb");
michael@0 143
michael@0 144 if(in == NULL){
michael@0 145 *error=U_FILE_ACCESS_ERROR;
michael@0 146 return NULL;
michael@0 147 }
michael@0 148
michael@0 149 if(ucbuf_autodetect_fs(in,cp,conv,signatureLength,error)) {
michael@0 150 return in;
michael@0 151 } else {
michael@0 152 ucnv_close(*conv);
michael@0 153 *conv=NULL;
michael@0 154 T_FileStream_close(in);
michael@0 155 return NULL;
michael@0 156 }
michael@0 157 }
michael@0 158
michael@0 159 /* fill the uchar buffer */
michael@0 160 static UCHARBUF*
michael@0 161 ucbuf_fillucbuf( UCHARBUF* buf,UErrorCode* error){
michael@0 162 UChar* pTarget=NULL;
michael@0 163 UChar* target=NULL;
michael@0 164 const char* source=NULL;
michael@0 165 char carr[MAX_IN_BUF] = {'\0'};
michael@0 166 char* cbuf = carr;
michael@0 167 int32_t inputRead=0;
michael@0 168 int32_t outputWritten=0;
michael@0 169 int32_t offset=0;
michael@0 170 const char* sourceLimit =NULL;
michael@0 171 int32_t cbufSize=0;
michael@0 172 pTarget = buf->buffer;
michael@0 173 /* check if we arrived here without exhausting the buffer*/
michael@0 174 if(buf->currentPos<buf->bufLimit){
michael@0 175 offset = (int32_t)(buf->bufLimit-buf->currentPos);
michael@0 176 memmove(buf->buffer,buf->currentPos,offset* sizeof(UChar));
michael@0 177 }
michael@0 178
michael@0 179 #if DEBUG
michael@0 180 memset(pTarget+offset,0xff,sizeof(UChar)*(MAX_IN_BUF-offset));
michael@0 181 #endif
michael@0 182 if(buf->isBuffered){
michael@0 183 cbufSize = MAX_IN_BUF;
michael@0 184 /* read the file */
michael@0 185 inputRead=T_FileStream_read(buf->in,cbuf,cbufSize-offset);
michael@0 186 buf->remaining-=inputRead;
michael@0 187
michael@0 188 }else{
michael@0 189 cbufSize = T_FileStream_size(buf->in);
michael@0 190 cbuf = (char*)uprv_malloc(cbufSize);
michael@0 191 if (cbuf == NULL) {
michael@0 192 *error = U_MEMORY_ALLOCATION_ERROR;
michael@0 193 return NULL;
michael@0 194 }
michael@0 195 inputRead= T_FileStream_read(buf->in,cbuf,cbufSize);
michael@0 196 buf->remaining-=inputRead;
michael@0 197 }
michael@0 198
michael@0 199 /* just to be sure...*/
michael@0 200 if ( 0 == inputRead )
michael@0 201 buf->remaining = 0;
michael@0 202
michael@0 203 target=pTarget;
michael@0 204 /* convert the bytes */
michael@0 205 if(buf->conv){
michael@0 206 /* set the callback to stop */
michael@0 207 UConverterToUCallback toUOldAction ;
michael@0 208 void* toUOldContext;
michael@0 209 void* toUNewContext=NULL;
michael@0 210 ucnv_setToUCallBack(buf->conv,
michael@0 211 UCNV_TO_U_CALLBACK_STOP,
michael@0 212 toUNewContext,
michael@0 213 &toUOldAction,
michael@0 214 (const void**)&toUOldContext,
michael@0 215 error);
michael@0 216 /* since state is saved in the converter we add offset to source*/
michael@0 217 target = pTarget+offset;
michael@0 218 source = cbuf;
michael@0 219 sourceLimit = source + inputRead;
michael@0 220 ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset),
michael@0 221 &source,sourceLimit,NULL,
michael@0 222 (UBool)(buf->remaining==0),error);
michael@0 223
michael@0 224 if(U_FAILURE(*error)){
michael@0 225 char context[CONTEXT_LEN+1];
michael@0 226 char preContext[CONTEXT_LEN+1];
michael@0 227 char postContext[CONTEXT_LEN+1];
michael@0 228 int8_t len = CONTEXT_LEN;
michael@0 229 int32_t start=0;
michael@0 230 int32_t stop =0;
michael@0 231 int32_t pos =0;
michael@0 232 /* use erro1 to preserve the error code */
michael@0 233 UErrorCode error1 =U_ZERO_ERROR;
michael@0 234
michael@0 235 if( buf->showWarning==TRUE){
michael@0 236 fprintf(stderr,"\n###WARNING: Encountered abnormal bytes while"
michael@0 237 " converting input stream to target encoding: %s\n",
michael@0 238 u_errorName(*error));
michael@0 239 }
michael@0 240
michael@0 241
michael@0 242 /* now get the context chars */
michael@0 243 ucnv_getInvalidChars(buf->conv,context,&len,&error1);
michael@0 244 context[len]= 0 ; /* null terminate the buffer */
michael@0 245
michael@0 246 pos = (int32_t)(source - cbuf - len);
michael@0 247
michael@0 248 /* for pre-context */
michael@0 249 start = (pos <=CONTEXT_LEN)? 0 : (pos - (CONTEXT_LEN-1));
michael@0 250 stop = pos-len;
michael@0 251
michael@0 252 memcpy(preContext,cbuf+start,stop-start);
michael@0 253 /* null terminate the buffer */
michael@0 254 preContext[stop-start] = 0;
michael@0 255
michael@0 256 /* for post-context */
michael@0 257 start = pos+len;
michael@0 258 stop = (int32_t)(((pos+CONTEXT_LEN)<= (sourceLimit-cbuf) )? (pos+(CONTEXT_LEN-1)) : (sourceLimit-cbuf));
michael@0 259
michael@0 260 memcpy(postContext,source,stop-start);
michael@0 261 /* null terminate the buffer */
michael@0 262 postContext[stop-start] = 0;
michael@0 263
michael@0 264 if(buf->showWarning ==TRUE){
michael@0 265 /* print out the context */
michael@0 266 fprintf(stderr,"\tPre-context: %s\n",preContext);
michael@0 267 fprintf(stderr,"\tContext: %s\n",context);
michael@0 268 fprintf(stderr,"\tPost-context: %s\n", postContext);
michael@0 269 }
michael@0 270
michael@0 271 /* reset the converter */
michael@0 272 ucnv_reset(buf->conv);
michael@0 273
michael@0 274 /* set the call back to substitute
michael@0 275 * and restart conversion
michael@0 276 */
michael@0 277 ucnv_setToUCallBack(buf->conv,
michael@0 278 UCNV_TO_U_CALLBACK_SUBSTITUTE,
michael@0 279 toUNewContext,
michael@0 280 &toUOldAction,
michael@0 281 (const void**)&toUOldContext,
michael@0 282 &error1);
michael@0 283
michael@0 284 /* reset source and target start positions */
michael@0 285 target = pTarget+offset;
michael@0 286 source = cbuf;
michael@0 287
michael@0 288 /* re convert */
michael@0 289 ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset),
michael@0 290 &source,sourceLimit,NULL,
michael@0 291 (UBool)(buf->remaining==0),&error1);
michael@0 292
michael@0 293 }
michael@0 294 outputWritten = (int32_t)(target - pTarget);
michael@0 295
michael@0 296
michael@0 297 #if DEBUG
michael@0 298 {
michael@0 299 int i;
michael@0 300 target = pTarget;
michael@0 301 for(i=0;i<numRead;i++){
michael@0 302 /* printf("%c", (char)(*target++));*/
michael@0 303 }
michael@0 304 }
michael@0 305 #endif
michael@0 306
michael@0 307 }else{
michael@0 308 u_charsToUChars(cbuf,target+offset,inputRead);
michael@0 309 outputWritten=((buf->remaining>cbufSize)? cbufSize:inputRead+offset);
michael@0 310 }
michael@0 311 buf->currentPos = pTarget;
michael@0 312 buf->bufLimit=pTarget+outputWritten;
michael@0 313 *buf->bufLimit=0; /*NUL terminate*/
michael@0 314 if(cbuf!=carr){
michael@0 315 uprv_free(cbuf);
michael@0 316 }
michael@0 317 return buf;
michael@0 318 }
michael@0 319
michael@0 320
michael@0 321
michael@0 322 /* get a UChar from the stream*/
michael@0 323 U_CAPI int32_t U_EXPORT2
michael@0 324 ucbuf_getc(UCHARBUF* buf,UErrorCode* error){
michael@0 325 if(error==NULL || U_FAILURE(*error)){
michael@0 326 return FALSE;
michael@0 327 }
michael@0 328 if(buf->currentPos>=buf->bufLimit){
michael@0 329 if(buf->remaining==0){
michael@0 330 return U_EOF;
michael@0 331 }
michael@0 332 buf=ucbuf_fillucbuf(buf,error);
michael@0 333 if(U_FAILURE(*error)){
michael@0 334 return U_EOF;
michael@0 335 }
michael@0 336 }
michael@0 337
michael@0 338 return *(buf->currentPos++);
michael@0 339 }
michael@0 340
michael@0 341 /* get a UChar32 from the stream*/
michael@0 342 U_CAPI int32_t U_EXPORT2
michael@0 343 ucbuf_getc32(UCHARBUF* buf,UErrorCode* error){
michael@0 344 int32_t retVal = (int32_t)U_EOF;
michael@0 345 if(error==NULL || U_FAILURE(*error)){
michael@0 346 return FALSE;
michael@0 347 }
michael@0 348 if(buf->currentPos+1>=buf->bufLimit){
michael@0 349 if(buf->remaining==0){
michael@0 350 return U_EOF;
michael@0 351 }
michael@0 352 buf=ucbuf_fillucbuf(buf,error);
michael@0 353 if(U_FAILURE(*error)){
michael@0 354 return U_EOF;
michael@0 355 }
michael@0 356 }
michael@0 357 if(U16_IS_LEAD(*(buf->currentPos))){
michael@0 358 retVal=U16_GET_SUPPLEMENTARY(buf->currentPos[0],buf->currentPos[1]);
michael@0 359 buf->currentPos+=2;
michael@0 360 }else{
michael@0 361 retVal = *(buf->currentPos++);
michael@0 362 }
michael@0 363 return retVal;
michael@0 364 }
michael@0 365
michael@0 366 /* u_unescapeAt() callback to return a UChar*/
michael@0 367 static UChar U_CALLCONV
michael@0 368 _charAt(int32_t offset, void *context) {
michael@0 369 return ((UCHARBUF*) context)->currentPos[offset];
michael@0 370 }
michael@0 371
michael@0 372 /* getc and escape it */
michael@0 373 U_CAPI int32_t U_EXPORT2
michael@0 374 ucbuf_getcx32(UCHARBUF* buf,UErrorCode* error) {
michael@0 375 int32_t length;
michael@0 376 int32_t offset;
michael@0 377 UChar32 c32,c1,c2;
michael@0 378 if(error==NULL || U_FAILURE(*error)){
michael@0 379 return FALSE;
michael@0 380 }
michael@0 381 /* Fill the buffer if it is empty */
michael@0 382 if (buf->currentPos >=buf->bufLimit-2) {
michael@0 383 ucbuf_fillucbuf(buf,error);
michael@0 384 }
michael@0 385
michael@0 386 /* Get the next character in the buffer */
michael@0 387 if (buf->currentPos < buf->bufLimit) {
michael@0 388 c1 = *(buf->currentPos)++;
michael@0 389 } else {
michael@0 390 c1 = U_EOF;
michael@0 391 }
michael@0 392
michael@0 393 c2 = *(buf->currentPos);
michael@0 394
michael@0 395 /* If it isn't a backslash, return it */
michael@0 396 if (c1 != 0x005C) {
michael@0 397 return c1;
michael@0 398 }
michael@0 399
michael@0 400 /* Determine the amount of data in the buffer */
michael@0 401 length = (int32_t)(buf->bufLimit - buf->currentPos);
michael@0 402
michael@0 403 /* The longest escape sequence is \Uhhhhhhhh; make sure
michael@0 404 we have at least that many characters */
michael@0 405 if (length < 10) {
michael@0 406
michael@0 407 /* fill the buffer */
michael@0 408 ucbuf_fillucbuf(buf,error);
michael@0 409 length = (int32_t)(buf->bufLimit - buf->buffer);
michael@0 410 }
michael@0 411
michael@0 412 /* Process the escape */
michael@0 413 offset = 0;
michael@0 414 c32 = u_unescapeAt(_charAt, &offset, length, (void*)buf);
michael@0 415
michael@0 416 /* check if u_unescapeAt unescaped and converted
michael@0 417 * to c32 or not
michael@0 418 */
michael@0 419 if(c32==0xFFFFFFFF){
michael@0 420 if(buf->showWarning) {
michael@0 421 char context[CONTEXT_LEN+1];
michael@0 422 int32_t len = CONTEXT_LEN;
michael@0 423 if(length < len) {
michael@0 424 len = length;
michael@0 425 }
michael@0 426 context[len]= 0 ; /* null terminate the buffer */
michael@0 427 u_UCharsToChars( buf->currentPos, context, len);
michael@0 428 fprintf(stderr,"Bad escape: [%c%s]...\n", (int)c1, context);
michael@0 429 }
michael@0 430 *error= U_ILLEGAL_ESCAPE_SEQUENCE;
michael@0 431 return c1;
michael@0 432 }else if(c32!=c2 || (c32==0x0075 && c2==0x0075 && c1==0x005C) /* for \u0075 c2=0x0075 and c32==0x0075*/){
michael@0 433 /* Update the current buffer position */
michael@0 434 buf->currentPos += offset;
michael@0 435 }else{
michael@0 436 /* unescaping failed so we just return
michael@0 437 * c1 and not consume the buffer
michael@0 438 * this is useful for rules with escapes
michael@0 439 * in resouce bundles
michael@0 440 * eg: \' \\ \"
michael@0 441 */
michael@0 442 return c1;
michael@0 443 }
michael@0 444
michael@0 445 return c32;
michael@0 446 }
michael@0 447
michael@0 448 U_CAPI UCHARBUF* U_EXPORT2
michael@0 449 ucbuf_open(const char* fileName,const char** cp,UBool showWarning, UBool buffered, UErrorCode* error){
michael@0 450
michael@0 451 FileStream* in = NULL;
michael@0 452 int32_t fileSize=0;
michael@0 453 const char* knownCp;
michael@0 454 if(error==NULL || U_FAILURE(*error)){
michael@0 455 return NULL;
michael@0 456 }
michael@0 457 if(cp==NULL || fileName==NULL){
michael@0 458 *error = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 459 return FALSE;
michael@0 460 }
michael@0 461 if (!uprv_strcmp(fileName, "-")) {
michael@0 462 in = T_FileStream_stdin();
michael@0 463 }else{
michael@0 464 in = T_FileStream_open(fileName, "rb");
michael@0 465 }
michael@0 466
michael@0 467 if(in!=NULL){
michael@0 468 UCHARBUF* buf =(UCHARBUF*) uprv_malloc(sizeof(UCHARBUF));
michael@0 469 fileSize = T_FileStream_size(in);
michael@0 470 if(buf == NULL){
michael@0 471 *error = U_MEMORY_ALLOCATION_ERROR;
michael@0 472 T_FileStream_close(in);
michael@0 473 return NULL;
michael@0 474 }
michael@0 475 buf->in=in;
michael@0 476 buf->conv=NULL;
michael@0 477 buf->showWarning = showWarning;
michael@0 478 buf->isBuffered = buffered;
michael@0 479 buf->signatureLength=0;
michael@0 480 if(*cp==NULL || **cp=='\0'){
michael@0 481 /* don't have code page name... try to autodetect */
michael@0 482 ucbuf_autodetect_fs(in,cp,&buf->conv,&buf->signatureLength,error);
michael@0 483 }else if(ucbuf_isCPKnown(*cp)){
michael@0 484 /* discard BOM */
michael@0 485 ucbuf_autodetect_fs(in,&knownCp,&buf->conv,&buf->signatureLength,error);
michael@0 486 }
michael@0 487 if(U_SUCCESS(*error) && buf->conv==NULL) {
michael@0 488 buf->conv=ucnv_open(*cp,error);
michael@0 489 }
michael@0 490 if(U_FAILURE(*error)){
michael@0 491 ucnv_close(buf->conv);
michael@0 492 uprv_free(buf);
michael@0 493 T_FileStream_close(in);
michael@0 494 return NULL;
michael@0 495 }
michael@0 496
michael@0 497 if((buf->conv==NULL) && (buf->showWarning==TRUE)){
michael@0 498 fprintf(stderr,"###WARNING: No converter defined. Using codepage of system.\n");
michael@0 499 }
michael@0 500 buf->remaining=fileSize-buf->signatureLength;
michael@0 501 if(buf->isBuffered){
michael@0 502 buf->bufCapacity=MAX_U_BUF;
michael@0 503 }else{
michael@0 504 buf->bufCapacity=buf->remaining+buf->signatureLength+1/*for terminating nul*/;
michael@0 505 }
michael@0 506 buf->buffer=(UChar*) uprv_malloc(U_SIZEOF_UCHAR * buf->bufCapacity );
michael@0 507 if (buf->buffer == NULL) {
michael@0 508 *error = U_MEMORY_ALLOCATION_ERROR;
michael@0 509 ucbuf_close(buf);
michael@0 510 return NULL;
michael@0 511 }
michael@0 512 buf->currentPos=buf->buffer;
michael@0 513 buf->bufLimit=buf->buffer;
michael@0 514 if(U_FAILURE(*error)){
michael@0 515 fprintf(stderr, "Could not open codepage [%s]: %s\n", *cp, u_errorName(*error));
michael@0 516 ucbuf_close(buf);
michael@0 517 return NULL;
michael@0 518 }
michael@0 519 ucbuf_fillucbuf(buf,error);
michael@0 520 if(U_FAILURE(*error)){
michael@0 521 ucbuf_close(buf);
michael@0 522 return NULL;
michael@0 523 }
michael@0 524 return buf;
michael@0 525 }
michael@0 526 *error =U_FILE_ACCESS_ERROR;
michael@0 527 return NULL;
michael@0 528 }
michael@0 529
michael@0 530
michael@0 531
michael@0 532 /* TODO: this method will fail if at the
michael@0 533 * begining of buffer and the uchar to unget
michael@0 534 * is from the previous buffer. Need to implement
michael@0 535 * system to take care of that situation.
michael@0 536 */
michael@0 537 U_CAPI void U_EXPORT2
michael@0 538 ucbuf_ungetc(int32_t c,UCHARBUF* buf){
michael@0 539 /* decrement currentPos pointer
michael@0 540 * if not at the begining of buffer
michael@0 541 */
michael@0 542 if(buf->currentPos!=buf->buffer){
michael@0 543 if(*(buf->currentPos-1)==c){
michael@0 544 buf->currentPos--;
michael@0 545 } else {
michael@0 546 /* ungetc failed - did not match. */
michael@0 547 }
michael@0 548 } else {
michael@0 549 /* ungetc failed - beginning of buffer. */
michael@0 550 }
michael@0 551 }
michael@0 552
michael@0 553 /* frees the resources of UChar* buffer */
michael@0 554 static void
michael@0 555 ucbuf_closebuf(UCHARBUF* buf){
michael@0 556 uprv_free(buf->buffer);
michael@0 557 buf->buffer = NULL;
michael@0 558 }
michael@0 559
michael@0 560 /* close the buf and release resources*/
michael@0 561 U_CAPI void U_EXPORT2
michael@0 562 ucbuf_close(UCHARBUF* buf){
michael@0 563 if(buf!=NULL){
michael@0 564 if(buf->conv){
michael@0 565 ucnv_close(buf->conv);
michael@0 566 }
michael@0 567 T_FileStream_close(buf->in);
michael@0 568 ucbuf_closebuf(buf);
michael@0 569 uprv_free(buf);
michael@0 570 }
michael@0 571 }
michael@0 572
michael@0 573 /* rewind the buf and file stream */
michael@0 574 U_CAPI void U_EXPORT2
michael@0 575 ucbuf_rewind(UCHARBUF* buf,UErrorCode* error){
michael@0 576 if(error==NULL || U_FAILURE(*error)){
michael@0 577 return;
michael@0 578 }
michael@0 579 if(buf){
michael@0 580 buf->currentPos=buf->buffer;
michael@0 581 buf->bufLimit=buf->buffer;
michael@0 582 T_FileStream_rewind(buf->in);
michael@0 583 buf->remaining=T_FileStream_size(buf->in)-buf->signatureLength;
michael@0 584
michael@0 585 ucnv_resetToUnicode(buf->conv);
michael@0 586 if(buf->signatureLength>0) {
michael@0 587 UChar target[1]={ 0 };
michael@0 588 UChar* pTarget;
michael@0 589 char start[8];
michael@0 590 const char* pStart;
michael@0 591 int32_t numRead;
michael@0 592
michael@0 593 /* read the signature bytes */
michael@0 594 numRead=T_FileStream_read(buf->in, start, buf->signatureLength);
michael@0 595
michael@0 596 /* convert and ignore initial U+FEFF, and the buffer overflow */
michael@0 597 pTarget = target;
michael@0 598 pStart = start;
michael@0 599 ucnv_toUnicode(buf->conv, &pTarget, target+1, &pStart, start+numRead, NULL, FALSE, error);
michael@0 600 if(*error==U_BUFFER_OVERFLOW_ERROR) {
michael@0 601 *error=U_ZERO_ERROR;
michael@0 602 }
michael@0 603
michael@0 604 /* verify that we successfully read exactly U+FEFF */
michael@0 605 if(U_SUCCESS(*error) && (numRead!=buf->signatureLength || pTarget!=(target+1) || target[0]!=0xfeff)) {
michael@0 606 *error=U_INTERNAL_PROGRAM_ERROR;
michael@0 607 }
michael@0 608 }
michael@0 609 }
michael@0 610 }
michael@0 611
michael@0 612
michael@0 613 U_CAPI int32_t U_EXPORT2
michael@0 614 ucbuf_size(UCHARBUF* buf){
michael@0 615 if(buf){
michael@0 616 if(buf->isBuffered){
michael@0 617 return (T_FileStream_size(buf->in)-buf->signatureLength)/ucnv_getMinCharSize(buf->conv);
michael@0 618 }else{
michael@0 619 return (int32_t)(buf->bufLimit - buf->buffer);
michael@0 620 }
michael@0 621 }
michael@0 622 return 0;
michael@0 623 }
michael@0 624
michael@0 625 U_CAPI const UChar* U_EXPORT2
michael@0 626 ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* error){
michael@0 627 if(error==NULL || U_FAILURE(*error)){
michael@0 628 return NULL;
michael@0 629 }
michael@0 630 if(buf==NULL || len==NULL){
michael@0 631 *error = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 632 return NULL;
michael@0 633 }
michael@0 634 *len = (int32_t)(buf->bufLimit - buf->buffer);
michael@0 635 return buf->buffer;
michael@0 636 }
michael@0 637
michael@0 638 U_CAPI const char* U_EXPORT2
michael@0 639 ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status){
michael@0 640 int32_t requiredLen = 0;
michael@0 641 int32_t dirlen = 0;
michael@0 642 int32_t filelen = 0;
michael@0 643 if(status==NULL || U_FAILURE(*status)){
michael@0 644 return NULL;
michael@0 645 }
michael@0 646
michael@0 647 if(inputDir == NULL || fileName == NULL || len==NULL || (target==NULL && *len>0)){
michael@0 648 *status = U_ILLEGAL_ARGUMENT_ERROR;
michael@0 649 return NULL;
michael@0 650 }
michael@0 651
michael@0 652
michael@0 653 dirlen = (int32_t)uprv_strlen(inputDir);
michael@0 654 filelen = (int32_t)uprv_strlen(fileName);
michael@0 655 if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
michael@0 656 requiredLen = dirlen + filelen + 2;
michael@0 657 if((*len < requiredLen) || target==NULL){
michael@0 658 *len = requiredLen;
michael@0 659 *status = U_BUFFER_OVERFLOW_ERROR;
michael@0 660 return NULL;
michael@0 661 }
michael@0 662
michael@0 663 target[0] = '\0';
michael@0 664 /*
michael@0 665 * append the input dir to openFileName if the first char in
michael@0 666 * filename is not file seperation char and the last char input directory is not '.'.
michael@0 667 * This is to support :
michael@0 668 * genrb -s. /home/icu/data
michael@0 669 * genrb -s. icu/data
michael@0 670 * The user cannot mix notations like
michael@0 671 * genrb -s. /icu/data --- the absolute path specified. -s redundant
michael@0 672 * user should use
michael@0 673 * genrb -s. icu/data --- start from CWD and look in icu/data dir
michael@0 674 */
michael@0 675 if( (fileName[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){
michael@0 676 uprv_strcpy(target, inputDir);
michael@0 677 target[dirlen] = U_FILE_SEP_CHAR;
michael@0 678 }
michael@0 679 target[dirlen + 1] = '\0';
michael@0 680 } else {
michael@0 681 requiredLen = dirlen + filelen + 1;
michael@0 682 if((*len < requiredLen) || target==NULL){
michael@0 683 *len = requiredLen;
michael@0 684 *status = U_BUFFER_OVERFLOW_ERROR;
michael@0 685 return NULL;
michael@0 686 }
michael@0 687
michael@0 688 uprv_strcpy(target, inputDir);
michael@0 689 }
michael@0 690
michael@0 691 uprv_strcat(target, fileName);
michael@0 692 return target;
michael@0 693 }
michael@0 694 /*
michael@0 695 * Unicode TR 13 says any of the below chars is
michael@0 696 * a new line char in a readline function in addition
michael@0 697 * to CR+LF combination which needs to be
michael@0 698 * handled seperately
michael@0 699 */
michael@0 700 static UBool ucbuf_isCharNewLine(UChar c){
michael@0 701 switch(c){
michael@0 702 case 0x000A: /* LF */
michael@0 703 case 0x000D: /* CR */
michael@0 704 case 0x000C: /* FF */
michael@0 705 case 0x0085: /* NEL */
michael@0 706 case 0x2028: /* LS */
michael@0 707 case 0x2029: /* PS */
michael@0 708 return TRUE;
michael@0 709 default:
michael@0 710 return FALSE;
michael@0 711 }
michael@0 712 }
michael@0 713
michael@0 714 U_CAPI const UChar* U_EXPORT2
michael@0 715 ucbuf_readline(UCHARBUF* buf,int32_t* len,UErrorCode* err){
michael@0 716 UChar* temp = buf->currentPos;
michael@0 717 UChar* savePos =NULL;
michael@0 718 UChar c=0x0000;
michael@0 719 if(buf->isBuffered){
michael@0 720 /* The input is buffered we have to do more
michael@0 721 * for returning a pointer U_TRUNCATED_CHAR_FOUND
michael@0 722 */
michael@0 723 for(;;){
michael@0 724 c = *temp++;
michael@0 725 if(buf->remaining==0){
michael@0 726 return NULL; /* end of file is reached return NULL */
michael@0 727 }
michael@0 728 if(temp>=buf->bufLimit && buf->currentPos == buf->buffer){
michael@0 729 *err= U_TRUNCATED_CHAR_FOUND;
michael@0 730 return NULL;
michael@0 731 }else{
michael@0 732 ucbuf_fillucbuf(buf,err);
michael@0 733 if(U_FAILURE(*err)){
michael@0 734 return NULL;
michael@0 735 }
michael@0 736 }
michael@0 737 /*
michael@0 738 * Accoding to TR 13 readLine functions must interpret
michael@0 739 * CR, CR+LF, LF, NEL, PS, LS or FF as line seperators
michael@0 740 */
michael@0 741 /* Windows CR LF */
michael@0 742 if(c ==0x0d && temp+1<=buf->bufLimit && *(temp+1) == 0x0a ){
michael@0 743 *len = (int32_t)(temp++ - buf->currentPos);
michael@0 744 savePos = buf->currentPos;
michael@0 745 buf->currentPos = temp;
michael@0 746 return savePos;
michael@0 747 }
michael@0 748 /* else */
michael@0 749
michael@0 750 if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)){ /* Unipad inserts 2028 line separators! */
michael@0 751 *len = (int32_t)(temp - buf->currentPos);
michael@0 752 savePos = buf->currentPos;
michael@0 753 buf->currentPos = temp;
michael@0 754 return savePos;
michael@0 755 }
michael@0 756 }
michael@0 757 }else{
michael@0 758 /* we know that all input is read into the internal
michael@0 759 * buffer so we can safely return pointers
michael@0 760 */
michael@0 761 for(;;){
michael@0 762 c = *temp++;
michael@0 763
michael@0 764 if(buf->currentPos==buf->bufLimit){
michael@0 765 return NULL; /* end of file is reached return NULL */
michael@0 766 }
michael@0 767 /* Windows CR LF */
michael@0 768 if(c ==0x0d && temp+1<=buf->bufLimit && *(temp+1) == 0x0a ){
michael@0 769 *len = (int32_t)(temp++ - buf->currentPos);
michael@0 770 savePos = buf->currentPos;
michael@0 771 buf->currentPos = temp;
michael@0 772 return savePos;
michael@0 773 }
michael@0 774 /* else */
michael@0 775 if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)) { /* Unipad inserts 2028 line separators! */
michael@0 776 *len = (int32_t)(temp - buf->currentPos);
michael@0 777 savePos = buf->currentPos;
michael@0 778 buf->currentPos = temp;
michael@0 779 return savePos;
michael@0 780 }
michael@0 781 }
michael@0 782 }
michael@0 783 /* not reached */
michael@0 784 /* A compiler warning will appear if all paths don't contain a return statement. */
michael@0 785 /* return NULL;*/
michael@0 786 }
michael@0 787 #endif

mercurial