michael@0: /* michael@0: ******************************************************************************* michael@0: * michael@0: * Copyright (C) 1998-2011, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: * michael@0: ******************************************************************************* michael@0: * michael@0: * File ucbuf.c michael@0: * michael@0: * Modification History: michael@0: * michael@0: * Date Name Description michael@0: * 05/10/01 Ram Creation. michael@0: ******************************************************************************* michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: #include "unicode/putil.h" michael@0: #include "unicode/uchar.h" michael@0: #include "unicode/ucnv.h" michael@0: #include "unicode/ucnv_err.h" michael@0: #include "unicode/ustring.h" michael@0: #include "unicode/utf16.h" michael@0: #include "filestrm.h" michael@0: #include "cstring.h" michael@0: #include "cmemory.h" michael@0: #include "ustrfmt.h" michael@0: #include "ucbuf.h" michael@0: #include michael@0: michael@0: #if !UCONFIG_NO_CONVERSION michael@0: michael@0: michael@0: #define MAX_IN_BUF 1000 michael@0: #define MAX_U_BUF 1500 michael@0: #define CONTEXT_LEN 20 michael@0: michael@0: struct UCHARBUF { michael@0: UChar* buffer; michael@0: UChar* currentPos; michael@0: UChar* bufLimit; michael@0: int32_t bufCapacity; michael@0: int32_t remaining; michael@0: int32_t signatureLength; michael@0: FileStream* in; michael@0: UConverter* conv; michael@0: UBool showWarning; /* makes this API not produce any errors */ michael@0: UBool isBuffered; michael@0: }; michael@0: michael@0: U_CAPI UBool U_EXPORT2 michael@0: ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* error){ michael@0: char start[8]; michael@0: int32_t numRead; michael@0: michael@0: UChar target[1]={ 0 }; michael@0: UChar* pTarget; michael@0: const char* pStart; michael@0: michael@0: /* read a few bytes */ michael@0: numRead=T_FileStream_read(in, start, sizeof(start)); michael@0: michael@0: *cp = ucnv_detectUnicodeSignature(start, numRead, signatureLength, error); michael@0: michael@0: /* unread the bytes beyond what was consumed for U+FEFF */ michael@0: T_FileStream_rewind(in); michael@0: if (*signatureLength > 0) { michael@0: T_FileStream_read(in, start, *signatureLength); michael@0: } michael@0: michael@0: if(*cp==NULL){ michael@0: *conv =NULL; michael@0: return FALSE; michael@0: } michael@0: michael@0: /* open the converter for the detected Unicode charset */ michael@0: *conv = ucnv_open(*cp,error); michael@0: michael@0: /* convert and ignore initial U+FEFF, and the buffer overflow */ michael@0: pTarget = target; michael@0: pStart = start; michael@0: ucnv_toUnicode(*conv, &pTarget, target+1, &pStart, start+*signatureLength, NULL, FALSE, error); michael@0: *signatureLength = (int32_t)(pStart - start); michael@0: if(*error==U_BUFFER_OVERFLOW_ERROR) { michael@0: *error=U_ZERO_ERROR; michael@0: } michael@0: michael@0: /* verify that we successfully read exactly U+FEFF */ michael@0: if(U_SUCCESS(*error) && (pTarget!=(target+1) || target[0]!=0xfeff)) { michael@0: *error=U_INTERNAL_PROGRAM_ERROR; michael@0: } michael@0: michael@0: michael@0: return TRUE; michael@0: } michael@0: static UBool ucbuf_isCPKnown(const char* cp){ michael@0: if(ucnv_compareNames("UTF-8",cp)==0){ michael@0: return TRUE; michael@0: } michael@0: if(ucnv_compareNames("UTF-16BE",cp)==0){ michael@0: return TRUE; michael@0: } michael@0: if(ucnv_compareNames("UTF-16LE",cp)==0){ michael@0: return TRUE; michael@0: } michael@0: if(ucnv_compareNames("UTF-16",cp)==0){ michael@0: return TRUE; michael@0: } michael@0: if(ucnv_compareNames("UTF-32",cp)==0){ michael@0: return TRUE; michael@0: } michael@0: if(ucnv_compareNames("UTF-32BE",cp)==0){ michael@0: return TRUE; michael@0: } michael@0: if(ucnv_compareNames("UTF-32LE",cp)==0){ michael@0: return TRUE; michael@0: } michael@0: if(ucnv_compareNames("SCSU",cp)==0){ michael@0: return TRUE; michael@0: } michael@0: if(ucnv_compareNames("BOCU-1",cp)==0){ michael@0: return TRUE; michael@0: } michael@0: if(ucnv_compareNames("UTF-7",cp)==0){ michael@0: return TRUE; michael@0: } michael@0: return FALSE; michael@0: } michael@0: michael@0: U_CAPI FileStream * U_EXPORT2 michael@0: ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv, int32_t* signatureLength,UErrorCode* error){ michael@0: FileStream* in=NULL; michael@0: if(error==NULL || U_FAILURE(*error)){ michael@0: return NULL; michael@0: } michael@0: if(conv==NULL || cp==NULL || fileName==NULL){ michael@0: *error = U_ILLEGAL_ARGUMENT_ERROR; michael@0: return NULL; michael@0: } michael@0: /* open the file */ michael@0: in= T_FileStream_open(fileName,"rb"); michael@0: michael@0: if(in == NULL){ michael@0: *error=U_FILE_ACCESS_ERROR; michael@0: return NULL; michael@0: } michael@0: michael@0: if(ucbuf_autodetect_fs(in,cp,conv,signatureLength,error)) { michael@0: return in; michael@0: } else { michael@0: ucnv_close(*conv); michael@0: *conv=NULL; michael@0: T_FileStream_close(in); michael@0: return NULL; michael@0: } michael@0: } michael@0: michael@0: /* fill the uchar buffer */ michael@0: static UCHARBUF* michael@0: ucbuf_fillucbuf( UCHARBUF* buf,UErrorCode* error){ michael@0: UChar* pTarget=NULL; michael@0: UChar* target=NULL; michael@0: const char* source=NULL; michael@0: char carr[MAX_IN_BUF] = {'\0'}; michael@0: char* cbuf = carr; michael@0: int32_t inputRead=0; michael@0: int32_t outputWritten=0; michael@0: int32_t offset=0; michael@0: const char* sourceLimit =NULL; michael@0: int32_t cbufSize=0; michael@0: pTarget = buf->buffer; michael@0: /* check if we arrived here without exhausting the buffer*/ michael@0: if(buf->currentPosbufLimit){ michael@0: offset = (int32_t)(buf->bufLimit-buf->currentPos); michael@0: memmove(buf->buffer,buf->currentPos,offset* sizeof(UChar)); michael@0: } michael@0: michael@0: #if DEBUG michael@0: memset(pTarget+offset,0xff,sizeof(UChar)*(MAX_IN_BUF-offset)); michael@0: #endif michael@0: if(buf->isBuffered){ michael@0: cbufSize = MAX_IN_BUF; michael@0: /* read the file */ michael@0: inputRead=T_FileStream_read(buf->in,cbuf,cbufSize-offset); michael@0: buf->remaining-=inputRead; michael@0: michael@0: }else{ michael@0: cbufSize = T_FileStream_size(buf->in); michael@0: cbuf = (char*)uprv_malloc(cbufSize); michael@0: if (cbuf == NULL) { michael@0: *error = U_MEMORY_ALLOCATION_ERROR; michael@0: return NULL; michael@0: } michael@0: inputRead= T_FileStream_read(buf->in,cbuf,cbufSize); michael@0: buf->remaining-=inputRead; michael@0: } michael@0: michael@0: /* just to be sure...*/ michael@0: if ( 0 == inputRead ) michael@0: buf->remaining = 0; michael@0: michael@0: target=pTarget; michael@0: /* convert the bytes */ michael@0: if(buf->conv){ michael@0: /* set the callback to stop */ michael@0: UConverterToUCallback toUOldAction ; michael@0: void* toUOldContext; michael@0: void* toUNewContext=NULL; michael@0: ucnv_setToUCallBack(buf->conv, michael@0: UCNV_TO_U_CALLBACK_STOP, michael@0: toUNewContext, michael@0: &toUOldAction, michael@0: (const void**)&toUOldContext, michael@0: error); michael@0: /* since state is saved in the converter we add offset to source*/ michael@0: target = pTarget+offset; michael@0: source = cbuf; michael@0: sourceLimit = source + inputRead; michael@0: ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset), michael@0: &source,sourceLimit,NULL, michael@0: (UBool)(buf->remaining==0),error); michael@0: michael@0: if(U_FAILURE(*error)){ michael@0: char context[CONTEXT_LEN+1]; michael@0: char preContext[CONTEXT_LEN+1]; michael@0: char postContext[CONTEXT_LEN+1]; michael@0: int8_t len = CONTEXT_LEN; michael@0: int32_t start=0; michael@0: int32_t stop =0; michael@0: int32_t pos =0; michael@0: /* use erro1 to preserve the error code */ michael@0: UErrorCode error1 =U_ZERO_ERROR; michael@0: michael@0: if( buf->showWarning==TRUE){ michael@0: fprintf(stderr,"\n###WARNING: Encountered abnormal bytes while" michael@0: " converting input stream to target encoding: %s\n", michael@0: u_errorName(*error)); michael@0: } michael@0: michael@0: michael@0: /* now get the context chars */ michael@0: ucnv_getInvalidChars(buf->conv,context,&len,&error1); michael@0: context[len]= 0 ; /* null terminate the buffer */ michael@0: michael@0: pos = (int32_t)(source - cbuf - len); michael@0: michael@0: /* for pre-context */ michael@0: start = (pos <=CONTEXT_LEN)? 0 : (pos - (CONTEXT_LEN-1)); michael@0: stop = pos-len; michael@0: michael@0: memcpy(preContext,cbuf+start,stop-start); michael@0: /* null terminate the buffer */ michael@0: preContext[stop-start] = 0; michael@0: michael@0: /* for post-context */ michael@0: start = pos+len; michael@0: stop = (int32_t)(((pos+CONTEXT_LEN)<= (sourceLimit-cbuf) )? (pos+(CONTEXT_LEN-1)) : (sourceLimit-cbuf)); michael@0: michael@0: memcpy(postContext,source,stop-start); michael@0: /* null terminate the buffer */ michael@0: postContext[stop-start] = 0; michael@0: michael@0: if(buf->showWarning ==TRUE){ michael@0: /* print out the context */ michael@0: fprintf(stderr,"\tPre-context: %s\n",preContext); michael@0: fprintf(stderr,"\tContext: %s\n",context); michael@0: fprintf(stderr,"\tPost-context: %s\n", postContext); michael@0: } michael@0: michael@0: /* reset the converter */ michael@0: ucnv_reset(buf->conv); michael@0: michael@0: /* set the call back to substitute michael@0: * and restart conversion michael@0: */ michael@0: ucnv_setToUCallBack(buf->conv, michael@0: UCNV_TO_U_CALLBACK_SUBSTITUTE, michael@0: toUNewContext, michael@0: &toUOldAction, michael@0: (const void**)&toUOldContext, michael@0: &error1); michael@0: michael@0: /* reset source and target start positions */ michael@0: target = pTarget+offset; michael@0: source = cbuf; michael@0: michael@0: /* re convert */ michael@0: ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset), michael@0: &source,sourceLimit,NULL, michael@0: (UBool)(buf->remaining==0),&error1); michael@0: michael@0: } michael@0: outputWritten = (int32_t)(target - pTarget); michael@0: michael@0: michael@0: #if DEBUG michael@0: { michael@0: int i; michael@0: target = pTarget; michael@0: for(i=0;iremaining>cbufSize)? cbufSize:inputRead+offset); michael@0: } michael@0: buf->currentPos = pTarget; michael@0: buf->bufLimit=pTarget+outputWritten; michael@0: *buf->bufLimit=0; /*NUL terminate*/ michael@0: if(cbuf!=carr){ michael@0: uprv_free(cbuf); michael@0: } michael@0: return buf; michael@0: } michael@0: michael@0: michael@0: michael@0: /* get a UChar from the stream*/ michael@0: U_CAPI int32_t U_EXPORT2 michael@0: ucbuf_getc(UCHARBUF* buf,UErrorCode* error){ michael@0: if(error==NULL || U_FAILURE(*error)){ michael@0: return FALSE; michael@0: } michael@0: if(buf->currentPos>=buf->bufLimit){ michael@0: if(buf->remaining==0){ michael@0: return U_EOF; michael@0: } michael@0: buf=ucbuf_fillucbuf(buf,error); michael@0: if(U_FAILURE(*error)){ michael@0: return U_EOF; michael@0: } michael@0: } michael@0: michael@0: return *(buf->currentPos++); michael@0: } michael@0: michael@0: /* get a UChar32 from the stream*/ michael@0: U_CAPI int32_t U_EXPORT2 michael@0: ucbuf_getc32(UCHARBUF* buf,UErrorCode* error){ michael@0: int32_t retVal = (int32_t)U_EOF; michael@0: if(error==NULL || U_FAILURE(*error)){ michael@0: return FALSE; michael@0: } michael@0: if(buf->currentPos+1>=buf->bufLimit){ michael@0: if(buf->remaining==0){ michael@0: return U_EOF; michael@0: } michael@0: buf=ucbuf_fillucbuf(buf,error); michael@0: if(U_FAILURE(*error)){ michael@0: return U_EOF; michael@0: } michael@0: } michael@0: if(U16_IS_LEAD(*(buf->currentPos))){ michael@0: retVal=U16_GET_SUPPLEMENTARY(buf->currentPos[0],buf->currentPos[1]); michael@0: buf->currentPos+=2; michael@0: }else{ michael@0: retVal = *(buf->currentPos++); michael@0: } michael@0: return retVal; michael@0: } michael@0: michael@0: /* u_unescapeAt() callback to return a UChar*/ michael@0: static UChar U_CALLCONV michael@0: _charAt(int32_t offset, void *context) { michael@0: return ((UCHARBUF*) context)->currentPos[offset]; michael@0: } michael@0: michael@0: /* getc and escape it */ michael@0: U_CAPI int32_t U_EXPORT2 michael@0: ucbuf_getcx32(UCHARBUF* buf,UErrorCode* error) { michael@0: int32_t length; michael@0: int32_t offset; michael@0: UChar32 c32,c1,c2; michael@0: if(error==NULL || U_FAILURE(*error)){ michael@0: return FALSE; michael@0: } michael@0: /* Fill the buffer if it is empty */ michael@0: if (buf->currentPos >=buf->bufLimit-2) { michael@0: ucbuf_fillucbuf(buf,error); michael@0: } michael@0: michael@0: /* Get the next character in the buffer */ michael@0: if (buf->currentPos < buf->bufLimit) { michael@0: c1 = *(buf->currentPos)++; michael@0: } else { michael@0: c1 = U_EOF; michael@0: } michael@0: michael@0: c2 = *(buf->currentPos); michael@0: michael@0: /* If it isn't a backslash, return it */ michael@0: if (c1 != 0x005C) { michael@0: return c1; michael@0: } michael@0: michael@0: /* Determine the amount of data in the buffer */ michael@0: length = (int32_t)(buf->bufLimit - buf->currentPos); michael@0: michael@0: /* The longest escape sequence is \Uhhhhhhhh; make sure michael@0: we have at least that many characters */ michael@0: if (length < 10) { michael@0: michael@0: /* fill the buffer */ michael@0: ucbuf_fillucbuf(buf,error); michael@0: length = (int32_t)(buf->bufLimit - buf->buffer); michael@0: } michael@0: michael@0: /* Process the escape */ michael@0: offset = 0; michael@0: c32 = u_unescapeAt(_charAt, &offset, length, (void*)buf); michael@0: michael@0: /* check if u_unescapeAt unescaped and converted michael@0: * to c32 or not michael@0: */ michael@0: if(c32==0xFFFFFFFF){ michael@0: if(buf->showWarning) { michael@0: char context[CONTEXT_LEN+1]; michael@0: int32_t len = CONTEXT_LEN; michael@0: if(length < len) { michael@0: len = length; michael@0: } michael@0: context[len]= 0 ; /* null terminate the buffer */ michael@0: u_UCharsToChars( buf->currentPos, context, len); michael@0: fprintf(stderr,"Bad escape: [%c%s]...\n", (int)c1, context); michael@0: } michael@0: *error= U_ILLEGAL_ESCAPE_SEQUENCE; michael@0: return c1; michael@0: }else if(c32!=c2 || (c32==0x0075 && c2==0x0075 && c1==0x005C) /* for \u0075 c2=0x0075 and c32==0x0075*/){ michael@0: /* Update the current buffer position */ michael@0: buf->currentPos += offset; michael@0: }else{ michael@0: /* unescaping failed so we just return michael@0: * c1 and not consume the buffer michael@0: * this is useful for rules with escapes michael@0: * in resouce bundles michael@0: * eg: \' \\ \" michael@0: */ michael@0: return c1; michael@0: } michael@0: michael@0: return c32; michael@0: } michael@0: michael@0: U_CAPI UCHARBUF* U_EXPORT2 michael@0: ucbuf_open(const char* fileName,const char** cp,UBool showWarning, UBool buffered, UErrorCode* error){ michael@0: michael@0: FileStream* in = NULL; michael@0: int32_t fileSize=0; michael@0: const char* knownCp; michael@0: if(error==NULL || U_FAILURE(*error)){ michael@0: return NULL; michael@0: } michael@0: if(cp==NULL || fileName==NULL){ michael@0: *error = U_ILLEGAL_ARGUMENT_ERROR; michael@0: return FALSE; michael@0: } michael@0: if (!uprv_strcmp(fileName, "-")) { michael@0: in = T_FileStream_stdin(); michael@0: }else{ michael@0: in = T_FileStream_open(fileName, "rb"); michael@0: } michael@0: michael@0: if(in!=NULL){ michael@0: UCHARBUF* buf =(UCHARBUF*) uprv_malloc(sizeof(UCHARBUF)); michael@0: fileSize = T_FileStream_size(in); michael@0: if(buf == NULL){ michael@0: *error = U_MEMORY_ALLOCATION_ERROR; michael@0: T_FileStream_close(in); michael@0: return NULL; michael@0: } michael@0: buf->in=in; michael@0: buf->conv=NULL; michael@0: buf->showWarning = showWarning; michael@0: buf->isBuffered = buffered; michael@0: buf->signatureLength=0; michael@0: if(*cp==NULL || **cp=='\0'){ michael@0: /* don't have code page name... try to autodetect */ michael@0: ucbuf_autodetect_fs(in,cp,&buf->conv,&buf->signatureLength,error); michael@0: }else if(ucbuf_isCPKnown(*cp)){ michael@0: /* discard BOM */ michael@0: ucbuf_autodetect_fs(in,&knownCp,&buf->conv,&buf->signatureLength,error); michael@0: } michael@0: if(U_SUCCESS(*error) && buf->conv==NULL) { michael@0: buf->conv=ucnv_open(*cp,error); michael@0: } michael@0: if(U_FAILURE(*error)){ michael@0: ucnv_close(buf->conv); michael@0: uprv_free(buf); michael@0: T_FileStream_close(in); michael@0: return NULL; michael@0: } michael@0: michael@0: if((buf->conv==NULL) && (buf->showWarning==TRUE)){ michael@0: fprintf(stderr,"###WARNING: No converter defined. Using codepage of system.\n"); michael@0: } michael@0: buf->remaining=fileSize-buf->signatureLength; michael@0: if(buf->isBuffered){ michael@0: buf->bufCapacity=MAX_U_BUF; michael@0: }else{ michael@0: buf->bufCapacity=buf->remaining+buf->signatureLength+1/*for terminating nul*/; michael@0: } michael@0: buf->buffer=(UChar*) uprv_malloc(U_SIZEOF_UCHAR * buf->bufCapacity ); michael@0: if (buf->buffer == NULL) { michael@0: *error = U_MEMORY_ALLOCATION_ERROR; michael@0: ucbuf_close(buf); michael@0: return NULL; michael@0: } michael@0: buf->currentPos=buf->buffer; michael@0: buf->bufLimit=buf->buffer; michael@0: if(U_FAILURE(*error)){ michael@0: fprintf(stderr, "Could not open codepage [%s]: %s\n", *cp, u_errorName(*error)); michael@0: ucbuf_close(buf); michael@0: return NULL; michael@0: } michael@0: ucbuf_fillucbuf(buf,error); michael@0: if(U_FAILURE(*error)){ michael@0: ucbuf_close(buf); michael@0: return NULL; michael@0: } michael@0: return buf; michael@0: } michael@0: *error =U_FILE_ACCESS_ERROR; michael@0: return NULL; michael@0: } michael@0: michael@0: michael@0: michael@0: /* TODO: this method will fail if at the michael@0: * begining of buffer and the uchar to unget michael@0: * is from the previous buffer. Need to implement michael@0: * system to take care of that situation. michael@0: */ michael@0: U_CAPI void U_EXPORT2 michael@0: ucbuf_ungetc(int32_t c,UCHARBUF* buf){ michael@0: /* decrement currentPos pointer michael@0: * if not at the begining of buffer michael@0: */ michael@0: if(buf->currentPos!=buf->buffer){ michael@0: if(*(buf->currentPos-1)==c){ michael@0: buf->currentPos--; michael@0: } else { michael@0: /* ungetc failed - did not match. */ michael@0: } michael@0: } else { michael@0: /* ungetc failed - beginning of buffer. */ michael@0: } michael@0: } michael@0: michael@0: /* frees the resources of UChar* buffer */ michael@0: static void michael@0: ucbuf_closebuf(UCHARBUF* buf){ michael@0: uprv_free(buf->buffer); michael@0: buf->buffer = NULL; michael@0: } michael@0: michael@0: /* close the buf and release resources*/ michael@0: U_CAPI void U_EXPORT2 michael@0: ucbuf_close(UCHARBUF* buf){ michael@0: if(buf!=NULL){ michael@0: if(buf->conv){ michael@0: ucnv_close(buf->conv); michael@0: } michael@0: T_FileStream_close(buf->in); michael@0: ucbuf_closebuf(buf); michael@0: uprv_free(buf); michael@0: } michael@0: } michael@0: michael@0: /* rewind the buf and file stream */ michael@0: U_CAPI void U_EXPORT2 michael@0: ucbuf_rewind(UCHARBUF* buf,UErrorCode* error){ michael@0: if(error==NULL || U_FAILURE(*error)){ michael@0: return; michael@0: } michael@0: if(buf){ michael@0: buf->currentPos=buf->buffer; michael@0: buf->bufLimit=buf->buffer; michael@0: T_FileStream_rewind(buf->in); michael@0: buf->remaining=T_FileStream_size(buf->in)-buf->signatureLength; michael@0: michael@0: ucnv_resetToUnicode(buf->conv); michael@0: if(buf->signatureLength>0) { michael@0: UChar target[1]={ 0 }; michael@0: UChar* pTarget; michael@0: char start[8]; michael@0: const char* pStart; michael@0: int32_t numRead; michael@0: michael@0: /* read the signature bytes */ michael@0: numRead=T_FileStream_read(buf->in, start, buf->signatureLength); michael@0: michael@0: /* convert and ignore initial U+FEFF, and the buffer overflow */ michael@0: pTarget = target; michael@0: pStart = start; michael@0: ucnv_toUnicode(buf->conv, &pTarget, target+1, &pStart, start+numRead, NULL, FALSE, error); michael@0: if(*error==U_BUFFER_OVERFLOW_ERROR) { michael@0: *error=U_ZERO_ERROR; michael@0: } michael@0: michael@0: /* verify that we successfully read exactly U+FEFF */ michael@0: if(U_SUCCESS(*error) && (numRead!=buf->signatureLength || pTarget!=(target+1) || target[0]!=0xfeff)) { michael@0: *error=U_INTERNAL_PROGRAM_ERROR; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: ucbuf_size(UCHARBUF* buf){ michael@0: if(buf){ michael@0: if(buf->isBuffered){ michael@0: return (T_FileStream_size(buf->in)-buf->signatureLength)/ucnv_getMinCharSize(buf->conv); michael@0: }else{ michael@0: return (int32_t)(buf->bufLimit - buf->buffer); michael@0: } michael@0: } michael@0: return 0; michael@0: } michael@0: michael@0: U_CAPI const UChar* U_EXPORT2 michael@0: ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* error){ michael@0: if(error==NULL || U_FAILURE(*error)){ michael@0: return NULL; michael@0: } michael@0: if(buf==NULL || len==NULL){ michael@0: *error = U_ILLEGAL_ARGUMENT_ERROR; michael@0: return NULL; michael@0: } michael@0: *len = (int32_t)(buf->bufLimit - buf->buffer); michael@0: return buf->buffer; michael@0: } michael@0: michael@0: U_CAPI const char* U_EXPORT2 michael@0: ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status){ michael@0: int32_t requiredLen = 0; michael@0: int32_t dirlen = 0; michael@0: int32_t filelen = 0; michael@0: if(status==NULL || U_FAILURE(*status)){ michael@0: return NULL; michael@0: } michael@0: michael@0: if(inputDir == NULL || fileName == NULL || len==NULL || (target==NULL && *len>0)){ michael@0: *status = U_ILLEGAL_ARGUMENT_ERROR; michael@0: return NULL; michael@0: } michael@0: michael@0: michael@0: dirlen = (int32_t)uprv_strlen(inputDir); michael@0: filelen = (int32_t)uprv_strlen(fileName); michael@0: if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) { michael@0: requiredLen = dirlen + filelen + 2; michael@0: if((*len < requiredLen) || target==NULL){ michael@0: *len = requiredLen; michael@0: *status = U_BUFFER_OVERFLOW_ERROR; michael@0: return NULL; michael@0: } michael@0: michael@0: target[0] = '\0'; michael@0: /* michael@0: * append the input dir to openFileName if the first char in michael@0: * filename is not file seperation char and the last char input directory is not '.'. michael@0: * This is to support : michael@0: * genrb -s. /home/icu/data michael@0: * genrb -s. icu/data michael@0: * The user cannot mix notations like michael@0: * genrb -s. /icu/data --- the absolute path specified. -s redundant michael@0: * user should use michael@0: * genrb -s. icu/data --- start from CWD and look in icu/data dir michael@0: */ michael@0: if( (fileName[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){ michael@0: uprv_strcpy(target, inputDir); michael@0: target[dirlen] = U_FILE_SEP_CHAR; michael@0: } michael@0: target[dirlen + 1] = '\0'; michael@0: } else { michael@0: requiredLen = dirlen + filelen + 1; michael@0: if((*len < requiredLen) || target==NULL){ michael@0: *len = requiredLen; michael@0: *status = U_BUFFER_OVERFLOW_ERROR; michael@0: return NULL; michael@0: } michael@0: michael@0: uprv_strcpy(target, inputDir); michael@0: } michael@0: michael@0: uprv_strcat(target, fileName); michael@0: return target; michael@0: } michael@0: /* michael@0: * Unicode TR 13 says any of the below chars is michael@0: * a new line char in a readline function in addition michael@0: * to CR+LF combination which needs to be michael@0: * handled seperately michael@0: */ michael@0: static UBool ucbuf_isCharNewLine(UChar c){ michael@0: switch(c){ michael@0: case 0x000A: /* LF */ michael@0: case 0x000D: /* CR */ michael@0: case 0x000C: /* FF */ michael@0: case 0x0085: /* NEL */ michael@0: case 0x2028: /* LS */ michael@0: case 0x2029: /* PS */ michael@0: return TRUE; michael@0: default: michael@0: return FALSE; michael@0: } michael@0: } michael@0: michael@0: U_CAPI const UChar* U_EXPORT2 michael@0: ucbuf_readline(UCHARBUF* buf,int32_t* len,UErrorCode* err){ michael@0: UChar* temp = buf->currentPos; michael@0: UChar* savePos =NULL; michael@0: UChar c=0x0000; michael@0: if(buf->isBuffered){ michael@0: /* The input is buffered we have to do more michael@0: * for returning a pointer U_TRUNCATED_CHAR_FOUND michael@0: */ michael@0: for(;;){ michael@0: c = *temp++; michael@0: if(buf->remaining==0){ michael@0: return NULL; /* end of file is reached return NULL */ michael@0: } michael@0: if(temp>=buf->bufLimit && buf->currentPos == buf->buffer){ michael@0: *err= U_TRUNCATED_CHAR_FOUND; michael@0: return NULL; michael@0: }else{ michael@0: ucbuf_fillucbuf(buf,err); michael@0: if(U_FAILURE(*err)){ michael@0: return NULL; michael@0: } michael@0: } michael@0: /* michael@0: * Accoding to TR 13 readLine functions must interpret michael@0: * CR, CR+LF, LF, NEL, PS, LS or FF as line seperators michael@0: */ michael@0: /* Windows CR LF */ michael@0: if(c ==0x0d && temp+1<=buf->bufLimit && *(temp+1) == 0x0a ){ michael@0: *len = (int32_t)(temp++ - buf->currentPos); michael@0: savePos = buf->currentPos; michael@0: buf->currentPos = temp; michael@0: return savePos; michael@0: } michael@0: /* else */ michael@0: michael@0: if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)){ /* Unipad inserts 2028 line separators! */ michael@0: *len = (int32_t)(temp - buf->currentPos); michael@0: savePos = buf->currentPos; michael@0: buf->currentPos = temp; michael@0: return savePos; michael@0: } michael@0: } michael@0: }else{ michael@0: /* we know that all input is read into the internal michael@0: * buffer so we can safely return pointers michael@0: */ michael@0: for(;;){ michael@0: c = *temp++; michael@0: michael@0: if(buf->currentPos==buf->bufLimit){ michael@0: return NULL; /* end of file is reached return NULL */ michael@0: } michael@0: /* Windows CR LF */ michael@0: if(c ==0x0d && temp+1<=buf->bufLimit && *(temp+1) == 0x0a ){ michael@0: *len = (int32_t)(temp++ - buf->currentPos); michael@0: savePos = buf->currentPos; michael@0: buf->currentPos = temp; michael@0: return savePos; michael@0: } michael@0: /* else */ michael@0: if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)) { /* Unipad inserts 2028 line separators! */ michael@0: *len = (int32_t)(temp - buf->currentPos); michael@0: savePos = buf->currentPos; michael@0: buf->currentPos = temp; michael@0: return savePos; michael@0: } michael@0: } michael@0: } michael@0: /* not reached */ michael@0: /* A compiler warning will appear if all paths don't contain a return statement. */ michael@0: /* return NULL;*/ michael@0: } michael@0: #endif