michael@0: /* michael@0: ******************************************************************************* michael@0: * michael@0: * Copyright (C) 2000-2012, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: * michael@0: ******************************************************************************* michael@0: * file name: uparse.c michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created on: 2000apr18 michael@0: * created by: Markus W. Scherer michael@0: * michael@0: * This file provides a parser for files that are delimited by one single michael@0: * character like ';' or TAB. Example: the Unicode Character Properties files michael@0: * like UnicodeData.txt are semicolon-delimited. michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: #include "unicode/uchar.h" michael@0: #include "unicode/ustring.h" michael@0: #include "unicode/utf16.h" michael@0: #include "cstring.h" michael@0: #include "filestrm.h" michael@0: #include "uparse.h" michael@0: #include "ustr_imp.h" michael@0: michael@0: #include michael@0: michael@0: U_CAPI const char * U_EXPORT2 michael@0: u_skipWhitespace(const char *s) { michael@0: while(U_IS_INV_WHITESPACE(*s)) { michael@0: ++s; michael@0: } michael@0: return s; michael@0: } michael@0: michael@0: U_CAPI char * U_EXPORT2 michael@0: u_rtrim(char *s) { michael@0: char *end=uprv_strchr(s, 0); michael@0: while(sstart && U_IS_INV_WHITESPACE(*(limit-1))) { michael@0: --limit; michael@0: } michael@0: michael@0: /* truncate the line */ michael@0: *limit=0; michael@0: } michael@0: michael@0: /* skip lines with only whitespace */ michael@0: if(u_skipWhitespace(start)[0]==0) { michael@0: continue; michael@0: } michael@0: michael@0: /* for each field, call the corresponding field function */ michael@0: for(i=0; i0 && dest==NULL)) { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: count=0; michael@0: for(;;) { michael@0: s=u_skipWhitespace(s); michael@0: if(*s==';' || *s==0) { michael@0: return count; michael@0: } michael@0: michael@0: /* read one code point */ michael@0: value=(uint32_t)uprv_strtoul(s, &end, 16); michael@0: if(end<=s || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || value>=0x110000) { michael@0: *pErrorCode=U_PARSE_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: /* append it to the destination array */ michael@0: if(count0 && dest==NULL)) { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: if(pFirst!=NULL) { michael@0: *pFirst=0xffffffff; michael@0: } michael@0: michael@0: destLength=0; michael@0: for(;;) { michael@0: s=u_skipWhitespace(s); michael@0: if(*s==';' || *s==0) { michael@0: if(destLength=0x110000) { michael@0: *pErrorCode=U_PARSE_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: /* store the first code point */ michael@0: if(pFirst!=NULL) { michael@0: *pFirst=value; michael@0: pFirst=NULL; michael@0: } michael@0: michael@0: /* append it to the destination array */ michael@0: if((destLength+U16_LENGTH(value))<=destCapacity) { michael@0: U16_APPEND_UNSAFE(dest, destLength, value); michael@0: } else { michael@0: destLength+=U16_LENGTH(value); michael@0: } michael@0: michael@0: /* go to the following characters */ michael@0: s=end; michael@0: } michael@0: } michael@0: michael@0: /* read a range like start or start..end */ michael@0: U_CAPI int32_t U_EXPORT2 michael@0: u_parseCodePointRangeAnyTerminator(const char *s, michael@0: uint32_t *pStart, uint32_t *pEnd, michael@0: const char **terminator, michael@0: UErrorCode *pErrorCode) { michael@0: char *end; michael@0: uint32_t value; michael@0: michael@0: if(U_FAILURE(*pErrorCode)) { michael@0: return 0; michael@0: } michael@0: if(s==NULL || pStart==NULL || pEnd==NULL) { michael@0: *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: /* read the start code point */ michael@0: s=u_skipWhitespace(s); michael@0: value=(uint32_t)uprv_strtoul(s, &end, 16); michael@0: if(end<=s || value>=0x110000) { michael@0: *pErrorCode=U_PARSE_ERROR; michael@0: return 0; michael@0: } michael@0: *pStart=*pEnd=value; michael@0: michael@0: /* is there a "..end"? */ michael@0: s=u_skipWhitespace(end); michael@0: if(*s!='.' || s[1]!='.') { michael@0: *terminator=end; michael@0: return 1; michael@0: } michael@0: s=u_skipWhitespace(s+2); michael@0: michael@0: /* read the end code point */ michael@0: value=(uint32_t)uprv_strtoul(s, &end, 16); michael@0: if(end<=s || value>=0x110000) { michael@0: *pErrorCode=U_PARSE_ERROR; michael@0: return 0; michael@0: } michael@0: *pEnd=value; michael@0: michael@0: /* is this a valid range? */ michael@0: if(value<*pStart) { michael@0: *pErrorCode=U_PARSE_ERROR; michael@0: return 0; michael@0: } michael@0: michael@0: *terminator=end; michael@0: return value-*pStart+1; michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: u_parseCodePointRange(const char *s, michael@0: uint32_t *pStart, uint32_t *pEnd, michael@0: UErrorCode *pErrorCode) { michael@0: const char *terminator; michael@0: int32_t rangeLength= michael@0: u_parseCodePointRangeAnyTerminator(s, pStart, pEnd, &terminator, pErrorCode); michael@0: if(U_SUCCESS(*pErrorCode)) { michael@0: terminator=u_skipWhitespace(terminator); michael@0: if(*terminator!=';' && *terminator!=0) { michael@0: *pErrorCode=U_PARSE_ERROR; michael@0: return 0; michael@0: } michael@0: } michael@0: return rangeLength; michael@0: } michael@0: michael@0: U_CAPI int32_t U_EXPORT2 michael@0: u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status) { michael@0: const char *read = source; michael@0: int32_t i = 0; michael@0: unsigned int value = 0; michael@0: if(sLen == -1) { michael@0: sLen = (int32_t)strlen(source); michael@0: } michael@0: michael@0: while(read < source+sLen) { michael@0: sscanf(read, "%2x", &value); michael@0: if(i < destCapacity) { michael@0: dest[i] = (char)value; michael@0: } michael@0: i++; michael@0: read += 2; michael@0: } michael@0: return u_terminateChars(dest, destCapacity, i, status); michael@0: }