michael@0: /*
michael@0: *******************************************************************************
michael@0: *
michael@0: *   Copyright (C) 2000-2010, International Business Machines
michael@0: *   Corporation and others.  All Rights Reserved.
michael@0: *
michael@0: *******************************************************************************
michael@0: *   file name:  uparse.h
michael@0: *   encoding:   US-ASCII
michael@0: *   tab size:   8 (not used)
michael@0: *   indentation:4
michael@0: *
michael@0: *   created on: 2000apr18
michael@0: *   created by: Markus W. Scherer
michael@0: *
michael@0: *   This file provides a parser for files that are delimited by one single
michael@0: *   character like ';' or TAB. Example: the Unicode Character Properties files
michael@0: *   like UnicodeData.txt are semicolon-delimited.
michael@0: */
michael@0: 
michael@0: #ifndef __UPARSE_H__
michael@0: #define __UPARSE_H__
michael@0: 
michael@0: #include "unicode/utypes.h"
michael@0: 
michael@0: /**
michael@0:  * Is c an invariant-character whitespace?
michael@0:  * @param c invariant character
michael@0:  */
michael@0: #define U_IS_INV_WHITESPACE(c) ((c)==' ' || (c)=='\t' || (c)=='\r' || (c)=='\n')
michael@0: 
michael@0: U_CDECL_BEGIN
michael@0: 
michael@0: /**
michael@0:  * Skip space ' ' and TAB '\t' characters.
michael@0:  *
michael@0:  * @param s Pointer to characters.
michael@0:  * @return Pointer to first character at or after s that is not a space or TAB.
michael@0:  */
michael@0: U_CAPI const char * U_EXPORT2
michael@0: u_skipWhitespace(const char *s);
michael@0: 
michael@0: /**
michael@0:  * Trim whitespace (including line endings) from the end of the string.
michael@0:  *
michael@0:  * @param s Pointer to the string.
michael@0:  * @return Pointer to the new end of the string.
michael@0:  */
michael@0: U_CAPI char * U_EXPORT2
michael@0: u_rtrim(char *s);
michael@0: 
michael@0: /** Function type for u_parseDelimitedFile(). */
michael@0: typedef void U_CALLCONV
michael@0: UParseLineFn(void *context,
michael@0:               char *fields[][2],
michael@0:               int32_t fieldCount,
michael@0:               UErrorCode *pErrorCode);
michael@0: 
michael@0: /**
michael@0:  * Parser for files that are similar to UnicodeData.txt:
michael@0:  * This function opens the file and reads it line by line. It skips empty lines
michael@0:  * and comment lines that start with a '#'.
michael@0:  * All other lines are separated into fields with one delimiter character
michael@0:  * (semicolon for Unicode Properties files) between two fields. The last field in
michael@0:  * a line does not need to be terminated with a delimiter.
michael@0:  *
michael@0:  * For each line, after segmenting it, a line function is called.
michael@0:  * It gets passed the array of field start and limit pointers that is
michael@0:  * passed into this parser and filled by it for each line.
michael@0:  * For each field i of the line, the start pointer in fields[i][0]
michael@0:  * points to the beginning of the field, while the limit pointer in fields[i][1]
michael@0:  * points behind the field, i.e., to the delimiter or the line end.
michael@0:  *
michael@0:  * The context parameter of the line function is
michael@0:  * the same as the one for the parse function.
michael@0:  *
michael@0:  * The line function may modify the contents of the fields including the
michael@0:  * limit characters.
michael@0:  *
michael@0:  * If the file cannot be opened, or there is a parsing error or a field function
michael@0:  * sets *pErrorCode, then the parser returns with *pErrorCode set to an error code.
michael@0:  */
michael@0: U_CAPI void U_EXPORT2
michael@0: u_parseDelimitedFile(const char *filename, char delimiter,
michael@0:                      char *fields[][2], int32_t fieldCount,
michael@0:                      UParseLineFn *lineFn, void *context,
michael@0:                      UErrorCode *pErrorCode);
michael@0: 
michael@0: /**
michael@0:  * Parse a string of code points like 0061 0308 0300.
michael@0:  * s must end with either ';' or NUL.
michael@0:  *
michael@0:  * @return Number of code points.
michael@0:  */
michael@0: U_CAPI int32_t U_EXPORT2
michael@0: u_parseCodePoints(const char *s,
michael@0:                   uint32_t *dest, int32_t destCapacity,
michael@0:                   UErrorCode *pErrorCode);
michael@0: 
michael@0: /**
michael@0:  * Parse a list of code points like 0061 0308 0300
michael@0:  * into a UChar * string.
michael@0:  * s must end with either ';' or NUL.
michael@0:  *
michael@0:  * Set the first code point in *pFirst.
michael@0:  *
michael@0:  * @param s Input char * string.
michael@0:  * @param dest Output string buffer.
michael@0:  * @param destCapacity Capacity of dest in numbers of UChars.
michael@0:  * @param pFirst If pFirst!=NULL the *pFirst will be set to the first
michael@0:  *               code point in the string.
michael@0:  * @param pErrorCode ICU error code.
michael@0:  * @return The length of the string in numbers of UChars.
michael@0:  */
michael@0: U_CAPI int32_t U_EXPORT2
michael@0: u_parseString(const char *s,
michael@0:               UChar *dest, int32_t destCapacity,
michael@0:               uint32_t *pFirst,
michael@0:               UErrorCode *pErrorCode);
michael@0: 
michael@0: /**
michael@0:  * Parse a code point range like
michael@0:  * 0085 or
michael@0:  * 4E00..9FA5.
michael@0:  *
michael@0:  * s must contain such a range and end with either ';' or NUL.
michael@0:  *
michael@0:  * @return Length of code point range, end-start+1
michael@0:  */
michael@0: U_CAPI int32_t U_EXPORT2
michael@0: u_parseCodePointRange(const char *s,
michael@0:                       uint32_t *pStart, uint32_t *pEnd,
michael@0:                       UErrorCode *pErrorCode);
michael@0: 
michael@0: /**
michael@0:  * Same as u_parseCodePointRange() but the range may be terminated by
michael@0:  * any character. The position of the terminating character is returned via
michael@0:  * the *terminator output parameter.
michael@0:  */
michael@0: U_CAPI int32_t U_EXPORT2
michael@0: u_parseCodePointRangeAnyTerminator(const char *s,
michael@0:                                    uint32_t *pStart, uint32_t *pEnd,
michael@0:                                    const char **terminator,
michael@0:                                    UErrorCode *pErrorCode);
michael@0: 
michael@0: U_CAPI int32_t U_EXPORT2
michael@0: u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status);
michael@0: 
michael@0: U_CDECL_END
michael@0: 
michael@0: #endif