intl/icu/source/tools/toolutil/uparse.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 2000-2010, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 *******************************************************************************
michael@0 8 * file name: uparse.h
michael@0 9 * encoding: US-ASCII
michael@0 10 * tab size: 8 (not used)
michael@0 11 * indentation:4
michael@0 12 *
michael@0 13 * created on: 2000apr18
michael@0 14 * created by: Markus W. Scherer
michael@0 15 *
michael@0 16 * This file provides a parser for files that are delimited by one single
michael@0 17 * character like ';' or TAB. Example: the Unicode Character Properties files
michael@0 18 * like UnicodeData.txt are semicolon-delimited.
michael@0 19 */
michael@0 20
michael@0 21 #ifndef __UPARSE_H__
michael@0 22 #define __UPARSE_H__
michael@0 23
michael@0 24 #include "unicode/utypes.h"
michael@0 25
michael@0 26 /**
michael@0 27 * Is c an invariant-character whitespace?
michael@0 28 * @param c invariant character
michael@0 29 */
michael@0 30 #define U_IS_INV_WHITESPACE(c) ((c)==' ' || (c)=='\t' || (c)=='\r' || (c)=='\n')
michael@0 31
michael@0 32 U_CDECL_BEGIN
michael@0 33
michael@0 34 /**
michael@0 35 * Skip space ' ' and TAB '\t' characters.
michael@0 36 *
michael@0 37 * @param s Pointer to characters.
michael@0 38 * @return Pointer to first character at or after s that is not a space or TAB.
michael@0 39 */
michael@0 40 U_CAPI const char * U_EXPORT2
michael@0 41 u_skipWhitespace(const char *s);
michael@0 42
michael@0 43 /**
michael@0 44 * Trim whitespace (including line endings) from the end of the string.
michael@0 45 *
michael@0 46 * @param s Pointer to the string.
michael@0 47 * @return Pointer to the new end of the string.
michael@0 48 */
michael@0 49 U_CAPI char * U_EXPORT2
michael@0 50 u_rtrim(char *s);
michael@0 51
michael@0 52 /** Function type for u_parseDelimitedFile(). */
michael@0 53 typedef void U_CALLCONV
michael@0 54 UParseLineFn(void *context,
michael@0 55 char *fields[][2],
michael@0 56 int32_t fieldCount,
michael@0 57 UErrorCode *pErrorCode);
michael@0 58
michael@0 59 /**
michael@0 60 * Parser for files that are similar to UnicodeData.txt:
michael@0 61 * This function opens the file and reads it line by line. It skips empty lines
michael@0 62 * and comment lines that start with a '#'.
michael@0 63 * All other lines are separated into fields with one delimiter character
michael@0 64 * (semicolon for Unicode Properties files) between two fields. The last field in
michael@0 65 * a line does not need to be terminated with a delimiter.
michael@0 66 *
michael@0 67 * For each line, after segmenting it, a line function is called.
michael@0 68 * It gets passed the array of field start and limit pointers that is
michael@0 69 * passed into this parser and filled by it for each line.
michael@0 70 * For each field i of the line, the start pointer in fields[i][0]
michael@0 71 * points to the beginning of the field, while the limit pointer in fields[i][1]
michael@0 72 * points behind the field, i.e., to the delimiter or the line end.
michael@0 73 *
michael@0 74 * The context parameter of the line function is
michael@0 75 * the same as the one for the parse function.
michael@0 76 *
michael@0 77 * The line function may modify the contents of the fields including the
michael@0 78 * limit characters.
michael@0 79 *
michael@0 80 * If the file cannot be opened, or there is a parsing error or a field function
michael@0 81 * sets *pErrorCode, then the parser returns with *pErrorCode set to an error code.
michael@0 82 */
michael@0 83 U_CAPI void U_EXPORT2
michael@0 84 u_parseDelimitedFile(const char *filename, char delimiter,
michael@0 85 char *fields[][2], int32_t fieldCount,
michael@0 86 UParseLineFn *lineFn, void *context,
michael@0 87 UErrorCode *pErrorCode);
michael@0 88
michael@0 89 /**
michael@0 90 * Parse a string of code points like 0061 0308 0300.
michael@0 91 * s must end with either ';' or NUL.
michael@0 92 *
michael@0 93 * @return Number of code points.
michael@0 94 */
michael@0 95 U_CAPI int32_t U_EXPORT2
michael@0 96 u_parseCodePoints(const char *s,
michael@0 97 uint32_t *dest, int32_t destCapacity,
michael@0 98 UErrorCode *pErrorCode);
michael@0 99
michael@0 100 /**
michael@0 101 * Parse a list of code points like 0061 0308 0300
michael@0 102 * into a UChar * string.
michael@0 103 * s must end with either ';' or NUL.
michael@0 104 *
michael@0 105 * Set the first code point in *pFirst.
michael@0 106 *
michael@0 107 * @param s Input char * string.
michael@0 108 * @param dest Output string buffer.
michael@0 109 * @param destCapacity Capacity of dest in numbers of UChars.
michael@0 110 * @param pFirst If pFirst!=NULL the *pFirst will be set to the first
michael@0 111 * code point in the string.
michael@0 112 * @param pErrorCode ICU error code.
michael@0 113 * @return The length of the string in numbers of UChars.
michael@0 114 */
michael@0 115 U_CAPI int32_t U_EXPORT2
michael@0 116 u_parseString(const char *s,
michael@0 117 UChar *dest, int32_t destCapacity,
michael@0 118 uint32_t *pFirst,
michael@0 119 UErrorCode *pErrorCode);
michael@0 120
michael@0 121 /**
michael@0 122 * Parse a code point range like
michael@0 123 * 0085 or
michael@0 124 * 4E00..9FA5.
michael@0 125 *
michael@0 126 * s must contain such a range and end with either ';' or NUL.
michael@0 127 *
michael@0 128 * @return Length of code point range, end-start+1
michael@0 129 */
michael@0 130 U_CAPI int32_t U_EXPORT2
michael@0 131 u_parseCodePointRange(const char *s,
michael@0 132 uint32_t *pStart, uint32_t *pEnd,
michael@0 133 UErrorCode *pErrorCode);
michael@0 134
michael@0 135 /**
michael@0 136 * Same as u_parseCodePointRange() but the range may be terminated by
michael@0 137 * any character. The position of the terminating character is returned via
michael@0 138 * the *terminator output parameter.
michael@0 139 */
michael@0 140 U_CAPI int32_t U_EXPORT2
michael@0 141 u_parseCodePointRangeAnyTerminator(const char *s,
michael@0 142 uint32_t *pStart, uint32_t *pEnd,
michael@0 143 const char **terminator,
michael@0 144 UErrorCode *pErrorCode);
michael@0 145
michael@0 146 U_CAPI int32_t U_EXPORT2
michael@0 147 u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status);
michael@0 148
michael@0 149 U_CDECL_END
michael@0 150
michael@0 151 #endif

mercurial