intl/icu/source/tools/toolutil/ucbuf.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 *******************************************************************************
michael@0 3 *
michael@0 4 * Copyright (C) 1998-2008, International Business Machines
michael@0 5 * Corporation and others. All Rights Reserved.
michael@0 6 *
michael@0 7 *******************************************************************************
michael@0 8 *
michael@0 9 * File ucbuf.c
michael@0 10 *
michael@0 11 * Modification History:
michael@0 12 *
michael@0 13 * Date Name Description
michael@0 14 * 05/10/01 Ram Creation.
michael@0 15 *
michael@0 16 * This API reads in files and returns UChars
michael@0 17 *******************************************************************************
michael@0 18 */
michael@0 19
michael@0 20 #include "unicode/ucnv.h"
michael@0 21 #include "filestrm.h"
michael@0 22
michael@0 23 #if !UCONFIG_NO_CONVERSION
michael@0 24
michael@0 25 #ifndef UCBUF_H
michael@0 26 #define UCBUF_H 1
michael@0 27
michael@0 28 typedef struct UCHARBUF UCHARBUF;
michael@0 29 /**
michael@0 30 * End of file value
michael@0 31 */
michael@0 32 #define U_EOF 0xFFFFFFFF
michael@0 33 /**
michael@0 34 * Error value if a sequence cannot be unescaped
michael@0 35 */
michael@0 36 #define U_ERR 0xFFFFFFFE
michael@0 37
michael@0 38 typedef struct ULine ULine;
michael@0 39
michael@0 40 struct ULine {
michael@0 41 UChar *name;
michael@0 42 int32_t len;
michael@0 43 };
michael@0 44
michael@0 45 /**
michael@0 46 * Opens the UCHARBUF with the given file stream and code page for conversion
michael@0 47 * @param fileName Name of the file to open.
michael@0 48 * @param codepage The encoding of the file stream to convert to Unicode.
michael@0 49 * If *codepoge is NULL on input the API will try to autodetect
michael@0 50 * popular Unicode encodings
michael@0 51 * @param showWarning Flag to print out warnings to STDOUT
michael@0 52 * @param buffered If TRUE performs a buffered read of the input file. If FALSE reads
michael@0 53 * the whole file into memory and converts it.
michael@0 54 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
michael@0 55 * indicates a failure on entry, the function will immediately return.
michael@0 56 * On exit the value will indicate the success of the operation.
michael@0 57 * @return pointer to the newly opened UCHARBUF
michael@0 58 */
michael@0 59 U_CAPI UCHARBUF* U_EXPORT2
michael@0 60 ucbuf_open(const char* fileName,const char** codepage,UBool showWarning, UBool buffered, UErrorCode* err);
michael@0 61
michael@0 62 /**
michael@0 63 * Gets a UTF-16 code unit at the current position from the converted buffer
michael@0 64 * and increments the current position
michael@0 65 * @param buf Pointer to UCHARBUF structure
michael@0 66 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
michael@0 67 * indicates a failure on entry, the function will immediately return.
michael@0 68 * On exit the value will indicate the success of the operation.
michael@0 69 */
michael@0 70 U_CAPI int32_t U_EXPORT2
michael@0 71 ucbuf_getc(UCHARBUF* buf,UErrorCode* err);
michael@0 72
michael@0 73 /**
michael@0 74 * Gets a UTF-32 code point at the current position from the converted buffer
michael@0 75 * and increments the current position
michael@0 76 * @param buf Pointer to UCHARBUF structure
michael@0 77 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
michael@0 78 * indicates a failure on entry, the function will immediately return.
michael@0 79 * On exit the value will indicate the success of the operation.
michael@0 80 */
michael@0 81 U_CAPI int32_t U_EXPORT2
michael@0 82 ucbuf_getc32(UCHARBUF* buf,UErrorCode* err);
michael@0 83
michael@0 84 /**
michael@0 85 * Gets a UTF-16 code unit at the current position from the converted buffer after
michael@0 86 * unescaping and increments the current position. If the escape sequence is for UTF-32
michael@0 87 * code point (\\Uxxxxxxxx) then a UTF-32 codepoint is returned
michael@0 88 * @param buf Pointer to UCHARBUF structure
michael@0 89 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
michael@0 90 * indicates a failure on entry, the function will immediately return.
michael@0 91 * On exit the value will indicate the success of the operation.
michael@0 92 */
michael@0 93 U_CAPI int32_t U_EXPORT2
michael@0 94 ucbuf_getcx32(UCHARBUF* buf,UErrorCode* err);
michael@0 95
michael@0 96 /**
michael@0 97 * Gets a pointer to the current position in the internal buffer and length of the line.
michael@0 98 * It imperative to make a copy of the returned buffere before performing operations on it.
michael@0 99 * @param buf Pointer to UCHARBUF structure
michael@0 100 * @param len Output param to receive the len of the buffer returned till end of the line
michael@0 101 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
michael@0 102 * indicates a failure on entry, the function will immediately return.
michael@0 103 * On exit the value will indicate the success of the operation.
michael@0 104 * Error: U_TRUNCATED_CHAR_FOUND
michael@0 105 * @return Pointer to the internal buffer, NULL if EOF
michael@0 106 */
michael@0 107 U_CAPI const UChar* U_EXPORT2
michael@0 108 ucbuf_readline(UCHARBUF* buf,int32_t* len, UErrorCode* err);
michael@0 109
michael@0 110
michael@0 111 /**
michael@0 112 * Resets the buffers and the underlying file stream.
michael@0 113 * @param buf Pointer to UCHARBUF structure
michael@0 114 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
michael@0 115 * indicates a failure on entry, the function will immediately return.
michael@0 116 * On exit the value will indicate the success of the operation.
michael@0 117 */
michael@0 118 U_CAPI void U_EXPORT2
michael@0 119 ucbuf_rewind(UCHARBUF* buf,UErrorCode* err);
michael@0 120
michael@0 121 /**
michael@0 122 * Returns a pointer to the internal converted buffer
michael@0 123 * @param buf Pointer to UCHARBUF structure
michael@0 124 * @param len Pointer to int32_t to receive the lenth of buffer
michael@0 125 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
michael@0 126 * indicates a failure on entry, the function will immediately return.
michael@0 127 * On exit the value will indicate the success of the operation.
michael@0 128 * @return Pointer to internal UChar buffer
michael@0 129 */
michael@0 130 U_CAPI const UChar* U_EXPORT2
michael@0 131 ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* err);
michael@0 132
michael@0 133 /**
michael@0 134 * Closes the UCHARBUF structure members and cleans up the malloc'ed memory
michael@0 135 * @param buf Pointer to UCHARBUF structure
michael@0 136 */
michael@0 137 U_CAPI void U_EXPORT2
michael@0 138 ucbuf_close(UCHARBUF* buf);
michael@0 139
michael@0 140 /**
michael@0 141 * Rewinds the buffer by one codepoint. Does not rewind over escaped characters.
michael@0 142 */
michael@0 143 U_CAPI void U_EXPORT2
michael@0 144 ucbuf_ungetc(int32_t ungetChar,UCHARBUF* buf);
michael@0 145
michael@0 146
michael@0 147 /**
michael@0 148 * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected.
michael@0 149 * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
michael@0 150 * the converter to correct state for converting the rest of the stream. So the UConverter parameter
michael@0 151 * is necessary.
michael@0 152 * If the charset was autodetected, the caller must close both the input FileStream
michael@0 153 * and the converter.
michael@0 154 *
michael@0 155 * @param fileName The file name to be opened and encoding autodected
michael@0 156 * @param conv Output param to receive the opened converter if autodetected; NULL otherwise.
michael@0 157 * @param cp Output param to receive the detected encoding
michael@0 158 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
michael@0 159 * indicates a failure on entry, the function will immediately return.
michael@0 160 * On exit the value will indicate the success of the operation.
michael@0 161 * @return The input FileStream if its charset was autodetected; NULL otherwise.
michael@0 162 */
michael@0 163 U_CAPI FileStream * U_EXPORT2
michael@0 164 ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv,
michael@0 165 int32_t* signatureLength, UErrorCode* status);
michael@0 166
michael@0 167 /**
michael@0 168 * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected.
michael@0 169 * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
michael@0 170 * the converter to correct state for converting the rest of the stream. So the UConverter parameter
michael@0 171 * is necessary.
michael@0 172 * If the charset was autodetected, the caller must close the converter.
michael@0 173 *
michael@0 174 * @param fileStream The file stream whose encoding is to be detected
michael@0 175 * @param conv Output param to receive the opened converter if autodetected; NULL otherwise.
michael@0 176 * @param cp Output param to receive the detected encoding
michael@0 177 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
michael@0 178 * indicates a failure on entry, the function will immediately return.
michael@0 179 * On exit the value will indicate the success of the operation.
michael@0 180 * @return Boolean whether the Unicode charset was autodetected.
michael@0 181 */
michael@0 182
michael@0 183 U_CAPI UBool U_EXPORT2
michael@0 184 ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* status);
michael@0 185
michael@0 186 /**
michael@0 187 * Returns the approximate size in UChars required for converting the file to UChars
michael@0 188 */
michael@0 189 U_CAPI int32_t U_EXPORT2
michael@0 190 ucbuf_size(UCHARBUF* buf);
michael@0 191
michael@0 192 U_CAPI const char* U_EXPORT2
michael@0 193 ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status);
michael@0 194
michael@0 195 #endif
michael@0 196 #endif
michael@0 197

mercurial