1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/tools/toolutil/ucbuf.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,197 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* 1.7 +* Copyright (C) 1998-2008, International Business Machines 1.8 +* Corporation and others. All Rights Reserved. 1.9 +* 1.10 +******************************************************************************* 1.11 +* 1.12 +* File ucbuf.c 1.13 +* 1.14 +* Modification History: 1.15 +* 1.16 +* Date Name Description 1.17 +* 05/10/01 Ram Creation. 1.18 +* 1.19 +* This API reads in files and returns UChars 1.20 +******************************************************************************* 1.21 +*/ 1.22 + 1.23 +#include "unicode/ucnv.h" 1.24 +#include "filestrm.h" 1.25 + 1.26 +#if !UCONFIG_NO_CONVERSION 1.27 + 1.28 +#ifndef UCBUF_H 1.29 +#define UCBUF_H 1 1.30 + 1.31 +typedef struct UCHARBUF UCHARBUF; 1.32 +/** 1.33 + * End of file value 1.34 + */ 1.35 +#define U_EOF 0xFFFFFFFF 1.36 +/** 1.37 + * Error value if a sequence cannot be unescaped 1.38 + */ 1.39 +#define U_ERR 0xFFFFFFFE 1.40 + 1.41 +typedef struct ULine ULine; 1.42 + 1.43 +struct ULine { 1.44 + UChar *name; 1.45 + int32_t len; 1.46 +}; 1.47 + 1.48 +/** 1.49 + * Opens the UCHARBUF with the given file stream and code page for conversion 1.50 + * @param fileName Name of the file to open. 1.51 + * @param codepage The encoding of the file stream to convert to Unicode. 1.52 + * If *codepoge is NULL on input the API will try to autodetect 1.53 + * popular Unicode encodings 1.54 + * @param showWarning Flag to print out warnings to STDOUT 1.55 + * @param buffered If TRUE performs a buffered read of the input file. If FALSE reads 1.56 + * the whole file into memory and converts it. 1.57 + * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value 1.58 + * indicates a failure on entry, the function will immediately return. 1.59 + * On exit the value will indicate the success of the operation. 1.60 + * @return pointer to the newly opened UCHARBUF 1.61 + */ 1.62 +U_CAPI UCHARBUF* U_EXPORT2 1.63 +ucbuf_open(const char* fileName,const char** codepage,UBool showWarning, UBool buffered, UErrorCode* err); 1.64 + 1.65 +/** 1.66 + * Gets a UTF-16 code unit at the current position from the converted buffer 1.67 + * and increments the current position 1.68 + * @param buf Pointer to UCHARBUF structure 1.69 + * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value 1.70 + * indicates a failure on entry, the function will immediately return. 1.71 + * On exit the value will indicate the success of the operation. 1.72 + */ 1.73 +U_CAPI int32_t U_EXPORT2 1.74 +ucbuf_getc(UCHARBUF* buf,UErrorCode* err); 1.75 + 1.76 +/** 1.77 + * Gets a UTF-32 code point at the current position from the converted buffer 1.78 + * and increments the current position 1.79 + * @param buf Pointer to UCHARBUF structure 1.80 + * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value 1.81 + * indicates a failure on entry, the function will immediately return. 1.82 + * On exit the value will indicate the success of the operation. 1.83 + */ 1.84 +U_CAPI int32_t U_EXPORT2 1.85 +ucbuf_getc32(UCHARBUF* buf,UErrorCode* err); 1.86 + 1.87 +/** 1.88 + * Gets a UTF-16 code unit at the current position from the converted buffer after 1.89 + * unescaping and increments the current position. If the escape sequence is for UTF-32 1.90 + * code point (\\Uxxxxxxxx) then a UTF-32 codepoint is returned 1.91 + * @param buf Pointer to UCHARBUF structure 1.92 + * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value 1.93 + * indicates a failure on entry, the function will immediately return. 1.94 + * On exit the value will indicate the success of the operation. 1.95 + */ 1.96 +U_CAPI int32_t U_EXPORT2 1.97 +ucbuf_getcx32(UCHARBUF* buf,UErrorCode* err); 1.98 + 1.99 +/** 1.100 + * Gets a pointer to the current position in the internal buffer and length of the line. 1.101 + * It imperative to make a copy of the returned buffere before performing operations on it. 1.102 + * @param buf Pointer to UCHARBUF structure 1.103 + * @param len Output param to receive the len of the buffer returned till end of the line 1.104 + * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value 1.105 + * indicates a failure on entry, the function will immediately return. 1.106 + * On exit the value will indicate the success of the operation. 1.107 + * Error: U_TRUNCATED_CHAR_FOUND 1.108 + * @return Pointer to the internal buffer, NULL if EOF 1.109 + */ 1.110 +U_CAPI const UChar* U_EXPORT2 1.111 +ucbuf_readline(UCHARBUF* buf,int32_t* len, UErrorCode* err); 1.112 + 1.113 + 1.114 +/** 1.115 + * Resets the buffers and the underlying file stream. 1.116 + * @param buf Pointer to UCHARBUF structure 1.117 + * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value 1.118 + * indicates a failure on entry, the function will immediately return. 1.119 + * On exit the value will indicate the success of the operation. 1.120 + */ 1.121 +U_CAPI void U_EXPORT2 1.122 +ucbuf_rewind(UCHARBUF* buf,UErrorCode* err); 1.123 + 1.124 +/** 1.125 + * Returns a pointer to the internal converted buffer 1.126 + * @param buf Pointer to UCHARBUF structure 1.127 + * @param len Pointer to int32_t to receive the lenth of buffer 1.128 + * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value 1.129 + * indicates a failure on entry, the function will immediately return. 1.130 + * On exit the value will indicate the success of the operation. 1.131 + * @return Pointer to internal UChar buffer 1.132 + */ 1.133 +U_CAPI const UChar* U_EXPORT2 1.134 +ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* err); 1.135 + 1.136 +/** 1.137 + * Closes the UCHARBUF structure members and cleans up the malloc'ed memory 1.138 + * @param buf Pointer to UCHARBUF structure 1.139 + */ 1.140 +U_CAPI void U_EXPORT2 1.141 +ucbuf_close(UCHARBUF* buf); 1.142 + 1.143 +/** 1.144 + * Rewinds the buffer by one codepoint. Does not rewind over escaped characters. 1.145 + */ 1.146 +U_CAPI void U_EXPORT2 1.147 +ucbuf_ungetc(int32_t ungetChar,UCHARBUF* buf); 1.148 + 1.149 + 1.150 +/** 1.151 + * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected. 1.152 + * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring 1.153 + * the converter to correct state for converting the rest of the stream. So the UConverter parameter 1.154 + * is necessary. 1.155 + * If the charset was autodetected, the caller must close both the input FileStream 1.156 + * and the converter. 1.157 + * 1.158 + * @param fileName The file name to be opened and encoding autodected 1.159 + * @param conv Output param to receive the opened converter if autodetected; NULL otherwise. 1.160 + * @param cp Output param to receive the detected encoding 1.161 + * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value 1.162 + * indicates a failure on entry, the function will immediately return. 1.163 + * On exit the value will indicate the success of the operation. 1.164 + * @return The input FileStream if its charset was autodetected; NULL otherwise. 1.165 + */ 1.166 +U_CAPI FileStream * U_EXPORT2 1.167 +ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv, 1.168 +int32_t* signatureLength, UErrorCode* status); 1.169 + 1.170 +/** 1.171 + * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected. 1.172 + * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring 1.173 + * the converter to correct state for converting the rest of the stream. So the UConverter parameter 1.174 + * is necessary. 1.175 + * If the charset was autodetected, the caller must close the converter. 1.176 + * 1.177 + * @param fileStream The file stream whose encoding is to be detected 1.178 + * @param conv Output param to receive the opened converter if autodetected; NULL otherwise. 1.179 + * @param cp Output param to receive the detected encoding 1.180 + * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value 1.181 + * indicates a failure on entry, the function will immediately return. 1.182 + * On exit the value will indicate the success of the operation. 1.183 + * @return Boolean whether the Unicode charset was autodetected. 1.184 + */ 1.185 + 1.186 +U_CAPI UBool U_EXPORT2 1.187 +ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* status); 1.188 + 1.189 +/** 1.190 + * Returns the approximate size in UChars required for converting the file to UChars 1.191 + */ 1.192 +U_CAPI int32_t U_EXPORT2 1.193 +ucbuf_size(UCHARBUF* buf); 1.194 + 1.195 +U_CAPI const char* U_EXPORT2 1.196 +ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status); 1.197 + 1.198 +#endif 1.199 +#endif 1.200 +