|
1 /* |
|
2 ******************************************************************************* |
|
3 * |
|
4 * Copyright (C) 1998-2008, International Business Machines |
|
5 * Corporation and others. All Rights Reserved. |
|
6 * |
|
7 ******************************************************************************* |
|
8 * |
|
9 * File ucbuf.c |
|
10 * |
|
11 * Modification History: |
|
12 * |
|
13 * Date Name Description |
|
14 * 05/10/01 Ram Creation. |
|
15 * |
|
16 * This API reads in files and returns UChars |
|
17 ******************************************************************************* |
|
18 */ |
|
19 |
|
20 #include "unicode/ucnv.h" |
|
21 #include "filestrm.h" |
|
22 |
|
23 #if !UCONFIG_NO_CONVERSION |
|
24 |
|
25 #ifndef UCBUF_H |
|
26 #define UCBUF_H 1 |
|
27 |
|
28 typedef struct UCHARBUF UCHARBUF; |
|
29 /** |
|
30 * End of file value |
|
31 */ |
|
32 #define U_EOF 0xFFFFFFFF |
|
33 /** |
|
34 * Error value if a sequence cannot be unescaped |
|
35 */ |
|
36 #define U_ERR 0xFFFFFFFE |
|
37 |
|
38 typedef struct ULine ULine; |
|
39 |
|
40 struct ULine { |
|
41 UChar *name; |
|
42 int32_t len; |
|
43 }; |
|
44 |
|
45 /** |
|
46 * Opens the UCHARBUF with the given file stream and code page for conversion |
|
47 * @param fileName Name of the file to open. |
|
48 * @param codepage The encoding of the file stream to convert to Unicode. |
|
49 * If *codepoge is NULL on input the API will try to autodetect |
|
50 * popular Unicode encodings |
|
51 * @param showWarning Flag to print out warnings to STDOUT |
|
52 * @param buffered If TRUE performs a buffered read of the input file. If FALSE reads |
|
53 * the whole file into memory and converts it. |
|
54 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value |
|
55 * indicates a failure on entry, the function will immediately return. |
|
56 * On exit the value will indicate the success of the operation. |
|
57 * @return pointer to the newly opened UCHARBUF |
|
58 */ |
|
59 U_CAPI UCHARBUF* U_EXPORT2 |
|
60 ucbuf_open(const char* fileName,const char** codepage,UBool showWarning, UBool buffered, UErrorCode* err); |
|
61 |
|
62 /** |
|
63 * Gets a UTF-16 code unit at the current position from the converted buffer |
|
64 * and increments the current position |
|
65 * @param buf Pointer to UCHARBUF structure |
|
66 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value |
|
67 * indicates a failure on entry, the function will immediately return. |
|
68 * On exit the value will indicate the success of the operation. |
|
69 */ |
|
70 U_CAPI int32_t U_EXPORT2 |
|
71 ucbuf_getc(UCHARBUF* buf,UErrorCode* err); |
|
72 |
|
73 /** |
|
74 * Gets a UTF-32 code point at the current position from the converted buffer |
|
75 * and increments the current position |
|
76 * @param buf Pointer to UCHARBUF structure |
|
77 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value |
|
78 * indicates a failure on entry, the function will immediately return. |
|
79 * On exit the value will indicate the success of the operation. |
|
80 */ |
|
81 U_CAPI int32_t U_EXPORT2 |
|
82 ucbuf_getc32(UCHARBUF* buf,UErrorCode* err); |
|
83 |
|
84 /** |
|
85 * Gets a UTF-16 code unit at the current position from the converted buffer after |
|
86 * unescaping and increments the current position. If the escape sequence is for UTF-32 |
|
87 * code point (\\Uxxxxxxxx) then a UTF-32 codepoint is returned |
|
88 * @param buf Pointer to UCHARBUF structure |
|
89 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value |
|
90 * indicates a failure on entry, the function will immediately return. |
|
91 * On exit the value will indicate the success of the operation. |
|
92 */ |
|
93 U_CAPI int32_t U_EXPORT2 |
|
94 ucbuf_getcx32(UCHARBUF* buf,UErrorCode* err); |
|
95 |
|
96 /** |
|
97 * Gets a pointer to the current position in the internal buffer and length of the line. |
|
98 * It imperative to make a copy of the returned buffere before performing operations on it. |
|
99 * @param buf Pointer to UCHARBUF structure |
|
100 * @param len Output param to receive the len of the buffer returned till end of the line |
|
101 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value |
|
102 * indicates a failure on entry, the function will immediately return. |
|
103 * On exit the value will indicate the success of the operation. |
|
104 * Error: U_TRUNCATED_CHAR_FOUND |
|
105 * @return Pointer to the internal buffer, NULL if EOF |
|
106 */ |
|
107 U_CAPI const UChar* U_EXPORT2 |
|
108 ucbuf_readline(UCHARBUF* buf,int32_t* len, UErrorCode* err); |
|
109 |
|
110 |
|
111 /** |
|
112 * Resets the buffers and the underlying file stream. |
|
113 * @param buf Pointer to UCHARBUF structure |
|
114 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value |
|
115 * indicates a failure on entry, the function will immediately return. |
|
116 * On exit the value will indicate the success of the operation. |
|
117 */ |
|
118 U_CAPI void U_EXPORT2 |
|
119 ucbuf_rewind(UCHARBUF* buf,UErrorCode* err); |
|
120 |
|
121 /** |
|
122 * Returns a pointer to the internal converted buffer |
|
123 * @param buf Pointer to UCHARBUF structure |
|
124 * @param len Pointer to int32_t to receive the lenth of buffer |
|
125 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value |
|
126 * indicates a failure on entry, the function will immediately return. |
|
127 * On exit the value will indicate the success of the operation. |
|
128 * @return Pointer to internal UChar buffer |
|
129 */ |
|
130 U_CAPI const UChar* U_EXPORT2 |
|
131 ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* err); |
|
132 |
|
133 /** |
|
134 * Closes the UCHARBUF structure members and cleans up the malloc'ed memory |
|
135 * @param buf Pointer to UCHARBUF structure |
|
136 */ |
|
137 U_CAPI void U_EXPORT2 |
|
138 ucbuf_close(UCHARBUF* buf); |
|
139 |
|
140 /** |
|
141 * Rewinds the buffer by one codepoint. Does not rewind over escaped characters. |
|
142 */ |
|
143 U_CAPI void U_EXPORT2 |
|
144 ucbuf_ungetc(int32_t ungetChar,UCHARBUF* buf); |
|
145 |
|
146 |
|
147 /** |
|
148 * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected. |
|
149 * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring |
|
150 * the converter to correct state for converting the rest of the stream. So the UConverter parameter |
|
151 * is necessary. |
|
152 * If the charset was autodetected, the caller must close both the input FileStream |
|
153 * and the converter. |
|
154 * |
|
155 * @param fileName The file name to be opened and encoding autodected |
|
156 * @param conv Output param to receive the opened converter if autodetected; NULL otherwise. |
|
157 * @param cp Output param to receive the detected encoding |
|
158 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value |
|
159 * indicates a failure on entry, the function will immediately return. |
|
160 * On exit the value will indicate the success of the operation. |
|
161 * @return The input FileStream if its charset was autodetected; NULL otherwise. |
|
162 */ |
|
163 U_CAPI FileStream * U_EXPORT2 |
|
164 ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv, |
|
165 int32_t* signatureLength, UErrorCode* status); |
|
166 |
|
167 /** |
|
168 * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected. |
|
169 * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring |
|
170 * the converter to correct state for converting the rest of the stream. So the UConverter parameter |
|
171 * is necessary. |
|
172 * If the charset was autodetected, the caller must close the converter. |
|
173 * |
|
174 * @param fileStream The file stream whose encoding is to be detected |
|
175 * @param conv Output param to receive the opened converter if autodetected; NULL otherwise. |
|
176 * @param cp Output param to receive the detected encoding |
|
177 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value |
|
178 * indicates a failure on entry, the function will immediately return. |
|
179 * On exit the value will indicate the success of the operation. |
|
180 * @return Boolean whether the Unicode charset was autodetected. |
|
181 */ |
|
182 |
|
183 U_CAPI UBool U_EXPORT2 |
|
184 ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* status); |
|
185 |
|
186 /** |
|
187 * Returns the approximate size in UChars required for converting the file to UChars |
|
188 */ |
|
189 U_CAPI int32_t U_EXPORT2 |
|
190 ucbuf_size(UCHARBUF* buf); |
|
191 |
|
192 U_CAPI const char* U_EXPORT2 |
|
193 ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status); |
|
194 |
|
195 #endif |
|
196 #endif |
|
197 |