intl/icu/source/tools/toolutil/ucbuf.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/tools/toolutil/ucbuf.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,197 @@
     1.4 +/*
     1.5 +*******************************************************************************
     1.6 +*
     1.7 +*   Copyright (C) 1998-2008, International Business Machines
     1.8 +*   Corporation and others.  All Rights Reserved.
     1.9 +*
    1.10 +*******************************************************************************
    1.11 +*
    1.12 +* File ucbuf.c
    1.13 +*
    1.14 +* Modification History:
    1.15 +*
    1.16 +*   Date        Name        Description
    1.17 +*   05/10/01    Ram         Creation.
    1.18 +*
    1.19 +* This API reads in files and returns UChars
    1.20 +*******************************************************************************
    1.21 +*/
    1.22 +
    1.23 +#include "unicode/ucnv.h"
    1.24 +#include "filestrm.h"
    1.25 +
    1.26 +#if !UCONFIG_NO_CONVERSION
    1.27 +
    1.28 +#ifndef UCBUF_H
    1.29 +#define UCBUF_H 1
    1.30 +
    1.31 +typedef struct UCHARBUF UCHARBUF;
    1.32 +/**
    1.33 + * End of file value
    1.34 + */
    1.35 +#define U_EOF 0xFFFFFFFF
    1.36 +/**
    1.37 + * Error value if a sequence cannot be unescaped
    1.38 + */
    1.39 +#define U_ERR 0xFFFFFFFE
    1.40 +
    1.41 +typedef struct ULine ULine;
    1.42 +
    1.43 +struct  ULine {
    1.44 +    UChar     *name;
    1.45 +    int32_t   len;
    1.46 +};
    1.47 +
    1.48 +/**
    1.49 + * Opens the UCHARBUF with the given file stream and code page for conversion
    1.50 + * @param fileName  Name of the file to open.
    1.51 + * @param codepage  The encoding of the file stream to convert to Unicode. 
    1.52 + *                  If *codepoge is NULL on input the API will try to autodetect
    1.53 + *                  popular Unicode encodings
    1.54 + * @param showWarning Flag to print out warnings to STDOUT
    1.55 + * @param buffered  If TRUE performs a buffered read of the input file. If FALSE reads 
    1.56 + *                  the whole file into memory and converts it.
    1.57 + * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
    1.58 + *        indicates a failure on entry, the function will immediately return.
    1.59 + *        On exit the value will indicate the success of the operation.
    1.60 + * @return pointer to the newly opened UCHARBUF
    1.61 + */
    1.62 +U_CAPI UCHARBUF* U_EXPORT2
    1.63 +ucbuf_open(const char* fileName,const char** codepage,UBool showWarning, UBool buffered, UErrorCode* err);
    1.64 +
    1.65 +/**
    1.66 + * Gets a UTF-16 code unit at the current position from the converted buffer
    1.67 + * and increments the current position
    1.68 + * @param buf Pointer to UCHARBUF structure
    1.69 + * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
    1.70 + *        indicates a failure on entry, the function will immediately return.
    1.71 + *        On exit the value will indicate the success of the operation.
    1.72 + */
    1.73 +U_CAPI int32_t U_EXPORT2
    1.74 +ucbuf_getc(UCHARBUF* buf,UErrorCode* err);
    1.75 +
    1.76 +/**
    1.77 + * Gets a UTF-32 code point at the current position from the converted buffer
    1.78 + * and increments the current position
    1.79 + * @param buf Pointer to UCHARBUF structure
    1.80 + * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
    1.81 + *        indicates a failure on entry, the function will immediately return.
    1.82 + *        On exit the value will indicate the success of the operation.
    1.83 + */
    1.84 +U_CAPI int32_t U_EXPORT2
    1.85 +ucbuf_getc32(UCHARBUF* buf,UErrorCode* err);
    1.86 +
    1.87 +/**
    1.88 + * Gets a UTF-16 code unit at the current position from the converted buffer after 
    1.89 + * unescaping and increments the current position. If the escape sequence is for UTF-32
    1.90 + * code point (\\Uxxxxxxxx) then a UTF-32 codepoint is returned
    1.91 + * @param buf Pointer to UCHARBUF structure
    1.92 + * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
    1.93 + *        indicates a failure on entry, the function will immediately return.
    1.94 + *        On exit the value will indicate the success of the operation.
    1.95 + */
    1.96 +U_CAPI int32_t U_EXPORT2
    1.97 +ucbuf_getcx32(UCHARBUF* buf,UErrorCode* err);
    1.98 +
    1.99 +/**
   1.100 + * Gets a pointer to the current position in the internal buffer and length of the line.
   1.101 + * It imperative to make a copy of the returned buffere before performing operations on it.
   1.102 + * @param buf Pointer to UCHARBUF structure
   1.103 + * @param len Output param to receive the len of the buffer returned till end of the line
   1.104 + * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
   1.105 + *        indicates a failure on entry, the function will immediately return.
   1.106 + *        On exit the value will indicate the success of the operation.
   1.107 + *        Error: U_TRUNCATED_CHAR_FOUND
   1.108 + * @return Pointer to the internal buffer, NULL if EOF
   1.109 + */
   1.110 +U_CAPI const UChar* U_EXPORT2
   1.111 +ucbuf_readline(UCHARBUF* buf,int32_t* len, UErrorCode* err);
   1.112 +
   1.113 +
   1.114 +/**
   1.115 + * Resets the buffers and the underlying file stream.
   1.116 + * @param buf Pointer to UCHARBUF structure
   1.117 + * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
   1.118 + *        indicates a failure on entry, the function will immediately return.
   1.119 + *        On exit the value will indicate the success of the operation.
   1.120 + */
   1.121 +U_CAPI void U_EXPORT2
   1.122 +ucbuf_rewind(UCHARBUF* buf,UErrorCode* err);
   1.123 +
   1.124 +/**
   1.125 + * Returns a pointer to the internal converted buffer
   1.126 + * @param buf Pointer to UCHARBUF structure
   1.127 + * @param len Pointer to int32_t to receive the lenth of buffer
   1.128 + * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
   1.129 + *        indicates a failure on entry, the function will immediately return.
   1.130 + *        On exit the value will indicate the success of the operation.
   1.131 + * @return Pointer to internal UChar buffer
   1.132 + */
   1.133 +U_CAPI const UChar* U_EXPORT2
   1.134 +ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* err);
   1.135 +
   1.136 +/**
   1.137 + * Closes the UCHARBUF structure members and cleans up the malloc'ed memory
   1.138 + * @param buf Pointer to UCHARBUF structure
   1.139 + */
   1.140 +U_CAPI void U_EXPORT2
   1.141 +ucbuf_close(UCHARBUF* buf);
   1.142 +
   1.143 +/**
   1.144 + * Rewinds the buffer by one codepoint. Does not rewind over escaped characters.
   1.145 + */
   1.146 +U_CAPI void U_EXPORT2
   1.147 +ucbuf_ungetc(int32_t ungetChar,UCHARBUF* buf);
   1.148 +
   1.149 +
   1.150 +/**
   1.151 + * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected. 
   1.152 + * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
   1.153 + * the converter to correct state for converting the rest of the stream. So the UConverter parameter
   1.154 + * is necessary.
   1.155 + * If the charset was autodetected, the caller must close both the input FileStream
   1.156 + * and the converter.
   1.157 + *
   1.158 + * @param fileName The file name to be opened and encoding autodected
   1.159 + * @param conv  Output param to receive the opened converter if autodetected; NULL otherwise.
   1.160 + * @param cp Output param to receive the detected encoding
   1.161 + * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
   1.162 + *        indicates a failure on entry, the function will immediately return.
   1.163 + *        On exit the value will indicate the success of the operation.
   1.164 + * @return The input FileStream if its charset was autodetected; NULL otherwise.
   1.165 + */
   1.166 +U_CAPI FileStream * U_EXPORT2
   1.167 +ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv,
   1.168 +int32_t* signatureLength, UErrorCode* status);
   1.169 +
   1.170 +/**
   1.171 + * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected. 
   1.172 + * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
   1.173 + * the converter to correct state for converting the rest of the stream. So the UConverter parameter
   1.174 + * is necessary.
   1.175 + * If the charset was autodetected, the caller must close the converter.
   1.176 + *
   1.177 + * @param fileStream The file stream whose encoding is to be detected
   1.178 + * @param conv  Output param to receive the opened converter if autodetected; NULL otherwise.
   1.179 + * @param cp Output param to receive the detected encoding
   1.180 + * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
   1.181 + *        indicates a failure on entry, the function will immediately return.
   1.182 + *        On exit the value will indicate the success of the operation.
   1.183 + * @return Boolean whether the Unicode charset was autodetected.
   1.184 + */
   1.185 +
   1.186 +U_CAPI UBool U_EXPORT2
   1.187 +ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* status);
   1.188 +
   1.189 +/**
   1.190 + * Returns the approximate size in UChars required for converting the file to UChars
   1.191 + */
   1.192 +U_CAPI int32_t U_EXPORT2
   1.193 +ucbuf_size(UCHARBUF* buf);
   1.194 +
   1.195 +U_CAPI const char* U_EXPORT2
   1.196 +ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status);
   1.197 +
   1.198 +#endif
   1.199 +#endif
   1.200 +

mercurial