intl/icu/source/tools/toolutil/ucbuf.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2 *******************************************************************************
     3 *
     4 *   Copyright (C) 1998-2008, International Business Machines
     5 *   Corporation and others.  All Rights Reserved.
     6 *
     7 *******************************************************************************
     8 *
     9 * File ucbuf.c
    10 *
    11 * Modification History:
    12 *
    13 *   Date        Name        Description
    14 *   05/10/01    Ram         Creation.
    15 *
    16 * This API reads in files and returns UChars
    17 *******************************************************************************
    18 */
    20 #include "unicode/ucnv.h"
    21 #include "filestrm.h"
    23 #if !UCONFIG_NO_CONVERSION
    25 #ifndef UCBUF_H
    26 #define UCBUF_H 1
    28 typedef struct UCHARBUF UCHARBUF;
    29 /**
    30  * End of file value
    31  */
    32 #define U_EOF 0xFFFFFFFF
    33 /**
    34  * Error value if a sequence cannot be unescaped
    35  */
    36 #define U_ERR 0xFFFFFFFE
    38 typedef struct ULine ULine;
    40 struct  ULine {
    41     UChar     *name;
    42     int32_t   len;
    43 };
    45 /**
    46  * Opens the UCHARBUF with the given file stream and code page for conversion
    47  * @param fileName  Name of the file to open.
    48  * @param codepage  The encoding of the file stream to convert to Unicode. 
    49  *                  If *codepoge is NULL on input the API will try to autodetect
    50  *                  popular Unicode encodings
    51  * @param showWarning Flag to print out warnings to STDOUT
    52  * @param buffered  If TRUE performs a buffered read of the input file. If FALSE reads 
    53  *                  the whole file into memory and converts it.
    54  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
    55  *        indicates a failure on entry, the function will immediately return.
    56  *        On exit the value will indicate the success of the operation.
    57  * @return pointer to the newly opened UCHARBUF
    58  */
    59 U_CAPI UCHARBUF* U_EXPORT2
    60 ucbuf_open(const char* fileName,const char** codepage,UBool showWarning, UBool buffered, UErrorCode* err);
    62 /**
    63  * Gets a UTF-16 code unit at the current position from the converted buffer
    64  * and increments the current position
    65  * @param buf Pointer to UCHARBUF structure
    66  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
    67  *        indicates a failure on entry, the function will immediately return.
    68  *        On exit the value will indicate the success of the operation.
    69  */
    70 U_CAPI int32_t U_EXPORT2
    71 ucbuf_getc(UCHARBUF* buf,UErrorCode* err);
    73 /**
    74  * Gets a UTF-32 code point at the current position from the converted buffer
    75  * and increments the current position
    76  * @param buf Pointer to UCHARBUF structure
    77  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
    78  *        indicates a failure on entry, the function will immediately return.
    79  *        On exit the value will indicate the success of the operation.
    80  */
    81 U_CAPI int32_t U_EXPORT2
    82 ucbuf_getc32(UCHARBUF* buf,UErrorCode* err);
    84 /**
    85  * Gets a UTF-16 code unit at the current position from the converted buffer after 
    86  * unescaping and increments the current position. If the escape sequence is for UTF-32
    87  * code point (\\Uxxxxxxxx) then a UTF-32 codepoint is returned
    88  * @param buf Pointer to UCHARBUF structure
    89  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
    90  *        indicates a failure on entry, the function will immediately return.
    91  *        On exit the value will indicate the success of the operation.
    92  */
    93 U_CAPI int32_t U_EXPORT2
    94 ucbuf_getcx32(UCHARBUF* buf,UErrorCode* err);
    96 /**
    97  * Gets a pointer to the current position in the internal buffer and length of the line.
    98  * It imperative to make a copy of the returned buffere before performing operations on it.
    99  * @param buf Pointer to UCHARBUF structure
   100  * @param len Output param to receive the len of the buffer returned till end of the line
   101  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
   102  *        indicates a failure on entry, the function will immediately return.
   103  *        On exit the value will indicate the success of the operation.
   104  *        Error: U_TRUNCATED_CHAR_FOUND
   105  * @return Pointer to the internal buffer, NULL if EOF
   106  */
   107 U_CAPI const UChar* U_EXPORT2
   108 ucbuf_readline(UCHARBUF* buf,int32_t* len, UErrorCode* err);
   111 /**
   112  * Resets the buffers and the underlying file stream.
   113  * @param buf Pointer to UCHARBUF structure
   114  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
   115  *        indicates a failure on entry, the function will immediately return.
   116  *        On exit the value will indicate the success of the operation.
   117  */
   118 U_CAPI void U_EXPORT2
   119 ucbuf_rewind(UCHARBUF* buf,UErrorCode* err);
   121 /**
   122  * Returns a pointer to the internal converted buffer
   123  * @param buf Pointer to UCHARBUF structure
   124  * @param len Pointer to int32_t to receive the lenth of buffer
   125  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
   126  *        indicates a failure on entry, the function will immediately return.
   127  *        On exit the value will indicate the success of the operation.
   128  * @return Pointer to internal UChar buffer
   129  */
   130 U_CAPI const UChar* U_EXPORT2
   131 ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* err);
   133 /**
   134  * Closes the UCHARBUF structure members and cleans up the malloc'ed memory
   135  * @param buf Pointer to UCHARBUF structure
   136  */
   137 U_CAPI void U_EXPORT2
   138 ucbuf_close(UCHARBUF* buf);
   140 /**
   141  * Rewinds the buffer by one codepoint. Does not rewind over escaped characters.
   142  */
   143 U_CAPI void U_EXPORT2
   144 ucbuf_ungetc(int32_t ungetChar,UCHARBUF* buf);
   147 /**
   148  * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected. 
   149  * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
   150  * the converter to correct state for converting the rest of the stream. So the UConverter parameter
   151  * is necessary.
   152  * If the charset was autodetected, the caller must close both the input FileStream
   153  * and the converter.
   154  *
   155  * @param fileName The file name to be opened and encoding autodected
   156  * @param conv  Output param to receive the opened converter if autodetected; NULL otherwise.
   157  * @param cp Output param to receive the detected encoding
   158  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
   159  *        indicates a failure on entry, the function will immediately return.
   160  *        On exit the value will indicate the success of the operation.
   161  * @return The input FileStream if its charset was autodetected; NULL otherwise.
   162  */
   163 U_CAPI FileStream * U_EXPORT2
   164 ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv,
   165 int32_t* signatureLength, UErrorCode* status);
   167 /**
   168  * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected. 
   169  * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
   170  * the converter to correct state for converting the rest of the stream. So the UConverter parameter
   171  * is necessary.
   172  * If the charset was autodetected, the caller must close the converter.
   173  *
   174  * @param fileStream The file stream whose encoding is to be detected
   175  * @param conv  Output param to receive the opened converter if autodetected; NULL otherwise.
   176  * @param cp Output param to receive the detected encoding
   177  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
   178  *        indicates a failure on entry, the function will immediately return.
   179  *        On exit the value will indicate the success of the operation.
   180  * @return Boolean whether the Unicode charset was autodetected.
   181  */
   183 U_CAPI UBool U_EXPORT2
   184 ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* status);
   186 /**
   187  * Returns the approximate size in UChars required for converting the file to UChars
   188  */
   189 U_CAPI int32_t U_EXPORT2
   190 ucbuf_size(UCHARBUF* buf);
   192 U_CAPI const char* U_EXPORT2
   193 ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status);
   195 #endif
   196 #endif

mercurial