michael@0: /*
michael@0: *******************************************************************************
michael@0: *
michael@0: * Copyright (C) 1998-2008, International Business Machines
michael@0: * Corporation and others. All Rights Reserved.
michael@0: *
michael@0: *******************************************************************************
michael@0: *
michael@0: * File ucbuf.c
michael@0: *
michael@0: * Modification History:
michael@0: *
michael@0: * Date Name Description
michael@0: * 05/10/01 Ram Creation.
michael@0: *
michael@0: * This API reads in files and returns UChars
michael@0: *******************************************************************************
michael@0: */
michael@0:
michael@0: #include "unicode/ucnv.h"
michael@0: #include "filestrm.h"
michael@0:
michael@0: #if !UCONFIG_NO_CONVERSION
michael@0:
michael@0: #ifndef UCBUF_H
michael@0: #define UCBUF_H 1
michael@0:
michael@0: typedef struct UCHARBUF UCHARBUF;
michael@0: /**
michael@0: * End of file value
michael@0: */
michael@0: #define U_EOF 0xFFFFFFFF
michael@0: /**
michael@0: * Error value if a sequence cannot be unescaped
michael@0: */
michael@0: #define U_ERR 0xFFFFFFFE
michael@0:
michael@0: typedef struct ULine ULine;
michael@0:
michael@0: struct ULine {
michael@0: UChar *name;
michael@0: int32_t len;
michael@0: };
michael@0:
michael@0: /**
michael@0: * Opens the UCHARBUF with the given file stream and code page for conversion
michael@0: * @param fileName Name of the file to open.
michael@0: * @param codepage The encoding of the file stream to convert to Unicode.
michael@0: * If *codepoge is NULL on input the API will try to autodetect
michael@0: * popular Unicode encodings
michael@0: * @param showWarning Flag to print out warnings to STDOUT
michael@0: * @param buffered If TRUE performs a buffered read of the input file. If FALSE reads
michael@0: * the whole file into memory and converts it.
michael@0: * @param err is a pointer to a valid UErrorCode
value. If this value
michael@0: * indicates a failure on entry, the function will immediately return.
michael@0: * On exit the value will indicate the success of the operation.
michael@0: * @return pointer to the newly opened UCHARBUF
michael@0: */
michael@0: U_CAPI UCHARBUF* U_EXPORT2
michael@0: ucbuf_open(const char* fileName,const char** codepage,UBool showWarning, UBool buffered, UErrorCode* err);
michael@0:
michael@0: /**
michael@0: * Gets a UTF-16 code unit at the current position from the converted buffer
michael@0: * and increments the current position
michael@0: * @param buf Pointer to UCHARBUF structure
michael@0: * @param err is a pointer to a valid UErrorCode
value. If this value
michael@0: * indicates a failure on entry, the function will immediately return.
michael@0: * On exit the value will indicate the success of the operation.
michael@0: */
michael@0: U_CAPI int32_t U_EXPORT2
michael@0: ucbuf_getc(UCHARBUF* buf,UErrorCode* err);
michael@0:
michael@0: /**
michael@0: * Gets a UTF-32 code point at the current position from the converted buffer
michael@0: * and increments the current position
michael@0: * @param buf Pointer to UCHARBUF structure
michael@0: * @param err is a pointer to a valid UErrorCode
value. If this value
michael@0: * indicates a failure on entry, the function will immediately return.
michael@0: * On exit the value will indicate the success of the operation.
michael@0: */
michael@0: U_CAPI int32_t U_EXPORT2
michael@0: ucbuf_getc32(UCHARBUF* buf,UErrorCode* err);
michael@0:
michael@0: /**
michael@0: * Gets a UTF-16 code unit at the current position from the converted buffer after
michael@0: * unescaping and increments the current position. If the escape sequence is for UTF-32
michael@0: * code point (\\Uxxxxxxxx) then a UTF-32 codepoint is returned
michael@0: * @param buf Pointer to UCHARBUF structure
michael@0: * @param err is a pointer to a valid UErrorCode
value. If this value
michael@0: * indicates a failure on entry, the function will immediately return.
michael@0: * On exit the value will indicate the success of the operation.
michael@0: */
michael@0: U_CAPI int32_t U_EXPORT2
michael@0: ucbuf_getcx32(UCHARBUF* buf,UErrorCode* err);
michael@0:
michael@0: /**
michael@0: * Gets a pointer to the current position in the internal buffer and length of the line.
michael@0: * It imperative to make a copy of the returned buffere before performing operations on it.
michael@0: * @param buf Pointer to UCHARBUF structure
michael@0: * @param len Output param to receive the len of the buffer returned till end of the line
michael@0: * @param err is a pointer to a valid UErrorCode
value. If this value
michael@0: * indicates a failure on entry, the function will immediately return.
michael@0: * On exit the value will indicate the success of the operation.
michael@0: * Error: U_TRUNCATED_CHAR_FOUND
michael@0: * @return Pointer to the internal buffer, NULL if EOF
michael@0: */
michael@0: U_CAPI const UChar* U_EXPORT2
michael@0: ucbuf_readline(UCHARBUF* buf,int32_t* len, UErrorCode* err);
michael@0:
michael@0:
michael@0: /**
michael@0: * Resets the buffers and the underlying file stream.
michael@0: * @param buf Pointer to UCHARBUF structure
michael@0: * @param err is a pointer to a valid UErrorCode
value. If this value
michael@0: * indicates a failure on entry, the function will immediately return.
michael@0: * On exit the value will indicate the success of the operation.
michael@0: */
michael@0: U_CAPI void U_EXPORT2
michael@0: ucbuf_rewind(UCHARBUF* buf,UErrorCode* err);
michael@0:
michael@0: /**
michael@0: * Returns a pointer to the internal converted buffer
michael@0: * @param buf Pointer to UCHARBUF structure
michael@0: * @param len Pointer to int32_t to receive the lenth of buffer
michael@0: * @param err is a pointer to a valid UErrorCode
value. If this value
michael@0: * indicates a failure on entry, the function will immediately return.
michael@0: * On exit the value will indicate the success of the operation.
michael@0: * @return Pointer to internal UChar buffer
michael@0: */
michael@0: U_CAPI const UChar* U_EXPORT2
michael@0: ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* err);
michael@0:
michael@0: /**
michael@0: * Closes the UCHARBUF structure members and cleans up the malloc'ed memory
michael@0: * @param buf Pointer to UCHARBUF structure
michael@0: */
michael@0: U_CAPI void U_EXPORT2
michael@0: ucbuf_close(UCHARBUF* buf);
michael@0:
michael@0: /**
michael@0: * Rewinds the buffer by one codepoint. Does not rewind over escaped characters.
michael@0: */
michael@0: U_CAPI void U_EXPORT2
michael@0: ucbuf_ungetc(int32_t ungetChar,UCHARBUF* buf);
michael@0:
michael@0:
michael@0: /**
michael@0: * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected.
michael@0: * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
michael@0: * the converter to correct state for converting the rest of the stream. So the UConverter parameter
michael@0: * is necessary.
michael@0: * If the charset was autodetected, the caller must close both the input FileStream
michael@0: * and the converter.
michael@0: *
michael@0: * @param fileName The file name to be opened and encoding autodected
michael@0: * @param conv Output param to receive the opened converter if autodetected; NULL otherwise.
michael@0: * @param cp Output param to receive the detected encoding
michael@0: * @param err is a pointer to a valid UErrorCode
value. If this value
michael@0: * indicates a failure on entry, the function will immediately return.
michael@0: * On exit the value will indicate the success of the operation.
michael@0: * @return The input FileStream if its charset was autodetected; NULL otherwise.
michael@0: */
michael@0: U_CAPI FileStream * U_EXPORT2
michael@0: ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv,
michael@0: int32_t* signatureLength, UErrorCode* status);
michael@0:
michael@0: /**
michael@0: * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected.
michael@0: * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
michael@0: * the converter to correct state for converting the rest of the stream. So the UConverter parameter
michael@0: * is necessary.
michael@0: * If the charset was autodetected, the caller must close the converter.
michael@0: *
michael@0: * @param fileStream The file stream whose encoding is to be detected
michael@0: * @param conv Output param to receive the opened converter if autodetected; NULL otherwise.
michael@0: * @param cp Output param to receive the detected encoding
michael@0: * @param err is a pointer to a valid UErrorCode
value. If this value
michael@0: * indicates a failure on entry, the function will immediately return.
michael@0: * On exit the value will indicate the success of the operation.
michael@0: * @return Boolean whether the Unicode charset was autodetected.
michael@0: */
michael@0:
michael@0: U_CAPI UBool U_EXPORT2
michael@0: ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* status);
michael@0:
michael@0: /**
michael@0: * Returns the approximate size in UChars required for converting the file to UChars
michael@0: */
michael@0: U_CAPI int32_t U_EXPORT2
michael@0: ucbuf_size(UCHARBUF* buf);
michael@0:
michael@0: U_CAPI const char* U_EXPORT2
michael@0: ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status);
michael@0:
michael@0: #endif
michael@0: #endif
michael@0: