michael@0: /* michael@0: ********************************************************************** michael@0: * Copyright (C) 1999-2011, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ********************************************************************** michael@0: * michael@0: * File USC_IMPL.H michael@0: * michael@0: * Modification History: michael@0: * michael@0: * Date Name Description michael@0: * 07/08/2002 Eric Mader Creation. michael@0: ****************************************************************************** michael@0: */ michael@0: michael@0: #ifndef USC_IMPL_H michael@0: #define USC_IMPL_H michael@0: #include "unicode/utypes.h" michael@0: #include "unicode/uscript.h" michael@0: michael@0: /** michael@0: * UScriptRun is used to find runs of characters in michael@0: * the same script. It implements a simple iterator over an array michael@0: * of characters. The iterator will resolve script-neutral characters michael@0: * like punctuation into the script of the surrounding characters. michael@0: * michael@0: * The iterator will try to match paired punctuation. If it sees an michael@0: * opening punctuation character, it will remember the script that michael@0: * was assigned to that character, and assign the same script to the michael@0: * matching closing punctuation. michael@0: * michael@0: * Scripts are chosen based on the UScriptCode enumeration. michael@0: * No attempt is made to combine related scripts into a single run. In michael@0: * particular, Hiragana, Katakana, and Han characters will appear in seperate michael@0: * runs. michael@0: michael@0: * Here is an example of how to iterate over script runs: michael@0: *
michael@0:  * \code
michael@0:  * void printScriptRuns(const UChar *text, int32_t length)
michael@0:  * {
michael@0:  *     UErrorCode error = U_ZERO_ERROR;
michael@0:  *     UScriptRun *scriptRun = uscript_openRun(text, testLength, &error);
michael@0:  *     int32_t start = 0, limit = 0;
michael@0:  *     UScriptCode code = USCRIPT_INVALID_CODE;
michael@0:  *
michael@0:  *     while (uscript_nextRun(&start, &limit, &code)) {
michael@0:  *         printf("Script '%s' from %d to %d.\n", uscript_getName(code), start, limit);
michael@0:  *     }
michael@0:  *
michael@0:  *     uscript_closeRun(scriptRun);
michael@0:  *  }
michael@0:  * 
michael@0: */ michael@0: struct UScriptRun; michael@0: michael@0: typedef struct UScriptRun UScriptRun; michael@0: michael@0: /** michael@0: * Create a UScriptRun object for iterating over the given text. This object must michael@0: * be freed using uscript_closeRun(). Note that this object does not copy the source text, michael@0: * only the pointer to it. You must make sure that the pointer remains valid until you call michael@0: * uscript_closeRun() or uscript_setRunText(). michael@0: * michael@0: * @param src is the address of the array of characters over which to iterate. michael@0: * if src == NULL and length == 0, michael@0: * an empty UScriptRun object will be returned. michael@0: * michael@0: * @param length is the number of characters over which to iterate. michael@0: * michael@0: * @param pErrorCode is a pointer to a valid UErrorCode value. If this value michael@0: * indicates a failure on entry, the function will immediately return. michael@0: * On exit the value will indicate the success of the operation. michael@0: * michael@0: * @return the address of UScriptRun object which will iterate over the text, michael@0: * or NULL if the operation failed. michael@0: */ michael@0: U_CAPI UScriptRun * U_EXPORT2 michael@0: uscript_openRun(const UChar *src, int32_t length, UErrorCode *pErrorCode); michael@0: michael@0: /** michael@0: * Frees the given UScriptRun object and any storage associated with it. michael@0: * On return, scriptRun no longer points to a valid UScriptRun object. michael@0: * michael@0: * @param scriptRun is the UScriptRun object which will be freed. michael@0: */ michael@0: U_CAPI void U_EXPORT2 michael@0: uscript_closeRun(UScriptRun *scriptRun); michael@0: michael@0: /** michael@0: * Reset the UScriptRun object so that it will start iterating from michael@0: * the beginning. michael@0: * michael@0: * @param scriptRun is the address of the UScriptRun object to be reset. michael@0: */ michael@0: U_CAPI void U_EXPORT2 michael@0: uscript_resetRun(UScriptRun *scriptRun); michael@0: michael@0: /** michael@0: * Change the text over which the given UScriptRun object iterates. michael@0: * michael@0: * @param scriptRun is the UScriptRun object which will be changed. michael@0: * michael@0: * @param src is the address of the new array of characters over which to iterate. michael@0: * If src == NULL and length == 0, michael@0: * the UScriptRun object will become empty. michael@0: * michael@0: * @param length is the new number of characters over which to iterate michael@0: * michael@0: * @param pErrorCode is a pointer to a valid UErrorCode value. If this value michael@0: * indicates a failure on entry, the function will immediately return. michael@0: * On exit the value will indicate the success of the operation. michael@0: */ michael@0: U_CAPI void U_EXPORT2 michael@0: uscript_setRunText(UScriptRun *scriptRun, const UChar *src, int32_t length, UErrorCode *pErrorCode); michael@0: michael@0: /** michael@0: * Advance the UScriptRun object to the next script run, return the start and limit michael@0: * offsets, and the script of the run. michael@0: * michael@0: * @param scriptRun is the address of the UScriptRun object. michael@0: * michael@0: * @param pRunStart is a pointer to the variable to receive the starting offset of the next run. michael@0: * This pointer can be NULL if the value is not needed. michael@0: * michael@0: * @param pRunLimit is a pointer to the variable to receive the limit offset of the next run. michael@0: * This pointer can be NULL if the value is not needed. michael@0: * michael@0: * @param pRunScript is a pointer to the variable to receive the UScriptCode for the michael@0: * script of the current run. This pointer can be NULL if the value is not needed. michael@0: * michael@0: * @return true if there was another script run. michael@0: */ michael@0: U_CAPI UBool U_EXPORT2 michael@0: uscript_nextRun(UScriptRun *scriptRun, int32_t *pRunStart, int32_t *pRunLimit, UScriptCode *pRunScript); michael@0: michael@0: #endif