intl/icu/source/i18n/unicode/uregex.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/i18n/unicode/uregex.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,1591 @@
     1.4 +/*
     1.5 +**********************************************************************
     1.6 +*   Copyright (C) 2004-2013, International Business Machines
     1.7 +*   Corporation and others.  All Rights Reserved.
     1.8 +**********************************************************************
     1.9 +*   file name:  uregex.h
    1.10 +*   encoding:   US-ASCII
    1.11 +*   indentation:4
    1.12 +*
    1.13 +*   created on: 2004mar09
    1.14 +*   created by: Andy Heninger
    1.15 +*
    1.16 +*   ICU Regular Expressions, API for C
    1.17 +*/
    1.18 +
    1.19 +/**
    1.20 + * \file
    1.21 + * \brief C API: Regular Expressions
    1.22 + *
    1.23 + * <p>This is a C wrapper around the C++ RegexPattern and RegexMatcher classes.</p>
    1.24 + */
    1.25 +
    1.26 +#ifndef UREGEX_H
    1.27 +#define UREGEX_H
    1.28 +
    1.29 +#include "unicode/utext.h"
    1.30 +#include "unicode/utypes.h"
    1.31 +
    1.32 +#if !UCONFIG_NO_REGULAR_EXPRESSIONS
    1.33 +
    1.34 +#include "unicode/localpointer.h"
    1.35 +#include "unicode/parseerr.h"
    1.36 +
    1.37 +struct URegularExpression;
    1.38 +/**
    1.39 +  * Structure representing a compiled regular expression, plus the results
    1.40 +  *    of a match operation.
    1.41 +  * @stable ICU 3.0
    1.42 +  */
    1.43 +typedef struct URegularExpression URegularExpression;
    1.44 +
    1.45 +
    1.46 +/**
    1.47 + * Constants for Regular Expression Match Modes.
    1.48 + * @stable ICU 2.4
    1.49 + */
    1.50 +typedef enum URegexpFlag{
    1.51 +
    1.52 +#ifndef U_HIDE_DRAFT_API 
    1.53 +    /** Forces normalization of pattern and strings. 
    1.54 +    Not implemented yet, just a placeholder, hence draft. 
    1.55 +    @draft ICU 2.4 */
    1.56 +    UREGEX_CANON_EQ         = 128,
    1.57 +#endif /* U_HIDE_DRAFT_API */
    1.58 +    /**  Enable case insensitive matching.  @stable ICU 2.4 */
    1.59 +    UREGEX_CASE_INSENSITIVE = 2,
    1.60 +
    1.61 +    /**  Allow white space and comments within patterns  @stable ICU 2.4 */
    1.62 +    UREGEX_COMMENTS         = 4,
    1.63 +
    1.64 +    /**  If set, '.' matches line terminators,  otherwise '.' matching stops at line end.
    1.65 +      *  @stable ICU 2.4 */
    1.66 +    UREGEX_DOTALL           = 32,
    1.67 +    
    1.68 +    /**  If set, treat the entire pattern as a literal string.  
    1.69 +      *  Metacharacters or escape sequences in the input sequence will be given 
    1.70 +      *  no special meaning. 
    1.71 +      *
    1.72 +      *  The flag UREGEX_CASE_INSENSITIVE retains its impact
    1.73 +      *  on matching when used in conjunction with this flag.
    1.74 +      *  The other flags become superfluous.
    1.75 +      *
    1.76 +      * @stable ICU 4.0
    1.77 +      */
    1.78 +    UREGEX_LITERAL = 16,
    1.79 +
    1.80 +    /**   Control behavior of "$" and "^"
    1.81 +      *    If set, recognize line terminators within string,
    1.82 +      *    otherwise, match only at start and end of input string.
    1.83 +      *   @stable ICU 2.4 */
    1.84 +    UREGEX_MULTILINE        = 8,
    1.85 +    
    1.86 +    /**   Unix-only line endings.
    1.87 +      *   When this mode is enabled, only \\u000a is recognized as a line ending
    1.88 +      *    in the behavior of ., ^, and $.
    1.89 +      *   @stable ICU 4.0
    1.90 +      */
    1.91 +    UREGEX_UNIX_LINES = 1,
    1.92 +
    1.93 +    /**  Unicode word boundaries.
    1.94 +      *     If set, \b uses the Unicode TR 29 definition of word boundaries.
    1.95 +      *     Warning: Unicode word boundaries are quite different from
    1.96 +      *     traditional regular expression word boundaries.  See
    1.97 +      *     http://unicode.org/reports/tr29/#Word_Boundaries
    1.98 +      *     @stable ICU 2.8
    1.99 +      */
   1.100 +    UREGEX_UWORD            = 256,
   1.101 +
   1.102 +     /**  Error on Unrecognized backslash escapes.
   1.103 +       *     If set, fail with an error on patterns that contain
   1.104 +       *     backslash-escaped ASCII letters without a known special
   1.105 +       *     meaning.  If this flag is not set, these
   1.106 +       *     escaped letters represent themselves.
   1.107 +       *     @stable ICU 4.0
   1.108 +       */
   1.109 +     UREGEX_ERROR_ON_UNKNOWN_ESCAPES = 512
   1.110 +
   1.111 +}  URegexpFlag;
   1.112 +
   1.113 +/**
   1.114 +  *  Open (compile) an ICU regular expression.  Compiles the regular expression in
   1.115 +  *  string form into an internal representation using the specified match mode flags.
   1.116 +  *  The resulting regular expression handle can then be used to perform various
   1.117 +  *   matching operations.
   1.118 +  * 
   1.119 +  *
   1.120 +  * @param pattern        The Regular Expression pattern to be compiled. 
   1.121 +  * @param patternLength  The length of the pattern, or -1 if the pattern is
   1.122 +  *                       NUL terminated.
   1.123 +  * @param flags          Flags that alter the default matching behavior for
   1.124 +  *                       the regular expression, UREGEX_CASE_INSENSITIVE, for
   1.125 +  *                       example.  For default behavior, set this parameter to zero.
   1.126 +  *                       See <code>enum URegexpFlag</code>.  All desired flags
   1.127 +  *                       are bitwise-ORed together.
   1.128 +  * @param pe             Receives the position (line and column numbers) of any syntax
   1.129 +  *                       error within the source regular expression string.  If this
   1.130 +  *                       information is not wanted, pass NULL for this parameter.
   1.131 +  * @param status         Receives error detected by this function.
   1.132 +  * @stable ICU 3.0
   1.133 +  *
   1.134 +  */
   1.135 +U_STABLE URegularExpression * U_EXPORT2
   1.136 +uregex_open( const  UChar          *pattern,
   1.137 +                    int32_t         patternLength,
   1.138 +                    uint32_t        flags,
   1.139 +                    UParseError    *pe,
   1.140 +                    UErrorCode     *status);
   1.141 +
   1.142 +/**
   1.143 +  *  Open (compile) an ICU regular expression.  Compiles the regular expression in
   1.144 +  *  string form into an internal representation using the specified match mode flags.
   1.145 +  *  The resulting regular expression handle can then be used to perform various
   1.146 +  *   matching operations.
   1.147 +  *  <p>
   1.148 +  *  The contents of the pattern UText will be extracted and saved. Ownership of the
   1.149 +  *   UText struct itself remains with the caller. This is to match the behavior of
   1.150 +  *   uregex_open().
   1.151 +  *
   1.152 +  * @param pattern        The Regular Expression pattern to be compiled. 
   1.153 +  * @param flags          Flags that alter the default matching behavior for
   1.154 +  *                       the regular expression, UREGEX_CASE_INSENSITIVE, for
   1.155 +  *                       example.  For default behavior, set this parameter to zero.
   1.156 +  *                       See <code>enum URegexpFlag</code>.  All desired flags
   1.157 +  *                       are bitwise-ORed together.
   1.158 +  * @param pe             Receives the position (line and column numbers) of any syntax
   1.159 +  *                       error within the source regular expression string.  If this
   1.160 +  *                       information is not wanted, pass NULL for this parameter.
   1.161 +  * @param status         Receives error detected by this function.
   1.162 +  *
   1.163 +  * @stable ICU 4.6
   1.164 +  */
   1.165 +U_STABLE URegularExpression *  U_EXPORT2
   1.166 +uregex_openUText(UText          *pattern,
   1.167 +                 uint32_t        flags,
   1.168 +                 UParseError    *pe,
   1.169 +                 UErrorCode     *status);
   1.170 +
   1.171 +/**
   1.172 +  *  Open (compile) an ICU regular expression.  The resulting regular expression
   1.173 +  *   handle can then be used to perform various matching operations.
   1.174 +  *  <p>
   1.175 +  *   This function is the same as uregex_open, except that the pattern
   1.176 +  *   is supplied as an 8 bit char * string in the default code page.
   1.177 +  *
   1.178 +  * @param pattern        The Regular Expression pattern to be compiled, 
   1.179 +  *                       NUL terminated.  
   1.180 +  * @param flags          Flags that alter the default matching behavior for
   1.181 +  *                       the regular expression, UREGEX_CASE_INSENSITIVE, for
   1.182 +  *                       example.  For default behavior, set this parameter to zero.
   1.183 +  *                       See <code>enum URegexpFlag</code>.  All desired flags
   1.184 +  *                       are bitwise-ORed together.
   1.185 +  * @param pe             Receives the position (line and column numbers) of any syntax
   1.186 +  *                       error within the source regular expression string.  If this
   1.187 +  *                       information is not wanted, pass NULL for this parameter.
   1.188 +  * @param status         Receives errors detected by this function.
   1.189 +  * @return               The URegularExpression object representing the compiled
   1.190 +  *                       pattern.
   1.191 +  *
   1.192 +  * @stable ICU 3.0
   1.193 +  */
   1.194 +#if !UCONFIG_NO_CONVERSION
   1.195 +U_STABLE URegularExpression * U_EXPORT2
   1.196 +uregex_openC( const char           *pattern,
   1.197 +                    uint32_t        flags,
   1.198 +                    UParseError    *pe,
   1.199 +                    UErrorCode     *status);
   1.200 +#endif
   1.201 +
   1.202 +
   1.203 +
   1.204 +/**
   1.205 +  *  Close the regular expression, recovering all resources (memory) it
   1.206 +  *   was holding.
   1.207 +  *
   1.208 +  * @param regexp   The regular expression to be closed.
   1.209 +  * @stable ICU 3.0
   1.210 +  */
   1.211 +U_STABLE void U_EXPORT2 
   1.212 +uregex_close(URegularExpression *regexp);
   1.213 +
   1.214 +#if U_SHOW_CPLUSPLUS_API
   1.215 +
   1.216 +U_NAMESPACE_BEGIN
   1.217 +
   1.218 +/**
   1.219 + * \class LocalURegularExpressionPointer
   1.220 + * "Smart pointer" class, closes a URegularExpression via uregex_close().
   1.221 + * For most methods see the LocalPointerBase base class.
   1.222 + *
   1.223 + * @see LocalPointerBase
   1.224 + * @see LocalPointer
   1.225 + * @stable ICU 4.4
   1.226 + */
   1.227 +U_DEFINE_LOCAL_OPEN_POINTER(LocalURegularExpressionPointer, URegularExpression, uregex_close);
   1.228 +
   1.229 +U_NAMESPACE_END
   1.230 +
   1.231 +#endif
   1.232 +
   1.233 +/**
   1.234 + * Make a copy of a compiled regular expression.  Cloning a regular
   1.235 + * expression is faster than opening a second instance from the source
   1.236 + * form of the expression, and requires less memory.
   1.237 + * <p>
   1.238 + * Note that the current input string and the position of any matched text
   1.239 + *  within it are not cloned; only the pattern itself and the
   1.240 + *  match mode flags are copied.
   1.241 + * <p>
   1.242 + * Cloning can be particularly useful to threaded applications that perform
   1.243 + * multiple match operations in parallel.  Each concurrent RE
   1.244 + * operation requires its own instance of a URegularExpression.
   1.245 + *
   1.246 + * @param regexp   The compiled regular expression to be cloned.
   1.247 + * @param status   Receives indication of any errors encountered
   1.248 + * @return the cloned copy of the compiled regular expression.
   1.249 + * @stable ICU 3.0
   1.250 + */
   1.251 +U_STABLE URegularExpression * U_EXPORT2 
   1.252 +uregex_clone(const URegularExpression *regexp, UErrorCode *status);
   1.253 +
   1.254 +/**
   1.255 + *  Returns a pointer to the source form of the pattern for this regular expression.
   1.256 + *  This function will work even if the pattern was originally specified as a UText.
   1.257 + *
   1.258 + * @param regexp     The compiled regular expression.
   1.259 + * @param patLength  This output parameter will be set to the length of the
   1.260 + *                   pattern string.  A NULL pointer may be used here if the
   1.261 + *                   pattern length is not needed, as would be the case if
   1.262 + *                   the pattern is known in advance to be a NUL terminated
   1.263 + *                   string.
   1.264 + * @param status     Receives errors detected by this function.
   1.265 + * @return a pointer to the pattern string.  The storage for the string is
   1.266 + *                   owned by the regular expression object, and must not be
   1.267 + *                   altered or deleted by the application.  The returned string
   1.268 + *                   will remain valid until the regular expression is closed.
   1.269 + * @stable ICU 3.0
   1.270 + */
   1.271 +U_STABLE const UChar * U_EXPORT2 
   1.272 +uregex_pattern(const URegularExpression *regexp,
   1.273 +                     int32_t            *patLength,
   1.274 +                     UErrorCode         *status);
   1.275 +
   1.276 +/**
   1.277 + *  Returns the source text of the pattern for this regular expression.
   1.278 + *  This function will work even if the pattern was originally specified as a UChar string.
   1.279 + *
   1.280 + * @param regexp     The compiled regular expression.
   1.281 + * @param status     Receives errors detected by this function.
   1.282 + * @return the pattern text.  The storage for the text is owned by the regular expression
   1.283 + *                   object, and must not be altered or deleted.
   1.284 + *
   1.285 + * @stable ICU 4.6
   1.286 + */
   1.287 +U_STABLE UText * U_EXPORT2 
   1.288 +uregex_patternUText(const URegularExpression *regexp,
   1.289 +                          UErrorCode         *status);
   1.290 +
   1.291 +/**
   1.292 +  * Get the match mode flags that were specified when compiling this regular expression.
   1.293 +  * @param status   Receives errors detected by this function.
   1.294 +  * @param regexp   The compiled regular expression.
   1.295 +  * @return         The match mode flags
   1.296 +  * @see URegexpFlag
   1.297 +  * @stable ICU 3.0
   1.298 +  */
   1.299 +U_STABLE int32_t U_EXPORT2 
   1.300 +uregex_flags(const  URegularExpression   *regexp,
   1.301 +                    UErrorCode           *status);
   1.302 +
   1.303 +
   1.304 +/**
   1.305 +  *  Set the subject text string upon which the regular expression will look for matches.
   1.306 +  *  This function may be called any number of times, allowing the regular
   1.307 +  *  expression pattern to be applied to different strings.
   1.308 +  *  <p>
   1.309 +  *  Regular expression matching operations work directly on the application's
   1.310 +  *  string data.  No copy is made.  The subject string data must not be
   1.311 +  *  altered after calling this function until after all regular expression
   1.312 +  *  operations involving this string data are completed.  
   1.313 +  *  <p>
   1.314 +  *  Zero length strings are permitted.  In this case, no subsequent match
   1.315 +  *  operation will dereference the text string pointer.
   1.316 +  *
   1.317 +  * @param regexp     The compiled regular expression.
   1.318 +  * @param text       The subject text string.
   1.319 +  * @param textLength The length of the subject text, or -1 if the string
   1.320 +  *                   is NUL terminated.
   1.321 +  * @param status     Receives errors detected by this function.
   1.322 +  * @stable ICU 3.0
   1.323 +  */
   1.324 +U_STABLE void U_EXPORT2 
   1.325 +uregex_setText(URegularExpression *regexp,
   1.326 +               const UChar        *text,
   1.327 +               int32_t             textLength,
   1.328 +               UErrorCode         *status);
   1.329 +
   1.330 +
   1.331 +/**
   1.332 +  *  Set the subject text string upon which the regular expression will look for matches.
   1.333 +  *  This function may be called any number of times, allowing the regular
   1.334 +  *  expression pattern to be applied to different strings.
   1.335 +  *  <p>
   1.336 +  *  Regular expression matching operations work directly on the application's
   1.337 +  *  string data; only a shallow clone is made.  The subject string data must not be
   1.338 +  *  altered after calling this function until after all regular expression
   1.339 +  *  operations involving this string data are completed.  
   1.340 +  *
   1.341 +  * @param regexp     The compiled regular expression.
   1.342 +  * @param text       The subject text string.
   1.343 +  * @param status     Receives errors detected by this function.
   1.344 +  *
   1.345 +  * @stable ICU 4.6
   1.346 +  */
   1.347 +U_STABLE void U_EXPORT2 
   1.348 +uregex_setUText(URegularExpression *regexp,
   1.349 +                UText              *text,
   1.350 +                UErrorCode         *status);
   1.351 +
   1.352 +/**
   1.353 +  *  Get the subject text that is currently associated with this 
   1.354 +  *   regular expression object.  If the input was supplied using uregex_setText(),
   1.355 +  *   that pointer will be returned.  Otherwise, the characters in the input will
   1.356 +  *   be extracted to a buffer and returned.  In either case, ownership remains
   1.357 +  *   with the regular expression object.
   1.358 +  *
   1.359 +  *  This function will work even if the input was originally specified as a UText.
   1.360 +  *
   1.361 +  * @param regexp      The compiled regular expression.
   1.362 +  * @param textLength  The length of the string is returned in this output parameter. 
   1.363 +  *                    A NULL pointer may be used here if the
   1.364 +  *                    text length is not needed, as would be the case if
   1.365 +  *                    the text is known in advance to be a NUL terminated
   1.366 +  *                    string.
   1.367 +  * @param status      Receives errors detected by this function.
   1.368 +  * @return            Pointer to the subject text string currently associated with
   1.369 +  *                    this regular expression.
   1.370 +  * @stable ICU 3.0
   1.371 +  */
   1.372 +U_STABLE const UChar * U_EXPORT2 
   1.373 +uregex_getText(URegularExpression *regexp,
   1.374 +               int32_t            *textLength,
   1.375 +               UErrorCode         *status);
   1.376 +
   1.377 +/**
   1.378 +  *  Get the subject text that is currently associated with this 
   1.379 +  *   regular expression object.
   1.380 +  *
   1.381 +  *  This function will work even if the input was originally specified as a UChar string.
   1.382 +  *
   1.383 +  * @param regexp      The compiled regular expression.
   1.384 +  * @param dest        A mutable UText in which to store the current input.
   1.385 +  *                    If NULL, a new UText will be created as an immutable shallow clone
   1.386 +  *                    of the actual input string.
   1.387 +  * @param status      Receives errors detected by this function.
   1.388 +  * @return            The subject text currently associated with this regular expression.
   1.389 +  *                    If a pre-allocated UText was provided, it will always be used and returned.
   1.390 +  *
   1.391 +  * @stable ICU 4.6
   1.392 +  */
   1.393 +U_STABLE UText * U_EXPORT2 
   1.394 +uregex_getUText(URegularExpression *regexp,
   1.395 +                UText              *dest,
   1.396 +                UErrorCode         *status);
   1.397 +
   1.398 +/**
   1.399 +  *  Set the subject text string upon which the regular expression is looking for matches
   1.400 +  *  without changing any other aspect of the matching state.
   1.401 +  *  The new and previous text strings must have the same content.
   1.402 +  *
   1.403 +  *  This function is intended for use in environments where ICU is operating on 
   1.404 +  *  strings that may move around in memory.  It provides a mechanism for notifying
   1.405 +  *  ICU that the string has been relocated, and providing a new UText to access the
   1.406 +  *  string in its new position.
   1.407 +  *
   1.408 +  *  Note that the regular expression implementation never copies the underlying text
   1.409 +  *  of a string being matched, but always operates directly on the original text 
   1.410 +  *  provided by the user. Refreshing simply drops the references to the old text 
   1.411 +  *  and replaces them with references to the new.
   1.412 +  *
   1.413 +  *  Caution:  this function is normally used only by very specialized
   1.414 +  *            system-level code.   One example use case is with garbage collection 
   1.415 +  *            that moves the text in memory. 
   1.416 +  *
   1.417 +  * @param regexp     The compiled regular expression.
   1.418 +  * @param text       The new (moved) text string.
   1.419 +  * @param status     Receives errors detected by this function.
   1.420 +  *
   1.421 +  * @stable ICU 4.8
   1.422 +  */
   1.423 +U_STABLE void U_EXPORT2 
   1.424 +uregex_refreshUText(URegularExpression *regexp,
   1.425 +                    UText              *text,
   1.426 +                    UErrorCode         *status);
   1.427 +
   1.428 +/**
   1.429 +  *   Attempts to match the input string against the pattern.
   1.430 +  *   To succeed, the match must extend to the end of the string,
   1.431 +  *   or cover the complete match region.
   1.432 +  *
   1.433 +  *   If startIndex >= zero the match operation starts at the specified
   1.434 +  *   index and must extend to the end of the input string.  Any region
   1.435 +  *   that has been specified is reset.
   1.436 +  *
   1.437 +  *   If startIndex == -1 the match must cover the input region, or the entire
   1.438 +  *   input string if no region has been set.  This directly corresponds to
   1.439 +  *   Matcher.matches() in Java
   1.440 +  *
   1.441 +  *    @param  regexp      The compiled regular expression.
   1.442 +  *    @param  startIndex  The input string (native) index at which to begin matching, or -1
   1.443 +  *                        to match the input Region.
   1.444 +  *    @param  status      Receives errors detected by this function.
   1.445 +  *    @return             TRUE if there is a match
   1.446 +  *    @stable ICU 3.0
   1.447 +  */
   1.448 +U_STABLE UBool U_EXPORT2 
   1.449 +uregex_matches(URegularExpression *regexp,
   1.450 +                int32_t            startIndex,
   1.451 +                UErrorCode        *status);
   1.452 +
   1.453 +/**
   1.454 +  *   64bit version of uregex_matches.
   1.455 +  *   Attempts to match the input string against the pattern.
   1.456 +  *   To succeed, the match must extend to the end of the string,
   1.457 +  *   or cover the complete match region.
   1.458 +  *
   1.459 +  *   If startIndex >= zero the match operation starts at the specified
   1.460 +  *   index and must extend to the end of the input string.  Any region
   1.461 +  *   that has been specified is reset.
   1.462 +  *
   1.463 +  *   If startIndex == -1 the match must cover the input region, or the entire
   1.464 +  *   input string if no region has been set.  This directly corresponds to
   1.465 +  *   Matcher.matches() in Java
   1.466 +  *
   1.467 +  *    @param  regexp      The compiled regular expression.
   1.468 +  *    @param  startIndex  The input string (native) index at which to begin matching, or -1
   1.469 +  *                        to match the input Region.
   1.470 +  *    @param  status      Receives errors detected by this function.
   1.471 +  *    @return             TRUE if there is a match
   1.472 +  *   @stable ICU 4.6
   1.473 +  */
   1.474 +U_STABLE UBool U_EXPORT2 
   1.475 +uregex_matches64(URegularExpression *regexp,
   1.476 +                 int64_t            startIndex,
   1.477 +                 UErrorCode        *status);
   1.478 +
   1.479 +/**
   1.480 +  *   Attempts to match the input string, starting from the specified index, against the pattern.
   1.481 +  *   The match may be of any length, and is not required to extend to the end
   1.482 +  *   of the input string.  Contrast with uregex_matches().
   1.483 +  *
   1.484 +  *   <p>If startIndex is >= 0 any input region that was set for this
   1.485 +  *   URegularExpression is reset before the operation begins.
   1.486 +  *
   1.487 +  *   <p>If the specified starting index == -1 the match begins at the start of the input 
   1.488 +  *   region, or at the start of the full string if no region has been specified.
   1.489 +  *   This corresponds directly with Matcher.lookingAt() in Java.
   1.490 +  *
   1.491 +  *   <p>If the match succeeds then more information can be obtained via the
   1.492 +  *    <code>uregexp_start()</code>, <code>uregexp_end()</code>,
   1.493 +  *    and <code>uregexp_group()</code> functions.</p>
   1.494 +  *
   1.495 +  *    @param   regexp      The compiled regular expression.
   1.496 +  *    @param   startIndex  The input string (native) index at which to begin matching, or
   1.497 +  *                         -1 to match the Input Region
   1.498 +  *    @param   status      A reference to a UErrorCode to receive any errors.
   1.499 +  *    @return  TRUE if there is a match.
   1.500 +  *    @stable ICU 3.0
   1.501 +  */
   1.502 +U_STABLE UBool U_EXPORT2 
   1.503 +uregex_lookingAt(URegularExpression *regexp,
   1.504 +                 int32_t             startIndex,
   1.505 +                 UErrorCode         *status);
   1.506 +
   1.507 +/**
   1.508 +  *   64bit version of uregex_lookingAt.
   1.509 +  *   Attempts to match the input string, starting from the specified index, against the pattern.
   1.510 +  *   The match may be of any length, and is not required to extend to the end
   1.511 +  *   of the input string.  Contrast with uregex_matches().
   1.512 +  *
   1.513 +  *   <p>If startIndex is >= 0 any input region that was set for this
   1.514 +  *   URegularExpression is reset before the operation begins.
   1.515 +  *
   1.516 +  *   <p>If the specified starting index == -1 the match begins at the start of the input 
   1.517 +  *   region, or at the start of the full string if no region has been specified.
   1.518 +  *   This corresponds directly with Matcher.lookingAt() in Java.
   1.519 +  *
   1.520 +  *   <p>If the match succeeds then more information can be obtained via the
   1.521 +  *    <code>uregexp_start()</code>, <code>uregexp_end()</code>,
   1.522 +  *    and <code>uregexp_group()</code> functions.</p>
   1.523 +  *
   1.524 +  *    @param   regexp      The compiled regular expression.
   1.525 +  *    @param   startIndex  The input string (native) index at which to begin matching, or
   1.526 +  *                         -1 to match the Input Region
   1.527 +  *    @param   status      A reference to a UErrorCode to receive any errors.
   1.528 +  *    @return  TRUE if there is a match.
   1.529 +  *    @stable ICU 4.6
   1.530 +  */
   1.531 +U_STABLE UBool U_EXPORT2 
   1.532 +uregex_lookingAt64(URegularExpression *regexp,
   1.533 +                   int64_t             startIndex,
   1.534 +                   UErrorCode         *status);
   1.535 +
   1.536 +/**
   1.537 +  *   Find the first matching substring of the input string that matches the pattern.
   1.538 +  *   If startIndex is >= zero the search for a match begins at the specified index,
   1.539 +  *          and any match region is reset.  This corresponds directly with
   1.540 +  *          Matcher.find(startIndex) in Java.
   1.541 +  *
   1.542 +  *   If startIndex == -1 the search begins at the start of the input region,
   1.543 +  *           or at the start of the full string if no region has been specified.
   1.544 +  *
   1.545 +  *   If a match is found, <code>uregex_start(), uregex_end()</code>, and
   1.546 +  *   <code>uregex_group()</code> will provide more information regarding the match.
   1.547 +  *
   1.548 +  *   @param   regexp      The compiled regular expression.
   1.549 +  *   @param   startIndex  The position (native) in the input string to begin the search, or
   1.550 +  *                        -1 to search within the Input Region.
   1.551 +  *   @param   status      A reference to a UErrorCode to receive any errors.
   1.552 +  *   @return              TRUE if a match is found.
   1.553 +  *   @stable ICU 3.0
   1.554 +  */
   1.555 +U_STABLE UBool U_EXPORT2 
   1.556 +uregex_find(URegularExpression *regexp,
   1.557 +            int32_t             startIndex, 
   1.558 +            UErrorCode         *status);
   1.559 +
   1.560 +/**
   1.561 +  *   64bit version of uregex_find.
   1.562 +  *   Find the first matching substring of the input string that matches the pattern.
   1.563 +  *   If startIndex is >= zero the search for a match begins at the specified index,
   1.564 +  *          and any match region is reset.  This corresponds directly with
   1.565 +  *          Matcher.find(startIndex) in Java.
   1.566 +  *
   1.567 +  *   If startIndex == -1 the search begins at the start of the input region,
   1.568 +  *           or at the start of the full string if no region has been specified.
   1.569 +  *
   1.570 +  *   If a match is found, <code>uregex_start(), uregex_end()</code>, and
   1.571 +  *   <code>uregex_group()</code> will provide more information regarding the match.
   1.572 +  *
   1.573 +  *   @param   regexp      The compiled regular expression.
   1.574 +  *   @param   startIndex  The position (native) in the input string to begin the search, or
   1.575 +  *                        -1 to search within the Input Region.
   1.576 +  *   @param   status      A reference to a UErrorCode to receive any errors.
   1.577 +  *   @return              TRUE if a match is found.
   1.578 +  *   @stable ICU 4.6
   1.579 +  */
   1.580 +U_STABLE UBool U_EXPORT2 
   1.581 +uregex_find64(URegularExpression *regexp,
   1.582 +              int64_t             startIndex, 
   1.583 +              UErrorCode         *status);
   1.584 +
   1.585 +/**
   1.586 +  *  Find the next pattern match in the input string.  Begin searching 
   1.587 +  *  the input at the location following the end of he previous match, 
   1.588 +  *  or at the start of the string (or region) if there is no 
   1.589 +  *  previous match.  If a match is found, <code>uregex_start(), uregex_end()</code>, and
   1.590 +  *  <code>uregex_group()</code> will provide more information regarding the match.
   1.591 +  *
   1.592 +  *  @param   regexp      The compiled regular expression.
   1.593 +  *  @param   status      A reference to a UErrorCode to receive any errors.
   1.594 +  *  @return              TRUE if a match is found.
   1.595 +  *  @see uregex_reset
   1.596 +  *  @stable ICU 3.0
   1.597 +  */
   1.598 +U_STABLE UBool U_EXPORT2 
   1.599 +uregex_findNext(URegularExpression *regexp,
   1.600 +                UErrorCode         *status);
   1.601 +
   1.602 +/**
   1.603 +  *   Get the number of capturing groups in this regular expression's pattern.
   1.604 +  *   @param   regexp      The compiled regular expression.
   1.605 +  *   @param   status      A reference to a UErrorCode to receive any errors.
   1.606 +  *   @return the number of capture groups
   1.607 +  *   @stable ICU 3.0
   1.608 +  */
   1.609 +U_STABLE int32_t U_EXPORT2 
   1.610 +uregex_groupCount(URegularExpression *regexp,
   1.611 +                  UErrorCode         *status);
   1.612 +
   1.613 +/** Extract the string for the specified matching expression or subexpression.
   1.614 +  * Group #0 is the complete string of matched text.
   1.615 +  * Group #1 is the text matched by the first set of capturing parentheses.
   1.616 +  *
   1.617 +  *   @param   regexp       The compiled regular expression.
   1.618 +  *   @param   groupNum     The capture group to extract.  Group 0 is the complete
   1.619 +  *                         match.  The value of this parameter must be
   1.620 +  *                         less than or equal to the number of capture groups in
   1.621 +  *                         the pattern.
   1.622 +  *   @param   dest         Buffer to receive the matching string data
   1.623 +  *   @param   destCapacity Capacity of the dest buffer.
   1.624 +  *   @param   status       A reference to a UErrorCode to receive any errors.
   1.625 +  *   @return               Length of matching data,
   1.626 +  *                         or -1 if no applicable match.
   1.627 +  *   @stable ICU 3.0
   1.628 +  */
   1.629 +U_STABLE int32_t U_EXPORT2 
   1.630 +uregex_group(URegularExpression *regexp,
   1.631 +             int32_t             groupNum,
   1.632 +             UChar              *dest,
   1.633 +             int32_t             destCapacity,
   1.634 +             UErrorCode          *status);
   1.635 +
   1.636 +/** Returns a shallow immutable clone of the entire input string.  The returned UText current native index
   1.637 +  *   is set to the beginning of the requested capture group.  The capture group length is also
   1.638 +  *   returned via groupLength.
   1.639 +  * Group #0 is the complete string of matched text.
   1.640 +  * Group #1 is the text matched by the first set of capturing parentheses.
   1.641 +  *
   1.642 +  *   @param   regexp       The compiled regular expression.
   1.643 +  *   @param   groupNum     The capture group to extract.  Group 0 is the complete
   1.644 +  *                         match.  The value of this parameter must be
   1.645 +  *                         less than or equal to the number of capture groups in
   1.646 +  *                         the pattern.
   1.647 +  *   @param   dest         A mutable UText in which to store the current input.
   1.648 +  *                         If NULL, a new UText will be created as an immutable shallow clone
   1.649 +  *                         of the entire input string.
   1.650 +  *   @param   groupLength  The group length of the desired capture group.
   1.651 +  *   @param   status       A reference to a UErrorCode to receive any errors.
   1.652 +  *   @return               The subject text currently associated with this regular expression.
   1.653 +  *                         If a pre-allocated UText was provided, it will always be used and returned.
   1.654 +
   1.655 +  *
   1.656 +  *   @stable ICU 4.6
   1.657 +  */
   1.658 +U_STABLE UText * U_EXPORT2 
   1.659 +uregex_groupUText(URegularExpression *regexp,
   1.660 +                  int32_t             groupNum,
   1.661 +                  UText              *dest,
   1.662 +                  int64_t            *groupLength,
   1.663 +                  UErrorCode         *status);
   1.664 +
   1.665 +#ifndef U_HIDE_INTERNAL_API
   1.666 +/** Extract the string for the specified matching expression or subexpression.
   1.667 +  * Group #0 is the complete string of matched text.
   1.668 +  * Group #1 is the text matched by the first set of capturing parentheses.
   1.669 +  *
   1.670 +  *   @param   regexp       The compiled regular expression.
   1.671 +  *   @param   groupNum     The capture group to extract.  Group 0 is the complete
   1.672 +  *                         match.  The value of this parameter must be
   1.673 +  *                         less than or equal to the number of capture groups in
   1.674 +  *                         the pattern.
   1.675 +  *   @param   dest         Mutable UText to receive the matching string data.
   1.676 +  *                         If NULL, a new UText will be created (which may not be mutable).
   1.677 +  *   @param   status       A reference to a UErrorCode to receive any errors.
   1.678 +  *   @return               The matching string data. If a pre-allocated UText was provided,
   1.679 +  *                          it will always be used and returned.
   1.680 +  *
   1.681 +  *   @internal ICU 4.4 technology preview
   1.682 +  */
   1.683 +U_INTERNAL UText * U_EXPORT2 
   1.684 +uregex_groupUTextDeep(URegularExpression *regexp,
   1.685 +                  int32_t             groupNum,
   1.686 +                  UText              *dest,
   1.687 +                  UErrorCode         *status);
   1.688 +#endif  /* U_HIDE_INTERNAL_API */
   1.689 +
   1.690 +/**
   1.691 +  *   Returns the index in the input string of the start of the text matched by the
   1.692 +  *   specified capture group during the previous match operation.  Return -1 if
   1.693 +  *   the capture group was not part of the last match.
   1.694 +  *   Group #0 refers to the complete range of matched text.
   1.695 +  *   Group #1 refers to the text matched by the first set of capturing parentheses.
   1.696 +  *
   1.697 +  *    @param   regexp      The compiled regular expression.
   1.698 +  *    @param   groupNum    The capture group number
   1.699 +  *    @param   status      A reference to a UErrorCode to receive any errors.
   1.700 +  *    @return              the starting (native) position in the input of the text matched 
   1.701 +  *                         by the specified group.
   1.702 +  *    @stable ICU 3.0
   1.703 +  */
   1.704 +U_STABLE int32_t U_EXPORT2 
   1.705 +uregex_start(URegularExpression *regexp,
   1.706 +             int32_t             groupNum,
   1.707 +             UErrorCode          *status);
   1.708 +
   1.709 +/**
   1.710 +  *   64bit version of uregex_start.
   1.711 +  *   Returns the index in the input string of the start of the text matched by the
   1.712 +  *   specified capture group during the previous match operation.  Return -1 if
   1.713 +  *   the capture group was not part of the last match.
   1.714 +  *   Group #0 refers to the complete range of matched text.
   1.715 +  *   Group #1 refers to the text matched by the first set of capturing parentheses.
   1.716 +  *
   1.717 +  *    @param   regexp      The compiled regular expression.
   1.718 +  *    @param   groupNum    The capture group number
   1.719 +  *    @param   status      A reference to a UErrorCode to receive any errors.
   1.720 +  *    @return              the starting (native) position in the input of the text matched 
   1.721 +  *                         by the specified group.
   1.722 +  *   @stable ICU 4.6
   1.723 +  */
   1.724 +U_STABLE int64_t U_EXPORT2 
   1.725 +uregex_start64(URegularExpression *regexp,
   1.726 +               int32_t             groupNum,
   1.727 +               UErrorCode          *status);
   1.728 +
   1.729 +/**
   1.730 +  *   Returns the index in the input string of the position following the end
   1.731 +  *   of the text matched by the specified capture group.
   1.732 +  *   Return -1 if the capture group was not part of the last match.
   1.733 +  *   Group #0 refers to the complete range of matched text.
   1.734 +  *   Group #1 refers to the text matched by the first set of capturing parentheses.
   1.735 +  *
   1.736 +  *    @param   regexp      The compiled regular expression.
   1.737 +  *    @param   groupNum    The capture group number
   1.738 +  *    @param   status      A reference to a UErrorCode to receive any errors.
   1.739 +  *    @return              the (native) index of the position following the last matched character.
   1.740 +  *    @stable ICU 3.0
   1.741 +  */
   1.742 +U_STABLE int32_t U_EXPORT2 
   1.743 +uregex_end(URegularExpression   *regexp,
   1.744 +           int32_t               groupNum,
   1.745 +           UErrorCode           *status);
   1.746 +
   1.747 +/**
   1.748 +  *   64bit version of uregex_end.
   1.749 +  *   Returns the index in the input string of the position following the end
   1.750 +  *   of the text matched by the specified capture group.
   1.751 +  *   Return -1 if the capture group was not part of the last match.
   1.752 +  *   Group #0 refers to the complete range of matched text.
   1.753 +  *   Group #1 refers to the text matched by the first set of capturing parentheses.
   1.754 +  *
   1.755 +  *    @param   regexp      The compiled regular expression.
   1.756 +  *    @param   groupNum    The capture group number
   1.757 +  *    @param   status      A reference to a UErrorCode to receive any errors.
   1.758 +  *    @return              the (native) index of the position following the last matched character.
   1.759 +  *   @stable ICU 4.6
   1.760 +  */
   1.761 +U_STABLE int64_t U_EXPORT2 
   1.762 +uregex_end64(URegularExpression *regexp,
   1.763 +             int32_t               groupNum,
   1.764 +             UErrorCode           *status);
   1.765 +
   1.766 +/**
   1.767 +  *  Reset any saved state from the previous match.  Has the effect of
   1.768 +  *  causing uregex_findNext to begin at the specified index, and causing
   1.769 +  *  uregex_start(), uregex_end() and uregex_group() to return an error 
   1.770 +  *  indicating that there is no match information available.  Clears any
   1.771 +  *  match region that may have been set.
   1.772 +  *
   1.773 +  *    @param   regexp      The compiled regular expression.
   1.774 +  *    @param   index       The position (native) in the text at which a
   1.775 +  *                         uregex_findNext() should begin searching.
   1.776 +  *    @param   status      A reference to a UErrorCode to receive any errors.
   1.777 +  *    @stable ICU 3.0
   1.778 +  */
   1.779 +U_STABLE void U_EXPORT2 
   1.780 +uregex_reset(URegularExpression    *regexp,
   1.781 +             int32_t               index,
   1.782 +             UErrorCode            *status);
   1.783 +
   1.784 +/**
   1.785 +  *  64bit version of uregex_reset.
   1.786 +  *  Reset any saved state from the previous match.  Has the effect of
   1.787 +  *  causing uregex_findNext to begin at the specified index, and causing
   1.788 +  *  uregex_start(), uregex_end() and uregex_group() to return an error 
   1.789 +  *  indicating that there is no match information available.  Clears any
   1.790 +  *  match region that may have been set.
   1.791 +  *
   1.792 +  *    @param   regexp      The compiled regular expression.
   1.793 +  *    @param   index       The position (native) in the text at which a
   1.794 +  *                         uregex_findNext() should begin searching.
   1.795 +  *    @param   status      A reference to a UErrorCode to receive any errors.
   1.796 +  *    @stable ICU 4.6
   1.797 +  */
   1.798 +U_STABLE void U_EXPORT2 
   1.799 +uregex_reset64(URegularExpression  *regexp,
   1.800 +               int64_t               index,
   1.801 +               UErrorCode            *status);
   1.802 +
   1.803 +/**
   1.804 +  * Sets the limits of the matching region for this URegularExpression.
   1.805 +  * The region is the part of the input string that will be considered when matching.
   1.806 +  * Invoking this method resets any saved state from the previous match, 
   1.807 +  * then sets the region to start at the index specified by the start parameter
   1.808 +  * and end at the index specified by the end parameter.
   1.809 +  *
   1.810 +  * Depending on the transparency and anchoring being used (see useTransparentBounds
   1.811 +  * and useAnchoringBounds), certain constructs such as anchors may behave differently
   1.812 +  * at or around the boundaries of the region
   1.813 +  *
   1.814 +  * The function will fail if start is greater than limit, or if either index
   1.815 +  *  is less than zero or greater than the length of the string being matched.
   1.816 +  *
   1.817 +  * @param regexp The compiled regular expression.
   1.818 +  * @param regionStart  The (native) index to begin searches at.
   1.819 +  * @param regionLimit  The (native) index to end searches at (exclusive).
   1.820 +  * @param status A pointer to a UErrorCode to receive any errors.
   1.821 +  * @stable ICU 4.0
   1.822 +  */
   1.823 +U_STABLE void U_EXPORT2
   1.824 +uregex_setRegion(URegularExpression   *regexp,
   1.825 +                 int32_t               regionStart,
   1.826 +                 int32_t               regionLimit,
   1.827 +                 UErrorCode           *status);
   1.828 +
   1.829 +/**
   1.830 +  * 64bit version of uregex_setRegion.
   1.831 +  * Sets the limits of the matching region for this URegularExpression.
   1.832 +  * The region is the part of the input string that will be considered when matching.
   1.833 +  * Invoking this method resets any saved state from the previous match, 
   1.834 +  * then sets the region to start at the index specified by the start parameter
   1.835 +  * and end at the index specified by the end parameter.
   1.836 +  *
   1.837 +  * Depending on the transparency and anchoring being used (see useTransparentBounds
   1.838 +  * and useAnchoringBounds), certain constructs such as anchors may behave differently
   1.839 +  * at or around the boundaries of the region
   1.840 +  *
   1.841 +  * The function will fail if start is greater than limit, or if either index
   1.842 +  *  is less than zero or greater than the length of the string being matched.
   1.843 +  *
   1.844 +  * @param regexp The compiled regular expression.
   1.845 +  * @param regionStart  The (native) index to begin searches at.
   1.846 +  * @param regionLimit  The (native) index to end searches at (exclusive).
   1.847 +  * @param status A pointer to a UErrorCode to receive any errors.
   1.848 +  * @stable ICU 4.6
   1.849 +  */
   1.850 +U_STABLE void U_EXPORT2 
   1.851 +uregex_setRegion64(URegularExpression *regexp,
   1.852 +                 int64_t               regionStart,
   1.853 +                 int64_t               regionLimit,
   1.854 +                 UErrorCode           *status);
   1.855 +
   1.856 +/**
   1.857 +  *  Set the matching region and the starting index for subsequent matches
   1.858 +  *  in a single operation.
   1.859 +  *  This is useful because the usual function for setting the starting
   1.860 +  *  index, urgex_reset(), also resets any region limits.
   1.861 +  *
   1.862 +  * @param regexp The compiled regular expression.
   1.863 +  * @param regionStart  The (native) index to begin searches at.
   1.864 +  * @param regionLimit  The (native) index to end searches at (exclusive).
   1.865 +  * @param startIndex   The index in the input text at which the next 
   1.866 +  *                     match operation should begin.
   1.867 +  * @param status A pointer to a UErrorCode to receive any errors.
   1.868 +  * @stable ICU 4.6
   1.869 +  */
   1.870 +U_STABLE void U_EXPORT2 
   1.871 +uregex_setRegionAndStart(URegularExpression *regexp,
   1.872 +                 int64_t               regionStart,
   1.873 +                 int64_t               regionLimit,
   1.874 +                 int64_t               startIndex,
   1.875 +                 UErrorCode           *status);
   1.876 +
   1.877 +/**
   1.878 +  * Reports the start index of the matching region. Any matches found are limited to
   1.879 +  * to the region bounded by regionStart (inclusive) and regionEnd (exclusive).
   1.880 +  *
   1.881 +  * @param regexp The compiled regular expression.
   1.882 +  * @param status A pointer to a UErrorCode to receive any errors.
   1.883 +  * @return The starting (native) index of this matcher's region.
   1.884 +  * @stable ICU 4.0
   1.885 +  */
   1.886 +U_STABLE int32_t U_EXPORT2
   1.887 +uregex_regionStart(const  URegularExpression   *regexp,
   1.888 +                          UErrorCode           *status);
   1.889 +
   1.890 +/**
   1.891 +  * 64bit version of uregex_regionStart.
   1.892 +  * Reports the start index of the matching region. Any matches found are limited to
   1.893 +  * to the region bounded by regionStart (inclusive) and regionEnd (exclusive).
   1.894 +  *
   1.895 +  * @param regexp The compiled regular expression.
   1.896 +  * @param status A pointer to a UErrorCode to receive any errors.
   1.897 +  * @return The starting (native) index of this matcher's region.
   1.898 +  * @stable ICU 4.6
   1.899 +  */
   1.900 +U_STABLE int64_t U_EXPORT2 
   1.901 +uregex_regionStart64(const  URegularExpression   *regexp,
   1.902 +                            UErrorCode           *status);
   1.903 +
   1.904 +/**
   1.905 +  * Reports the end index (exclusive) of the matching region for this URegularExpression.
   1.906 +  * Any matches found are limited to to the region bounded by regionStart (inclusive)
   1.907 +  * and regionEnd (exclusive).
   1.908 +  *
   1.909 +  * @param regexp The compiled regular expression.
   1.910 +  * @param status A pointer to a UErrorCode to receive any errors.
   1.911 +  * @return The ending point (native) of this matcher's region.
   1.912 +  * @stable ICU 4.0
   1.913 +  */
   1.914 +U_STABLE int32_t U_EXPORT2
   1.915 +uregex_regionEnd(const  URegularExpression   *regexp,
   1.916 +                        UErrorCode           *status);
   1.917 +
   1.918 +/**
   1.919 +  * 64bit version of uregex_regionEnd.
   1.920 +  * Reports the end index (exclusive) of the matching region for this URegularExpression.
   1.921 +  * Any matches found are limited to to the region bounded by regionStart (inclusive)
   1.922 +  * and regionEnd (exclusive).
   1.923 +  *
   1.924 +  * @param regexp The compiled regular expression.
   1.925 +  * @param status A pointer to a UErrorCode to receive any errors.
   1.926 +  * @return The ending point (native) of this matcher's region.
   1.927 +  * @stable ICU 4.6
   1.928 +  */
   1.929 +U_STABLE int64_t U_EXPORT2 
   1.930 +uregex_regionEnd64(const  URegularExpression   *regexp,
   1.931 +                          UErrorCode           *status);
   1.932 +
   1.933 +/**
   1.934 +  * Queries the transparency of region bounds for this URegularExpression.
   1.935 +  * See useTransparentBounds for a description of transparent and opaque bounds.
   1.936 +  * By default, matching boundaries are opaque.
   1.937 +  *
   1.938 +  * @param regexp The compiled regular expression.
   1.939 +  * @param status A pointer to a UErrorCode to receive any errors.
   1.940 +  * @return TRUE if this matcher is using opaque bounds, false if it is not.
   1.941 +  * @stable ICU 4.0
   1.942 +  */
   1.943 +U_STABLE UBool U_EXPORT2
   1.944 +uregex_hasTransparentBounds(const  URegularExpression   *regexp,
   1.945 +                                   UErrorCode           *status);
   1.946 +
   1.947 +
   1.948 +/**
   1.949 +  * Sets the transparency of region bounds for this URegularExpression.
   1.950 +  * Invoking this function with an argument of TRUE will set matches to use transparent bounds.
   1.951 +  * If the boolean argument is FALSE, then opaque bounds will be used.
   1.952 +  *
   1.953 +  * Using transparent bounds, the boundaries of the matching region are transparent
   1.954 +  * to lookahead, lookbehind, and boundary matching constructs. Those constructs can
   1.955 +  * see text beyond the boundaries of the region while checking for a match.
   1.956 +  *
   1.957 +  * With opaque bounds, no text outside of the matching region is visible to lookahead,
   1.958 +  * lookbehind, and boundary matching constructs.
   1.959 +  *
   1.960 +  * By default, opaque bounds are used.
   1.961 +  *
   1.962 +  * @param   regexp The compiled regular expression.
   1.963 +  * @param   b      TRUE for transparent bounds; FALSE for opaque bounds
   1.964 +  * @param   status A pointer to a UErrorCode to receive any errors.
   1.965 +  * @stable ICU 4.0
   1.966 +  **/
   1.967 +U_STABLE void U_EXPORT2  
   1.968 +uregex_useTransparentBounds(URegularExpression   *regexp, 
   1.969 +                            UBool                b,
   1.970 +                            UErrorCode           *status);
   1.971 +
   1.972 +
   1.973 +/**
   1.974 +  * Return true if this URegularExpression is using anchoring bounds.
   1.975 +  * By default, anchoring region bounds are used.
   1.976 +  *
   1.977 +  * @param  regexp The compiled regular expression.
   1.978 +  * @param  status A pointer to a UErrorCode to receive any errors.
   1.979 +  * @return TRUE if this matcher is using anchoring bounds.
   1.980 +  * @stable ICU 4.0
   1.981 +  */
   1.982 +U_STABLE UBool U_EXPORT2
   1.983 +uregex_hasAnchoringBounds(const  URegularExpression   *regexp,
   1.984 +                                 UErrorCode           *status);
   1.985 +
   1.986 +
   1.987 +/**
   1.988 +  * Set whether this URegularExpression is using Anchoring Bounds for its region.
   1.989 +  * With anchoring bounds, pattern anchors such as ^ and $ will match at the start
   1.990 +  * and end of the region.  Without Anchoring Bounds, anchors will only match at
   1.991 +  * the positions they would in the complete text.
   1.992 +  *
   1.993 +  * Anchoring Bounds are the default for regions.
   1.994 +  *
   1.995 +  * @param regexp The compiled regular expression.
   1.996 +  * @param b      TRUE if to enable anchoring bounds; FALSE to disable them.
   1.997 +  * @param status A pointer to a UErrorCode to receive any errors.
   1.998 +  * @stable ICU 4.0
   1.999 +  */
  1.1000 +U_STABLE void U_EXPORT2
  1.1001 +uregex_useAnchoringBounds(URegularExpression   *regexp,
  1.1002 +                          UBool                 b,
  1.1003 +                          UErrorCode           *status);
  1.1004 +
  1.1005 +/**
  1.1006 +  * Return TRUE if the most recent matching operation touched the
  1.1007 +  *  end of the text being processed.  In this case, additional input text could
  1.1008 +  *  change the results of that match.
  1.1009 +  *
  1.1010 +  *  @param regexp The compiled regular expression.
  1.1011 +  *  @param status A pointer to a UErrorCode to receive any errors.
  1.1012 +  *  @return  TRUE if the most recent match hit the end of input
  1.1013 +  *  @stable ICU 4.0
  1.1014 +  */
  1.1015 +U_STABLE UBool U_EXPORT2
  1.1016 +uregex_hitEnd(const  URegularExpression   *regexp,
  1.1017 +                     UErrorCode           *status);
  1.1018 +
  1.1019 +/**
  1.1020 +  * Return TRUE the most recent match succeeded and additional input could cause
  1.1021 +  * it to fail. If this function returns false and a match was found, then more input
  1.1022 +  * might change the match but the match won't be lost. If a match was not found,
  1.1023 +  * then requireEnd has no meaning.
  1.1024 +  *
  1.1025 +  * @param regexp The compiled regular expression.
  1.1026 +  * @param status A pointer to a UErrorCode to receive any errors.
  1.1027 +  * @return TRUE  if more input could cause the most recent match to no longer match.
  1.1028 +  * @stable ICU 4.0
  1.1029 +  */
  1.1030 +U_STABLE UBool U_EXPORT2   
  1.1031 +uregex_requireEnd(const  URegularExpression   *regexp,
  1.1032 +                         UErrorCode           *status);
  1.1033 +
  1.1034 +
  1.1035 +
  1.1036 +
  1.1037 +
  1.1038 +/**
  1.1039 +  *    Replaces every substring of the input that matches the pattern
  1.1040 +  *    with the given replacement string.  This is a convenience function that
  1.1041 +  *    provides a complete find-and-replace-all operation.
  1.1042 +  *
  1.1043 +  *    This method scans the input string looking for matches of the pattern. 
  1.1044 +  *    Input that is not part of any match is copied unchanged to the
  1.1045 +  *    destination buffer.  Matched regions are replaced in the output
  1.1046 +  *    buffer by the replacement string.   The replacement string may contain
  1.1047 +  *    references to capture groups; these take the form of $1, $2, etc.
  1.1048 +  *
  1.1049 +  *    @param   regexp             The compiled regular expression.
  1.1050 +  *    @param   replacementText    A string containing the replacement text.
  1.1051 +  *    @param   replacementLength  The length of the replacement string, or
  1.1052 +  *                                -1 if it is NUL terminated.
  1.1053 +  *    @param   destBuf            A (UChar *) buffer that will receive the result.
  1.1054 +  *    @param   destCapacity       The capacity of the destination buffer.
  1.1055 +  *    @param   status             A reference to a UErrorCode to receive any errors.
  1.1056 +  *    @return                     The length of the string resulting from the find
  1.1057 +  *                                and replace operation.  In the event that the
  1.1058 +  *                                destination capacity is inadequate, the return value
  1.1059 +  *                                is still the full length of the untruncated string.
  1.1060 +  *    @stable ICU 3.0
  1.1061 +  */
  1.1062 +U_STABLE int32_t U_EXPORT2 
  1.1063 +uregex_replaceAll(URegularExpression    *regexp,
  1.1064 +                  const UChar           *replacementText,
  1.1065 +                  int32_t                replacementLength,
  1.1066 +                  UChar                 *destBuf,
  1.1067 +                  int32_t                destCapacity,
  1.1068 +                  UErrorCode            *status);
  1.1069 +
  1.1070 +/**
  1.1071 +  *    Replaces every substring of the input that matches the pattern
  1.1072 +  *    with the given replacement string.  This is a convenience function that
  1.1073 +  *    provides a complete find-and-replace-all operation.
  1.1074 +  *
  1.1075 +  *    This method scans the input string looking for matches of the pattern. 
  1.1076 +  *    Input that is not part of any match is copied unchanged to the
  1.1077 +  *    destination buffer.  Matched regions are replaced in the output
  1.1078 +  *    buffer by the replacement string.   The replacement string may contain
  1.1079 +  *    references to capture groups; these take the form of $1, $2, etc.
  1.1080 +  *
  1.1081 +  *    @param   regexp         The compiled regular expression.
  1.1082 +  *    @param   replacement    A string containing the replacement text.
  1.1083 +  *    @param   dest           A mutable UText that will receive the result.
  1.1084 +  *                             If NULL, a new UText will be created (which may not be mutable).
  1.1085 +  *    @param   status         A reference to a UErrorCode to receive any errors.
  1.1086 +  *    @return                 A UText containing the results of the find and replace.
  1.1087 +  *                             If a pre-allocated UText was provided, it will always be used and returned.
  1.1088 +  *
  1.1089 +  *    @stable ICU 4.6
  1.1090 +  */
  1.1091 +U_STABLE UText * U_EXPORT2 
  1.1092 +uregex_replaceAllUText(URegularExpression *regexp,
  1.1093 +                       UText              *replacement,
  1.1094 +                       UText              *dest,
  1.1095 +                       UErrorCode         *status);
  1.1096 +
  1.1097 +/**
  1.1098 +  *    Replaces the first substring of the input that matches the pattern
  1.1099 +  *    with the given replacement string.  This is a convenience function that
  1.1100 +  *    provides a complete find-and-replace operation.
  1.1101 +  *
  1.1102 +  *    This method scans the input string looking for a match of the pattern. 
  1.1103 +  *    All input that is not part of the match is copied unchanged to the
  1.1104 +  *    destination buffer.  The matched region is replaced in the output
  1.1105 +  *    buffer by the replacement string.   The replacement string may contain
  1.1106 +  *    references to capture groups; these take the form of $1, $2, etc.
  1.1107 +  *
  1.1108 +  *    @param   regexp             The compiled regular expression.
  1.1109 +  *    @param   replacementText    A string containing the replacement text.
  1.1110 +  *    @param   replacementLength  The length of the replacement string, or
  1.1111 +  *                                -1 if it is NUL terminated.
  1.1112 +  *    @param   destBuf            A (UChar *) buffer that will receive the result.
  1.1113 +  *    @param   destCapacity       The capacity of the destination buffer.
  1.1114 +  *    @param   status             a reference to a UErrorCode to receive any errors.
  1.1115 +  *    @return                     The length of the string resulting from the find
  1.1116 +  *                                and replace operation.  In the event that the
  1.1117 +  *                                destination capacity is inadequate, the return value
  1.1118 +  *                                is still the full length of the untruncated string.
  1.1119 +  *    @stable ICU 3.0
  1.1120 +  */
  1.1121 +U_STABLE int32_t U_EXPORT2 
  1.1122 +uregex_replaceFirst(URegularExpression  *regexp,
  1.1123 +                    const UChar         *replacementText,
  1.1124 +                    int32_t              replacementLength,
  1.1125 +                    UChar               *destBuf,
  1.1126 +                    int32_t              destCapacity,
  1.1127 +                    UErrorCode          *status);
  1.1128 +
  1.1129 +/**
  1.1130 +  *    Replaces the first substring of the input that matches the pattern
  1.1131 +  *    with the given replacement string.  This is a convenience function that
  1.1132 +  *    provides a complete find-and-replace operation.
  1.1133 +  *
  1.1134 +  *    This method scans the input string looking for a match of the pattern. 
  1.1135 +  *    All input that is not part of the match is copied unchanged to the
  1.1136 +  *    destination buffer.  The matched region is replaced in the output
  1.1137 +  *    buffer by the replacement string.   The replacement string may contain
  1.1138 +  *    references to capture groups; these take the form of $1, $2, etc.
  1.1139 +  *
  1.1140 +  *    @param   regexp         The compiled regular expression.
  1.1141 +  *    @param   replacement    A string containing the replacement text.
  1.1142 +  *    @param   dest           A mutable UText that will receive the result.
  1.1143 +  *                             If NULL, a new UText will be created (which may not be mutable).
  1.1144 +  *    @param   status         A reference to a UErrorCode to receive any errors.
  1.1145 +  *    @return                 A UText containing the results of the find and replace.
  1.1146 +  *                             If a pre-allocated UText was provided, it will always be used and returned.
  1.1147 +  *
  1.1148 +  *    @stable ICU 4.6
  1.1149 +  */
  1.1150 +U_STABLE UText * U_EXPORT2 
  1.1151 +uregex_replaceFirstUText(URegularExpression *regexp,
  1.1152 +                         UText              *replacement,
  1.1153 +                         UText              *dest,
  1.1154 +                         UErrorCode         *status);
  1.1155 +
  1.1156 +/**
  1.1157 +  *   Implements a replace operation intended to be used as part of an
  1.1158 +  *   incremental find-and-replace.
  1.1159 +  *
  1.1160 +  *   <p>The input string, starting from the end of the previous match and ending at
  1.1161 +  *   the start of the current match, is appended to the destination string.  Then the
  1.1162 +  *   replacement string is appended to the output string,
  1.1163 +  *   including handling any substitutions of captured text.</p>
  1.1164 +  *
  1.1165 +  *   <p>A note on preflight computation of buffersize and error handling:
  1.1166 +  *   Calls to uregex_appendReplacement() and uregex_appendTail() are
  1.1167 +  *   designed to be chained, one after another, with the destination
  1.1168 +  *   buffer pointer and buffer capacity updated after each in preparation
  1.1169 +  *   to for the next.  If the destination buffer is exhausted partway through such a
  1.1170 +  *   sequence, a U_BUFFER_OVERFLOW_ERROR status will be returned.  Normal
  1.1171 +  *   ICU conventions are for a function to perform no action if it is
  1.1172 +  *   called with an error status, but for this one case, uregex_appendRepacement()
  1.1173 +  *   will operate normally so that buffer size computations will complete
  1.1174 +  *   correctly.
  1.1175 +  *
  1.1176 +  *   <p>For simple, prepackaged, non-incremental find-and-replace
  1.1177 +  *      operations, see replaceFirst() or replaceAll().</p>
  1.1178 +  *
  1.1179 +  *   @param   regexp      The regular expression object.  
  1.1180 +  *   @param   replacementText The string that will replace the matched portion of the
  1.1181 +  *                        input string as it is copied to the destination buffer.
  1.1182 +  *                        The replacement text may contain references ($1, for
  1.1183 +  *                        example) to capture groups from the match.
  1.1184 +  *   @param   replacementLength  The length of the replacement text string,
  1.1185 +  *                        or -1 if the string is NUL terminated.
  1.1186 +  *   @param   destBuf     The buffer into which the results of the
  1.1187 +  *                        find-and-replace are placed.  On return, this pointer
  1.1188 +  *                        will be updated to refer to the beginning of the
  1.1189 +  *                        unused portion of buffer, leaving it in position for
  1.1190 +  *                        a subsequent call to this function.
  1.1191 +  *   @param   destCapacity The size of the output buffer,  On return, this
  1.1192 +  *                        parameter will be updated to reflect the space remaining
  1.1193 +  *                        unused in the output buffer.
  1.1194 +  *   @param   status      A reference to a UErrorCode to receive any errors. 
  1.1195 +  *   @return              The length of the result string.  In the event that
  1.1196 +  *                        destCapacity is inadequate, the full length of the
  1.1197 +  *                        untruncated output string is returned.
  1.1198 +  *
  1.1199 +  *   @stable ICU 3.0
  1.1200 +  *
  1.1201 +  */
  1.1202 +U_STABLE int32_t U_EXPORT2 
  1.1203 +uregex_appendReplacement(URegularExpression    *regexp,
  1.1204 +                         const UChar           *replacementText,
  1.1205 +                         int32_t                replacementLength,
  1.1206 +                         UChar                **destBuf,
  1.1207 +                         int32_t               *destCapacity,
  1.1208 +                         UErrorCode            *status);
  1.1209 +
  1.1210 +/**
  1.1211 +  *   Implements a replace operation intended to be used as part of an
  1.1212 +  *   incremental find-and-replace.
  1.1213 +  *
  1.1214 +  *   <p>The input string, starting from the end of the previous match and ending at
  1.1215 +  *   the start of the current match, is appended to the destination string.  Then the
  1.1216 +  *   replacement string is appended to the output string,
  1.1217 +  *   including handling any substitutions of captured text.</p>
  1.1218 +  *
  1.1219 +  *   <p>For simple, prepackaged, non-incremental find-and-replace
  1.1220 +  *      operations, see replaceFirst() or replaceAll().</p>
  1.1221 +  *
  1.1222 +  *   @param   regexp      The regular expression object.  
  1.1223 +  *   @param   replacementText The string that will replace the matched portion of the
  1.1224 +  *                        input string as it is copied to the destination buffer.
  1.1225 +  *                        The replacement text may contain references ($1, for
  1.1226 +  *                        example) to capture groups from the match.
  1.1227 +  *   @param   dest        A mutable UText that will receive the result. Must not be NULL.
  1.1228 +  *   @param   status      A reference to a UErrorCode to receive any errors. 
  1.1229 +  *
  1.1230 +  *   @stable ICU 4.6
  1.1231 +  */
  1.1232 +U_STABLE void U_EXPORT2 
  1.1233 +uregex_appendReplacementUText(URegularExpression    *regexp,
  1.1234 +                              UText                 *replacementText,
  1.1235 +                              UText                 *dest,
  1.1236 +                              UErrorCode            *status);
  1.1237 +
  1.1238 +/**
  1.1239 +  * As the final step in a find-and-replace operation, append the remainder
  1.1240 +  * of the input string, starting at the position following the last match,
  1.1241 +  * to the destination string. <code>uregex_appendTail()</code> is intended 
  1.1242 +  *  to be invoked after one or more invocations of the
  1.1243 +  *  <code>uregex_appendReplacement()</code> function.
  1.1244 +  *
  1.1245 +  *   @param   regexp      The regular expression object.  This is needed to 
  1.1246 +  *                        obtain the input string and with the position
  1.1247 +  *                        of the last match within it.
  1.1248 +  *   @param   destBuf     The buffer in which the results of the
  1.1249 +  *                        find-and-replace are placed.  On return, the pointer
  1.1250 +  *                        will be updated to refer to the beginning of the
  1.1251 +  *                        unused portion of buffer.
  1.1252 +  *   @param   destCapacity The size of the output buffer,  On return, this
  1.1253 +  *                        value will be updated to reflect the space remaining
  1.1254 +  *                        unused in the output buffer.
  1.1255 +  *   @param   status      A reference to a UErrorCode to receive any errors. 
  1.1256 +  *   @return              The length of the result string.  In the event that
  1.1257 +  *                        destCapacity is inadequate, the full length of the
  1.1258 +  *                        untruncated output string is returned.
  1.1259 +  *
  1.1260 +  *   @stable ICU 3.0
  1.1261 +  */
  1.1262 +U_STABLE int32_t U_EXPORT2 
  1.1263 +uregex_appendTail(URegularExpression    *regexp,
  1.1264 +                  UChar                **destBuf,
  1.1265 +                  int32_t               *destCapacity,
  1.1266 +                  UErrorCode            *status);
  1.1267 +
  1.1268 +/**
  1.1269 +  * As the final step in a find-and-replace operation, append the remainder
  1.1270 +  * of the input string, starting at the position following the last match,
  1.1271 +  * to the destination string. <code>uregex_appendTailUText()</code> is intended 
  1.1272 +  *  to be invoked after one or more invocations of the
  1.1273 +  *  <code>uregex_appendReplacementUText()</code> function.
  1.1274 +  *
  1.1275 +  *   @param   regexp      The regular expression object.  This is needed to 
  1.1276 +  *                        obtain the input string and with the position
  1.1277 +  *                        of the last match within it.
  1.1278 +  *   @param   dest        A mutable UText that will receive the result. Must not be NULL.
  1.1279 +  *
  1.1280 +  *   @param status        Error code
  1.1281 +  *
  1.1282 +  *   @return              The destination UText.
  1.1283 +  *
  1.1284 +  *   @stable ICU 4.6
  1.1285 +  */
  1.1286 +U_STABLE UText * U_EXPORT2 
  1.1287 +uregex_appendTailUText(URegularExpression    *regexp,
  1.1288 +                       UText                 *dest,
  1.1289 +                       UErrorCode            *status);
  1.1290 +
  1.1291 + /**
  1.1292 +   * Split a string into fields.  Somewhat like split() from Perl.
  1.1293 +   *  The pattern matches identify delimiters that separate the input
  1.1294 +   *  into fields.  The input data between the matches becomes the
  1.1295 +   *  fields themselves.
  1.1296 +   *
  1.1297 +   *  Each of the fields is copied from the input string to the destination
  1.1298 +   *  buffer, and NUL terminated.  The position of each field within
  1.1299 +   *  the destination buffer is returned in the destFields array.
  1.1300 +   *
  1.1301 +   *  If the delimiter pattern includes capture groups, the captured text will
  1.1302 +   *  also appear in the destination array of output strings, interspersed
  1.1303 +   *  with the fields.  This is similar to Perl, but differs from Java, 
  1.1304 +   *  which ignores the presence of capture groups in the pattern.
  1.1305 +   * 
  1.1306 +   *  Trailing empty fields will always be returned, assuming sufficient
  1.1307 +   *  destination capacity.  This differs from the default behavior for Java
  1.1308 +   *  and Perl where trailing empty fields are not returned.
  1.1309 +   *
  1.1310 +   *  The number of strings produced by the split operation is returned.
  1.1311 +   *  This count includes the strings from capture groups in the delimiter pattern.
  1.1312 +   *  This behavior differs from Java, which ignores capture groups.
  1.1313 +   * 
  1.1314 +   *    @param   regexp      The compiled regular expression.
  1.1315 +   *    @param   destBuf     A (UChar *) buffer to receive the fields that
  1.1316 +   *                         are extracted from the input string. These
  1.1317 +   *                         field pointers will refer to positions within the
  1.1318 +   *                         destination buffer supplied by the caller.  Any
  1.1319 +   *                         extra positions within the destFields array will be
  1.1320 +   *                         set to NULL.
  1.1321 +   *    @param   destCapacity The capacity of the destBuf.
  1.1322 +   *    @param   requiredCapacity  The actual capacity required of the destBuf.
  1.1323 +   *                         If destCapacity is too small, requiredCapacity will return 
  1.1324 +   *                         the total capacity required to hold all of the output, and
  1.1325 +   *                         a U_BUFFER_OVERFLOW_ERROR will be returned.
  1.1326 +   *    @param   destFields  An array to be filled with the position of each
  1.1327 +   *                         of the extracted fields within destBuf.
  1.1328 +   *    @param   destFieldsCapacity  The number of elements in the destFields array.
  1.1329 +   *                If the number of fields found is less than destFieldsCapacity,
  1.1330 +   *                the extra destFields elements are set to zero.
  1.1331 +   *                If destFieldsCapacity is too small, the trailing part of the
  1.1332 +   *                input, including any field delimiters, is treated as if it
  1.1333 +   *                were the last field - it is copied to the destBuf, and
  1.1334 +   *                its position is in the destBuf is stored in the last element
  1.1335 +   *                of destFields.  This behavior mimics that of Perl.  It is not
  1.1336 +   *                an error condition, and no error status is returned when all destField
  1.1337 +   *                positions are used.
  1.1338 +   * @param status  A reference to a UErrorCode to receive any errors.
  1.1339 +   * @return        The number of fields into which the input string was split.
  1.1340 +   * @stable ICU 3.0
  1.1341 +   */
  1.1342 +U_STABLE int32_t U_EXPORT2 
  1.1343 +uregex_split(   URegularExpression      *regexp,
  1.1344 +                  UChar                 *destBuf,
  1.1345 +                  int32_t                destCapacity,
  1.1346 +                  int32_t               *requiredCapacity,
  1.1347 +                  UChar                 *destFields[],
  1.1348 +                  int32_t                destFieldsCapacity,
  1.1349 +                  UErrorCode            *status);
  1.1350 +
  1.1351 +  /**
  1.1352 +   * Split a string into fields.  Somewhat like split() from Perl.
  1.1353 +   * The pattern matches identify delimiters that separate the input
  1.1354 +   *  into fields.  The input data between the matches becomes the
  1.1355 +   *  fields themselves.
  1.1356 +   * <p>
  1.1357 +   * The behavior of this function is not very closely aligned with uregex_split();
  1.1358 +   * instead, it is based on (and implemented directly on top of) the C++ split method.
  1.1359 +   *
  1.1360 +   * @param regexp  The compiled regular expression.
  1.1361 +   * @param destFields    An array of mutable UText structs to receive the results of the split.
  1.1362 +   *                If a field is NULL, a new UText is allocated to contain the results for
  1.1363 +   *                that field. This new UText is not guaranteed to be mutable.
  1.1364 +   * @param destFieldsCapacity  The number of elements in the destination array.
  1.1365 +   *                If the number of fields found is less than destCapacity, the
  1.1366 +   *                extra strings in the destination array are not altered.
  1.1367 +   *                If the number of destination strings is less than the number
  1.1368 +   *                of fields, the trailing part of the input string, including any
  1.1369 +   *                field delimiters, is placed in the last destination string.
  1.1370 +   *                This behavior mimics that of Perl.  It is not  an error condition, and no
  1.1371 +   *                error status is returned when all destField positions are used.
  1.1372 +   * @param status  A reference to a UErrorCode to receive any errors.
  1.1373 +   * @return        The number of fields into which the input string was split.
  1.1374 +   *
  1.1375 +   * @stable ICU 4.6
  1.1376 +   */
  1.1377 +U_STABLE int32_t U_EXPORT2 
  1.1378 +uregex_splitUText(URegularExpression    *regexp,
  1.1379 +                  UText                 *destFields[],
  1.1380 +                  int32_t                destFieldsCapacity,
  1.1381 +                  UErrorCode            *status);
  1.1382 +
  1.1383 +/**
  1.1384 + * Set a processing time limit for match operations with this URegularExpression.
  1.1385 + *
  1.1386 + * Some patterns, when matching certain strings, can run in exponential time.
  1.1387 + * For practical purposes, the match operation may appear to be in an
  1.1388 + * infinite loop.
  1.1389 + * When a limit is set a match operation will fail with an error if the
  1.1390 + * limit is exceeded.
  1.1391 + * <p>
  1.1392 + * The units of the limit are steps of the match engine.
  1.1393 + * Correspondence with actual processor time will depend on the speed
  1.1394 + * of the processor and the details of the specific pattern, but will
  1.1395 + * typically be on the order of milliseconds.
  1.1396 + * <p>
  1.1397 + * By default, the matching time is not limited.
  1.1398 + * <p>
  1.1399 + *
  1.1400 + * @param   regexp      The compiled regular expression.
  1.1401 + * @param   limit       The limit value, or 0 for no limit.
  1.1402 + * @param   status      A reference to a UErrorCode to receive any errors.
  1.1403 + * @stable ICU 4.0
  1.1404 + */
  1.1405 +U_STABLE void U_EXPORT2
  1.1406 +uregex_setTimeLimit(URegularExpression      *regexp,
  1.1407 +                    int32_t                  limit,
  1.1408 +                    UErrorCode              *status);
  1.1409 +
  1.1410 +/**
  1.1411 + * Get the time limit for for matches with this URegularExpression.
  1.1412 + * A return value of zero indicates that there is no limit.
  1.1413 + *
  1.1414 + * @param   regexp      The compiled regular expression.
  1.1415 + * @param   status      A reference to a UErrorCode to receive any errors.
  1.1416 + * @return the maximum allowed time for a match, in units of processing steps.
  1.1417 + * @stable ICU 4.0
  1.1418 + */
  1.1419 +U_STABLE int32_t U_EXPORT2
  1.1420 +uregex_getTimeLimit(const URegularExpression      *regexp,
  1.1421 +                          UErrorCode              *status);
  1.1422 +
  1.1423 +/**
  1.1424 + * Set the amount of heap storage available for use by the match backtracking stack.
  1.1425 + * <p>
  1.1426 + * ICU uses a backtracking regular expression engine, with the backtrack stack
  1.1427 + * maintained on the heap.  This function sets the limit to the amount of memory
  1.1428 + * that can be used  for this purpose.  A backtracking stack overflow will
  1.1429 + * result in an error from the match operation that caused it.
  1.1430 + * <p>
  1.1431 + * A limit is desirable because a malicious or poorly designed pattern can use
  1.1432 + * excessive memory, potentially crashing the process.  A limit is enabled
  1.1433 + * by default.
  1.1434 + * <p>
  1.1435 + * @param   regexp      The compiled regular expression.
  1.1436 + * @param   limit       The maximum size, in bytes, of the matching backtrack stack.
  1.1437 + *                      A value of zero means no limit.
  1.1438 + *                      The limit must be greater than or equal to zero.
  1.1439 + * @param   status      A reference to a UErrorCode to receive any errors.
  1.1440 + *
  1.1441 + * @stable ICU 4.0
  1.1442 + */
  1.1443 +U_STABLE void U_EXPORT2
  1.1444 +uregex_setStackLimit(URegularExpression      *regexp,
  1.1445 +                     int32_t                  limit,
  1.1446 +                     UErrorCode              *status);
  1.1447 +
  1.1448 +/**
  1.1449 + * Get the size of the heap storage available for use by the back tracking stack.
  1.1450 + *
  1.1451 + * @return  the maximum backtracking stack size, in bytes, or zero if the
  1.1452 + *          stack size is unlimited.
  1.1453 + * @stable ICU 4.0
  1.1454 + */
  1.1455 +U_STABLE int32_t U_EXPORT2
  1.1456 +uregex_getStackLimit(const URegularExpression      *regexp,
  1.1457 +                           UErrorCode              *status);
  1.1458 +
  1.1459 +
  1.1460 +/**
  1.1461 + * Function pointer for a regular expression matching callback function.
  1.1462 + * When set, a callback function will be called periodically during matching
  1.1463 + * operations.  If the call back function returns FALSE, the matching
  1.1464 + * operation will be terminated early.
  1.1465 + *
  1.1466 + * Note:  the callback function must not call other functions on this
  1.1467 + *        URegularExpression.
  1.1468 + *
  1.1469 + * @param context  context pointer.  The callback function will be invoked
  1.1470 + *                 with the context specified at the time that
  1.1471 + *                 uregex_setMatchCallback() is called.
  1.1472 + * @param steps    the accumulated processing time, in match steps, 
  1.1473 + *                 for this matching operation.
  1.1474 + * @return         TRUE to continue the matching operation.
  1.1475 + *                 FALSE to terminate the matching operation.
  1.1476 + * @stable ICU 4.0
  1.1477 + */
  1.1478 +U_CDECL_BEGIN
  1.1479 +typedef UBool U_CALLCONV URegexMatchCallback (
  1.1480 +                   const void *context,
  1.1481 +                   int32_t     steps);
  1.1482 +U_CDECL_END
  1.1483 +
  1.1484 +/**
  1.1485 + * Set a callback function for this URegularExpression.
  1.1486 + * During matching operations the function will be called periodically,
  1.1487 + * giving the application the opportunity to terminate a long-running
  1.1488 + * match.
  1.1489 + *
  1.1490 + * @param   regexp      The compiled regular expression.
  1.1491 + * @param   callback    A pointer to the user-supplied callback function.
  1.1492 + * @param   context     User context pointer.  The value supplied at the
  1.1493 + *                      time the callback function is set will be saved
  1.1494 + *                      and passed to the callback each time that it is called.
  1.1495 + * @param   status      A reference to a UErrorCode to receive any errors.
  1.1496 + * @stable ICU 4.0
  1.1497 + */
  1.1498 +U_STABLE void U_EXPORT2
  1.1499 +uregex_setMatchCallback(URegularExpression      *regexp,
  1.1500 +                        URegexMatchCallback     *callback,
  1.1501 +                        const void              *context,
  1.1502 +                        UErrorCode              *status);
  1.1503 +
  1.1504 +
  1.1505 +/**
  1.1506 + *  Get the callback function for this URegularExpression.
  1.1507 + *
  1.1508 + * @param   regexp      The compiled regular expression.
  1.1509 + * @param   callback    Out parameter, receives a pointer to the user-supplied 
  1.1510 + *                      callback function.
  1.1511 + * @param   context     Out parameter, receives the user context pointer that
  1.1512 + *                      was set when uregex_setMatchCallback() was called.
  1.1513 + * @param   status      A reference to a UErrorCode to receive any errors.
  1.1514 + * @stable ICU 4.0
  1.1515 + */
  1.1516 +U_STABLE void U_EXPORT2
  1.1517 +uregex_getMatchCallback(const URegularExpression    *regexp,
  1.1518 +                        URegexMatchCallback        **callback,
  1.1519 +                        const void                 **context,
  1.1520 +                        UErrorCode                  *status);
  1.1521 +
  1.1522 +/**
  1.1523 + * Function pointer for a regular expression find callback function.
  1.1524 + * 
  1.1525 + * When set, a callback function will be called during a find operation
  1.1526 + * and for operations that depend on find, such as findNext, split and some replace
  1.1527 + * operations like replaceFirst.
  1.1528 + * The callback will usually be called after each attempt at a match, but this is not a
  1.1529 + * guarantee that the callback will be invoked at each character.  For finds where the
  1.1530 + * match engine is invoked at each character, this may be close to true, but less likely
  1.1531 + * for more optimized loops where the pattern is known to only start, and the match
  1.1532 + * engine invoked, at certain characters.
  1.1533 + * When invoked, this callback will specify the index at which a match operation is about
  1.1534 + * to be attempted, giving the application the opportunity to terminate a long-running
  1.1535 + * find operation.
  1.1536 + * 
  1.1537 + * If the call back function returns FALSE, the find operation will be terminated early.
  1.1538 + *
  1.1539 + * Note:  the callback function must not call other functions on this
  1.1540 + *        URegularExpression
  1.1541 + *
  1.1542 + * @param context  context pointer.  The callback function will be invoked
  1.1543 + *                 with the context specified at the time that
  1.1544 + *                 uregex_setFindProgressCallback() is called.
  1.1545 + * @param matchIndex  the next index at which a match attempt will be attempted for this
  1.1546 + *                 find operation.  If this callback interrupts the search, this is the
  1.1547 + *                 index at which a find/findNext operation may be re-initiated.
  1.1548 + * @return         TRUE to continue the matching operation.
  1.1549 + *                 FALSE to terminate the matching operation.
  1.1550 + * @stable ICU 4.6
  1.1551 + */
  1.1552 +U_CDECL_BEGIN
  1.1553 +typedef UBool U_CALLCONV URegexFindProgressCallback (
  1.1554 +                   const void *context,
  1.1555 +                   int64_t     matchIndex);
  1.1556 +U_CDECL_END
  1.1557 +
  1.1558 +
  1.1559 +/**
  1.1560 + *  Set the find progress callback function for this URegularExpression.
  1.1561 + *
  1.1562 + * @param   regexp      The compiled regular expression.
  1.1563 + * @param   callback    A pointer to the user-supplied callback function.
  1.1564 + * @param   context     User context pointer.  The value supplied at the
  1.1565 + *                      time the callback function is set will be saved
  1.1566 + *                      and passed to the callback each time that it is called.
  1.1567 + * @param   status      A reference to a UErrorCode to receive any errors.
  1.1568 + * @stable ICU 4.6
  1.1569 + */
  1.1570 +U_STABLE void U_EXPORT2
  1.1571 +uregex_setFindProgressCallback(URegularExpression              *regexp,
  1.1572 +                                URegexFindProgressCallback      *callback,
  1.1573 +                                const void                      *context,
  1.1574 +                                UErrorCode                      *status);
  1.1575 +
  1.1576 +/**
  1.1577 + *  Get the find progress callback function for this URegularExpression.
  1.1578 + *
  1.1579 + * @param   regexp      The compiled regular expression.
  1.1580 + * @param   callback    Out parameter, receives a pointer to the user-supplied 
  1.1581 + *                      callback function.
  1.1582 + * @param   context     Out parameter, receives the user context pointer that
  1.1583 + *                      was set when uregex_setFindProgressCallback() was called.
  1.1584 + * @param   status      A reference to a UErrorCode to receive any errors.
  1.1585 + * @stable ICU 4.6
  1.1586 + */
  1.1587 +U_STABLE void U_EXPORT2
  1.1588 +uregex_getFindProgressCallback(const URegularExpression          *regexp,
  1.1589 +                                URegexFindProgressCallback        **callback,
  1.1590 +                                const void                        **context,
  1.1591 +                                UErrorCode                        *status);
  1.1592 +
  1.1593 +#endif   /*  !UCONFIG_NO_REGULAR_EXPRESSIONS  */
  1.1594 +#endif   /*  UREGEX_H  */

mercurial