diff -r 000000000000 -r 6474c204b198 intl/icu/source/i18n/regexcst.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/intl/icu/source/i18n/regexcst.txt Wed Dec 31 06:09:35 2014 +0100 @@ -0,0 +1,467 @@ + +#***************************************************************************** +# +# Copyright (C) 2002-2007, International Business Machines Corporation and others. +# All Rights Reserved. +# +#***************************************************************************** +# +# file: regexcst.txt +# ICU Regular Expression Parser State Table +# +# This state table is used when reading and parsing a regular expression pattern +# The pattern parser uses a state machine; the data in this file define the +# state transitions that occur for each input character. +# +# *** This file defines the regex pattern grammar. This is it. +# *** The determination of what is accepted is here. +# +# This file is processed by a perl script "regexcst.pl" to produce initialized C arrays +# that are then built with the rule parser. +# + +# +# Here is the syntax of the state definitions in this file: +# +# +#StateName: +# input-char n next-state ^push-state action +# input-char n next-state ^push-state action +# | | | | | +# | | | | |--- action to be performed by state machine +# | | | | See function RBBIRuleScanner::doParseActions() +# | | | | +# | | | |--- Push this named state onto the state stack. +# | | | Later, when next state is specified as "pop", +# | | | the pushed state will become the current state. +# | | | +# | | |--- Transition to this state if the current input character matches the input +# | | character or char class in the left hand column. "pop" causes the next +# | | state to be popped from the state stack. +# | | +# | |--- When making the state transition specified on this line, advance to the next +# | character from the input only if 'n' appears here. +# | +# |--- Character or named character classes to test for. If the current character being scanned +# matches, peform the actions and go to the state specified on this line. +# The input character is tested sequentally, in the order written. The characters and +# character classes tested for do not need to be mutually exclusive. The first match wins. +# + + + + +# +# start state, scan position is at the beginning of the pattern. +# +start: + default term doPatStart + + + + +# +# term. At a position where we can accept the start most items in a pattern. +# +term: + quoted n expr-quant doLiteralChar + rule_char n expr-quant doLiteralChar + '[' n set-open ^set-finish doSetBegin + '(' n open-paren + '.' n expr-quant doDotAny + '^' n expr-quant doCaret + '$' n expr-quant doDollar + '\' n backslash + '|' n term doOrOperator + ')' n pop doCloseParen + eof term doPatFinish + default errorDeath doRuleError + + + +# +# expr-quant We've just finished scanning a term, now look for the optional +# trailing quantifier - *, +, ?, *?, etc. +# +expr-quant: + '*' n quant-star + '+' n quant-plus + '?' n quant-opt + '{' n interval-open doIntervalInit + '(' n open-paren-quant + default expr-cont + + +# +# expr-cont Expression, continuation. At a point where additional terms are +# allowed, but not required. No Quantifiers +# +expr-cont: + '|' n term doOrOperator + ')' n pop doCloseParen + default term + + +# +# open-paren-quant Special case handling for comments appearing before a quantifier, +# e.g. x(?#comment )* +# Open parens from expr-quant come here; anything but a (?# comment +# branches into the normal parenthesis sequence as quickly as possible. +# +open-paren-quant: + '?' n open-paren-quant2 doSuppressComments + default open-paren + +open-paren-quant2: + '#' n paren-comment ^expr-quant + default open-paren-extended + + +# +# open-paren We've got an open paren. We need to scan further to +# determine what kind of quantifier it is - plain (, (?:, (?>, or whatever. +# +open-paren: + '?' n open-paren-extended doSuppressComments + default term ^expr-quant doOpenCaptureParen + +open-paren-extended: + ':' n term ^expr-quant doOpenNonCaptureParen # (?: + '>' n term ^expr-quant doOpenAtomicParen # (?> + '=' n term ^expr-cont doOpenLookAhead # (?= + '!' n term ^expr-cont doOpenLookAheadNeg # (?! + '<' n open-paren-lookbehind + '#' n paren-comment ^term + 'i' paren-flag doBeginMatchMode + 'd' paren-flag doBeginMatchMode + 'm' paren-flag doBeginMatchMode + 's' paren-flag doBeginMatchMode + 'u' paren-flag doBeginMatchMode + 'w' paren-flag doBeginMatchMode + 'x' paren-flag doBeginMatchMode + '-' paren-flag doBeginMatchMode + '(' n errorDeath doConditionalExpr + '{' n errorDeath doPerlInline + default errorDeath doBadOpenParenType + +open-paren-lookbehind: + '=' n term ^expr-cont doOpenLookBehind # (?<= + '!' n term ^expr-cont doOpenLookBehindNeg # (?