1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/addon-sdk/source/python-lib/simplejson/scanner.py Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,67 @@ 1.4 +""" 1.5 +Iterator based sre token scanner 1.6 +""" 1.7 +import re 1.8 +from re import VERBOSE, MULTILINE, DOTALL 1.9 +import sre_parse 1.10 +import sre_compile 1.11 +import sre_constants 1.12 +from sre_constants import BRANCH, SUBPATTERN 1.13 + 1.14 +__all__ = ['Scanner', 'pattern'] 1.15 + 1.16 +FLAGS = (VERBOSE | MULTILINE | DOTALL) 1.17 + 1.18 +class Scanner(object): 1.19 + def __init__(self, lexicon, flags=FLAGS): 1.20 + self.actions = [None] 1.21 + # Combine phrases into a compound pattern 1.22 + s = sre_parse.Pattern() 1.23 + s.flags = flags 1.24 + p = [] 1.25 + for idx, token in enumerate(lexicon): 1.26 + phrase = token.pattern 1.27 + try: 1.28 + subpattern = sre_parse.SubPattern(s, 1.29 + [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))]) 1.30 + except sre_constants.error: 1.31 + raise 1.32 + p.append(subpattern) 1.33 + self.actions.append(token) 1.34 + 1.35 + s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work 1.36 + p = sre_parse.SubPattern(s, [(BRANCH, (None, p))]) 1.37 + self.scanner = sre_compile.compile(p) 1.38 + 1.39 + def iterscan(self, string, idx=0, context=None): 1.40 + """ 1.41 + Yield match, end_idx for each match 1.42 + """ 1.43 + match = self.scanner.scanner(string, idx).match 1.44 + actions = self.actions 1.45 + lastend = idx 1.46 + end = len(string) 1.47 + while True: 1.48 + m = match() 1.49 + if m is None: 1.50 + break 1.51 + matchbegin, matchend = m.span() 1.52 + if lastend == matchend: 1.53 + break 1.54 + action = actions[m.lastindex] 1.55 + if action is not None: 1.56 + rval, next_pos = action(m, context) 1.57 + if next_pos is not None and next_pos != matchend: 1.58 + # "fast forward" the scanner 1.59 + matchend = next_pos 1.60 + match = self.scanner.scanner(string, matchend).match 1.61 + yield rval, matchend 1.62 + lastend = matchend 1.63 + 1.64 + 1.65 +def pattern(pattern, flags=FLAGS): 1.66 + def decorator(fn): 1.67 + fn.pattern = pattern 1.68 + fn.regex = re.compile(pattern, flags) 1.69 + return fn 1.70 + return decorator 1.71 \ No newline at end of file