michael@0: #!/usr/bin/env python
michael@0: #
michael@0: # Copyright 2008, Google Inc.
michael@0: # All rights reserved.
michael@0: #
michael@0: # Redistribution and use in source and binary forms, with or without
michael@0: # modification, are permitted provided that the following conditions are
michael@0: # met:
michael@0: #
michael@0: #     * Redistributions of source code must retain the above copyright
michael@0: # notice, this list of conditions and the following disclaimer.
michael@0: #     * Redistributions in binary form must reproduce the above
michael@0: # copyright notice, this list of conditions and the following disclaimer
michael@0: # in the documentation and/or other materials provided with the
michael@0: # distribution.
michael@0: #     * Neither the name of Google Inc. nor the names of its
michael@0: # contributors may be used to endorse or promote products derived from
michael@0: # this software without specific prior written permission.
michael@0: #
michael@0: # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
michael@0: # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
michael@0: # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
michael@0: # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
michael@0: # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
michael@0: # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
michael@0: # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
michael@0: # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
michael@0: # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
michael@0: # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
michael@0: # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
michael@0: 
michael@0: """pump v0.2.0 - Pretty Useful for Meta Programming.
michael@0: 
michael@0: A tool for preprocessor meta programming.  Useful for generating
michael@0: repetitive boilerplate code.  Especially useful for writing C++
michael@0: classes, functions, macros, and templates that need to work with
michael@0: various number of arguments.
michael@0: 
michael@0: USAGE:
michael@0:        pump.py SOURCE_FILE
michael@0: 
michael@0: EXAMPLES:
michael@0:        pump.py foo.cc.pump
michael@0:          Converts foo.cc.pump to foo.cc.
michael@0: 
michael@0: GRAMMAR:
michael@0:        CODE ::= ATOMIC_CODE*
michael@0:        ATOMIC_CODE ::= $var ID = EXPRESSION
michael@0:            | $var ID = [[ CODE ]]
michael@0:            | $range ID EXPRESSION..EXPRESSION
michael@0:            | $for ID SEPARATOR [[ CODE ]]
michael@0:            | $($)
michael@0:            | $ID
michael@0:            | $(EXPRESSION)
michael@0:            | $if EXPRESSION [[ CODE ]] ELSE_BRANCH
michael@0:            | [[ CODE ]]
michael@0:            | RAW_CODE
michael@0:        SEPARATOR ::= RAW_CODE | EMPTY
michael@0:        ELSE_BRANCH ::= $else [[ CODE ]]
michael@0:            | $elif EXPRESSION [[ CODE ]] ELSE_BRANCH
michael@0:            | EMPTY
michael@0:        EXPRESSION has Python syntax.
michael@0: """
michael@0: 
michael@0: __author__ = 'wan@google.com (Zhanyong Wan)'
michael@0: 
michael@0: import os
michael@0: import re
michael@0: import sys
michael@0: 
michael@0: 
michael@0: TOKEN_TABLE = [
michael@0:     (re.compile(r'\$var\s+'), '$var'),
michael@0:     (re.compile(r'\$elif\s+'), '$elif'),
michael@0:     (re.compile(r'\$else\s+'), '$else'),
michael@0:     (re.compile(r'\$for\s+'), '$for'),
michael@0:     (re.compile(r'\$if\s+'), '$if'),
michael@0:     (re.compile(r'\$range\s+'), '$range'),
michael@0:     (re.compile(r'\$[_A-Za-z]\w*'), '$id'),
michael@0:     (re.compile(r'\$\(\$\)'), '$($)'),
michael@0:     (re.compile(r'\$'), '$'),
michael@0:     (re.compile(r'\[\[\n?'), '[['),
michael@0:     (re.compile(r'\]\]\n?'), ']]'),
michael@0:     ]
michael@0: 
michael@0: 
michael@0: class Cursor:
michael@0:   """Represents a position (line and column) in a text file."""
michael@0: 
michael@0:   def __init__(self, line=-1, column=-1):
michael@0:     self.line = line
michael@0:     self.column = column
michael@0: 
michael@0:   def __eq__(self, rhs):
michael@0:     return self.line == rhs.line and self.column == rhs.column
michael@0: 
michael@0:   def __ne__(self, rhs):
michael@0:     return not self == rhs
michael@0: 
michael@0:   def __lt__(self, rhs):
michael@0:     return self.line < rhs.line or (
michael@0:         self.line == rhs.line and self.column < rhs.column)
michael@0: 
michael@0:   def __le__(self, rhs):
michael@0:     return self < rhs or self == rhs
michael@0: 
michael@0:   def __gt__(self, rhs):
michael@0:     return rhs < self
michael@0: 
michael@0:   def __ge__(self, rhs):
michael@0:     return rhs <= self
michael@0: 
michael@0:   def __str__(self):
michael@0:     if self == Eof():
michael@0:       return 'EOF'
michael@0:     else:
michael@0:       return '%s(%s)' % (self.line + 1, self.column)
michael@0: 
michael@0:   def __add__(self, offset):
michael@0:     return Cursor(self.line, self.column + offset)
michael@0: 
michael@0:   def __sub__(self, offset):
michael@0:     return Cursor(self.line, self.column - offset)
michael@0: 
michael@0:   def Clone(self):
michael@0:     """Returns a copy of self."""
michael@0: 
michael@0:     return Cursor(self.line, self.column)
michael@0: 
michael@0: 
michael@0: # Special cursor to indicate the end-of-file.
michael@0: def Eof():
michael@0:   """Returns the special cursor to denote the end-of-file."""
michael@0:   return Cursor(-1, -1)
michael@0: 
michael@0: 
michael@0: class Token:
michael@0:   """Represents a token in a Pump source file."""
michael@0: 
michael@0:   def __init__(self, start=None, end=None, value=None, token_type=None):
michael@0:     if start is None:
michael@0:       self.start = Eof()
michael@0:     else:
michael@0:       self.start = start
michael@0:     if end is None:
michael@0:       self.end = Eof()
michael@0:     else:
michael@0:       self.end = end
michael@0:     self.value = value
michael@0:     self.token_type = token_type
michael@0: 
michael@0:   def __str__(self):
michael@0:     return 'Token @%s: \'%s\' type=%s' % (
michael@0:         self.start, self.value, self.token_type)
michael@0: 
michael@0:   def Clone(self):
michael@0:     """Returns a copy of self."""
michael@0: 
michael@0:     return Token(self.start.Clone(), self.end.Clone(), self.value,
michael@0:                  self.token_type)
michael@0: 
michael@0: 
michael@0: def StartsWith(lines, pos, string):
michael@0:   """Returns True iff the given position in lines starts with 'string'."""
michael@0: 
michael@0:   return lines[pos.line][pos.column:].startswith(string)
michael@0: 
michael@0: 
michael@0: def FindFirstInLine(line, token_table):
michael@0:   best_match_start = -1
michael@0:   for (regex, token_type) in token_table:
michael@0:     m = regex.search(line)
michael@0:     if m:
michael@0:       # We found regex in lines
michael@0:       if best_match_start < 0 or m.start() < best_match_start:
michael@0:         best_match_start = m.start()
michael@0:         best_match_length = m.end() - m.start()
michael@0:         best_match_token_type = token_type
michael@0: 
michael@0:   if best_match_start < 0:
michael@0:     return None
michael@0: 
michael@0:   return (best_match_start, best_match_length, best_match_token_type)
michael@0: 
michael@0: 
michael@0: def FindFirst(lines, token_table, cursor):
michael@0:   """Finds the first occurrence of any string in strings in lines."""
michael@0: 
michael@0:   start = cursor.Clone()
michael@0:   cur_line_number = cursor.line
michael@0:   for line in lines[start.line:]:
michael@0:     if cur_line_number == start.line:
michael@0:       line = line[start.column:]
michael@0:     m = FindFirstInLine(line, token_table)
michael@0:     if m:
michael@0:       # We found a regex in line.
michael@0:       (start_column, length, token_type) = m
michael@0:       if cur_line_number == start.line:
michael@0:         start_column += start.column
michael@0:       found_start = Cursor(cur_line_number, start_column)
michael@0:       found_end = found_start + length
michael@0:       return MakeToken(lines, found_start, found_end, token_type)
michael@0:     cur_line_number += 1
michael@0:   # We failed to find str in lines
michael@0:   return None
michael@0: 
michael@0: 
michael@0: def SubString(lines, start, end):
michael@0:   """Returns a substring in lines."""
michael@0: 
michael@0:   if end == Eof():
michael@0:     end = Cursor(len(lines) - 1, len(lines[-1]))
michael@0: 
michael@0:   if start >= end:
michael@0:     return ''
michael@0: 
michael@0:   if start.line == end.line:
michael@0:     return lines[start.line][start.column:end.column]
michael@0: 
michael@0:   result_lines = ([lines[start.line][start.column:]] +
michael@0:                   lines[start.line + 1:end.line] +
michael@0:                   [lines[end.line][:end.column]])
michael@0:   return ''.join(result_lines)
michael@0: 
michael@0: 
michael@0: def StripMetaComments(str):
michael@0:   """Strip meta comments from each line in the given string."""
michael@0: 
michael@0:   # First, completely remove lines containing nothing but a meta
michael@0:   # comment, including the trailing \n.
michael@0:   str = re.sub(r'^\s*\$\$.*\n', '', str)
michael@0: 
michael@0:   # Then, remove meta comments from contentful lines.
michael@0:   return re.sub(r'\s*\$\$.*', '', str)
michael@0: 
michael@0: 
michael@0: def MakeToken(lines, start, end, token_type):
michael@0:   """Creates a new instance of Token."""
michael@0: 
michael@0:   return Token(start, end, SubString(lines, start, end), token_type)
michael@0: 
michael@0: 
michael@0: def ParseToken(lines, pos, regex, token_type):
michael@0:   line = lines[pos.line][pos.column:]
michael@0:   m = regex.search(line)
michael@0:   if m and not m.start():
michael@0:     return MakeToken(lines, pos, pos + m.end(), token_type)
michael@0:   else:
michael@0:     print 'ERROR: %s expected at %s.' % (token_type, pos)
michael@0:     sys.exit(1)
michael@0: 
michael@0: 
michael@0: ID_REGEX = re.compile(r'[_A-Za-z]\w*')
michael@0: EQ_REGEX = re.compile(r'=')
michael@0: REST_OF_LINE_REGEX = re.compile(r'.*?(?=$|\$\$)')
michael@0: OPTIONAL_WHITE_SPACES_REGEX = re.compile(r'\s*')
michael@0: WHITE_SPACE_REGEX = re.compile(r'\s')
michael@0: DOT_DOT_REGEX = re.compile(r'\.\.')
michael@0: 
michael@0: 
michael@0: def Skip(lines, pos, regex):
michael@0:   line = lines[pos.line][pos.column:]
michael@0:   m = re.search(regex, line)
michael@0:   if m and not m.start():
michael@0:     return pos + m.end()
michael@0:   else:
michael@0:     return pos
michael@0: 
michael@0: 
michael@0: def SkipUntil(lines, pos, regex, token_type):
michael@0:   line = lines[pos.line][pos.column:]
michael@0:   m = re.search(regex, line)
michael@0:   if m:
michael@0:     return pos + m.start()
michael@0:   else:
michael@0:     print ('ERROR: %s expected on line %s after column %s.' %
michael@0:            (token_type, pos.line + 1, pos.column))
michael@0:     sys.exit(1)
michael@0: 
michael@0: 
michael@0: def ParseExpTokenInParens(lines, pos):
michael@0:   def ParseInParens(pos):
michael@0:     pos = Skip(lines, pos, OPTIONAL_WHITE_SPACES_REGEX)
michael@0:     pos = Skip(lines, pos, r'\(')
michael@0:     pos = Parse(pos)
michael@0:     pos = Skip(lines, pos, r'\)')
michael@0:     return pos
michael@0: 
michael@0:   def Parse(pos):
michael@0:     pos = SkipUntil(lines, pos, r'\(|\)', ')')
michael@0:     if SubString(lines, pos, pos + 1) == '(':
michael@0:       pos = Parse(pos + 1)
michael@0:       pos = Skip(lines, pos, r'\)')
michael@0:       return Parse(pos)
michael@0:     else:
michael@0:       return pos
michael@0: 
michael@0:   start = pos.Clone()
michael@0:   pos = ParseInParens(pos)
michael@0:   return MakeToken(lines, start, pos, 'exp')
michael@0: 
michael@0: 
michael@0: def RStripNewLineFromToken(token):
michael@0:   if token.value.endswith('\n'):
michael@0:     return Token(token.start, token.end, token.value[:-1], token.token_type)
michael@0:   else:
michael@0:     return token
michael@0: 
michael@0: 
michael@0: def TokenizeLines(lines, pos):
michael@0:   while True:
michael@0:     found = FindFirst(lines, TOKEN_TABLE, pos)
michael@0:     if not found:
michael@0:       yield MakeToken(lines, pos, Eof(), 'code')
michael@0:       return
michael@0: 
michael@0:     if found.start == pos:
michael@0:       prev_token = None
michael@0:       prev_token_rstripped = None
michael@0:     else:
michael@0:       prev_token = MakeToken(lines, pos, found.start, 'code')
michael@0:       prev_token_rstripped = RStripNewLineFromToken(prev_token)
michael@0: 
michael@0:     if found.token_type == '$var':
michael@0:       if prev_token_rstripped:
michael@0:         yield prev_token_rstripped
michael@0:       yield found
michael@0:       id_token = ParseToken(lines, found.end, ID_REGEX, 'id')
michael@0:       yield id_token
michael@0:       pos = Skip(lines, id_token.end, OPTIONAL_WHITE_SPACES_REGEX)
michael@0: 
michael@0:       eq_token = ParseToken(lines, pos, EQ_REGEX, '=')
michael@0:       yield eq_token
michael@0:       pos = Skip(lines, eq_token.end, r'\s*')
michael@0: 
michael@0:       if SubString(lines, pos, pos + 2) != '[[':
michael@0:         exp_token = ParseToken(lines, pos, REST_OF_LINE_REGEX, 'exp')
michael@0:         yield exp_token
michael@0:         pos = Cursor(exp_token.end.line + 1, 0)
michael@0:     elif found.token_type == '$for':
michael@0:       if prev_token_rstripped:
michael@0:         yield prev_token_rstripped
michael@0:       yield found
michael@0:       id_token = ParseToken(lines, found.end, ID_REGEX, 'id')
michael@0:       yield id_token
michael@0:       pos = Skip(lines, id_token.end, WHITE_SPACE_REGEX)
michael@0:     elif found.token_type == '$range':
michael@0:       if prev_token_rstripped:
michael@0:         yield prev_token_rstripped
michael@0:       yield found
michael@0:       id_token = ParseToken(lines, found.end, ID_REGEX, 'id')
michael@0:       yield id_token
michael@0:       pos = Skip(lines, id_token.end, OPTIONAL_WHITE_SPACES_REGEX)
michael@0: 
michael@0:       dots_pos = SkipUntil(lines, pos, DOT_DOT_REGEX, '..')
michael@0:       yield MakeToken(lines, pos, dots_pos, 'exp')
michael@0:       yield MakeToken(lines, dots_pos, dots_pos + 2, '..')
michael@0:       pos = dots_pos + 2
michael@0:       new_pos = Cursor(pos.line + 1, 0)
michael@0:       yield MakeToken(lines, pos, new_pos, 'exp')
michael@0:       pos = new_pos
michael@0:     elif found.token_type == '$':
michael@0:       if prev_token:
michael@0:         yield prev_token
michael@0:       yield found
michael@0:       exp_token = ParseExpTokenInParens(lines, found.end)
michael@0:       yield exp_token
michael@0:       pos = exp_token.end
michael@0:     elif (found.token_type == ']]' or found.token_type == '$if' or
michael@0:           found.token_type == '$elif' or found.token_type == '$else'):
michael@0:       if prev_token_rstripped:
michael@0:         yield prev_token_rstripped
michael@0:       yield found
michael@0:       pos = found.end
michael@0:     else:
michael@0:       if prev_token:
michael@0:         yield prev_token
michael@0:       yield found
michael@0:       pos = found.end
michael@0: 
michael@0: 
michael@0: def Tokenize(s):
michael@0:   """A generator that yields the tokens in the given string."""
michael@0:   if s != '':
michael@0:     lines = s.splitlines(True)
michael@0:     for token in TokenizeLines(lines, Cursor(0, 0)):
michael@0:       yield token
michael@0: 
michael@0: 
michael@0: class CodeNode:
michael@0:   def __init__(self, atomic_code_list=None):
michael@0:     self.atomic_code = atomic_code_list
michael@0: 
michael@0: 
michael@0: class VarNode:
michael@0:   def __init__(self, identifier=None, atomic_code=None):
michael@0:     self.identifier = identifier
michael@0:     self.atomic_code = atomic_code
michael@0: 
michael@0: 
michael@0: class RangeNode:
michael@0:   def __init__(self, identifier=None, exp1=None, exp2=None):
michael@0:     self.identifier = identifier
michael@0:     self.exp1 = exp1
michael@0:     self.exp2 = exp2
michael@0: 
michael@0: 
michael@0: class ForNode:
michael@0:   def __init__(self, identifier=None, sep=None, code=None):
michael@0:     self.identifier = identifier
michael@0:     self.sep = sep
michael@0:     self.code = code
michael@0: 
michael@0: 
michael@0: class ElseNode:
michael@0:   def __init__(self, else_branch=None):
michael@0:     self.else_branch = else_branch
michael@0: 
michael@0: 
michael@0: class IfNode:
michael@0:   def __init__(self, exp=None, then_branch=None, else_branch=None):
michael@0:     self.exp = exp
michael@0:     self.then_branch = then_branch
michael@0:     self.else_branch = else_branch
michael@0: 
michael@0: 
michael@0: class RawCodeNode:
michael@0:   def __init__(self, token=None):
michael@0:     self.raw_code = token
michael@0: 
michael@0: 
michael@0: class LiteralDollarNode:
michael@0:   def __init__(self, token):
michael@0:     self.token = token
michael@0: 
michael@0: 
michael@0: class ExpNode:
michael@0:   def __init__(self, token, python_exp):
michael@0:     self.token = token
michael@0:     self.python_exp = python_exp
michael@0: 
michael@0: 
michael@0: def PopFront(a_list):
michael@0:   head = a_list[0]
michael@0:   a_list[:1] = []
michael@0:   return head
michael@0: 
michael@0: 
michael@0: def PushFront(a_list, elem):
michael@0:   a_list[:0] = [elem]
michael@0: 
michael@0: 
michael@0: def PopToken(a_list, token_type=None):
michael@0:   token = PopFront(a_list)
michael@0:   if token_type is not None and token.token_type != token_type:
michael@0:     print 'ERROR: %s expected at %s' % (token_type, token.start)
michael@0:     print 'ERROR: %s found instead' % (token,)
michael@0:     sys.exit(1)
michael@0: 
michael@0:   return token
michael@0: 
michael@0: 
michael@0: def PeekToken(a_list):
michael@0:   if not a_list:
michael@0:     return None
michael@0: 
michael@0:   return a_list[0]
michael@0: 
michael@0: 
michael@0: def ParseExpNode(token):
michael@0:   python_exp = re.sub(r'([_A-Za-z]\w*)', r'self.GetValue("\1")', token.value)
michael@0:   return ExpNode(token, python_exp)
michael@0: 
michael@0: 
michael@0: def ParseElseNode(tokens):
michael@0:   def Pop(token_type=None):
michael@0:     return PopToken(tokens, token_type)
michael@0: 
michael@0:   next = PeekToken(tokens)
michael@0:   if not next:
michael@0:     return None
michael@0:   if next.token_type == '$else':
michael@0:     Pop('$else')
michael@0:     Pop('[[')
michael@0:     code_node = ParseCodeNode(tokens)
michael@0:     Pop(']]')
michael@0:     return code_node
michael@0:   elif next.token_type == '$elif':
michael@0:     Pop('$elif')
michael@0:     exp = Pop('code')
michael@0:     Pop('[[')
michael@0:     code_node = ParseCodeNode(tokens)
michael@0:     Pop(']]')
michael@0:     inner_else_node = ParseElseNode(tokens)
michael@0:     return CodeNode([IfNode(ParseExpNode(exp), code_node, inner_else_node)])
michael@0:   elif not next.value.strip():
michael@0:     Pop('code')
michael@0:     return ParseElseNode(tokens)
michael@0:   else:
michael@0:     return None
michael@0: 
michael@0: 
michael@0: def ParseAtomicCodeNode(tokens):
michael@0:   def Pop(token_type=None):
michael@0:     return PopToken(tokens, token_type)
michael@0: 
michael@0:   head = PopFront(tokens)
michael@0:   t = head.token_type
michael@0:   if t == 'code':
michael@0:     return RawCodeNode(head)
michael@0:   elif t == '$var':
michael@0:     id_token = Pop('id')
michael@0:     Pop('=')
michael@0:     next = PeekToken(tokens)
michael@0:     if next.token_type == 'exp':
michael@0:       exp_token = Pop()
michael@0:       return VarNode(id_token, ParseExpNode(exp_token))
michael@0:     Pop('[[')
michael@0:     code_node = ParseCodeNode(tokens)
michael@0:     Pop(']]')
michael@0:     return VarNode(id_token, code_node)
michael@0:   elif t == '$for':
michael@0:     id_token = Pop('id')
michael@0:     next_token = PeekToken(tokens)
michael@0:     if next_token.token_type == 'code':
michael@0:       sep_token = next_token
michael@0:       Pop('code')
michael@0:     else:
michael@0:       sep_token = None
michael@0:     Pop('[[')
michael@0:     code_node = ParseCodeNode(tokens)
michael@0:     Pop(']]')
michael@0:     return ForNode(id_token, sep_token, code_node)
michael@0:   elif t == '$if':
michael@0:     exp_token = Pop('code')
michael@0:     Pop('[[')
michael@0:     code_node = ParseCodeNode(tokens)
michael@0:     Pop(']]')
michael@0:     else_node = ParseElseNode(tokens)
michael@0:     return IfNode(ParseExpNode(exp_token), code_node, else_node)
michael@0:   elif t == '$range':
michael@0:     id_token = Pop('id')
michael@0:     exp1_token = Pop('exp')
michael@0:     Pop('..')
michael@0:     exp2_token = Pop('exp')
michael@0:     return RangeNode(id_token, ParseExpNode(exp1_token),
michael@0:                      ParseExpNode(exp2_token))
michael@0:   elif t == '$id':
michael@0:     return ParseExpNode(Token(head.start + 1, head.end, head.value[1:], 'id'))
michael@0:   elif t == '$($)':
michael@0:     return LiteralDollarNode(head)
michael@0:   elif t == '$':
michael@0:     exp_token = Pop('exp')
michael@0:     return ParseExpNode(exp_token)
michael@0:   elif t == '[[':
michael@0:     code_node = ParseCodeNode(tokens)
michael@0:     Pop(']]')
michael@0:     return code_node
michael@0:   else:
michael@0:     PushFront(tokens, head)
michael@0:     return None
michael@0: 
michael@0: 
michael@0: def ParseCodeNode(tokens):
michael@0:   atomic_code_list = []
michael@0:   while True:
michael@0:     if not tokens:
michael@0:       break
michael@0:     atomic_code_node = ParseAtomicCodeNode(tokens)
michael@0:     if atomic_code_node:
michael@0:       atomic_code_list.append(atomic_code_node)
michael@0:     else:
michael@0:       break
michael@0:   return CodeNode(atomic_code_list)
michael@0: 
michael@0: 
michael@0: def ParseToAST(pump_src_text):
michael@0:   """Convert the given Pump source text into an AST."""
michael@0:   tokens = list(Tokenize(pump_src_text))
michael@0:   code_node = ParseCodeNode(tokens)
michael@0:   return code_node
michael@0: 
michael@0: 
michael@0: class Env:
michael@0:   def __init__(self):
michael@0:     self.variables = []
michael@0:     self.ranges = []
michael@0: 
michael@0:   def Clone(self):
michael@0:     clone = Env()
michael@0:     clone.variables = self.variables[:]
michael@0:     clone.ranges = self.ranges[:]
michael@0:     return clone
michael@0: 
michael@0:   def PushVariable(self, var, value):
michael@0:     # If value looks like an int, store it as an int.
michael@0:     try:
michael@0:       int_value = int(value)
michael@0:       if ('%s' % int_value) == value:
michael@0:         value = int_value
michael@0:     except Exception:
michael@0:       pass
michael@0:     self.variables[:0] = [(var, value)]
michael@0: 
michael@0:   def PopVariable(self):
michael@0:     self.variables[:1] = []
michael@0: 
michael@0:   def PushRange(self, var, lower, upper):
michael@0:     self.ranges[:0] = [(var, lower, upper)]
michael@0: 
michael@0:   def PopRange(self):
michael@0:     self.ranges[:1] = []
michael@0: 
michael@0:   def GetValue(self, identifier):
michael@0:     for (var, value) in self.variables:
michael@0:       if identifier == var:
michael@0:         return value
michael@0: 
michael@0:     print 'ERROR: meta variable %s is undefined.' % (identifier,)
michael@0:     sys.exit(1)
michael@0: 
michael@0:   def EvalExp(self, exp):
michael@0:     try:
michael@0:       result = eval(exp.python_exp)
michael@0:     except Exception, e:
michael@0:       print 'ERROR: caught exception %s: %s' % (e.__class__.__name__, e)
michael@0:       print ('ERROR: failed to evaluate meta expression %s at %s' %
michael@0:              (exp.python_exp, exp.token.start))
michael@0:       sys.exit(1)
michael@0:     return result
michael@0: 
michael@0:   def GetRange(self, identifier):
michael@0:     for (var, lower, upper) in self.ranges:
michael@0:       if identifier == var:
michael@0:         return (lower, upper)
michael@0: 
michael@0:     print 'ERROR: range %s is undefined.' % (identifier,)
michael@0:     sys.exit(1)
michael@0: 
michael@0: 
michael@0: class Output:
michael@0:   def __init__(self):
michael@0:     self.string = ''
michael@0: 
michael@0:   def GetLastLine(self):
michael@0:     index = self.string.rfind('\n')
michael@0:     if index < 0:
michael@0:       return ''
michael@0: 
michael@0:     return self.string[index + 1:]
michael@0: 
michael@0:   def Append(self, s):
michael@0:     self.string += s
michael@0: 
michael@0: 
michael@0: def RunAtomicCode(env, node, output):
michael@0:   if isinstance(node, VarNode):
michael@0:     identifier = node.identifier.value.strip()
michael@0:     result = Output()
michael@0:     RunAtomicCode(env.Clone(), node.atomic_code, result)
michael@0:     value = result.string
michael@0:     env.PushVariable(identifier, value)
michael@0:   elif isinstance(node, RangeNode):
michael@0:     identifier = node.identifier.value.strip()
michael@0:     lower = int(env.EvalExp(node.exp1))
michael@0:     upper = int(env.EvalExp(node.exp2))
michael@0:     env.PushRange(identifier, lower, upper)
michael@0:   elif isinstance(node, ForNode):
michael@0:     identifier = node.identifier.value.strip()
michael@0:     if node.sep is None:
michael@0:       sep = ''
michael@0:     else:
michael@0:       sep = node.sep.value
michael@0:     (lower, upper) = env.GetRange(identifier)
michael@0:     for i in range(lower, upper + 1):
michael@0:       new_env = env.Clone()
michael@0:       new_env.PushVariable(identifier, i)
michael@0:       RunCode(new_env, node.code, output)
michael@0:       if i != upper:
michael@0:         output.Append(sep)
michael@0:   elif isinstance(node, RawCodeNode):
michael@0:     output.Append(node.raw_code.value)
michael@0:   elif isinstance(node, IfNode):
michael@0:     cond = env.EvalExp(node.exp)
michael@0:     if cond:
michael@0:       RunCode(env.Clone(), node.then_branch, output)
michael@0:     elif node.else_branch is not None:
michael@0:       RunCode(env.Clone(), node.else_branch, output)
michael@0:   elif isinstance(node, ExpNode):
michael@0:     value = env.EvalExp(node)
michael@0:     output.Append('%s' % (value,))
michael@0:   elif isinstance(node, LiteralDollarNode):
michael@0:     output.Append('$')
michael@0:   elif isinstance(node, CodeNode):
michael@0:     RunCode(env.Clone(), node, output)
michael@0:   else:
michael@0:     print 'BAD'
michael@0:     print node
michael@0:     sys.exit(1)
michael@0: 
michael@0: 
michael@0: def RunCode(env, code_node, output):
michael@0:   for atomic_code in code_node.atomic_code:
michael@0:     RunAtomicCode(env, atomic_code, output)
michael@0: 
michael@0: 
michael@0: def IsSingleLineComment(cur_line):
michael@0:   return '//' in cur_line
michael@0: 
michael@0: 
michael@0: def IsInPreprocessorDirective(prev_lines, cur_line):
michael@0:   if cur_line.lstrip().startswith('#'):
michael@0:     return True
michael@0:   return prev_lines and prev_lines[-1].endswith('\\')
michael@0: 
michael@0: 
michael@0: def WrapComment(line, output):
michael@0:   loc = line.find('//')
michael@0:   before_comment = line[:loc].rstrip()
michael@0:   if before_comment == '':
michael@0:     indent = loc
michael@0:   else:
michael@0:     output.append(before_comment)
michael@0:     indent = len(before_comment) - len(before_comment.lstrip())
michael@0:   prefix = indent*' ' + '// '
michael@0:   max_len = 80 - len(prefix)
michael@0:   comment = line[loc + 2:].strip()
michael@0:   segs = [seg for seg in re.split(r'(\w+\W*)', comment) if seg != '']
michael@0:   cur_line = ''
michael@0:   for seg in segs:
michael@0:     if len((cur_line + seg).rstrip()) < max_len:
michael@0:       cur_line += seg
michael@0:     else:
michael@0:       if cur_line.strip() != '':
michael@0:         output.append(prefix + cur_line.rstrip())
michael@0:       cur_line = seg.lstrip()
michael@0:   if cur_line.strip() != '':
michael@0:     output.append(prefix + cur_line.strip())
michael@0: 
michael@0: 
michael@0: def WrapCode(line, line_concat, output):
michael@0:   indent = len(line) - len(line.lstrip())
michael@0:   prefix = indent*' '  # Prefix of the current line
michael@0:   max_len = 80 - indent - len(line_concat)  # Maximum length of the current line
michael@0:   new_prefix = prefix + 4*' '  # Prefix of a continuation line
michael@0:   new_max_len = max_len - 4  # Maximum length of a continuation line
michael@0:   # Prefers to wrap a line after a ',' or ';'.
michael@0:   segs = [seg for seg in re.split(r'([^,;]+[,;]?)', line.strip()) if seg != '']
michael@0:   cur_line = ''  # The current line without leading spaces.
michael@0:   for seg in segs:
michael@0:     # If the line is still too long, wrap at a space.
michael@0:     while cur_line == '' and len(seg.strip()) > max_len:
michael@0:       seg = seg.lstrip()
michael@0:       split_at = seg.rfind(' ', 0, max_len)
michael@0:       output.append(prefix + seg[:split_at].strip() + line_concat)
michael@0:       seg = seg[split_at + 1:]
michael@0:       prefix = new_prefix
michael@0:       max_len = new_max_len
michael@0: 
michael@0:     if len((cur_line + seg).rstrip()) < max_len:
michael@0:       cur_line = (cur_line + seg).lstrip()
michael@0:     else:
michael@0:       output.append(prefix + cur_line.rstrip() + line_concat)
michael@0:       prefix = new_prefix
michael@0:       max_len = new_max_len
michael@0:       cur_line = seg.lstrip()
michael@0:   if cur_line.strip() != '':
michael@0:     output.append(prefix + cur_line.strip())
michael@0: 
michael@0: 
michael@0: def WrapPreprocessorDirective(line, output):
michael@0:   WrapCode(line, ' \\', output)
michael@0: 
michael@0: 
michael@0: def WrapPlainCode(line, output):
michael@0:   WrapCode(line, '', output)
michael@0: 
michael@0: 
michael@0: def IsMultiLineIWYUPragma(line):
michael@0:   return re.search(r'/\* IWYU pragma: ', line)
michael@0: 
michael@0: 
michael@0: def IsHeaderGuardIncludeOrOneLineIWYUPragma(line):
michael@0:   return (re.match(r'^#(ifndef|define|endif\s*//)\s*[\w_]+\s*$', line) or
michael@0:           re.match(r'^#include\s', line) or
michael@0:           # Don't break IWYU pragmas, either; that causes iwyu.py problems.
michael@0:           re.search(r'// IWYU pragma: ', line))
michael@0: 
michael@0: 
michael@0: def WrapLongLine(line, output):
michael@0:   line = line.rstrip()
michael@0:   if len(line) <= 80:
michael@0:     output.append(line)
michael@0:   elif IsSingleLineComment(line):
michael@0:     if IsHeaderGuardIncludeOrOneLineIWYUPragma(line):
michael@0:       # The style guide made an exception to allow long header guard lines,
michael@0:       # includes and IWYU pragmas.
michael@0:       output.append(line)
michael@0:     else:
michael@0:       WrapComment(line, output)
michael@0:   elif IsInPreprocessorDirective(output, line):
michael@0:     if IsHeaderGuardIncludeOrOneLineIWYUPragma(line):
michael@0:       # The style guide made an exception to allow long header guard lines,
michael@0:       # includes and IWYU pragmas.
michael@0:       output.append(line)
michael@0:     else:
michael@0:       WrapPreprocessorDirective(line, output)
michael@0:   elif IsMultiLineIWYUPragma(line):
michael@0:     output.append(line)
michael@0:   else:
michael@0:     WrapPlainCode(line, output)
michael@0: 
michael@0: 
michael@0: def BeautifyCode(string):
michael@0:   lines = string.splitlines()
michael@0:   output = []
michael@0:   for line in lines:
michael@0:     WrapLongLine(line, output)
michael@0:   output2 = [line.rstrip() for line in output]
michael@0:   return '\n'.join(output2) + '\n'
michael@0: 
michael@0: 
michael@0: def ConvertFromPumpSource(src_text):
michael@0:   """Return the text generated from the given Pump source text."""
michael@0:   ast = ParseToAST(StripMetaComments(src_text))
michael@0:   output = Output()
michael@0:   RunCode(Env(), ast, output)
michael@0:   return BeautifyCode(output.string)
michael@0: 
michael@0: 
michael@0: def main(argv):
michael@0:   if len(argv) == 1:
michael@0:     print __doc__
michael@0:     sys.exit(1)
michael@0: 
michael@0:   file_path = argv[-1]
michael@0:   output_str = ConvertFromPumpSource(file(file_path, 'r').read())
michael@0:   if file_path.endswith('.pump'):
michael@0:     output_file_path = file_path[:-5]
michael@0:   else:
michael@0:     output_file_path = '-'
michael@0:   if output_file_path == '-':
michael@0:     print output_str,
michael@0:   else:
michael@0:     output_file = file(output_file_path, 'w')
michael@0:     output_file.write('// This file was GENERATED by command:\n')
michael@0:     output_file.write('//     %s %s\n' %
michael@0:                       (os.path.basename(__file__), os.path.basename(file_path)))
michael@0:     output_file.write('// DO NOT EDIT BY HAND!!!\n\n')
michael@0:     output_file.write(output_str)
michael@0:     output_file.close()
michael@0: 
michael@0: 
michael@0: if __name__ == '__main__':
michael@0:   main(sys.argv)