michael@0: # Copyright 2011, Google Inc.
michael@0: # All rights reserved.
michael@0: #
michael@0: # Redistribution and use in source and binary forms, with or without
michael@0: # modification, are permitted provided that the following conditions are
michael@0: # met:
michael@0: #
michael@0: #     * Redistributions of source code must retain the above copyright
michael@0: # notice, this list of conditions and the following disclaimer.
michael@0: #     * Redistributions in binary form must reproduce the above
michael@0: # copyright notice, this list of conditions and the following disclaimer
michael@0: # in the documentation and/or other materials provided with the
michael@0: # distribution.
michael@0: #     * Neither the name of Google Inc. nor the names of its
michael@0: # contributors may be used to endorse or promote products derived from
michael@0: # this software without specific prior written permission.
michael@0: #
michael@0: # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
michael@0: # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
michael@0: # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
michael@0: # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
michael@0: # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
michael@0: # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
michael@0: # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
michael@0: # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
michael@0: # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
michael@0: # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
michael@0: # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
michael@0: 
michael@0: 
michael@0: """Utilities for parsing and formatting headers that follow the grammar defined
michael@0: in HTTP RFC http://www.ietf.org/rfc/rfc2616.txt.
michael@0: """
michael@0: 
michael@0: 
michael@0: import urlparse
michael@0: 
michael@0: 
michael@0: _SEPARATORS = '()<>@,;:\\"/[]?={} \t'
michael@0: 
michael@0: 
michael@0: def _is_char(c):
michael@0:     """Returns true iff c is in CHAR as specified in HTTP RFC."""
michael@0: 
michael@0:     return ord(c) <= 127
michael@0: 
michael@0: 
michael@0: def _is_ctl(c):
michael@0:     """Returns true iff c is in CTL as specified in HTTP RFC."""
michael@0: 
michael@0:     return ord(c) <= 31 or ord(c) == 127
michael@0: 
michael@0: 
michael@0: class ParsingState(object):
michael@0: 
michael@0:     def __init__(self, data):
michael@0:         self.data = data
michael@0:         self.head = 0
michael@0: 
michael@0: 
michael@0: def peek(state, pos=0):
michael@0:     """Peeks the character at pos from the head of data."""
michael@0: 
michael@0:     if state.head + pos >= len(state.data):
michael@0:         return None
michael@0: 
michael@0:     return state.data[state.head + pos]
michael@0: 
michael@0: 
michael@0: def consume(state, amount=1):
michael@0:     """Consumes specified amount of bytes from the head and returns the
michael@0:     consumed bytes. If there's not enough bytes to consume, returns None.
michael@0:     """
michael@0: 
michael@0:     if state.head + amount > len(state.data):
michael@0:         return None
michael@0: 
michael@0:     result = state.data[state.head:state.head + amount]
michael@0:     state.head = state.head + amount
michael@0:     return result
michael@0: 
michael@0: 
michael@0: def consume_string(state, expected):
michael@0:     """Given a parsing state and a expected string, consumes the string from
michael@0:     the head. Returns True if consumed successfully. Otherwise, returns
michael@0:     False.
michael@0:     """
michael@0: 
michael@0:     pos = 0
michael@0: 
michael@0:     for c in expected:
michael@0:         if c != peek(state, pos):
michael@0:             return False
michael@0:         pos += 1
michael@0: 
michael@0:     consume(state, pos)
michael@0:     return True
michael@0: 
michael@0: 
michael@0: def consume_lws(state):
michael@0:     """Consumes a LWS from the head. Returns True if any LWS is consumed.
michael@0:     Otherwise, returns False.
michael@0: 
michael@0:     LWS = [CRLF] 1*( SP | HT )
michael@0:     """
michael@0: 
michael@0:     original_head = state.head
michael@0: 
michael@0:     consume_string(state, '\r\n')
michael@0: 
michael@0:     pos = 0
michael@0: 
michael@0:     while True:
michael@0:         c = peek(state, pos)
michael@0:         if c == ' ' or c == '\t':
michael@0:             pos += 1
michael@0:         else:
michael@0:             if pos == 0:
michael@0:                 state.head = original_head
michael@0:                 return False
michael@0:             else:
michael@0:                 consume(state, pos)
michael@0:                 return True
michael@0: 
michael@0: 
michael@0: def consume_lwses(state):
michael@0:     """Consumes *LWS from the head."""
michael@0: 
michael@0:     while consume_lws(state):
michael@0:         pass
michael@0: 
michael@0: 
michael@0: def consume_token(state):
michael@0:     """Consumes a token from the head. Returns the token or None if no token
michael@0:     was found.
michael@0:     """
michael@0: 
michael@0:     pos = 0
michael@0: 
michael@0:     while True:
michael@0:         c = peek(state, pos)
michael@0:         if c is None or c in _SEPARATORS or _is_ctl(c) or not _is_char(c):
michael@0:             if pos == 0:
michael@0:                 return None
michael@0: 
michael@0:             return consume(state, pos)
michael@0:         else:
michael@0:             pos += 1
michael@0: 
michael@0: 
michael@0: def consume_token_or_quoted_string(state):
michael@0:     """Consumes a token or a quoted-string, and returns the token or unquoted
michael@0:     string. If no token or quoted-string was found, returns None.
michael@0:     """
michael@0: 
michael@0:     original_head = state.head
michael@0: 
michael@0:     if not consume_string(state, '"'):
michael@0:         return consume_token(state)
michael@0: 
michael@0:     result = []
michael@0: 
michael@0:     expect_quoted_pair = False
michael@0: 
michael@0:     while True:
michael@0:         if not expect_quoted_pair and consume_lws(state):
michael@0:             result.append(' ')
michael@0:             continue
michael@0: 
michael@0:         c = consume(state)
michael@0:         if c is None:
michael@0:             # quoted-string is not enclosed with double quotation
michael@0:             state.head = original_head
michael@0:             return None
michael@0:         elif expect_quoted_pair:
michael@0:             expect_quoted_pair = False
michael@0:             if _is_char(c):
michael@0:                 result.append(c)
michael@0:             else:
michael@0:                 # Non CHAR character found in quoted-pair
michael@0:                 state.head = original_head
michael@0:                 return None
michael@0:         elif c == '\\':
michael@0:             expect_quoted_pair = True
michael@0:         elif c == '"':
michael@0:             return ''.join(result)
michael@0:         elif _is_ctl(c):
michael@0:             # Invalid character %r found in qdtext
michael@0:             state.head = original_head
michael@0:             return None
michael@0:         else:
michael@0:             result.append(c)
michael@0: 
michael@0: 
michael@0: def quote_if_necessary(s):
michael@0:     """Quotes arbitrary string into quoted-string."""
michael@0: 
michael@0:     quote = False
michael@0:     if s == '':
michael@0:         return '""'
michael@0: 
michael@0:     result = []
michael@0:     for c in s:
michael@0:         if c == '"' or c in _SEPARATORS or _is_ctl(c) or not _is_char(c):
michael@0:             quote = True
michael@0: 
michael@0:         if c == '"' or _is_ctl(c):
michael@0:             result.append('\\' + c)
michael@0:         else:
michael@0:             result.append(c)
michael@0: 
michael@0:     if quote:
michael@0:         return '"' + ''.join(result) + '"'
michael@0:     else:
michael@0:         return ''.join(result)
michael@0: 
michael@0: 
michael@0: def parse_uri(uri):
michael@0:     """Parse absolute URI then return host, port and resource."""
michael@0: 
michael@0:     parsed = urlparse.urlsplit(uri)
michael@0:     if parsed.scheme != 'wss' and parsed.scheme != 'ws':
michael@0:         # |uri| must be a relative URI.
michael@0:         # TODO(toyoshim): Should validate |uri|.
michael@0:         return None, None, uri
michael@0: 
michael@0:     if parsed.hostname is None:
michael@0:         return None, None, None
michael@0: 
michael@0:     port = None
michael@0:     try:
michael@0:         port = parsed.port
michael@0:     except ValueError, e:
michael@0:         # port property cause ValueError on invalid null port description like
michael@0:         # 'ws://host:/path'.
michael@0:         return None, None, None
michael@0: 
michael@0:     if port is None:
michael@0:         if parsed.scheme == 'ws':
michael@0:             port = 80
michael@0:         else:
michael@0:             port = 443
michael@0: 
michael@0:     path = parsed.path
michael@0:     if not path:
michael@0:         path += '/'
michael@0:     if parsed.query:
michael@0:         path += '?' + parsed.query
michael@0:     if parsed.fragment:
michael@0:         path += '#' + parsed.fragment
michael@0: 
michael@0:     return parsed.hostname, port, path
michael@0: 
michael@0: 
michael@0: try:
michael@0:     urlparse.uses_netloc.index('ws')
michael@0: except ValueError, e:
michael@0:     # urlparse in Python2.5.1 doesn't have 'ws' and 'wss' entries.
michael@0:     urlparse.uses_netloc.append('ws')
michael@0:     urlparse.uses_netloc.append('wss')
michael@0: 
michael@0: 
michael@0: # vi:sts=4 sw=4 et