michael@0: # Copyright 2011, Google Inc. michael@0: # All rights reserved. michael@0: # michael@0: # Redistribution and use in source and binary forms, with or without michael@0: # modification, are permitted provided that the following conditions are michael@0: # met: michael@0: # michael@0: # * Redistributions of source code must retain the above copyright michael@0: # notice, this list of conditions and the following disclaimer. michael@0: # * Redistributions in binary form must reproduce the above michael@0: # copyright notice, this list of conditions and the following disclaimer michael@0: # in the documentation and/or other materials provided with the michael@0: # distribution. michael@0: # * Neither the name of Google Inc. nor the names of its michael@0: # contributors may be used to endorse or promote products derived from michael@0: # this software without specific prior written permission. michael@0: # michael@0: # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS michael@0: # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT michael@0: # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR michael@0: # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT michael@0: # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, michael@0: # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT michael@0: # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, michael@0: # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY michael@0: # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT michael@0: # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE michael@0: # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. michael@0: michael@0: michael@0: """Utilities for parsing and formatting headers that follow the grammar defined michael@0: in HTTP RFC http://www.ietf.org/rfc/rfc2616.txt. michael@0: """ michael@0: michael@0: michael@0: import urlparse michael@0: michael@0: michael@0: _SEPARATORS = '()<>@,;:\\"/[]?={} \t' michael@0: michael@0: michael@0: def _is_char(c): michael@0: """Returns true iff c is in CHAR as specified in HTTP RFC.""" michael@0: michael@0: return ord(c) <= 127 michael@0: michael@0: michael@0: def _is_ctl(c): michael@0: """Returns true iff c is in CTL as specified in HTTP RFC.""" michael@0: michael@0: return ord(c) <= 31 or ord(c) == 127 michael@0: michael@0: michael@0: class ParsingState(object): michael@0: michael@0: def __init__(self, data): michael@0: self.data = data michael@0: self.head = 0 michael@0: michael@0: michael@0: def peek(state, pos=0): michael@0: """Peeks the character at pos from the head of data.""" michael@0: michael@0: if state.head + pos >= len(state.data): michael@0: return None michael@0: michael@0: return state.data[state.head + pos] michael@0: michael@0: michael@0: def consume(state, amount=1): michael@0: """Consumes specified amount of bytes from the head and returns the michael@0: consumed bytes. If there's not enough bytes to consume, returns None. michael@0: """ michael@0: michael@0: if state.head + amount > len(state.data): michael@0: return None michael@0: michael@0: result = state.data[state.head:state.head + amount] michael@0: state.head = state.head + amount michael@0: return result michael@0: michael@0: michael@0: def consume_string(state, expected): michael@0: """Given a parsing state and a expected string, consumes the string from michael@0: the head. Returns True if consumed successfully. Otherwise, returns michael@0: False. michael@0: """ michael@0: michael@0: pos = 0 michael@0: michael@0: for c in expected: michael@0: if c != peek(state, pos): michael@0: return False michael@0: pos += 1 michael@0: michael@0: consume(state, pos) michael@0: return True michael@0: michael@0: michael@0: def consume_lws(state): michael@0: """Consumes a LWS from the head. Returns True if any LWS is consumed. michael@0: Otherwise, returns False. michael@0: michael@0: LWS = [CRLF] 1*( SP | HT ) michael@0: """ michael@0: michael@0: original_head = state.head michael@0: michael@0: consume_string(state, '\r\n') michael@0: michael@0: pos = 0 michael@0: michael@0: while True: michael@0: c = peek(state, pos) michael@0: if c == ' ' or c == '\t': michael@0: pos += 1 michael@0: else: michael@0: if pos == 0: michael@0: state.head = original_head michael@0: return False michael@0: else: michael@0: consume(state, pos) michael@0: return True michael@0: michael@0: michael@0: def consume_lwses(state): michael@0: """Consumes *LWS from the head.""" michael@0: michael@0: while consume_lws(state): michael@0: pass michael@0: michael@0: michael@0: def consume_token(state): michael@0: """Consumes a token from the head. Returns the token or None if no token michael@0: was found. michael@0: """ michael@0: michael@0: pos = 0 michael@0: michael@0: while True: michael@0: c = peek(state, pos) michael@0: if c is None or c in _SEPARATORS or _is_ctl(c) or not _is_char(c): michael@0: if pos == 0: michael@0: return None michael@0: michael@0: return consume(state, pos) michael@0: else: michael@0: pos += 1 michael@0: michael@0: michael@0: def consume_token_or_quoted_string(state): michael@0: """Consumes a token or a quoted-string, and returns the token or unquoted michael@0: string. If no token or quoted-string was found, returns None. michael@0: """ michael@0: michael@0: original_head = state.head michael@0: michael@0: if not consume_string(state, '"'): michael@0: return consume_token(state) michael@0: michael@0: result = [] michael@0: michael@0: expect_quoted_pair = False michael@0: michael@0: while True: michael@0: if not expect_quoted_pair and consume_lws(state): michael@0: result.append(' ') michael@0: continue michael@0: michael@0: c = consume(state) michael@0: if c is None: michael@0: # quoted-string is not enclosed with double quotation michael@0: state.head = original_head michael@0: return None michael@0: elif expect_quoted_pair: michael@0: expect_quoted_pair = False michael@0: if _is_char(c): michael@0: result.append(c) michael@0: else: michael@0: # Non CHAR character found in quoted-pair michael@0: state.head = original_head michael@0: return None michael@0: elif c == '\\': michael@0: expect_quoted_pair = True michael@0: elif c == '"': michael@0: return ''.join(result) michael@0: elif _is_ctl(c): michael@0: # Invalid character %r found in qdtext michael@0: state.head = original_head michael@0: return None michael@0: else: michael@0: result.append(c) michael@0: michael@0: michael@0: def quote_if_necessary(s): michael@0: """Quotes arbitrary string into quoted-string.""" michael@0: michael@0: quote = False michael@0: if s == '': michael@0: return '""' michael@0: michael@0: result = [] michael@0: for c in s: michael@0: if c == '"' or c in _SEPARATORS or _is_ctl(c) or not _is_char(c): michael@0: quote = True michael@0: michael@0: if c == '"' or _is_ctl(c): michael@0: result.append('\\' + c) michael@0: else: michael@0: result.append(c) michael@0: michael@0: if quote: michael@0: return '"' + ''.join(result) + '"' michael@0: else: michael@0: return ''.join(result) michael@0: michael@0: michael@0: def parse_uri(uri): michael@0: """Parse absolute URI then return host, port and resource.""" michael@0: michael@0: parsed = urlparse.urlsplit(uri) michael@0: if parsed.scheme != 'wss' and parsed.scheme != 'ws': michael@0: # |uri| must be a relative URI. michael@0: # TODO(toyoshim): Should validate |uri|. michael@0: return None, None, uri michael@0: michael@0: if parsed.hostname is None: michael@0: return None, None, None michael@0: michael@0: port = None michael@0: try: michael@0: port = parsed.port michael@0: except ValueError, e: michael@0: # port property cause ValueError on invalid null port description like michael@0: # 'ws://host:/path'. michael@0: return None, None, None michael@0: michael@0: if port is None: michael@0: if parsed.scheme == 'ws': michael@0: port = 80 michael@0: else: michael@0: port = 443 michael@0: michael@0: path = parsed.path michael@0: if not path: michael@0: path += '/' michael@0: if parsed.query: michael@0: path += '?' + parsed.query michael@0: if parsed.fragment: michael@0: path += '#' + parsed.fragment michael@0: michael@0: return parsed.hostname, port, path michael@0: michael@0: michael@0: try: michael@0: urlparse.uses_netloc.index('ws') michael@0: except ValueError, e: michael@0: # urlparse in Python2.5.1 doesn't have 'ws' and 'wss' entries. michael@0: urlparse.uses_netloc.append('ws') michael@0: urlparse.uses_netloc.append('wss') michael@0: michael@0: michael@0: # vi:sts=4 sw=4 et