Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | # Copyright 2011, Google Inc. |
michael@0 | 2 | # All rights reserved. |
michael@0 | 3 | # |
michael@0 | 4 | # Redistribution and use in source and binary forms, with or without |
michael@0 | 5 | # modification, are permitted provided that the following conditions are |
michael@0 | 6 | # met: |
michael@0 | 7 | # |
michael@0 | 8 | # * Redistributions of source code must retain the above copyright |
michael@0 | 9 | # notice, this list of conditions and the following disclaimer. |
michael@0 | 10 | # * Redistributions in binary form must reproduce the above |
michael@0 | 11 | # copyright notice, this list of conditions and the following disclaimer |
michael@0 | 12 | # in the documentation and/or other materials provided with the |
michael@0 | 13 | # distribution. |
michael@0 | 14 | # * Neither the name of Google Inc. nor the names of its |
michael@0 | 15 | # contributors may be used to endorse or promote products derived from |
michael@0 | 16 | # this software without specific prior written permission. |
michael@0 | 17 | # |
michael@0 | 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
michael@0 | 19 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
michael@0 | 20 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
michael@0 | 21 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
michael@0 | 22 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
michael@0 | 23 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
michael@0 | 24 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
michael@0 | 25 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
michael@0 | 26 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
michael@0 | 27 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
michael@0 | 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
michael@0 | 29 | |
michael@0 | 30 | |
michael@0 | 31 | """Utilities for parsing and formatting headers that follow the grammar defined |
michael@0 | 32 | in HTTP RFC http://www.ietf.org/rfc/rfc2616.txt. |
michael@0 | 33 | """ |
michael@0 | 34 | |
michael@0 | 35 | |
michael@0 | 36 | import urlparse |
michael@0 | 37 | |
michael@0 | 38 | |
michael@0 | 39 | _SEPARATORS = '()<>@,;:\\"/[]?={} \t' |
michael@0 | 40 | |
michael@0 | 41 | |
michael@0 | 42 | def _is_char(c): |
michael@0 | 43 | """Returns true iff c is in CHAR as specified in HTTP RFC.""" |
michael@0 | 44 | |
michael@0 | 45 | return ord(c) <= 127 |
michael@0 | 46 | |
michael@0 | 47 | |
michael@0 | 48 | def _is_ctl(c): |
michael@0 | 49 | """Returns true iff c is in CTL as specified in HTTP RFC.""" |
michael@0 | 50 | |
michael@0 | 51 | return ord(c) <= 31 or ord(c) == 127 |
michael@0 | 52 | |
michael@0 | 53 | |
michael@0 | 54 | class ParsingState(object): |
michael@0 | 55 | |
michael@0 | 56 | def __init__(self, data): |
michael@0 | 57 | self.data = data |
michael@0 | 58 | self.head = 0 |
michael@0 | 59 | |
michael@0 | 60 | |
michael@0 | 61 | def peek(state, pos=0): |
michael@0 | 62 | """Peeks the character at pos from the head of data.""" |
michael@0 | 63 | |
michael@0 | 64 | if state.head + pos >= len(state.data): |
michael@0 | 65 | return None |
michael@0 | 66 | |
michael@0 | 67 | return state.data[state.head + pos] |
michael@0 | 68 | |
michael@0 | 69 | |
michael@0 | 70 | def consume(state, amount=1): |
michael@0 | 71 | """Consumes specified amount of bytes from the head and returns the |
michael@0 | 72 | consumed bytes. If there's not enough bytes to consume, returns None. |
michael@0 | 73 | """ |
michael@0 | 74 | |
michael@0 | 75 | if state.head + amount > len(state.data): |
michael@0 | 76 | return None |
michael@0 | 77 | |
michael@0 | 78 | result = state.data[state.head:state.head + amount] |
michael@0 | 79 | state.head = state.head + amount |
michael@0 | 80 | return result |
michael@0 | 81 | |
michael@0 | 82 | |
michael@0 | 83 | def consume_string(state, expected): |
michael@0 | 84 | """Given a parsing state and a expected string, consumes the string from |
michael@0 | 85 | the head. Returns True if consumed successfully. Otherwise, returns |
michael@0 | 86 | False. |
michael@0 | 87 | """ |
michael@0 | 88 | |
michael@0 | 89 | pos = 0 |
michael@0 | 90 | |
michael@0 | 91 | for c in expected: |
michael@0 | 92 | if c != peek(state, pos): |
michael@0 | 93 | return False |
michael@0 | 94 | pos += 1 |
michael@0 | 95 | |
michael@0 | 96 | consume(state, pos) |
michael@0 | 97 | return True |
michael@0 | 98 | |
michael@0 | 99 | |
michael@0 | 100 | def consume_lws(state): |
michael@0 | 101 | """Consumes a LWS from the head. Returns True if any LWS is consumed. |
michael@0 | 102 | Otherwise, returns False. |
michael@0 | 103 | |
michael@0 | 104 | LWS = [CRLF] 1*( SP | HT ) |
michael@0 | 105 | """ |
michael@0 | 106 | |
michael@0 | 107 | original_head = state.head |
michael@0 | 108 | |
michael@0 | 109 | consume_string(state, '\r\n') |
michael@0 | 110 | |
michael@0 | 111 | pos = 0 |
michael@0 | 112 | |
michael@0 | 113 | while True: |
michael@0 | 114 | c = peek(state, pos) |
michael@0 | 115 | if c == ' ' or c == '\t': |
michael@0 | 116 | pos += 1 |
michael@0 | 117 | else: |
michael@0 | 118 | if pos == 0: |
michael@0 | 119 | state.head = original_head |
michael@0 | 120 | return False |
michael@0 | 121 | else: |
michael@0 | 122 | consume(state, pos) |
michael@0 | 123 | return True |
michael@0 | 124 | |
michael@0 | 125 | |
michael@0 | 126 | def consume_lwses(state): |
michael@0 | 127 | """Consumes *LWS from the head.""" |
michael@0 | 128 | |
michael@0 | 129 | while consume_lws(state): |
michael@0 | 130 | pass |
michael@0 | 131 | |
michael@0 | 132 | |
michael@0 | 133 | def consume_token(state): |
michael@0 | 134 | """Consumes a token from the head. Returns the token or None if no token |
michael@0 | 135 | was found. |
michael@0 | 136 | """ |
michael@0 | 137 | |
michael@0 | 138 | pos = 0 |
michael@0 | 139 | |
michael@0 | 140 | while True: |
michael@0 | 141 | c = peek(state, pos) |
michael@0 | 142 | if c is None or c in _SEPARATORS or _is_ctl(c) or not _is_char(c): |
michael@0 | 143 | if pos == 0: |
michael@0 | 144 | return None |
michael@0 | 145 | |
michael@0 | 146 | return consume(state, pos) |
michael@0 | 147 | else: |
michael@0 | 148 | pos += 1 |
michael@0 | 149 | |
michael@0 | 150 | |
michael@0 | 151 | def consume_token_or_quoted_string(state): |
michael@0 | 152 | """Consumes a token or a quoted-string, and returns the token or unquoted |
michael@0 | 153 | string. If no token or quoted-string was found, returns None. |
michael@0 | 154 | """ |
michael@0 | 155 | |
michael@0 | 156 | original_head = state.head |
michael@0 | 157 | |
michael@0 | 158 | if not consume_string(state, '"'): |
michael@0 | 159 | return consume_token(state) |
michael@0 | 160 | |
michael@0 | 161 | result = [] |
michael@0 | 162 | |
michael@0 | 163 | expect_quoted_pair = False |
michael@0 | 164 | |
michael@0 | 165 | while True: |
michael@0 | 166 | if not expect_quoted_pair and consume_lws(state): |
michael@0 | 167 | result.append(' ') |
michael@0 | 168 | continue |
michael@0 | 169 | |
michael@0 | 170 | c = consume(state) |
michael@0 | 171 | if c is None: |
michael@0 | 172 | # quoted-string is not enclosed with double quotation |
michael@0 | 173 | state.head = original_head |
michael@0 | 174 | return None |
michael@0 | 175 | elif expect_quoted_pair: |
michael@0 | 176 | expect_quoted_pair = False |
michael@0 | 177 | if _is_char(c): |
michael@0 | 178 | result.append(c) |
michael@0 | 179 | else: |
michael@0 | 180 | # Non CHAR character found in quoted-pair |
michael@0 | 181 | state.head = original_head |
michael@0 | 182 | return None |
michael@0 | 183 | elif c == '\\': |
michael@0 | 184 | expect_quoted_pair = True |
michael@0 | 185 | elif c == '"': |
michael@0 | 186 | return ''.join(result) |
michael@0 | 187 | elif _is_ctl(c): |
michael@0 | 188 | # Invalid character %r found in qdtext |
michael@0 | 189 | state.head = original_head |
michael@0 | 190 | return None |
michael@0 | 191 | else: |
michael@0 | 192 | result.append(c) |
michael@0 | 193 | |
michael@0 | 194 | |
michael@0 | 195 | def quote_if_necessary(s): |
michael@0 | 196 | """Quotes arbitrary string into quoted-string.""" |
michael@0 | 197 | |
michael@0 | 198 | quote = False |
michael@0 | 199 | if s == '': |
michael@0 | 200 | return '""' |
michael@0 | 201 | |
michael@0 | 202 | result = [] |
michael@0 | 203 | for c in s: |
michael@0 | 204 | if c == '"' or c in _SEPARATORS or _is_ctl(c) or not _is_char(c): |
michael@0 | 205 | quote = True |
michael@0 | 206 | |
michael@0 | 207 | if c == '"' or _is_ctl(c): |
michael@0 | 208 | result.append('\\' + c) |
michael@0 | 209 | else: |
michael@0 | 210 | result.append(c) |
michael@0 | 211 | |
michael@0 | 212 | if quote: |
michael@0 | 213 | return '"' + ''.join(result) + '"' |
michael@0 | 214 | else: |
michael@0 | 215 | return ''.join(result) |
michael@0 | 216 | |
michael@0 | 217 | |
michael@0 | 218 | def parse_uri(uri): |
michael@0 | 219 | """Parse absolute URI then return host, port and resource.""" |
michael@0 | 220 | |
michael@0 | 221 | parsed = urlparse.urlsplit(uri) |
michael@0 | 222 | if parsed.scheme != 'wss' and parsed.scheme != 'ws': |
michael@0 | 223 | # |uri| must be a relative URI. |
michael@0 | 224 | # TODO(toyoshim): Should validate |uri|. |
michael@0 | 225 | return None, None, uri |
michael@0 | 226 | |
michael@0 | 227 | if parsed.hostname is None: |
michael@0 | 228 | return None, None, None |
michael@0 | 229 | |
michael@0 | 230 | port = None |
michael@0 | 231 | try: |
michael@0 | 232 | port = parsed.port |
michael@0 | 233 | except ValueError, e: |
michael@0 | 234 | # port property cause ValueError on invalid null port description like |
michael@0 | 235 | # 'ws://host:/path'. |
michael@0 | 236 | return None, None, None |
michael@0 | 237 | |
michael@0 | 238 | if port is None: |
michael@0 | 239 | if parsed.scheme == 'ws': |
michael@0 | 240 | port = 80 |
michael@0 | 241 | else: |
michael@0 | 242 | port = 443 |
michael@0 | 243 | |
michael@0 | 244 | path = parsed.path |
michael@0 | 245 | if not path: |
michael@0 | 246 | path += '/' |
michael@0 | 247 | if parsed.query: |
michael@0 | 248 | path += '?' + parsed.query |
michael@0 | 249 | if parsed.fragment: |
michael@0 | 250 | path += '#' + parsed.fragment |
michael@0 | 251 | |
michael@0 | 252 | return parsed.hostname, port, path |
michael@0 | 253 | |
michael@0 | 254 | |
michael@0 | 255 | try: |
michael@0 | 256 | urlparse.uses_netloc.index('ws') |
michael@0 | 257 | except ValueError, e: |
michael@0 | 258 | # urlparse in Python2.5.1 doesn't have 'ws' and 'wss' entries. |
michael@0 | 259 | urlparse.uses_netloc.append('ws') |
michael@0 | 260 | urlparse.uses_netloc.append('wss') |
michael@0 | 261 | |
michael@0 | 262 | |
michael@0 | 263 | # vi:sts=4 sw=4 et |