|
1 # Copyright 2011, Google Inc. |
|
2 # All rights reserved. |
|
3 # |
|
4 # Redistribution and use in source and binary forms, with or without |
|
5 # modification, are permitted provided that the following conditions are |
|
6 # met: |
|
7 # |
|
8 # * Redistributions of source code must retain the above copyright |
|
9 # notice, this list of conditions and the following disclaimer. |
|
10 # * Redistributions in binary form must reproduce the above |
|
11 # copyright notice, this list of conditions and the following disclaimer |
|
12 # in the documentation and/or other materials provided with the |
|
13 # distribution. |
|
14 # * Neither the name of Google Inc. nor the names of its |
|
15 # contributors may be used to endorse or promote products derived from |
|
16 # this software without specific prior written permission. |
|
17 # |
|
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|
19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|
20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|
21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
|
22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
|
25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|
26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
29 |
|
30 |
|
31 """Utilities for parsing and formatting headers that follow the grammar defined |
|
32 in HTTP RFC http://www.ietf.org/rfc/rfc2616.txt. |
|
33 """ |
|
34 |
|
35 |
|
36 import urlparse |
|
37 |
|
38 |
|
39 _SEPARATORS = '()<>@,;:\\"/[]?={} \t' |
|
40 |
|
41 |
|
42 def _is_char(c): |
|
43 """Returns true iff c is in CHAR as specified in HTTP RFC.""" |
|
44 |
|
45 return ord(c) <= 127 |
|
46 |
|
47 |
|
48 def _is_ctl(c): |
|
49 """Returns true iff c is in CTL as specified in HTTP RFC.""" |
|
50 |
|
51 return ord(c) <= 31 or ord(c) == 127 |
|
52 |
|
53 |
|
54 class ParsingState(object): |
|
55 |
|
56 def __init__(self, data): |
|
57 self.data = data |
|
58 self.head = 0 |
|
59 |
|
60 |
|
61 def peek(state, pos=0): |
|
62 """Peeks the character at pos from the head of data.""" |
|
63 |
|
64 if state.head + pos >= len(state.data): |
|
65 return None |
|
66 |
|
67 return state.data[state.head + pos] |
|
68 |
|
69 |
|
70 def consume(state, amount=1): |
|
71 """Consumes specified amount of bytes from the head and returns the |
|
72 consumed bytes. If there's not enough bytes to consume, returns None. |
|
73 """ |
|
74 |
|
75 if state.head + amount > len(state.data): |
|
76 return None |
|
77 |
|
78 result = state.data[state.head:state.head + amount] |
|
79 state.head = state.head + amount |
|
80 return result |
|
81 |
|
82 |
|
83 def consume_string(state, expected): |
|
84 """Given a parsing state and a expected string, consumes the string from |
|
85 the head. Returns True if consumed successfully. Otherwise, returns |
|
86 False. |
|
87 """ |
|
88 |
|
89 pos = 0 |
|
90 |
|
91 for c in expected: |
|
92 if c != peek(state, pos): |
|
93 return False |
|
94 pos += 1 |
|
95 |
|
96 consume(state, pos) |
|
97 return True |
|
98 |
|
99 |
|
100 def consume_lws(state): |
|
101 """Consumes a LWS from the head. Returns True if any LWS is consumed. |
|
102 Otherwise, returns False. |
|
103 |
|
104 LWS = [CRLF] 1*( SP | HT ) |
|
105 """ |
|
106 |
|
107 original_head = state.head |
|
108 |
|
109 consume_string(state, '\r\n') |
|
110 |
|
111 pos = 0 |
|
112 |
|
113 while True: |
|
114 c = peek(state, pos) |
|
115 if c == ' ' or c == '\t': |
|
116 pos += 1 |
|
117 else: |
|
118 if pos == 0: |
|
119 state.head = original_head |
|
120 return False |
|
121 else: |
|
122 consume(state, pos) |
|
123 return True |
|
124 |
|
125 |
|
126 def consume_lwses(state): |
|
127 """Consumes *LWS from the head.""" |
|
128 |
|
129 while consume_lws(state): |
|
130 pass |
|
131 |
|
132 |
|
133 def consume_token(state): |
|
134 """Consumes a token from the head. Returns the token or None if no token |
|
135 was found. |
|
136 """ |
|
137 |
|
138 pos = 0 |
|
139 |
|
140 while True: |
|
141 c = peek(state, pos) |
|
142 if c is None or c in _SEPARATORS or _is_ctl(c) or not _is_char(c): |
|
143 if pos == 0: |
|
144 return None |
|
145 |
|
146 return consume(state, pos) |
|
147 else: |
|
148 pos += 1 |
|
149 |
|
150 |
|
151 def consume_token_or_quoted_string(state): |
|
152 """Consumes a token or a quoted-string, and returns the token or unquoted |
|
153 string. If no token or quoted-string was found, returns None. |
|
154 """ |
|
155 |
|
156 original_head = state.head |
|
157 |
|
158 if not consume_string(state, '"'): |
|
159 return consume_token(state) |
|
160 |
|
161 result = [] |
|
162 |
|
163 expect_quoted_pair = False |
|
164 |
|
165 while True: |
|
166 if not expect_quoted_pair and consume_lws(state): |
|
167 result.append(' ') |
|
168 continue |
|
169 |
|
170 c = consume(state) |
|
171 if c is None: |
|
172 # quoted-string is not enclosed with double quotation |
|
173 state.head = original_head |
|
174 return None |
|
175 elif expect_quoted_pair: |
|
176 expect_quoted_pair = False |
|
177 if _is_char(c): |
|
178 result.append(c) |
|
179 else: |
|
180 # Non CHAR character found in quoted-pair |
|
181 state.head = original_head |
|
182 return None |
|
183 elif c == '\\': |
|
184 expect_quoted_pair = True |
|
185 elif c == '"': |
|
186 return ''.join(result) |
|
187 elif _is_ctl(c): |
|
188 # Invalid character %r found in qdtext |
|
189 state.head = original_head |
|
190 return None |
|
191 else: |
|
192 result.append(c) |
|
193 |
|
194 |
|
195 def quote_if_necessary(s): |
|
196 """Quotes arbitrary string into quoted-string.""" |
|
197 |
|
198 quote = False |
|
199 if s == '': |
|
200 return '""' |
|
201 |
|
202 result = [] |
|
203 for c in s: |
|
204 if c == '"' or c in _SEPARATORS or _is_ctl(c) or not _is_char(c): |
|
205 quote = True |
|
206 |
|
207 if c == '"' or _is_ctl(c): |
|
208 result.append('\\' + c) |
|
209 else: |
|
210 result.append(c) |
|
211 |
|
212 if quote: |
|
213 return '"' + ''.join(result) + '"' |
|
214 else: |
|
215 return ''.join(result) |
|
216 |
|
217 |
|
218 def parse_uri(uri): |
|
219 """Parse absolute URI then return host, port and resource.""" |
|
220 |
|
221 parsed = urlparse.urlsplit(uri) |
|
222 if parsed.scheme != 'wss' and parsed.scheme != 'ws': |
|
223 # |uri| must be a relative URI. |
|
224 # TODO(toyoshim): Should validate |uri|. |
|
225 return None, None, uri |
|
226 |
|
227 if parsed.hostname is None: |
|
228 return None, None, None |
|
229 |
|
230 port = None |
|
231 try: |
|
232 port = parsed.port |
|
233 except ValueError, e: |
|
234 # port property cause ValueError on invalid null port description like |
|
235 # 'ws://host:/path'. |
|
236 return None, None, None |
|
237 |
|
238 if port is None: |
|
239 if parsed.scheme == 'ws': |
|
240 port = 80 |
|
241 else: |
|
242 port = 443 |
|
243 |
|
244 path = parsed.path |
|
245 if not path: |
|
246 path += '/' |
|
247 if parsed.query: |
|
248 path += '?' + parsed.query |
|
249 if parsed.fragment: |
|
250 path += '#' + parsed.fragment |
|
251 |
|
252 return parsed.hostname, port, path |
|
253 |
|
254 |
|
255 try: |
|
256 urlparse.uses_netloc.index('ws') |
|
257 except ValueError, e: |
|
258 # urlparse in Python2.5.1 doesn't have 'ws' and 'wss' entries. |
|
259 urlparse.uses_netloc.append('ws') |
|
260 urlparse.uses_netloc.append('wss') |
|
261 |
|
262 |
|
263 # vi:sts=4 sw=4 et |