michael@0: # Copyright 2011, Google Inc. michael@0: # All rights reserved. michael@0: # michael@0: # Redistribution and use in source and binary forms, with or without michael@0: # modification, are permitted provided that the following conditions are michael@0: # met: michael@0: # michael@0: # * Redistributions of source code must retain the above copyright michael@0: # notice, this list of conditions and the following disclaimer. michael@0: # * Redistributions in binary form must reproduce the above michael@0: # copyright notice, this list of conditions and the following disclaimer michael@0: # in the documentation and/or other materials provided with the michael@0: # distribution. michael@0: # * Neither the name of Google Inc. nor the names of its michael@0: # contributors may be used to endorse or promote products derived from michael@0: # this software without specific prior written permission. michael@0: # michael@0: # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS michael@0: # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT michael@0: # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR michael@0: # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT michael@0: # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, michael@0: # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT michael@0: # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, michael@0: # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY michael@0: # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT michael@0: # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE michael@0: # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. michael@0: michael@0: michael@0: """WebSocket utilities. michael@0: """ michael@0: michael@0: michael@0: import array michael@0: import errno michael@0: michael@0: # Import hash classes from a module available and recommended for each Python michael@0: # version and re-export those symbol. Use sha and md5 module in Python 2.4, and michael@0: # hashlib module in Python 2.6. michael@0: try: michael@0: import hashlib michael@0: md5_hash = hashlib.md5 michael@0: sha1_hash = hashlib.sha1 michael@0: except ImportError: michael@0: import md5 michael@0: import sha michael@0: md5_hash = md5.md5 michael@0: sha1_hash = sha.sha michael@0: michael@0: import StringIO michael@0: import logging michael@0: import os michael@0: import re michael@0: import socket michael@0: import traceback michael@0: import zlib michael@0: michael@0: michael@0: def get_stack_trace(): michael@0: """Get the current stack trace as string. michael@0: michael@0: This is needed to support Python 2.3. michael@0: TODO: Remove this when we only support Python 2.4 and above. michael@0: Use traceback.format_exc instead. michael@0: """ michael@0: michael@0: out = StringIO.StringIO() michael@0: traceback.print_exc(file=out) michael@0: return out.getvalue() michael@0: michael@0: michael@0: def prepend_message_to_exception(message, exc): michael@0: """Prepend message to the exception.""" michael@0: michael@0: exc.args = (message + str(exc),) michael@0: return michael@0: michael@0: michael@0: def __translate_interp(interp, cygwin_path): michael@0: """Translate interp program path for Win32 python to run cygwin program michael@0: (e.g. perl). Note that it doesn't support path that contains space, michael@0: which is typically true for Unix, where #!-script is written. michael@0: For Win32 python, cygwin_path is a directory of cygwin binaries. michael@0: michael@0: Args: michael@0: interp: interp command line michael@0: cygwin_path: directory name of cygwin binary, or None michael@0: Returns: michael@0: translated interp command line. michael@0: """ michael@0: if not cygwin_path: michael@0: return interp michael@0: m = re.match('^[^ ]*/([^ ]+)( .*)?', interp) michael@0: if m: michael@0: cmd = os.path.join(cygwin_path, m.group(1)) michael@0: return cmd + m.group(2) michael@0: return interp michael@0: michael@0: michael@0: def get_script_interp(script_path, cygwin_path=None): michael@0: """Gets #!-interpreter command line from the script. michael@0: michael@0: It also fixes command path. When Cygwin Python is used, e.g. in WebKit, michael@0: it could run "/usr/bin/perl -wT hello.pl". michael@0: When Win32 Python is used, e.g. in Chromium, it couldn't. So, fix michael@0: "/usr/bin/perl" to "\perl.exe". michael@0: michael@0: Args: michael@0: script_path: pathname of the script michael@0: cygwin_path: directory name of cygwin binary, or None michael@0: Returns: michael@0: #!-interpreter command line, or None if it is not #!-script. michael@0: """ michael@0: fp = open(script_path) michael@0: line = fp.readline() michael@0: fp.close() michael@0: m = re.match('^#!(.*)', line) michael@0: if m: michael@0: return __translate_interp(m.group(1), cygwin_path) michael@0: return None michael@0: michael@0: michael@0: def wrap_popen3_for_win(cygwin_path): michael@0: """Wrap popen3 to support #!-script on Windows. michael@0: michael@0: Args: michael@0: cygwin_path: path for cygwin binary if command path is needed to be michael@0: translated. None if no translation required. michael@0: """ michael@0: michael@0: __orig_popen3 = os.popen3 michael@0: michael@0: def __wrap_popen3(cmd, mode='t', bufsize=-1): michael@0: cmdline = cmd.split(' ') michael@0: interp = get_script_interp(cmdline[0], cygwin_path) michael@0: if interp: michael@0: cmd = interp + ' ' + cmd michael@0: return __orig_popen3(cmd, mode, bufsize) michael@0: michael@0: os.popen3 = __wrap_popen3 michael@0: michael@0: michael@0: def hexify(s): michael@0: return ' '.join(map(lambda x: '%02x' % ord(x), s)) michael@0: michael@0: michael@0: def get_class_logger(o): michael@0: return logging.getLogger( michael@0: '%s.%s' % (o.__class__.__module__, o.__class__.__name__)) michael@0: michael@0: michael@0: class NoopMasker(object): michael@0: """A masking object that has the same interface as RepeatedXorMasker but michael@0: just returns the string passed in without making any change. michael@0: """ michael@0: michael@0: def __init__(self): michael@0: pass michael@0: michael@0: def mask(self, s): michael@0: return s michael@0: michael@0: michael@0: class RepeatedXorMasker(object): michael@0: """A masking object that applies XOR on the string given to mask method michael@0: with the masking bytes given to the constructor repeatedly. This object michael@0: remembers the position in the masking bytes the last mask method call michael@0: ended and resumes from that point on the next mask method call. michael@0: """ michael@0: michael@0: def __init__(self, mask): michael@0: self._mask = map(ord, mask) michael@0: self._mask_size = len(self._mask) michael@0: self._count = 0 michael@0: michael@0: def mask(self, s): michael@0: result = array.array('B') michael@0: result.fromstring(s) michael@0: # Use temporary local variables to eliminate the cost to access michael@0: # attributes michael@0: count = self._count michael@0: mask = self._mask michael@0: mask_size = self._mask_size michael@0: for i in xrange(len(result)): michael@0: result[i] ^= mask[count] michael@0: count = (count + 1) % mask_size michael@0: self._count = count michael@0: michael@0: return result.tostring() michael@0: michael@0: michael@0: class DeflateRequest(object): michael@0: """A wrapper class for request object to intercept send and recv to perform michael@0: deflate compression and decompression transparently. michael@0: """ michael@0: michael@0: def __init__(self, request): michael@0: self._request = request michael@0: self.connection = DeflateConnection(request.connection) michael@0: michael@0: def __getattribute__(self, name): michael@0: if name in ('_request', 'connection'): michael@0: return object.__getattribute__(self, name) michael@0: return self._request.__getattribute__(name) michael@0: michael@0: def __setattr__(self, name, value): michael@0: if name in ('_request', 'connection'): michael@0: return object.__setattr__(self, name, value) michael@0: return self._request.__setattr__(name, value) michael@0: michael@0: michael@0: # By making wbits option negative, we can suppress CMF/FLG (2 octet) and michael@0: # ADLER32 (4 octet) fields of zlib so that we can use zlib module just as michael@0: # deflate library. DICTID won't be added as far as we don't set dictionary. michael@0: # LZ77 window of 32K will be used for both compression and decompression. michael@0: # For decompression, we can just use 32K to cover any windows size. For michael@0: # compression, we use 32K so receivers must use 32K. michael@0: # michael@0: # Compression level is Z_DEFAULT_COMPRESSION. We don't have to match level michael@0: # to decode. michael@0: # michael@0: # See zconf.h, deflate.cc, inflate.cc of zlib library, and zlibmodule.c of michael@0: # Python. See also RFC1950 (ZLIB 3.3). michael@0: michael@0: michael@0: class _Deflater(object): michael@0: michael@0: def __init__(self, window_bits): michael@0: self._logger = get_class_logger(self) michael@0: michael@0: self._compress = zlib.compressobj( michael@0: zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -window_bits) michael@0: michael@0: def compress_and_flush(self, bytes): michael@0: compressed_bytes = self._compress.compress(bytes) michael@0: compressed_bytes += self._compress.flush(zlib.Z_SYNC_FLUSH) michael@0: self._logger.debug('Compress input %r', bytes) michael@0: self._logger.debug('Compress result %r', compressed_bytes) michael@0: return compressed_bytes michael@0: michael@0: michael@0: class _Inflater(object): michael@0: michael@0: def __init__(self): michael@0: self._logger = get_class_logger(self) michael@0: michael@0: self._unconsumed = '' michael@0: michael@0: self.reset() michael@0: michael@0: def decompress(self, size): michael@0: if not (size == -1 or size > 0): michael@0: raise Exception('size must be -1 or positive') michael@0: michael@0: data = '' michael@0: michael@0: while True: michael@0: if size == -1: michael@0: data += self._decompress.decompress(self._unconsumed) michael@0: # See Python bug http://bugs.python.org/issue12050 to michael@0: # understand why the same code cannot be used for updating michael@0: # self._unconsumed for here and else block. michael@0: self._unconsumed = '' michael@0: else: michael@0: data += self._decompress.decompress( michael@0: self._unconsumed, size - len(data)) michael@0: self._unconsumed = self._decompress.unconsumed_tail michael@0: if self._decompress.unused_data: michael@0: # Encountered a last block (i.e. a block with BFINAL = 1) and michael@0: # found a new stream (unused_data). We cannot use the same michael@0: # zlib.Decompress object for the new stream. Create a new michael@0: # Decompress object to decompress the new one. michael@0: # michael@0: # It's fine to ignore unconsumed_tail if unused_data is not michael@0: # empty. michael@0: self._unconsumed = self._decompress.unused_data michael@0: self.reset() michael@0: if size >= 0 and len(data) == size: michael@0: # data is filled. Don't call decompress again. michael@0: break michael@0: else: michael@0: # Re-invoke Decompress.decompress to try to decompress all michael@0: # available bytes before invoking read which blocks until michael@0: # any new byte is available. michael@0: continue michael@0: else: michael@0: # Here, since unused_data is empty, even if unconsumed_tail is michael@0: # not empty, bytes of requested length are already in data. We michael@0: # don't have to "continue" here. michael@0: break michael@0: michael@0: if data: michael@0: self._logger.debug('Decompressed %r', data) michael@0: return data michael@0: michael@0: def append(self, data): michael@0: self._logger.debug('Appended %r', data) michael@0: self._unconsumed += data michael@0: michael@0: def reset(self): michael@0: self._logger.debug('Reset') michael@0: self._decompress = zlib.decompressobj(-zlib.MAX_WBITS) michael@0: michael@0: michael@0: # Compresses/decompresses given octets using the method introduced in RFC1979. michael@0: michael@0: michael@0: class _RFC1979Deflater(object): michael@0: """A compressor class that applies DEFLATE to given byte sequence and michael@0: flushes using the algorithm described in the RFC1979 section 2.1. michael@0: """ michael@0: michael@0: def __init__(self, window_bits, no_context_takeover): michael@0: self._deflater = None michael@0: if window_bits is None: michael@0: window_bits = zlib.MAX_WBITS michael@0: self._window_bits = window_bits michael@0: self._no_context_takeover = no_context_takeover michael@0: michael@0: def filter(self, bytes): michael@0: if self._deflater is None or self._no_context_takeover: michael@0: self._deflater = _Deflater(self._window_bits) michael@0: michael@0: # Strip last 4 octets which is LEN and NLEN field of a non-compressed michael@0: # block added for Z_SYNC_FLUSH. michael@0: return self._deflater.compress_and_flush(bytes)[:-4] michael@0: michael@0: michael@0: class _RFC1979Inflater(object): michael@0: """A decompressor class for byte sequence compressed and flushed following michael@0: the algorithm described in the RFC1979 section 2.1. michael@0: """ michael@0: michael@0: def __init__(self): michael@0: self._inflater = _Inflater() michael@0: michael@0: def filter(self, bytes): michael@0: # Restore stripped LEN and NLEN field of a non-compressed block added michael@0: # for Z_SYNC_FLUSH. michael@0: self._inflater.append(bytes + '\x00\x00\xff\xff') michael@0: return self._inflater.decompress(-1) michael@0: michael@0: michael@0: class DeflateSocket(object): michael@0: """A wrapper class for socket object to intercept send and recv to perform michael@0: deflate compression and decompression transparently. michael@0: """ michael@0: michael@0: # Size of the buffer passed to recv to receive compressed data. michael@0: _RECV_SIZE = 4096 michael@0: michael@0: def __init__(self, socket): michael@0: self._socket = socket michael@0: michael@0: self._logger = get_class_logger(self) michael@0: michael@0: self._deflater = _Deflater(zlib.MAX_WBITS) michael@0: self._inflater = _Inflater() michael@0: michael@0: def recv(self, size): michael@0: """Receives data from the socket specified on the construction up michael@0: to the specified size. Once any data is available, returns it even michael@0: if it's smaller than the specified size. michael@0: """ michael@0: michael@0: # TODO(tyoshino): Allow call with size=0. It should block until any michael@0: # decompressed data is available. michael@0: if size <= 0: michael@0: raise Exception('Non-positive size passed') michael@0: while True: michael@0: data = self._inflater.decompress(size) michael@0: if len(data) != 0: michael@0: return data michael@0: michael@0: read_data = self._socket.recv(DeflateSocket._RECV_SIZE) michael@0: if not read_data: michael@0: return '' michael@0: self._inflater.append(read_data) michael@0: michael@0: def sendall(self, bytes): michael@0: self.send(bytes) michael@0: michael@0: def send(self, bytes): michael@0: self._socket.sendall(self._deflater.compress_and_flush(bytes)) michael@0: return len(bytes) michael@0: michael@0: michael@0: class DeflateConnection(object): michael@0: """A wrapper class for request object to intercept write and read to michael@0: perform deflate compression and decompression transparently. michael@0: """ michael@0: michael@0: def __init__(self, connection): michael@0: self._connection = connection michael@0: michael@0: self._logger = get_class_logger(self) michael@0: michael@0: self._deflater = _Deflater(zlib.MAX_WBITS) michael@0: self._inflater = _Inflater() michael@0: michael@0: def get_remote_addr(self): michael@0: return self._connection.remote_addr michael@0: remote_addr = property(get_remote_addr) michael@0: michael@0: def put_bytes(self, bytes): michael@0: self.write(bytes) michael@0: michael@0: def read(self, size=-1): michael@0: """Reads at most size bytes. Blocks until there's at least one byte michael@0: available. michael@0: """ michael@0: michael@0: # TODO(tyoshino): Allow call with size=0. michael@0: if not (size == -1 or size > 0): michael@0: raise Exception('size must be -1 or positive') michael@0: michael@0: data = '' michael@0: while True: michael@0: if size == -1: michael@0: data += self._inflater.decompress(-1) michael@0: else: michael@0: data += self._inflater.decompress(size - len(data)) michael@0: michael@0: if size >= 0 and len(data) != 0: michael@0: break michael@0: michael@0: # TODO(tyoshino): Make this read efficient by some workaround. michael@0: # michael@0: # In 3.0.3 and prior of mod_python, read blocks until length bytes michael@0: # was read. We don't know the exact size to read while using michael@0: # deflate, so read byte-by-byte. michael@0: # michael@0: # _StandaloneRequest.read that ultimately performs michael@0: # socket._fileobject.read also blocks until length bytes was read michael@0: read_data = self._connection.read(1) michael@0: if not read_data: michael@0: break michael@0: self._inflater.append(read_data) michael@0: return data michael@0: michael@0: def write(self, bytes): michael@0: self._connection.write(self._deflater.compress_and_flush(bytes)) michael@0: michael@0: michael@0: def _is_ewouldblock_errno(error_number): michael@0: """Returns True iff error_number indicates that receive operation would michael@0: block. To make this portable, we check availability of errno and then michael@0: compare them. michael@0: """ michael@0: michael@0: for error_name in ['WSAEWOULDBLOCK', 'EWOULDBLOCK', 'EAGAIN']: michael@0: if (error_name in dir(errno) and michael@0: error_number == getattr(errno, error_name)): michael@0: return True michael@0: return False michael@0: michael@0: michael@0: def drain_received_data(raw_socket): michael@0: # Set the socket non-blocking. michael@0: original_timeout = raw_socket.gettimeout() michael@0: raw_socket.settimeout(0.0) michael@0: michael@0: drained_data = [] michael@0: michael@0: # Drain until the socket is closed or no data is immediately michael@0: # available for read. michael@0: while True: michael@0: try: michael@0: data = raw_socket.recv(1) michael@0: if not data: michael@0: break michael@0: drained_data.append(data) michael@0: except socket.error, e: michael@0: # e can be either a pair (errno, string) or just a string (or michael@0: # something else) telling what went wrong. We suppress only michael@0: # the errors that indicates that the socket blocks. Those michael@0: # exceptions can be parsed as a pair (errno, string). michael@0: try: michael@0: error_number, message = e michael@0: except: michael@0: # Failed to parse socket.error. michael@0: raise e michael@0: michael@0: if _is_ewouldblock_errno(error_number): michael@0: break michael@0: else: michael@0: raise e michael@0: michael@0: # Rollback timeout value. michael@0: raw_socket.settimeout(original_timeout) michael@0: michael@0: return ''.join(drained_data) michael@0: michael@0: michael@0: # vi:sts=4 sw=4 et