1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/testing/tools/proxyserver/proxyserver.py Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,269 @@ 1.4 +# This Source Code Form is subject to the terms of the Mozilla Public 1.5 +# License, v. 2.0. If a copy of the MPL was not distributed with this 1.6 +# file, You can obtain one at http://mozilla.org/MPL/2.0/. 1.7 + 1.8 +""" 1.9 +Caching HTTP Proxy for use with the Talos pageload tests 1.10 +Author: Rob Arnold 1.11 + 1.12 +This file implements a multithreaded caching http 1.1 proxy. HEAD and GET 1.13 +methods are supported; POST is not yet. 1.14 + 1.15 +Each incoming request is put onto a new thread; python does not have a thread 1.16 +pool library, so a new thread is spawned for each request. I have tried to use 1.17 +the python 2.4 standard library wherever possible. 1.18 + 1.19 +Caching: 1.20 +The cache is implemented in the Cache class. Items can only be added to the 1.21 +cache. The only way to remove items from the cache is to blow it all away, 1.22 +either by deleting the file (default: proxy_cache.db) or passing the -c or 1.23 +--clear-cache flags on the command line. It is technically possible to remove 1.24 +items individually from the cache, but there has been no need to do so so far. 1.25 + 1.26 +The cache is implemented with the shelve module. The key is the combination of 1.27 +host, port and request (path + params + fragment) and the values stored are the 1.28 +http status code, headers and content that were received from the remote server. 1.29 + 1.30 +Access to the cache is guarded by a semaphore which allows concurrent read 1.31 +access. The semaphore is guarded by a simple mutex which prevents a deadlock 1.32 +from occuring when two threads try to add an item to the cache at the same time. 1.33 + 1.34 +Memory usage is kept to a minimum by the shelve module; only items in the cache 1.35 +that are currently being served stay in memory. 1.36 + 1.37 +Proxy: 1.38 +The BaseHTTPServer.BaseHTTPRequestHandler takes care of parsing incoming 1.39 +requests and managing the socket connection. See the documentation of the 1.40 +BaseHTTPServer module for more information. When do_HEAD or do_GET is called, 1.41 +the url that we are supposed to fetch is in self.path. 1.42 + 1.43 +TODO: 1.44 +* Implement POST requests. This requires implementing the do_POST method and 1.45 + passing the post data along. 1.46 +* Implement different cache policies 1.47 +* Added an interface to allow administrators to probe the cache and remove 1.48 + items from the database and such. 1.49 +""" 1.50 + 1.51 +__version__ = "0.1" 1.52 + 1.53 +import os 1.54 +import sys 1.55 +import time 1.56 +import threading 1.57 +import shelve 1.58 +from optparse import OptionParser, OptionValueError 1.59 + 1.60 +import SocketServer 1.61 +import BaseHTTPServer 1.62 +import socket 1.63 +import httplib 1.64 +from urlparse import urlsplit, urlunsplit 1.65 + 1.66 +class HTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler): 1.67 + server_version = "TalosProxy/" + __version__ 1.68 + protocol_version = "HTTP/1.1" 1.69 + 1.70 + def do_GET(self): 1.71 + content = self.send_head() 1.72 + if content: 1.73 + try: 1.74 + self.wfile.write(content) 1.75 + except socket.error, e: 1.76 + if options.verbose: 1.77 + print "Got socket error %s" % e 1.78 + #self.close_connection = 1 1.79 + def do_HEAD(self): 1.80 + self.send_head() 1.81 + 1.82 + def getHeaders(self): 1.83 + h = {} 1.84 + for name in self.headers.keys(): 1.85 + h[name] = self.headers[name] 1.86 + 1.87 + return h 1.88 + 1.89 + def send_head(self, method="GET"): 1.90 + o = urlsplit(self.path) 1.91 + 1.92 + #sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 1.93 + 1.94 + headers = self.getHeaders() 1.95 + for k in "Proxy-Connection", "Connection": 1.96 + if k in headers: 1.97 + headers[k] = "Close" 1.98 + if "Keep-Alive" in headers: 1.99 + del headers["Keep-Alive"] 1.100 + 1.101 + reqstring = urlunsplit(('','',o.path, o.query, o.fragment)) 1.102 + 1.103 + if options.no_cache: 1.104 + cache_result = None 1.105 + else: 1.106 + cache_result = cache.get(o.hostname, o.port, reqstring) 1.107 + 1.108 + if not cache_result: 1.109 + if options.localonly: 1.110 + self.send_error(404, "Object not in cache") 1.111 + return None 1.112 + else: 1.113 + if options.verbose: 1.114 + print "Object %s was not in the cache" % self.path 1.115 + conn = httplib.HTTPConnection(o.netloc) 1.116 + conn.request("GET", reqstring, headers=headers) 1.117 + res = conn.getresponse() 1.118 + 1.119 + content = res.read() 1.120 + conn.close() 1.121 + 1.122 + status, headers = res.status, res.getheaders() 1.123 + 1.124 + if not options.no_cache: 1.125 + cache.add(o.hostname, o.port, reqstring, status, headers, content) 1.126 + else: 1.127 + status, headers, content = cache_result 1.128 + 1.129 + try: 1.130 + self.send_response(status) 1.131 + for name, value in headers: 1.132 + # kill the transfer-encoding header because we don't support it when 1.133 + # we send data to the client 1.134 + if name not in ('transfer-encoding',): 1.135 + self.send_header(name, value) 1.136 + if "Content-Length" not in headers: 1.137 + self.send_header("Content-Length", str(len(content))) 1.138 + self.end_headers() 1.139 + except socket.error, e: 1.140 + if options.verbose: 1.141 + print "Got socket error %s" % e 1.142 + return None 1.143 + return content 1.144 + def log_message(self, format, *args): 1.145 + if options.verbose: 1.146 + BaseHTTPServer.BaseHTTPRequestHandler.log_message(self, format, *args) 1.147 + 1.148 +class HTTPServer(SocketServer.ThreadingMixIn, BaseHTTPServer.HTTPServer): 1.149 + def __init__(self, address, handler): 1.150 + BaseHTTPServer.HTTPServer.__init__(self, address, handler) 1.151 + 1.152 +class Cache(object): 1.153 + """Multithreaded cache uses the shelve module to store pages""" 1.154 + # 20 concurrent threads ought to be enough for one browser 1.155 + max_concurrency = 20 1.156 + def __init__(self, name='', max_concurrency=20): 1.157 + name = name or options.cache or "proxy_cache.db" 1.158 + self.name = name 1.159 + self.max_concurrency = max_concurrency 1.160 + self.entries = {} 1.161 + self.sem = threading.Semaphore(self.max_concurrency) 1.162 + self.semlock = threading.Lock() 1.163 + if options.clear_cache: 1.164 + flag = 'n' 1.165 + else: 1.166 + flag = 'c' 1.167 + self.db = shelve.DbfilenameShelf(name, flag) 1.168 + 1.169 + def __del__(self): 1.170 + if hasattr(self, 'db'): 1.171 + self.db.close() 1.172 + 1.173 + def get_key(self, host, port, resource): 1.174 + return '%s:%s/%s' % (host, port, resource) 1.175 + 1.176 + def get(self, host, port, resource): 1.177 + key = self.get_key(host, port, resource) 1.178 + self.semlock.acquire() 1.179 + self.sem.acquire() 1.180 + self.semlock.release() 1.181 + try: 1.182 + if not self.db.has_key(key): 1.183 + return None 1.184 + # returns status, headers, content 1.185 + return self.db[key] 1.186 + finally: 1.187 + self.sem.release() 1.188 + def add(self, host, port, resource, status, headers, content): 1.189 + key = self.get_key(host, port, resource) 1.190 + self.semlock.acquire() 1.191 + for i in range(self.max_concurrency): 1.192 + self.sem.acquire() 1.193 + self.semlock.release() 1.194 + try: 1.195 + self.db[key] = (status, headers, content) 1.196 + self.db.sync() 1.197 + finally: 1.198 + for i in range(self.max_concurrency): 1.199 + self.sem.release() 1.200 + 1.201 +class Options(object): 1.202 + port = 8000 1.203 + localonly = False 1.204 + clear_cache = False 1.205 + no_cache = False 1.206 + cache = 'proxy_cache.db' 1.207 + verbose = False 1.208 + 1.209 +def _parseOptions(): 1.210 + def port_callback(option, opt, value, parser): 1.211 + if value > 0 and value < (2 ** 16 - 1): 1.212 + setattr(parser.values, option.dest, value) 1.213 + else: 1.214 + raise OptionValueError("Port number is out of range") 1.215 + 1.216 + global options 1.217 + parser = OptionParser(version="Talos Proxy " + __version__) 1.218 + parser.add_option("-p", "--port", dest="port", 1.219 + help="The port to run the proxy server on", metavar="PORT", type="int", 1.220 + action="callback", callback=port_callback) 1.221 + parser.add_option("-v", "--verbose", action="store_true", dest="verbose", 1.222 + help="Include additional debugging information") 1.223 + parser.add_option("-l", "--localonly", action="store_true", dest="localonly", 1.224 + help="Only serve pages from the local database") 1.225 + parser.add_option("-c", "--clear", action="store_true", dest="clear_cache", 1.226 + help="Clear the cache on startup") 1.227 + parser.add_option("-n", "--no-cache", action="store_true", dest="no_cache", 1.228 + help="Do not use a cache") 1.229 + parser.add_option("-u", "--use-cache", dest="cache", 1.230 + help="The filename of the cache to use", metavar="NAME.db") 1.231 + parser.set_defaults(verbose=Options.verbose, 1.232 + port=Options.port, 1.233 + localonly=Options.localonly, 1.234 + clear_cache=Options.clear_cache, 1.235 + no_cache=Options.no_cache, 1.236 + cache=Options.cache) 1.237 + options, args = parser.parse_args() 1.238 + 1.239 +"""Configures the proxy server. This should be called before run_proxy. It can be 1.240 +called afterwards, but note that it is not threadsafe and some options (namely 1.241 +port) will not take effect""" 1.242 +def configure_proxy(**kwargs): 1.243 + global options 1.244 + options = Options() 1.245 + for key in kwargs: 1.246 + setattr(options, key, kwargs[key]) 1.247 + 1.248 +def _run(): 1.249 + global cache 1.250 + cache = Cache() 1.251 + server_address = ('', options.port) 1.252 + httpd = HTTPServer(server_address, HTTPRequestHandler) 1.253 + httpd.serve_forever() 1.254 + 1.255 +"""Starts the proxy; it runs on a separate daemon thread""" 1.256 +def run_proxy(): 1.257 + thr = threading.Thread(target=_run) 1.258 + # now when we die, the daemon thread will die too 1.259 + thr.setDaemon(1) 1.260 + thr.start() 1.261 + 1.262 +if __name__ == '__main__': 1.263 + _parseOptions() 1.264 + try: 1.265 + run_proxy() 1.266 + # thr.join() doesn't terminate on keyboard interrupt 1.267 + while 1: time.sleep(1) 1.268 + except KeyboardInterrupt: 1.269 + if options.verbose: 1.270 + print "Quittin' time..." 1.271 + 1.272 +__all__ = ['run_proxy', 'configure_proxy']