testing/tools/proxyserver/proxyserver.py

Wed, 31 Dec 2014 06:55:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:55:50 +0100
changeset 2
7e26c7da4463
permissions
-rw-r--r--

Added tag UPSTREAM_283F7C6 for changeset ca08bd8f51b2

     1 # This Source Code Form is subject to the terms of the Mozilla Public
     2 # License, v. 2.0. If a copy of the MPL was not distributed with this
     3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
     5 """
     6 Caching HTTP Proxy for use with the Talos pageload tests
     7 Author: Rob Arnold
     9 This file implements a multithreaded caching http 1.1 proxy. HEAD and GET
    10 methods are supported; POST is not yet.
    12 Each incoming request is put onto a new thread; python does not have a thread
    13 pool library, so a new thread is spawned for each request. I have tried to use
    14 the python 2.4 standard library wherever possible.
    16 Caching:
    17 The cache is implemented in the Cache class. Items can only be added to the
    18 cache. The only way to remove items from the cache is to blow it all away,
    19 either by deleting the file (default: proxy_cache.db) or passing the -c or
    20 --clear-cache flags on the command line. It is technically possible to remove
    21 items individually from the cache, but there has been no need to do so so far.
    23 The cache is implemented with the shelve module. The key is the combination of
    24 host, port and request (path + params + fragment) and the values stored are the
    25 http status code, headers and content that were received from the remote server.
    27 Access to the cache is guarded by a semaphore which allows concurrent read
    28 access. The semaphore is guarded by a simple mutex which prevents a deadlock
    29 from occuring when two threads try to add an item to the cache at the same time.
    31 Memory usage is kept to a minimum by the shelve module; only items in the cache
    32 that are currently being served stay in memory.
    34 Proxy:
    35 The BaseHTTPServer.BaseHTTPRequestHandler takes care of parsing incoming
    36 requests and managing the socket connection. See the documentation of the
    37 BaseHTTPServer module for more information. When do_HEAD or do_GET is called,
    38 the url that we are supposed to fetch is in self.path.
    40 TODO:
    41 * Implement POST requests. This requires implementing the do_POST method and
    42   passing the post data along.
    43 * Implement different cache policies
    44 * Added an interface to allow administrators to probe the cache and remove
    45   items from the database and such.
    46 """
    48 __version__ = "0.1"
    50 import os
    51 import sys
    52 import time
    53 import threading
    54 import shelve
    55 from optparse import OptionParser, OptionValueError
    57 import SocketServer
    58 import BaseHTTPServer
    59 import socket
    60 import httplib
    61 from urlparse import urlsplit, urlunsplit
    63 class HTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
    64   server_version = "TalosProxy/" + __version__
    65   protocol_version = "HTTP/1.1"
    67   def do_GET(self):
    68     content = self.send_head()
    69     if content:
    70       try:
    71         self.wfile.write(content)
    72       except socket.error, e:
    73         if options.verbose:
    74           print "Got socket error %s" % e
    75     #self.close_connection = 1
    76   def do_HEAD(self):
    77     self.send_head()
    79   def getHeaders(self):
    80     h = {}
    81     for name in self.headers.keys():
    82       h[name] = self.headers[name]
    84     return h
    86   def send_head(self, method="GET"): 
    87     o = urlsplit(self.path)
    89     #sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    91     headers = self.getHeaders()
    92     for k in "Proxy-Connection", "Connection":
    93       if k in headers:
    94         headers[k] = "Close"
    95     if "Keep-Alive" in headers:
    96       del headers["Keep-Alive"]
    98     reqstring = urlunsplit(('','',o.path, o.query, o.fragment))
   100     if options.no_cache:
   101       cache_result = None
   102     else:
   103       cache_result = cache.get(o.hostname, o.port, reqstring)
   105     if not cache_result:
   106       if options.localonly:
   107         self.send_error(404, "Object not in cache")
   108         return None
   109       else:
   110         if options.verbose:
   111           print "Object %s was not in the cache" % self.path
   112         conn = httplib.HTTPConnection(o.netloc)
   113         conn.request("GET", reqstring, headers=headers)
   114         res = conn.getresponse()
   116         content = res.read()
   117         conn.close()
   119         status, headers = res.status, res.getheaders()
   121         if not options.no_cache:
   122           cache.add(o.hostname, o.port, reqstring, status, headers, content)
   123     else:
   124       status, headers, content = cache_result
   126     try:
   127       self.send_response(status)
   128       for name, value in headers:
   129         # kill the transfer-encoding header because we don't support it when
   130         # we send data to the client
   131         if name not in ('transfer-encoding',):
   132           self.send_header(name, value)
   133       if "Content-Length" not in headers:
   134         self.send_header("Content-Length", str(len(content)))
   135       self.end_headers()
   136     except socket.error, e:
   137       if options.verbose:
   138         print "Got socket error %s" % e
   139       return None
   140     return content
   141   def log_message(self, format, *args):
   142     if options.verbose:
   143       BaseHTTPServer.BaseHTTPRequestHandler.log_message(self, format, *args)
   145 class HTTPServer(SocketServer.ThreadingMixIn, BaseHTTPServer.HTTPServer):
   146   def __init__(self, address, handler):
   147     BaseHTTPServer.HTTPServer.__init__(self, address, handler)
   149 class Cache(object):
   150   """Multithreaded cache uses the shelve module to store pages"""
   151   # 20 concurrent threads ought to be enough for one browser
   152   max_concurrency = 20
   153   def __init__(self, name='', max_concurrency=20):
   154     name = name or options.cache or "proxy_cache.db"
   155     self.name = name
   156     self.max_concurrency = max_concurrency
   157     self.entries = {}
   158     self.sem = threading.Semaphore(self.max_concurrency)
   159     self.semlock = threading.Lock()
   160     if options.clear_cache:
   161       flag = 'n'
   162     else:
   163       flag = 'c'
   164     self.db = shelve.DbfilenameShelf(name, flag)
   166   def __del__(self):
   167     if hasattr(self, 'db'):
   168       self.db.close()
   170   def get_key(self, host, port, resource):
   171     return '%s:%s/%s' % (host, port, resource)
   173   def get(self, host, port, resource):
   174     key = self.get_key(host, port, resource)
   175     self.semlock.acquire()
   176     self.sem.acquire()
   177     self.semlock.release()
   178     try:
   179       if not self.db.has_key(key):
   180         return None
   181       # returns status, headers, content
   182       return self.db[key]
   183     finally:
   184       self.sem.release()
   185   def add(self, host, port, resource, status, headers, content):
   186     key = self.get_key(host, port, resource)
   187     self.semlock.acquire()
   188     for i in range(self.max_concurrency):
   189       self.sem.acquire()
   190     self.semlock.release()
   191     try:
   192       self.db[key] = (status, headers, content)
   193       self.db.sync()
   194     finally:
   195       for i in range(self.max_concurrency):
   196         self.sem.release()
   198 class Options(object):
   199   port = 8000
   200   localonly = False
   201   clear_cache = False
   202   no_cache = False
   203   cache = 'proxy_cache.db'
   204   verbose = False
   206 def _parseOptions():
   207   def port_callback(option, opt, value, parser):
   208     if value > 0 and value < (2 ** 16 - 1):
   209       setattr(parser.values, option.dest, value)
   210     else:
   211       raise OptionValueError("Port number is out of range")
   213   global options
   214   parser = OptionParser(version="Talos Proxy " + __version__)
   215   parser.add_option("-p", "--port", dest="port",
   216     help="The port to run the proxy server on", metavar="PORT", type="int",
   217     action="callback", callback=port_callback)
   218   parser.add_option("-v", "--verbose", action="store_true", dest="verbose",
   219     help="Include additional debugging information")
   220   parser.add_option("-l", "--localonly", action="store_true", dest="localonly",
   221     help="Only serve pages from the local database")
   222   parser.add_option("-c", "--clear", action="store_true", dest="clear_cache",
   223     help="Clear the cache on startup")
   224   parser.add_option("-n", "--no-cache", action="store_true", dest="no_cache",
   225     help="Do not use a cache")
   226   parser.add_option("-u", "--use-cache", dest="cache",
   227     help="The filename of the cache to use", metavar="NAME.db")
   228   parser.set_defaults(verbose=Options.verbose,
   229                       port=Options.port,
   230                       localonly=Options.localonly,
   231                       clear_cache=Options.clear_cache,
   232                       no_cache=Options.no_cache,
   233                       cache=Options.cache)
   234   options, args = parser.parse_args()
   236 """Configures the proxy server. This should be called before run_proxy. It can be
   237 called afterwards, but note that it is not threadsafe and some options (namely
   238 port) will not take effect"""
   239 def configure_proxy(**kwargs):
   240   global options
   241   options = Options()
   242   for key in kwargs:
   243     setattr(options, key, kwargs[key])
   245 def _run():
   246   global cache
   247   cache = Cache()
   248   server_address = ('', options.port)
   249   httpd = HTTPServer(server_address, HTTPRequestHandler)
   250   httpd.serve_forever()
   252 """Starts the proxy; it runs on a separate daemon thread"""
   253 def run_proxy():
   254   thr = threading.Thread(target=_run)
   255   # now when we die, the daemon thread will die too
   256   thr.setDaemon(1)
   257   thr.start()
   259 if __name__ == '__main__':
   260   _parseOptions()
   261   try:
   262     run_proxy()
   263     # thr.join() doesn't terminate on keyboard interrupt
   264     while 1: time.sleep(1)
   265   except KeyboardInterrupt:
   266     if options.verbose:
   267       print "Quittin' time..."
   269 __all__ = ['run_proxy', 'configure_proxy']

mercurial