[z3-checkins] r33676 - z3/deliverance/branches/packaged/deliverance

ianb at codespeak.net ianb at codespeak.net
Tue Oct 24 19:25:52 CEST 2006


Author: ianb
Date: Tue Oct 24 19:25:49 2006
New Revision: 33676

Added:
   z3/deliverance/branches/packaged/deliverance/fixuplinks.py   (contents, props changed)
   z3/deliverance/branches/packaged/deliverance/relocateresponse.py   (contents, props changed)
Modified:
   z3/deliverance/branches/packaged/deliverance/proxyapp.py
   z3/deliverance/branches/packaged/deliverance/proxycommand.py
Log:
Added code to log requests more verbosely in the proxy app/command.  Added some routines to fix up/translate links in a response, to help us proxy sites that don't want to be proxied

Added: z3/deliverance/branches/packaged/deliverance/fixuplinks.py
==============================================================================
--- (empty file)
+++ z3/deliverance/branches/packaged/deliverance/fixuplinks.py	Tue Oct 24 19:25:49 2006
@@ -0,0 +1,46 @@
+from htmlserialize import decodeAndParseHTML, tostring
+import re
+
+def fixup_text_links(doc, link_repl_func, remove_base_tags=True):
+    """
+    fixup_links(), but work on text and returns text
+    """
+    doc = decodeAndParseHTML(doc)
+    fixup_links(doc, link_repl_func, remove_base_tags=remove_base_tags)
+    return tostring(doc)
+
+def fixup_links(doc, link_repl_func,
+                remove_base_tags=True):
+    """
+    Takes a given document (already parsed by lxml) and modifies it
+    in-place.  Every link is passed through link_repl_func, and the
+    output of that function replaces the link.
+    """
+    if remove_base_tags:
+        remove_base_tags_from_document(doc)
+
+    for attrib in 'href', 'src':
+        els = doc.xpath('//*[@%s]' % attrib)
+        for el in els:
+            el.attrib[attrib] = link_repl_func(el.attrib[attrib])
+
+    fixup_css_links(doc, link_repl_func)
+
+def remove_base_tags_from_document(doc):
+    basetags = doc.xpath('//base[@href]')
+    for b in basetags:
+        b.getparent().remove(b)
+    
+CSS_URL_PAT = re.compile(r'url\((.*?)\)',re.I)
+def fixup_css_links(doc, link_repl_func):
+    """ 
+    prepends url(...) in css style elements to be 
+    absolute links based on base_uri
+    """
+    def absuri(matchobj):
+        return 'url(%s)' % link_repl_func(matchobj.group(1))
+    els = doc.xpath('//head/style')
+    for el in els:
+        if el.text:
+            el.text = re.sub(self.CSS_URL_PAT,absuri,el.text)
+

Modified: z3/deliverance/branches/packaged/deliverance/proxyapp.py
==============================================================================
--- z3/deliverance/branches/packaged/deliverance/proxyapp.py	(original)
+++ z3/deliverance/branches/packaged/deliverance/proxyapp.py	Tue Oct 24 19:25:49 2006
@@ -5,30 +5,71 @@
 
 from paste.proxy import TransparentProxy
 from deliverance import wsgifilter
+from deliverance.relocateresponse import RelocateMiddleware
 
 class ProxyDeliveranceApp(object):
 
     def __init__(self, theme_uri, rule_uri, proxy,
-                 transparent=False):
+                 transparent=False, debug_headers=False,
+                 relocate_content=False):
         self.theme_uri = theme_uri,
         self.rule_uri = rule_uri,
         self.proxy = proxy
         self.transparent = transparent
+        self.debug_headers = debug_headers
         self.subapp = self.make_app()
         self.deliverance_app = wsgifilter.DeliveranceMiddleware(
             self.subapp, theme_uri, rule_uri)
+        self.relocate_content = relocate_content
 
     def make_app(self):
         if self.transparent:
             force_host = self.proxy
         else:
             force_host = None
-        return TransparentProxy(force_host=force_host)
+        app = TransparentProxy(force_host=force_host)
+        if self.debug_headers:
+            app = DebugHeaders(app)
+        return app
 
     def __call__(self, environ, start_response):
+        if self.relocate_content:
+            reloc_app = RelocateMiddleware(self.run_subapp, old_href='http://'+self.proxy)
+            return reloc_app(environ, start_response)
+        else:
+            return self.run_subapp(environ, start_response)
+
+    def run_subapp(self, environ, start_response):
         if not self.transparent:
             # @@: Set forwarded header
             environ['HTTP_HOST'] = self.proxy
+            if ':' in self.proxy:
+                server, port = self.proxy.split(':', 1)
+            else:
+                server, port = self.proxy, '80'
+            environ['SERVER_NAME'] = server
+            environ['SERVER_PORT'] = port
         return self.deliverance_app(
             environ, start_response)
     
+
+class DebugHeaders(object):
+
+    def __init__(self, app):
+        self.app = app
+
+    def __call__(self, environ, start_response):
+        from paste.request import construct_url
+        print 'Incoming headers: (%s %s)' % (
+            environ['REQUEST_METHOD'], construct_url(environ))
+        for name, value in sorted(environ.items()):
+            if not name.startswith('HTTP_'):
+                continue
+            name = name[5:].replace('_', '-').title()
+            print '  %s: %s' % (name, value)
+        def repl_start_response(status, headers, exc_info=None):
+            print 'Outgoing headers: (%s)' % status
+            for name, value in headers:
+                print '  %s: %s' % (name.title(), value)
+            start_response(status, headers, exc_info)
+        return self.app(environ, repl_start_response)

Modified: z3/deliverance/branches/packaged/deliverance/proxycommand.py
==============================================================================
--- z3/deliverance/branches/packaged/deliverance/proxycommand.py	(original)
+++ z3/deliverance/branches/packaged/deliverance/proxycommand.py	Tue Oct 24 19:25:49 2006
@@ -18,10 +18,12 @@
 parser.add_option('-s', '--serve',
                   help="The interface to serve on (default 0.0.0.0:80)",
                   dest="serve",
+                  metavar="HOST",
                   default="0.0.0.0:80")
 parser.add_option('-p', '--proxy',
                   help="The host and port to proxy to (default localhost:8080)",
                   dest="proxy",
+                  metavar="PROXY_TO",
                   default='localhost:8080')
 parser.add_option('--theme',
                   help="The URI of the theme to use",
@@ -37,6 +39,15 @@
                   help="Show tracebacks when an error occurs (use twice for fancy/dangerous traceback)",
                   action="count",
                   dest="debug")
+parser.add_option('--request-log',
+                  help="Show an apache-style log of requests (use twice for more logging)",
+                  action="count",
+                  dest="request_log",
+                  default=0)
+parser.add_option('--rewrite',
+                  help="Rewrite all headers and links",
+                  action="store_true",
+                  dest="rewrite")
 
 def strip(prefix, string):
     if string.startswith(prefix):
@@ -61,11 +72,17 @@
             op = '--theme'
         print 'You must provide the %s option' % op
         sys.exit(2)
+    debug_headers = options.request_log > 1
     app = proxyapp.ProxyDeliveranceApp(
         theme_uri=options.theme,
         rule_uri=options.rule,
         proxy=proxy,
-        transparent=options.transparent)
+        transparent=options.transparent,
+        debug_headers=debug_headers,
+        relocate_content=options.rewrite)
+    if options.request_log:
+        from paste.translogger import TransLogger
+        app = TransLogger(app)
     if options.debug:
         if options.debug > 1:
             from paste.evalexception.middleware import EvalException

Added: z3/deliverance/branches/packaged/deliverance/relocateresponse.py
==============================================================================
--- (empty file)
+++ z3/deliverance/branches/packaged/deliverance/relocateresponse.py	Tue Oct 24 19:25:49 2006
@@ -0,0 +1,72 @@
+"""
+Takes a response (headers + content) and relocates it, changing domain
+names and paths.
+"""
+import fixuplinks
+import urlparse
+from paste.request import construct_url
+from paste.response import header_value
+
+def relocate_response(headers, content, base_href, old_href, new_href):
+    """
+    Takes headers and content, and replaces all instances of old_href
+    with new_href.  Returns (new_headers, new_content)
+    """
+    new_headers = relocate_headers(headers, base_href, old_href, new_href)
+    new_content = relocate_content(content, base_href, old_href, new_href)
+    return new_headers, new_content
+
+def relocate_headers(headers, base_href, old_href, new_href):
+    new_headers = []
+    for name, value in headers:
+        if name.lower() == 'location':
+            value = relocate_href(value, base_href, old_href, new_href)
+        new_headers.append((name, value))
+    return new_headers
+
+def relocate_content(content, base_href, old_href, new_href):
+    def sub_link(href):
+        return relocate_href(href, base_href, old_href, new_href)
+    return fixuplinks.fixup_text_links(content, sub_link)
+
+def relocate_href(href, base_href, old_href, new_href):
+    real_href = urlparse.urljoin(base_href, href)
+    if not real_href.startswith(old_href):
+        return href
+    return new_href + real_href[len(old_href):]
+
+class RelocateMiddleware(object):
+
+    def __init__(self, app, old_href):
+        self.app = app
+        if old_href.endswith(':80'):
+            old_href = old_href[:-3]
+        self.old_href = old_href
+
+    def __call__(self, environ, start_response):
+        new_href = construct_url(environ, path_info='')
+        base_href = construct_url(environ)
+        skipped = []
+        written = []
+        stat_headers = []
+        def repl_start_response(status, headers, exc_info=None):
+            headers = relocate_headers(headers, base_href, self.old_href, new_href)
+            content_type = header_value(headers, 'content-type')
+            if not content_type or not content_type.startswith('text/html'):
+                skipped.append(True)
+                return start_response(status, headers, exc_info)
+            stat_headers[:] = [status, headers]
+            return written.append
+        app_iter = self.app(environ, repl_start_response)
+        if skipped:
+            return app_iter
+        start_response(*stat_headers)
+        try:
+            for chunk in app_iter:
+                written.append(chunk)
+        finally:
+            if hasattr(app_iter, 'close'):
+                app_iter.close()
+        content = ''.join(written)
+        content = relocate_content(content, base_href, self.old_href, new_href)
+        return [content]


More information about the z3-checkins mailing list