[z3-checkins] r33676 - z3/deliverance/branches/packaged/deliverance
ianb at codespeak.net
ianb at codespeak.net
Tue Oct 24 19:25:52 CEST 2006
Author: ianb
Date: Tue Oct 24 19:25:49 2006
New Revision: 33676
Added:
z3/deliverance/branches/packaged/deliverance/fixuplinks.py (contents, props changed)
z3/deliverance/branches/packaged/deliverance/relocateresponse.py (contents, props changed)
Modified:
z3/deliverance/branches/packaged/deliverance/proxyapp.py
z3/deliverance/branches/packaged/deliverance/proxycommand.py
Log:
Added code to log requests more verbosely in the proxy app/command. Added some routines to fix up/translate links in a response, to help us proxy sites that don't want to be proxied
Added: z3/deliverance/branches/packaged/deliverance/fixuplinks.py
==============================================================================
--- (empty file)
+++ z3/deliverance/branches/packaged/deliverance/fixuplinks.py Tue Oct 24 19:25:49 2006
@@ -0,0 +1,46 @@
+from htmlserialize import decodeAndParseHTML, tostring
+import re
+
+def fixup_text_links(doc, link_repl_func, remove_base_tags=True):
+ """
+ fixup_links(), but work on text and returns text
+ """
+ doc = decodeAndParseHTML(doc)
+ fixup_links(doc, link_repl_func, remove_base_tags=remove_base_tags)
+ return tostring(doc)
+
+def fixup_links(doc, link_repl_func,
+ remove_base_tags=True):
+ """
+ Takes a given document (already parsed by lxml) and modifies it
+ in-place. Every link is passed through link_repl_func, and the
+ output of that function replaces the link.
+ """
+ if remove_base_tags:
+ remove_base_tags_from_document(doc)
+
+ for attrib in 'href', 'src':
+ els = doc.xpath('//*[@%s]' % attrib)
+ for el in els:
+ el.attrib[attrib] = link_repl_func(el.attrib[attrib])
+
+ fixup_css_links(doc, link_repl_func)
+
+def remove_base_tags_from_document(doc):
+ basetags = doc.xpath('//base[@href]')
+ for b in basetags:
+ b.getparent().remove(b)
+
+CSS_URL_PAT = re.compile(r'url\((.*?)\)',re.I)
+def fixup_css_links(doc, link_repl_func):
+ """
+ prepends url(...) in css style elements to be
+ absolute links based on base_uri
+ """
+ def absuri(matchobj):
+ return 'url(%s)' % link_repl_func(matchobj.group(1))
+ els = doc.xpath('//head/style')
+ for el in els:
+ if el.text:
+ el.text = re.sub(self.CSS_URL_PAT,absuri,el.text)
+
Modified: z3/deliverance/branches/packaged/deliverance/proxyapp.py
==============================================================================
--- z3/deliverance/branches/packaged/deliverance/proxyapp.py (original)
+++ z3/deliverance/branches/packaged/deliverance/proxyapp.py Tue Oct 24 19:25:49 2006
@@ -5,30 +5,71 @@
from paste.proxy import TransparentProxy
from deliverance import wsgifilter
+from deliverance.relocateresponse import RelocateMiddleware
class ProxyDeliveranceApp(object):
def __init__(self, theme_uri, rule_uri, proxy,
- transparent=False):
+ transparent=False, debug_headers=False,
+ relocate_content=False):
self.theme_uri = theme_uri,
self.rule_uri = rule_uri,
self.proxy = proxy
self.transparent = transparent
+ self.debug_headers = debug_headers
self.subapp = self.make_app()
self.deliverance_app = wsgifilter.DeliveranceMiddleware(
self.subapp, theme_uri, rule_uri)
+ self.relocate_content = relocate_content
def make_app(self):
if self.transparent:
force_host = self.proxy
else:
force_host = None
- return TransparentProxy(force_host=force_host)
+ app = TransparentProxy(force_host=force_host)
+ if self.debug_headers:
+ app = DebugHeaders(app)
+ return app
def __call__(self, environ, start_response):
+ if self.relocate_content:
+ reloc_app = RelocateMiddleware(self.run_subapp, old_href='http://'+self.proxy)
+ return reloc_app(environ, start_response)
+ else:
+ return self.run_subapp(environ, start_response)
+
+ def run_subapp(self, environ, start_response):
if not self.transparent:
# @@: Set forwarded header
environ['HTTP_HOST'] = self.proxy
+ if ':' in self.proxy:
+ server, port = self.proxy.split(':', 1)
+ else:
+ server, port = self.proxy, '80'
+ environ['SERVER_NAME'] = server
+ environ['SERVER_PORT'] = port
return self.deliverance_app(
environ, start_response)
+
+class DebugHeaders(object):
+
+ def __init__(self, app):
+ self.app = app
+
+ def __call__(self, environ, start_response):
+ from paste.request import construct_url
+ print 'Incoming headers: (%s %s)' % (
+ environ['REQUEST_METHOD'], construct_url(environ))
+ for name, value in sorted(environ.items()):
+ if not name.startswith('HTTP_'):
+ continue
+ name = name[5:].replace('_', '-').title()
+ print ' %s: %s' % (name, value)
+ def repl_start_response(status, headers, exc_info=None):
+ print 'Outgoing headers: (%s)' % status
+ for name, value in headers:
+ print ' %s: %s' % (name.title(), value)
+ start_response(status, headers, exc_info)
+ return self.app(environ, repl_start_response)
Modified: z3/deliverance/branches/packaged/deliverance/proxycommand.py
==============================================================================
--- z3/deliverance/branches/packaged/deliverance/proxycommand.py (original)
+++ z3/deliverance/branches/packaged/deliverance/proxycommand.py Tue Oct 24 19:25:49 2006
@@ -18,10 +18,12 @@
parser.add_option('-s', '--serve',
help="The interface to serve on (default 0.0.0.0:80)",
dest="serve",
+ metavar="HOST",
default="0.0.0.0:80")
parser.add_option('-p', '--proxy',
help="The host and port to proxy to (default localhost:8080)",
dest="proxy",
+ metavar="PROXY_TO",
default='localhost:8080')
parser.add_option('--theme',
help="The URI of the theme to use",
@@ -37,6 +39,15 @@
help="Show tracebacks when an error occurs (use twice for fancy/dangerous traceback)",
action="count",
dest="debug")
+parser.add_option('--request-log',
+ help="Show an apache-style log of requests (use twice for more logging)",
+ action="count",
+ dest="request_log",
+ default=0)
+parser.add_option('--rewrite',
+ help="Rewrite all headers and links",
+ action="store_true",
+ dest="rewrite")
def strip(prefix, string):
if string.startswith(prefix):
@@ -61,11 +72,17 @@
op = '--theme'
print 'You must provide the %s option' % op
sys.exit(2)
+ debug_headers = options.request_log > 1
app = proxyapp.ProxyDeliveranceApp(
theme_uri=options.theme,
rule_uri=options.rule,
proxy=proxy,
- transparent=options.transparent)
+ transparent=options.transparent,
+ debug_headers=debug_headers,
+ relocate_content=options.rewrite)
+ if options.request_log:
+ from paste.translogger import TransLogger
+ app = TransLogger(app)
if options.debug:
if options.debug > 1:
from paste.evalexception.middleware import EvalException
Added: z3/deliverance/branches/packaged/deliverance/relocateresponse.py
==============================================================================
--- (empty file)
+++ z3/deliverance/branches/packaged/deliverance/relocateresponse.py Tue Oct 24 19:25:49 2006
@@ -0,0 +1,72 @@
+"""
+Takes a response (headers + content) and relocates it, changing domain
+names and paths.
+"""
+import fixuplinks
+import urlparse
+from paste.request import construct_url
+from paste.response import header_value
+
+def relocate_response(headers, content, base_href, old_href, new_href):
+ """
+ Takes headers and content, and replaces all instances of old_href
+ with new_href. Returns (new_headers, new_content)
+ """
+ new_headers = relocate_headers(headers, base_href, old_href, new_href)
+ new_content = relocate_content(content, base_href, old_href, new_href)
+ return new_headers, new_content
+
+def relocate_headers(headers, base_href, old_href, new_href):
+ new_headers = []
+ for name, value in headers:
+ if name.lower() == 'location':
+ value = relocate_href(value, base_href, old_href, new_href)
+ new_headers.append((name, value))
+ return new_headers
+
+def relocate_content(content, base_href, old_href, new_href):
+ def sub_link(href):
+ return relocate_href(href, base_href, old_href, new_href)
+ return fixuplinks.fixup_text_links(content, sub_link)
+
+def relocate_href(href, base_href, old_href, new_href):
+ real_href = urlparse.urljoin(base_href, href)
+ if not real_href.startswith(old_href):
+ return href
+ return new_href + real_href[len(old_href):]
+
+class RelocateMiddleware(object):
+
+ def __init__(self, app, old_href):
+ self.app = app
+ if old_href.endswith(':80'):
+ old_href = old_href[:-3]
+ self.old_href = old_href
+
+ def __call__(self, environ, start_response):
+ new_href = construct_url(environ, path_info='')
+ base_href = construct_url(environ)
+ skipped = []
+ written = []
+ stat_headers = []
+ def repl_start_response(status, headers, exc_info=None):
+ headers = relocate_headers(headers, base_href, self.old_href, new_href)
+ content_type = header_value(headers, 'content-type')
+ if not content_type or not content_type.startswith('text/html'):
+ skipped.append(True)
+ return start_response(status, headers, exc_info)
+ stat_headers[:] = [status, headers]
+ return written.append
+ app_iter = self.app(environ, repl_start_response)
+ if skipped:
+ return app_iter
+ start_response(*stat_headers)
+ try:
+ for chunk in app_iter:
+ written.append(chunk)
+ finally:
+ if hasattr(app_iter, 'close'):
+ app_iter.close()
+ content = ''.join(written)
+ content = relocate_content(content, base_href, self.old_href, new_href)
+ return [content]
More information about the z3-checkins
mailing list