"""
Implements the middleware that does the Deliverance transformations.
"""
import posixpath
import mimetypes
import os
import urllib
import urlparse
import re
import simplejson
import datetime
from webob import Request, Response
from webob import exc
from wsgiproxy.exactproxy import proxy_exact_request
from pygments import highlight as pygments_highlight
from pygments.lexers import XmlLexer, HtmlLexer
from pygments.formatters import HtmlFormatter
from tempita import HTMLTemplate, html
from lxml.etree import _Element, XMLSyntaxError
from lxml.html import fromstring, document_fromstring, tostring, Element
from deliverance.log import SavingLogger
from deliverance.security import display_logging, display_local_files, edit_local_files
from deliverance.util.filetourl import url_to_filename
from deliverance.editor.editorapp import Editor
from deliverance.rules import clientside_action
from deliverance.ruleset import RuleSet
__all__ = ['DeliveranceMiddleware',
'SubrequestRuleGetter',
'FileRuleGetter',
'make_deliverance_middleware' ]
class DeliveranceMiddleware(object):
"""
The middleware that implements the Deliverance transformations
"""
## FIXME: is log_factory etc very useful?
def __init__(self, app, rule_getter, log_factory=SavingLogger,
log_factory_kw={}, default_theme=None):
self.app = app
self.rule_getter = rule_getter
self.log_factory = log_factory
self.log_factory_kw = log_factory_kw
self._default_theme = default_theme
## FIXME: clearly, this should not be a dictionary:
self.known_html = set()
self.known_titles = {}
def default_theme(self, environ):
"""
The URI of the global default theme, if one is set, or None.
This is a method that takes the WSGI environ so that subclasses
can override the behavior (for example setting the default theme
to a URI template that is interpolated on every request)
"""
return self._default_theme
def log_description(self, log=None):
"""The description shown in the log for this context"""
return 'Deliverance'
def notheme_request(self, req):
if 'deliv_notheme' in req.GET:
return True
def __call__(self, environ, start_response):
req = Request(environ)
if self.notheme_request(req):
return self.app(environ, start_response)
req.environ['deliverance.base_url'] = req.application_url
## FIXME: copy_get?:
orig_req = Request(environ.copy())
if 'deliverance.log' in req.environ:
log = req.environ['deliverance.log']
else:
log = self.log_factory(req, self, **self.log_factory_kw)
## FIXME: should this be put in both the orig_req and this req?
req.environ['deliverance.log'] = log
def resource_fetcher(url, retry_inner_if_not_200=False):
"""
Return the Response object for the given URL
"""
return self.get_resource(url, orig_req, log, retry_inner_if_not_200)
if req.path_info_peek() == '.deliverance':
req.path_info_pop()
resp = self.internal_app(req, resource_fetcher)
return resp(environ, start_response)
rule_set = self.rule_getter(resource_fetcher, self.app, orig_req)
clientside = rule_set.check_clientside(req, log)
if clientside and req.url in self.known_html:
if req.cookies.get('jsEnabled'):
log.debug(self, 'Responding to %s with a clientside theme' % req.url)
return self.clientside_response(req, rule_set, resource_fetcher, log)(environ, start_response)
else:
log.debug(self, 'Not doing clientside theming because jsEnabled cookie not set')
resp = req.get_response(self.app)
## FIXME: also XHTML?
if resp.content_type != 'text/html':
## FIXME: remove from known_html?
return resp(environ, start_response)
# XXX: Not clear why such responses would have a content type, but
# they sometimes do (from Zope/Plone, at least) and that then breaks
# when trying to apply a theme.
if resp.status_int in (301, 302, 304):
return resp(environ, start_response)
if resp.content_length == 0:
return resp(environ, start_response)
if clientside and req.url not in self.known_html:
log.debug(self, '%s would have been a clientside check; in future will be since we know it is HTML'
% req.url)
self.known_titles[req.url] = self._get_title(resp.body)
self.known_html.add(req.url)
resp = rule_set.apply_rules(req, resp, resource_fetcher, log,
default_theme=self.default_theme(environ))
if clientside:
resp.decode_content()
resp.body = self._substitute_jsenable(resp.body)
resp = log.finish_request(req, resp)
return resp(environ, start_response)
_title_re = re.compile(r'
(.*?)', re.I|re.S)
def _get_title(self, body):
match = self._title_re.search(body)
if match:
return match.group(1)
else:
return None
_end_head_re = re.compile(r'', re.I)
_jsenable_js = '''\
'''
_future_date = (datetime.datetime.now() + datetime.timedelta(days=10*365)).strftime('%a, %d-%b-%Y %H:%M:%S GMT')
def _substitute_jsenable(self, body):
match = self._end_head_re.search(body)
if not match:
return body
js = self._jsenable_js.replace('__DATE__', self._future_date)
return body[:match.start()] + js + body[match.start():]
def clientside_response(self, req, rule_set, resource_fetcher, log):
theme_href = rule_set.default_theme.resolve_href(req, None, log)
theme_doc = rule_set.get_theme(theme_href, resource_fetcher, log)
js = CLIENTSIDE_JAVASCRIPT.replace('__DELIVERANCE_URL__', req.application_url)
theme_doc.head.insert(0, fromstring('''\
''' % js))
theme = tostring(theme_doc)
## FIXME: cache this, use the actual subresponse to get proper last-modified, etc
title = self.known_titles.get(req.url)
if title:
theme = self._title_re.sub('%s' % title, theme)
resp = Response(theme, conditional_response=True)
if not resp.etag:
resp.md5_etag()
return resp
def get_resource(self, url, orig_req, log, retry_inner_if_not_200=False):
"""
Gets the resource at the given url, using the original request
`orig_req` as the basis for constructing the subrequest.
Returns a `webob.Response` object.
If `url.startswith(orig_req.application_url + '/')`, then Deliverance
will try to fetch the resource by making a subrequest to the app that
is being wrapped by Deliverance, instead of an external subrequest.
This can cause problems in some setups -- see #16. To work around
this, if `retry_inner_if_not_200` is True, then, in the situation
described above, non-200 responses from the inner app will be tossed
out, and the request will be retried as an external http request.
Currently this is used only by RuleSet.get_theme
"""
assert url is not None
if url.lower().startswith('file:'):
if not display_local_files(orig_req):
## FIXME: not sure if this applies generally; some
## calls to get_resource might be because of a more
## valid subrequest than displaying a file
return exc.HTTPForbidden(
"You cannot access file: URLs (like %r)" % url)
filename = url_to_filename(url)
if not os.path.exists(filename):
return exc.HTTPNotFound(
"The file %r was not found" % filename)
if os.path.isdir(filename):
return exc.HTTPForbidden(
"You cannot display a directory (%r)" % filename)
subresp = Response()
type, dummy = mimetypes.guess_type(filename)
if not type:
type = 'application/octet-stream'
subresp.content_type = type
## FIXME: reading the whole thing obviously ain't great:
f = open(filename, 'rb')
subresp.body = f.read()
f.close()
return subresp
elif url.startswith(orig_req.application_url + '/'):
subreq = orig_req.copy_get()
subreq.environ['deliverance.subrequest_original_environ'] = orig_req.environ
new_path_info = url[len(orig_req.application_url):]
query_string = ''
if '?' in new_path_info:
new_path_info, query_string = new_path_info.split('?')
new_path_info = urllib.unquote(new_path_info)
assert new_path_info.startswith('/')
subreq.path_info = new_path_info
subreq.query_string = query_string
subresp = subreq.get_response(self.app)
## FIXME: error if not HTML?
## FIXME: handle redirects?
## FIXME: handle non-200?
log.debug(self, 'Internal request for %s: %s content-type: %s',
url, subresp.status, subresp.content_type)
if not retry_inner_if_not_200:
return subresp
if subresp.status_int == 200:
return subresp
elif 'x-deliverance-theme-subrequest' in orig_req.headers:
log.debug(self,
'Internal request for %s was not 200 OK; '
'returning it anyway.' % url)
return subresp
else:
log.debug(self,
'Internal request for %s was not 200 OK; retrying as external request.' % url)
## FIXME: pluggable subrequest handler?
subreq = self.build_external_subrequest(url, orig_req, log)
subresp = subreq.get_response(proxy_exact_request)
log.debug(self, 'External request for %s: %s content-type: %s',
url, subresp.status, subresp.content_type)
return subresp
def build_external_subrequest(self, url, orig_req, log):
"""
Returns a webob.Request to be used when Deliverance is getting
a resource via an external subrequest (as opposed to a file://
URL or an internal subrequest to the application being wrapped
by Deliverance)
The method returns a webob.Request object; the default
implementation returns a blank Request with only the header
``x-deliverance-theme-subrequest`` set. Subclasses can
override this behavior, e.g. to preserve certain headers from
the original request into subrequests.
``url``:
The URL of the resource to be fetched
``orig_req``:
The original request received by Deliverance
``log``:
The logging object
"""
subreq = Request.blank(url)
subreq.headers['x-deliverance-theme-subrequest'] = "1"
return subreq
def link_to(self, req, url, source=False, line=None, selector=None,
browse=False):
"""
Creates a link to the given url for debugging purposes.
``source=True``:
link to the highlighted source for the file.
``line=#``:
link to the specific line number
``selector="css/xpath"``:
highlight the element that matches that css/xpath selector
``browse=True``:
link to a display that lets you see ids and classes in the
document
"""
base = req.environ['deliverance.base_url']
base += '/.deliverance/view'
source = int(bool(source))
args = {'url': url}
if source:
args['source'] = '1'
if line:
args['line'] = str(line)
if selector:
args['selector'] = selector
if browse:
args['browse'] = '1'
url = base + '?' + urllib.urlencode(args)
if selector:
url += '#deliverance-selection'
if line:
url += '#code-%s' % line
return url
def internal_app(self, req, resource_fetcher):
"""
Handles all internal (``/.deliverance``) requests.
"""
segment = req.path_info_peek()
method = 'action_%s' % segment
method = getattr(self, method, None)
if not display_logging(req) and not getattr(method, 'exposed', False):
return exc.HTTPForbidden(
"Logging is not enabled for you")
req.path_info_pop()
if not method:
return exc.HTTPNotFound('There is no %r action' % segment)
try:
return method(req, resource_fetcher)
except exc.HTTPException, e:
return e
def action_media(self, req, resource_fetcher):
"""
Serves up media from the ``deliverance/media`` directory.
"""
## FIXME: I'm not using this currently, because the Javascript
## didn't work. Dunno why.
from paste.urlparser import StaticURLParser
app = StaticURLParser(os.path.join(os.path.dirname(__file__), 'media'))
## FIXME: need to pop some segments from the req?
req.path_info_pop()
resp = req.get_response(app)
if resp.content_type == 'application/x-javascript':
resp.content_type = 'application/javascript'
return resp
def action_view(self, req, resource_fetcher):
"""
Views files; ``.link_to()`` creates links that go to this
method.
"""
url = req.GET['url']
source = int(req.GET.get('source', '0'))
browse = int(req.GET.get('browse', '0'))
selector = req.GET.get('selector', '')
subresp = resource_fetcher(url)
if source:
return self.view_source(req, subresp, url)
elif browse:
return self.view_browse(req, subresp, url)
elif selector:
return self.view_selection(req, subresp, url)
else:
return exc.HTTPBadRequest(
"You must have a query variable source, browse, or selector")
def action_edit_rules(self, req, resource_fetcher):
if not edit_local_files(req.environ):
return exc.HTTPForbidden('Editing is forbidden')
rules = self.rule_getter(resource_fetcher, self.app, req)
file_url = rules.source_location
if not file_url.startswith('file:'):
return exc.HTTPForbidden('The rule location (%s) is not a local file' % file_url)
filename = url_to_filename(file_url)
app = Editor(filename=filename, force_syntax='delivxml', title='rule file %s' % os.path.basename(filename))
return app
def view_source(self, req, resp, url):
"""
View the highlighted source (from `action_view`).
"""
content_type = resp.content_type
if content_type.startswith('application/xml'):
lexer = XmlLexer()
elif content_type == 'text/html':
lexer = HtmlLexer()
else:
## FIXME: what then?
lexer = HtmlLexer()
text = pygments_highlight(
resp.body, lexer,
HtmlFormatter(full=True, linenos=True, lineanchors='code'))
return Response(text)
def view_browse(self, req, resp, url):
"""
View the id/class browser (from `action_view`)
"""
import re
body = resp.body
f = open(os.path.join(os.path.dirname(__file__), 'media', 'browser.js'))
content = f.read()
f.close()
extra_head = '''
''' % (
content, posixpath.dirname(req.GET['url']) + '/')
match = re.search(r'', body, re.I)
if match:
body = body[:match.end()] + extra_head + body[match.end():]
else:
body = extra_head + body
extra_body = '''