[z3-checkins] r56104 - in z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance: . tests util
ianb at codespeak.net
ianb at codespeak.net
Fri Jun 27 00:41:35 CEST 2008
Author: ianb
Date: Fri Jun 27 00:41:33 2008
New Revision: 56104
Added:
z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/pagematch.py (contents, props changed)
z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/ruleset.py (contents, props changed)
z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_middleware.txt (contents, props changed)
z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_pagematch.txt (contents, props changed)
Modified:
z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/log.py
z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/rules.py
z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/selector.py
z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/stringmatch.py
z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_selection.txt
z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_stringmatch.txt
z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/util/converters.py
Log:
Added the page/class matching and some middleware, not yet complete
Modified: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/log.py
==============================================================================
--- z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/log.py (original)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/log.py Fri Jun 27 00:41:33 2008
@@ -1,6 +1,17 @@
+"""
+Logging for deliverance.
+
+This does not use the standard :mod:`logging` module because that
+module is not easily applied and inspected locally. We want the log
+messages to be strictly per-request.
+"""
+
import logging
class SavingLogger(object):
+ """
+ Logger that saves all its messages locally.
+ """
def __init__(self, description=True):
self.messages = []
if description:
Added: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/pagematch.py
==============================================================================
--- (empty file)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/pagematch.py Fri Jun 27 00:41:33 2008
@@ -0,0 +1,165 @@
+"""
+Handles the <match> tag and matching requests and responses against these patterns.
+"""
+
+from deliverance.stringmatch import compile_matcher, compile_header_matcher
+from deliverance.util.converters import asbool, html_quote
+
+__all__ = ['MatchSyntaxError', 'Match']
+
+class MatchSyntaxError(Exception):
+ """
+ Raised if there's some error with the matching.
+ """
+
+class Match(object):
+ """
+ Represents the <match> tags.
+
+ You can call this object to apply the match
+ """
+
+ def __init__(self, classes, path=None, domain=None,
+ request_header=None, response_header=None, environ=None,
+ abort=False, last=False, source_location=None):
+ self.classes = classes
+ self.path = path
+ self.domain = domain
+ self.request_header = request_header
+ self.response_header = response_header
+ self.environ = environ
+ self.abort = abort
+ self.last = last
+ self.source_location = source_location
+
+ @classmethod
+ def parse_xml(cls, el, source_location):
+ """
+ Creates an instance of Match from the given parsed XML element.
+ """
+ assert el.tag == 'match'
+ classes = el.get('class', '').split()
+ abort = asbool(el.get('abort'))
+ if not abort and not classes:
+ ## FIXME: source location
+ raise MatchSyntaxError(
+ "You must provide some classes in the class attribute")
+ if abort and classes:
+ ## FIXME: source location
+ raise MatchSyntaxError(
+ 'You cannot provide both abort="1" and class="%s"'
+ % (' '.join(classes)))
+ path = cls._parse_attr(el, 'path', default='path')
+ domain = cls._parse_attr(el, 'domain', default='wildcard')
+ request_header = cls._parse_attr(el, 'request-header', default='exact', header=True)
+ response_header = cls._parse_attr(el, 'response-header', default='exact', header=True)
+ environ = cls._parse_attr(el, 'environ', default='exact', header=True)
+ last = asbool(el.get('last'))
+ return cls(
+ classes,
+ path=path,
+ domain=domain,
+ request_header=request_header,
+ response_header=response_header,
+ environ=environ,
+ abort=abort,
+ last=last,
+ source_location=source_location)
+
+ @staticmethod
+ def _parse_attr(el, attr, default=None, header=False):
+ """
+ Compiles a single string pattern
+ """
+ value = el.get(attr)
+ if value is None:
+ return None
+ if header:
+ return compile_header_matcher(value, default)
+ else:
+ return compile_matcher(value, default)
+
+ def __unicode__(self):
+ parts = [u'<match']
+ parts.append(u'class="%s"' % html_quote(' '.join(self.classes)))
+ for attr, value in [
+ ('path', self.path),
+ ('domain', self.domain),
+ ('request-header', self.request_header),
+ ('response-header', self.response_header),
+ ('environ', self.environ)]:
+ if value:
+ parts.append(u'%s="%s"' % (attr, html_quote(unicode(self.path))))
+ if self.abort:
+ parts.append(u'abort="1"')
+ if self.last:
+ parts.append(u'last="1"')
+ parts.append(u'/>')
+ return ' '.join(parts)
+
+ def __str__(self):
+ return unicode(self).encode('utf8')
+
+ def __call__(self, request, response_headers, log):
+ """
+ Checks this match against the given request and response_headers object.
+
+ `response_headers` should be a case-insensitive dictionary. `request` should be a
+ :class:webob.Request object.
+ """
+ result = True
+ if self.abort:
+ class_name = 'abort'
+ elif len(self.classes) > 1:
+ class_name = '(%s)' % ' '.join(self.classes)
+ else:
+ class_name = self.classes[0]
+ if self.path:
+ if not self.path(request.path):
+ log.debug(self, 'Skipping class %s because request URL (%s) does not match path="%s"',
+ class_name, request.path, self.path)
+ return False
+ if self.domain:
+ host = request.host.split(':', 1)[0]
+ if not self.domain(host):
+ log.debug(self, 'Skipping class %s because request domain (%s) does not match domain="%s"',
+ class_name, host, self.domain)
+ return False
+ if self.request_header:
+ result, headers = self.request_header(request.headers)
+ if not result:
+ log.debug(self, 'Skipping class %s because request headers %s do not match request-header="%s"',
+ class_name, ', '.join(headers), self.request_header)
+ return False
+ if self.response_header:
+ result, headers = self.response_header(response_headers)
+ if not result:
+ ## FIXME: maybe distinguish <meta> headers and real headers?
+ log.debug(self, 'Skipping class %s because the response headers %s do not match response-header="%s"',
+ class_name, ', '.join(headers), self.response_header)
+ return False
+ if self.environ:
+ result, keys = self.environ(request.environ)
+ if not result:
+ log.debug(self, 'Skipping class %s because the request environ (keys %s) did not match environ="%s"',
+ class_name, ', '.join(keys), self.environ)
+ return False
+ return True
+
+def run_matches(matchers, request, response_headers, log):
+ """
+ Runs all the match objects in matchers, returning the list of matched classes.
+ """
+ results = []
+ for matcher in matchers:
+ if matcher(request, response_headers, log):
+ log.debug(matcher, '<match> matched request, adding classes %s',
+ ', '.join(matcher.classes))
+ for item in matcher.classes:
+ if item not in results:
+ results.append(item)
+ if matcher.last:
+ log.debug(matcher, 'Stopping matches (skipping %i matches)',
+ len(matchers) - matchers.index(matcher) - 1)
+ return results
+ return results
Modified: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/rules.py
==============================================================================
--- z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/rules.py (original)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/rules.py Fri Jun 27 00:41:33 2008
@@ -1,15 +1,12 @@
"""
-Represents individual rules
+Represents individual actions (<append> etc) and the RuleSet that puts them together
"""
from deliverance.exceptions import add_exception_info
-from deliverance.util.converters import asbool
+from deliverance.util.converters import asbool, html_quote
from deliverance.selector import Selector
from lxml import etree
-## A dictionary mapping element names to their rule classes:
-rules = {}
-
class RuleSyntaxError(Exception):
"""
Exception raised when a rule itself is invalid
@@ -28,22 +25,83 @@
CONTENT_ATTRIB = 'x-a-marker-attribute-for-deliverance'
-def parse_rule(el, source_location):
- if el.tag not in rules:
+class Rule(object):
+ """
+ This represents everything in a <rule></rule> section.
+ """
+
+ def __init__(self, classes, actions, theme, source_location):
+ self.classes = classes
+ self._actions = actions
+ self.theme = theme
+ self.source_location = source_location
+
+ @classmethod
+ def parse_xml(cls, el, source_location):
+ """
+ Creates a Rule object from a parsed XML <rule> element.
+ """
+ assert el.tag == 'rule'
+ classes = el.get('class', '').split()
+ if not classes:
+ classes = ['default']
+ theme = None
+ actions = []
+ for el in el.iterchildren():
+ if el.tag == 'theme':
+ ## FIXME: error if more than one theme
+ ## FIXME: error if no href
+ theme = el.get('href')
+ continue
+ action = parse_action(el, source_location)
+ actions.append(action)
+ return cls(classes, actions, theme, source_location)
+
+ def apply(self, content_doc, theme_doc, resource_fetcher, log):
+ """
+ Applies all the actions in this rule to the theme_doc
+
+ Note that this leaves behind attributes to mark elements that
+ originated in the content. You should call
+ :func:`remove_content_attribs` after applying all rules.
+ """
+ for action in self._actions:
+ action.apply(content_doc, theme_doc, resource_fetcher, log)
+ return theme_doc
+
+## A dictionary mapping element names to their rule classes:
+_actions = {}
+
+def parse_action(el, source_location):
+ """
+ Parses an element into an action object.
+ """
+ if el.tag not in _actions:
raise RuleSyntaxError(
"There is no rule with the name %s"
% el.tag)
- Class = rules[el.tag]
+ Class = _actions[el.tag]
instance = Class.from_xml(el, source_location)
return instance
-class AbstractRule(object):
+class AbstractAction(object):
+ # This is the abstract class for all other rules
+ # These values are allowed for nocontent and notheme attributes:
_no_allowed = (None, 'ignore', 'abort', 'warn')
+ # These values are allowed for manycontent and manytheme attributes:
_many_allowed = _no_allowed + ('last', 'first', 'ignore:first', 'ignore:last',
'warn:first', 'warn:last')
def convert_error(self, name, value):
+ """
+ Taking a ``name="value"`` attribute for an error type
+ (nocontent, manycontent, etc) this returns ``(error_handler,
+ position)`` (where ``position`` is None for notheme/nocontent).
+
+ This applies the default value of "warn" and the default
+ position of "first".
+ """
if value == '':
value = None
if value:
@@ -74,9 +132,16 @@
value = ('abort', None)
elif not value:
value = ('warn', None)
+ if isinstance(value, basestring):
+ value = (value, None)
+ assert isinstance(value, tuple), 'Bad value: %r' % value
return value
def format_error(self, attr, value):
+ """
+ Takes the result of :meth:`convert_error` and serializes it
+ back into ``attribute="value"``
+ """
if attr in ('manytheme', 'manycontent'):
handler, pos = value
if pos == 'last':
@@ -133,10 +198,15 @@
return False
return True
+ # Set to the tag name in subclasses (append, prepend, etc):
name = None
+ # Set to true in subclasses if the move attribute means something:
move_supported = True
def describe_self(self):
+ """
+ A text description of this rule, for use in log messages and errors
+ """
parts = ['<%s' % self.name]
if getattr(self, 'content', None):
parts.append('content="%s"' % html_quote(self.content))
@@ -158,6 +228,10 @@
return ' '.join(parts) + ' />'
def describe_content_elements(self, els, children=False):
+ """
+ A text description of a list of content elements, for use in
+ log messages and errors.
+ """
text = ', '.join(el.tag for el in els)
if children:
return 'children of %s' % text
@@ -165,11 +239,19 @@
return text
def describe_theme_element(self, el):
+ """
+ A text description of a theme element, for use in log messages
+ and errors.
+ """
return el.tag
@classmethod
- def compile_selector(cls, tag, attr, source_location):
- value = tag.get(attr)
+ def compile_selector(cls, el, attr, source_location):
+ """
+ Compiles a single selector taken from the given attribute of
+ an element.
+ """
+ value = el.get(attr)
if value is None:
return None
return Selector.parse(value)
@@ -179,7 +261,7 @@
Takes a list of elements and prepares their children as a list and text,
so that you can do::
- text, els = preparent_content_children(self, els)
+ text, els = prepare_content_children(self, els)
add_text(theme_el, text)
theme_el.extend(els)
@@ -213,10 +295,8 @@
elements.remove(el)
return type, elements, attributes
-class TransformRule(AbstractRule):
- """
- Abstract class for the rules that move from the content to the theme (replace, append, prepend)
- """
+class TransformAction(AbstractAction):
+ # Abstract class for the rules that move from the content to the theme (replace, append, prepend)
def __init__(self, source_location, content, theme, if_content=None, content_href=None,
move=True, nocontent=None, notheme=None, manytheme=None, manycontent=None):
@@ -241,6 +321,9 @@
@classmethod
def from_xml(cls, tag, source_location):
+ """
+ Creates an instance of this object from the given parsed XML element
+ """
content = cls.compile_selector(tag, 'content', source_location)
theme = cls.compile_selector(tag, 'theme', source_location)
if_content = cls.compile_selector(tag, 'if_content', source_location)
@@ -254,6 +337,9 @@
manycontent=tag.get('manycontent'))
def apply(self, content_doc, theme_doc, resource_fetcher, log):
+ """
+ Applies this action to the theme_doc.
+ """
describe = log.describe
if self.content_href:
content_doc = resource_fetcher(self.content_href)
@@ -307,6 +393,9 @@
self.apply_transformation(content_type, content_els, attributes, theme_type, theme_el, log)
def join_attributes(self, attr1, attr2):
+ """
+ Joins the sets of attribute names in attr1 and attr2, where either might be None
+ """
if not attr1 and not attr2:
return None
if attr1 and not attr2:
@@ -318,8 +407,12 @@
attr |= attr2
return list(attr)
-class Replace(TransformRule):
+ def apply_transformation(self, content_type, content_els, attributes, theme_type, theme_el, log):
+ raise NotImplementedError
+
+class Replace(TransformAction):
+ # Compatible types of child and theme selector types:
_compatible_types = [
('children', 'elements'),
('children', 'children'),
@@ -328,7 +421,7 @@
('attributes', 'attributes'),
('tag', 'tag'),
]
-
+
def apply_transformation(self, content_type, content_els, attributes, theme_type, theme_el, log):
describe = log.describe
if theme_type == 'children':
@@ -337,11 +430,11 @@
theme_el.text = ''
if content_type == 'elements':
if self.move:
- # If we are working with copies, then the tails don't/shouldn't be moved
+ # If we aren't working with copies then we have to move the tails up as we remove the elements:
for el in reversed(content_els):
move_tail_upward(el)
else:
- # If we are working with copies, then we can just throw away the tails
+ # If we are working with copies, then we can just throw away the tails:
for el in content_els:
el.tail = None
theme_el.extend(content_els)
@@ -440,10 +533,11 @@
theme_el.attrib.update(content_els[0].attrib)
# "move" in this case doesn't mean anything
-rules['replace'] = Replace
+_actions['replace'] = Replace
-class Append(TransformRule):
+class Append(TransformAction):
+ # This is set to False in Prepend:
_append = True
_compatible_types = [
@@ -472,7 +566,7 @@
theme_el.text = None
theme_el[:0] = content_els
elif content_type == 'children':
- text, els = self.preparent_content_children(content_els)
+ text, els = self.prepare_content_children(content_els)
if self._append:
if len(theme_el):
add_tail(theme_el[-1], text)
@@ -556,14 +650,14 @@
else:
content_attrib.clear()
-rules['append'] = Append
+_actions['append'] = Append
class Prepend(Append):
_append = False
-rules['prepend'] = Prepend
+_actions['prepend'] = Prepend
-class Drop(AbstractRule):
+class Drop(AbstractAction):
def __init__(self, source_location, content, theme, if_content=None,
nocontent=None, notheme=None):
@@ -635,7 +729,7 @@
nocontent=tag.get('nocontent'),
notheme=tag.get('notheme'))
-rules['drop'] = Drop
+_actions['drop'] = Drop
## Element utilities ##
@@ -675,16 +769,25 @@
add_text(parent, el.tail)
def iter_self_and_ancestors(el):
+ """
+ Iterates over an element itself and all its ancestors (parent, grandparent, etc)
+ """
yield el
for item in el.iterancestors():
yield item
def mark_content_els(els):
+ """
+ Mark an element as originating from the content (this uses a special attribute)
+ """
for el in els:
## FIXME: maybe put something that is trackable to the rule that moved the element
el.set(CONTENT_ATTRIB, '1')
def is_content_element(el):
+ """
+ Tests if the element came from the content (which includes if any of its ancestors)
+ """
## FIXME: should this check children too?
for p in iter_self_and_ancestors(el):
if p.get(CONTENT_ATTRIB):
@@ -692,11 +795,9 @@
return False
def remove_content_attribs(doc):
+ """
+ Remove the markers placed by :func:`mark_content_els`
+ """
for p in doc.getiterator():
if p.get(CONTENT_ATTRIB, None) is not None:
del p.attrib[CONTENT_ATTRIB]
-
-from cgi import escape as cgi_escape
-def html_quote(s):
- s = unicode(s)
- return cgi_escape(s, True)
Added: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/ruleset.py
==============================================================================
--- (empty file)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/ruleset.py Fri Jun 27 00:41:33 2008
@@ -0,0 +1,102 @@
+from deliverance.pagematch import run_matches, Match
+from deliverance.rules import Rule, remove_content_attribs
+from lxml.html import tostring, document_fromstring
+import re
+import urlparse
+
+class RuleSet(object):
+
+ def __init__(self, matchers, rules_by_class, default_theme=None):
+ self.matchers = matchers
+ self.rules_by_class = rules_by_class
+ self.default_theme = default_theme
+
+ def apply_rules(self, req, resp, resource_fetcher, log):
+ extra_headers = parse_meta_headers(resp.body)
+ if extra_headers:
+ response_headers = HeaderDict(resp.headerlist + extra_headers)
+ else:
+ response_headers = resp.headers
+ classes = run_matches(self.matchers, req, response_headers, log)
+ if not classes:
+ classes = ['default']
+ rules = []
+ theme = None
+ for class_name in classes:
+ ## FIXME: handle case of unknown classes
+ ## Or do that during compilation?
+ for rule in self.rules_by_class[class_name]:
+ if rule not in rules:
+ rules.append(rule)
+ if rule.theme:
+ theme = rule.theme
+ if theme is None:
+ theme = self.default_theme
+ ## FIXME: error if not theme still
+ assert theme is not None
+ theme_doc = self.get_theme(theme, resource_fetcher, log)
+ content_doc = self.parse_document(resp.body, req.url)
+ for rule in rules:
+ rule.apply(content_doc, theme_doc, resource_fetcher, log)
+ remove_content_attribs(theme_doc)
+ ## FIXME: handle caching?
+ resp.body = tostring(theme_doc)
+ return resp
+
+ def get_theme(self, url, resource_getter, log):
+ log.info(self, 'Fetching theme from %s' % url)
+ ## FIXME: should do caching
+ doc = self.parse_document(resource_getter(url), url)
+ doc.make_links_absolute()
+ return doc
+
+ def parse_document(self, s, url):
+ return document_fromstring(s, base_url=url)
+
+ @classmethod
+ def parse_xml(cls, doc, source_location):
+ assert doc.tag == 'ruleset'
+ matchers = []
+ rules = []
+ default_theme = None
+ for el in doc.iterchildren():
+ if el.tag == 'match':
+ matcher = Match.parse_xml(el, source_location)
+ matchers.append(matcher)
+ elif el.tag == 'rule':
+ rule = Rule.parse_xml(el, source_location)
+ rules.append(rule)
+ elif el.tag == 'theme':
+ ## FIXME: Add parse error
+ default_theme = el.get('href')
+ else:
+ ## FIXME: better error
+ assert 0
+ rules_by_class = {}
+ for rule in rules:
+ for class_name in rule.classes:
+ rules_by_class.setdefault(class_name, []).append(rule)
+ if default_theme:
+ default_theme = urlparse.urljoin(doc.base, default_theme)
+ return cls(matchers, rules_by_class, default_theme=default_theme)
+
+_meta_tag_re = re.compile(r'<meta\s+(.*?)>', re.I | re.S)
+_http_equiv_re = re.compile(r'http-equiv=(?:"([^"]*)"|([^\s>]*))', re.I|re.S)
+_content_re = re.compile(r'content=(?:"([^"]*)"|([^\s>]*))', re.I|re.S)
+
+def parse_meta_headers(body):
+ headers = []
+ for match in _meta_tag_re.finditer(body):
+ content = match.group(1)
+ http_equiv_match = _http_equiv_re.search(content)
+ content_match = _content_re.search(content)
+ if not http_equiv_match or not content_match:
+ ## FIXME: log partial matches?
+ continue
+ http_equiv = (http_equiv_match.group(1) or http_equiv_match.group(2) or '').strip()
+ content = content_match.group(1) or content_match.group(2) or ''
+ if not http_equiv or not content:
+ ## FIXME: is empty content really meaningless?
+ continue
+ headers.append((http_equiv, content))
+ return headers
Modified: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/selector.py
==============================================================================
--- z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/selector.py (original)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/selector.py Fri Jun 27 00:41:33 2008
@@ -1,3 +1,7 @@
+"""
+Implements the element selection; XPath, CSS, and the modifiers on
+those selections.
+"""
from lxml.etree import XPath
from lxml.cssselect import CSSSelector
import re
@@ -10,6 +14,12 @@
pass
class Selector(object):
+ """
+ Represents one selection attribute
+
+ A selector contains multiple sub-selectors; this level combines
+ those from the || (cascading) operator.
+ """
def __init__(self, major_type, attributes, selectors):
self.major_type = major_type
@@ -20,6 +30,9 @@
@classmethod
def parse(cls, expr):
+ """
+ Parses one string expression, returning an instance of this class.
+ """
major_type, attributes, expr = cls.parse_prefix(expr)
selectors = [e.strip()
for e in expr.split('||')]
@@ -49,6 +62,13 @@
@staticmethod
def types_compatible(type1, type2):
+ """
+ When multiple types appear (separated with ||) this tests if
+ they are compatible with each other.
+
+ Only ``children`` and ``elements`` are compatible with each
+ other; in all other cases you must use the same type.
+ """
if type1 in ('children', 'elements'):
return type2 in ('children', 'elements')
else:
@@ -61,9 +81,18 @@
in self.selectors])
def __str__(self):
- return str(unicode(self))
+ return unicode(self).encode('utf8')
def compile_selector(self, expr, default_type):
+ """
+ Compiles a single selector string to ``(selector_type,
+ selector_object, expression_string, attributes)`` where the
+ selector_type is a string (``"elements"``, ``"children"``,
+ etc), selector_object is a callable that returns elements,
+ expression_string is the original expression, passed in, and
+ ``attributes`` is a list of attributes in the case of
+ ``attributes(attr1, attr2):``
+ """
type, attributes, rest_expr = self.parse_prefix(expr, default_type=default_type)
if not self.types_compatible(type, self.major_type):
raise SelectorSyntaxError(
Modified: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/stringmatch.py
==============================================================================
--- z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/stringmatch.py (original)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/stringmatch.py Fri Jun 27 00:41:33 2008
@@ -1,11 +1,21 @@
+"""
+Represents the string and header matching that is used to determine page classes.
+"""
+
import fnmatch
import re
+from deliverance.util.converters import asbool
__all__ = ['compile_matcher', 'compile_header_matcher', 'MatchSyntaxError']
_prefix_re = re.compile(r'^([a-z_-]+):', re.I)
def compile_matcher(s, default=None):
+ """
+ Compiles the match string to a match object.
+
+ Match objects are callable objects that return a boolean.
+ """
match = _prefix_re.search(s)
if not match:
if default is None:
@@ -17,7 +27,7 @@
pattern = s
else:
type = match.group(1).lower()
- pattern = s[match.end():]
+ pattern = s[match.end():].lstrip()
if type not in _matches:
## FIXME: show possible names?
raise MatchSyntaxError(
@@ -26,6 +36,14 @@
return _matches[type](pattern)
def compile_header_matcher(s, default='exact'):
+ """
+ Compiles the match header string to a match object.
+
+ Unlike simple match objects, these match against a dictionary of headers.
+
+ This also applies the the environ dictionary. Case-sensitivity is
+ handled by the dictionary, not the matcher.
+ """
if ':' not in s:
raise MatchSyntaxError(
"A header match must be like 'Header: pattern'; you have no header in %r"
@@ -50,6 +68,7 @@
_matches[cls.name] = cls
class Matcher(object):
+ # Abstract base class for matchers
name = None
@@ -63,12 +82,15 @@
return '%s:%s' % (self.name, self.pattern)
def __str__(self):
- return str(unicode(self))
+ return unicode(self).encode('utf8')
def __repr__(self):
return '<%s %s>' % (self.__class__.__name__, str(self))
class WildcardMatcher(Matcher):
+ """
+ Matches a value against a pattern that may contain ``*`` wildcards.
+ """
name = 'wildcard'
@@ -82,6 +104,9 @@
_add_matcher(WildcardMatcher)
class WildcardInsensitiveMatcher(Matcher):
+ """
+ Matches a value, ignoring case, against a pattern with wildcards.
+ """
name = 'wildcard-insensitive'
@@ -95,6 +120,9 @@
_add_matcher(WildcardInsensitiveMatcher)
class RegexMatcher(Matcher):
+ """
+ Matches a value against a regular expression.
+ """
name = 'regex'
@@ -113,6 +141,10 @@
_add_matcher(RegexMatcher)
class PathMatcher(Matcher):
+ """
+ Matches a value against a path. This checks prefixes, but also
+ only matches /-delimited segments.
+ """
name = 'path'
@@ -128,6 +160,9 @@
_add_matcher(PathMatcher)
class ExactMatcher(Matcher):
+ """
+ Matches a string exactly.
+ """
name = 'exact'
@@ -137,6 +172,9 @@
_add_matcher(ExactMatcher)
class ExactInsensitiveMatcher(Matcher):
+ """
+ Matches a string exactly, but ignoring case.
+ """
name = 'exact-insensitive'
@@ -146,6 +184,9 @@
_add_matcher(ExactInsensitiveMatcher)
class ContainsMatcher(Matcher):
+ """
+ Matches if the value contains the pattern.
+ """
name = 'contains'
@@ -155,6 +196,9 @@
_add_matcher(ContainsMatcher)
class ContainsInsensitiveMatcher(Matcher):
+ """
+ Matches if the value contains the pattern, ignoring case.
+ """
name = 'contains-insensitive'
@@ -163,19 +207,56 @@
_add_matcher(ContainsInsensitiveMatcher)
+class BooleanMatcher(Matcher):
+ """
+ Matches according to a boolean true/falseness of a value
+ """
+
+ name = 'boolean'
+
+ def __init__(self, pattern):
+ pattern = pattern.strip()
+ super(BooleanMatcher, self).__init__(pattern)
+ if pattern.lower() == 'not':
+ pattern = 'false'
+ if not pattern:
+ pattern = 'true'
+ self.boolean = asbool(pattern)
+
+ def __call__(self, s):
+ try:
+ value = asbool(s)
+ except ValueError:
+ value = False
+ if not self.boolean:
+ return not value
+ else:
+ return value
+
+_add_matcher(BooleanMatcher)
+
class HeaderMatcher(object):
+ """
+ Matches simple "Header: pattern". Does not match wildcard headers.
+ """
def __init__(self, header, pattern):
self.header = header
self.pattern = pattern
def __call__(self, headers):
- return self.pattern(headers.get(self.header, ''))
+ return self.pattern(headers.get(self.header, '')), [self.header]
def __unicode__(self):
return u'%s: %s' % (self.header, self.pattern)
+ def __str__(self):
+ return unicode(self).encode('utf8')
+
class HeaderWildcardMatcher(object):
+ """
+ Matches "Header*: pattern", where the header contains a wildcard.
+ """
def __init__(self, header, pattern):
self.header = header
@@ -184,11 +265,16 @@
def __call__(self, headers):
matches = self.header_re.match
+ matched = []
for key in headers:
if matches(key):
+ matched.append(key)
if self.pattern(headers[key]):
- return True
- return False
+ return True, [key]
+ return False, matched
def __unicode__(self):
return u'%s: %s' % (self.header, self.pattern)
+
+ def __str__(self):
+ return unicode(self).encode('utf8')
Added: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_middleware.txt
==============================================================================
--- (empty file)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_middleware.txt Fri Jun 27 00:41:33 2008
@@ -0,0 +1,232 @@
+This tests the middleware, using a basically static site and applying themes.
+
+First we'll setup the site, using urlmap:
+
+ >>> from paste.urlmap import URLMap
+ >>> from webob import Request, Response
+ >>> app = URLMap()
+
+A theme:
+
+ >>> app['/theme.html'] = Response('''\
+ ... <html>
+ ... <head>
+ ... <title>This is a theme title</title>
+ ... <link rel=Stylesheet type="text/css" href="style.css">
+ ... <style type="text/css">
+ ... @import "style2.css";
+ ... </style>
+ ... </head>
+ ... <body>
+ ...
+ ... <div id="header" class="title-bar">
+ ... <h1 id="title">This is the theme title</h1>
+ ... <div class="topnav"></div>
+ ... </div>
+ ... <div id="content-wrapper">
+ ... <div id="content">
+ ... This content will be replaced.
+ ... </div>
+ ... </div>
+ ...
+ ... <div id="footer">
+ ... Copyright (C) 2000 Some Corporation
+ ... </div>
+ ...
+ ... </body>
+ ... </html>''')
+
+The rule file:
+
+ >>> app['/rules.xml'] = Response('''\
+ ... <ruleset>
+ ... <match path="/blog" class="blog" />
+ ... <match path="exact:/about.html" class="breakout" />
+ ... <match request-header="X-No-Deliverate: boolean:true" abort="1" />
+ ... <match environ="wsgi.url_scheme: https" class="via-https" />
+ ... <theme href="/theme.html" />
+ ... <rule class="default">
+ ... <replace content="children:#footer" theme="children:#footer" nocontent="ignore" />
+ ... <replace content="children:body" theme="children:#content" nocontent="abort" />
+ ... </rule>
+ ... <rule class="breakout">
+ ... <replace content="children:#footer" theme="children:#footer" nocontent="ignore" />
+ ... <replace content="children:body" theme="children:#content-wrapper" nocontent="abort" />
+ ... </rule>
+ ... <rule class="blog">
+ ... <replace content="children:#content" theme="children:#content" nocontent="abort" />
+ ... </rule>
+ ... </ruleset>''', content_type="application/xml")
+
+Some pages:
+
+ >>> app['/blog/index.html'] = Response('''\
+ ... <html><head><title>A blog post</title></head>
+ ... <body>
+ ... Some junk
+ ... <div id="content">the blog post <b>with some style</b></div>
+ ... some more junk
+ ... <div id="footer">a footer that will be ignored</div>
+ ... </body></html>
+ ... ''')
+ >>> app['/about.html'] = Response('''\
+ ... <html><title>About this site</title></html>
+ ... <body>
+ ... This is all about this site.
+ ... <div id="footer">a footer that will be ignored</div>
+ ... </body></html>
+ ... ''')
+ >>> app['/magic'] = Response('''\
+ ... <html><head></head><body>A simple page</body></html>''')
+ >>> app['/magic'].headers['x-no-deliverate'] = '1'
+
+Now to deliverate:
+
+ >>> from deliverance.middleware import DeliveranceMiddleware, SubrequestRuleGetter
+ >>> deliv = DeliveranceMiddleware(app, SubrequestRuleGetter('/rules.xml'))
+
+Now lets look at some plain content and its deliverated equivalent
+
+ >>> def compare_request(path):
+ ... resp = Request.blank(path).get_response(app)
+ ... print 'Original content:'
+ ... print resp.body.strip()
+ ... resp = Request.blank(path).get_response(deliv)
+ ... print 'Themed content:'
+ ... print resp.body.strip()
+ >>> compare_request('/blog/index.html')
+ Original content:
+ <html><head><title>A blog post</title></head>
+ <body>
+ Some junk
+ <div id="content">the blog post <b>with some style</b></div>
+ some more junk
+ <div id="footer">a footer that will be ignored</div>
+ </body></html>
+ Themed content:
+ <html><head><title>This is a theme title</title><link rel="Stylesheet" type="text/css" href="http://localhost/style.css"><style type="text/css">
+ @import "http://localhost/style2.css";
+ </style></head><body>
+ <BLANKLINE>
+ <div id="header" class="title-bar">
+ <h1 id="title">This is the theme title</h1>
+ <div class="topnav"></div>
+ </div>
+ <div id="content-wrapper">
+ <div id="content">the blog post <b>with some style</b></div>
+ </div>
+ <BLANKLINE>
+ <div id="footer">
+ Copyright (C) 2000 Some Corporation
+ </div>
+ <BLANKLINE>
+ </body></html>
+
+
+Other rule formats
+==================
+
+One could imagine the rules looking more like:
+
+ default_theme = /theme.html
+
+ [match:blog]
+ path = /blog
+
+ [match:breakout]
+ path = exact:/about.html
+
+ [match abort]
+ request-header X-No-Deliverate = boolean: true
+
+ [rule:default]
+ append content="children:body" theme="children:#content" nocontent=abort
+ replace content="children:#footer" theme="children:#footer" nocontent=ignore
+
+ [rule:breakout]
+ append content="children:body" theme="children:#content-wrapper" nocontent=abort
+ replace content="children#footer" theme="children#footer" nocontent=ignore
+
+ [rule:blog]
+ append content="children:#content" theme="children:#content" nocontent=abort
+
+Or something like:
+
+ theme "/theme.html";
+ match (path=/blog) {
+ class: blog;
+ }
+ match (path="exact:/about.html") {
+ class: breakout;
+ }
+ match (request X-No-Deliverate="boolean: true") {
+ abort;
+ }
+ .default {
+ append content "children:body"
+ theme "children:#content"
+ nocontent abort;
+ replace content "children:#footer" theme "children:#footer" nocontent ignore;
+ }
+
+Blech. Maybe:
+
+ theme "/theme.html"
+ match (path="/blog") blog
+ match (path="exact:/about.html") (breakout)
+ match (request X-No-Deliverate="boolean: true") abort
+
+ rule default {
+ append "children:body" "children:#content" nocontent=abort
+ replace "children:#footer" "children:#footer" nocontent=ignore
+ }
+ rule breakout {
+ append "children:body" "children:#content-wrapper" nocontent=abort
+ replace "children:#footer" "children:#footer" nocontent=ignore
+ }
+ rule blog {
+ append "children:#content" "children:#content" nocontent=abort
+ }
+
+Or YAMLish:
+
+ theme: /theme.html
+ match: blog
+ path: /blog
+ match: breakout
+ path: exact:/about.html
+ match: abort
+ request X-No-Deliverate: boolean:true
+
+ rule default:
+ append "children:body" "children:#content" nocontent=abort
+ replace "children:#footer" "children:#footer" nocontent=ignore
+ rule breakout:
+ append "children:body" "children:#content-wrapper" nocontent=abort
+ replace "children:#footer" "children:#footer" nocontent=ignore
+ rule blog:
+ append "children:#content" "children:#content" nocontent=abort
+
+
+
+
+
+
+ <ruleset>
+ <match path="/blog" class="blog" />
+ <match path="exact:/about.html" class="breakout" />
+ <match request-header="X-No-Deliverate: boolean:true" abort="1" />
+ <match environ="wsgi.url_scheme: https" class="via-https" />
+ <theme href="/theme.html" />
+ <rule class="default">
+ <append content="children:body" theme="children:#content" nocontent="abort" />
+ <replace content="children:#footer" theme="children:#footer" nocontent="ignore" />
+ </rule>
+ <rule class="breakout">
+ <append content="children:body" theme="children:#content-wrapper" nocontent="abort" />
+ <replace content="children:#footer" theme="children:#footer" nocontent="ignore" />
+ </rule>
+ <rule class="blog">
+ <append content="children:#content" theme="children:#content" nocontent="abort" />
+ </rule>
+ </ruleset>
Added: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_pagematch.txt
==============================================================================
--- (empty file)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_pagematch.txt Fri Jun 27 00:41:33 2008
@@ -0,0 +1,50 @@
+This tests pagematch, which tests whether a request/response matches a <match> tag.
+
+ >>> from deliverance.pagematch import Match
+ >>> from lxml.etree import XML
+ >>> from webob import Request
+ >>> from webob.headerdict import HeaderDict
+ >>> from deliverance.log import SavingLogger
+ >>> def make(xml):
+ ... el = XML(xml)
+ ... return Match.parse_xml(el, source_location=None)
+ >>> def match(matcher, request, response_headers, show_log=True):
+ ... if isinstance(matcher, basestring):
+ ... matcher = make(matcher)
+ ... log = SavingLogger()
+ ... if isinstance(response_headers, list):
+ ... response_headers = HeaderDict(response_headers)
+ ... result = matcher(request, response_headers, log)
+ ... if show_log:
+ ... for level, rule, message in log.messages:
+ ... print 'log:', message
+ ... return result
+
+If you don't provide a class attribute, it is an error:
+
+ >>> make('<match path="foo" />')
+ Traceback (most recent call last):
+ ...
+ MatchSyntaxError: You must provide some classes in the class attribute
+
+Matches get normalized:
+
+ >>> print make('<match path="/foo" last="0" class=" a b"/>')
+ <match class="a b" path="path:/foo/" />
+
+Now, some matches:
+
+ >>> m = make('<match path="/foo" class="a" />')
+ >>> match(m, Request.blank('/foo'), [])
+ True
+ >>> match(m, Request.blank('/foobar'), [])
+ log: Skipping class a because request URL (/foobar) does not match path="path:/foo/"
+ False
+ >>> match(m, Request.blank('/foo/bar'), [])
+ True
+ >>> m = make('<match response-header="Content-Type: contains: html" class="x" />')
+ >>> match(m, Request.blank('/'), [('content-type', 'text/plain')])
+ log: Skipping class x because the response headers Content-Type do not match response-header="Content-Type: contains:html"
+ False
+ >>> match(m, Request.blank('/'), [('content-type', 'text/html')])
+ True
Modified: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_selection.txt
==============================================================================
--- z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_selection.txt (original)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_selection.txt Fri Jun 27 00:41:33 2008
@@ -88,11 +88,11 @@
>>> from lxml.etree import XML
>>> import copy
- >>> from deliverance.rules import parse_rule, remove_content_attribs
+ >>> from deliverance.rules import parse_action, remove_content_attribs
>>> from deliverance.log import SavingLogger
>>> def t_rule_head(rule, selector='//head', show_log=False):
... rule = XML(rule)
- ... rule = parse_rule(rule, None)
+ ... rule = parse_action(rule, None)
... theme_copy = copy.deepcopy(theme)
... theme_copy.make_links_absolute()
... logger = SavingLogger()
Modified: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_stringmatch.txt
==============================================================================
--- z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_stringmatch.txt (original)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_stringmatch.txt Fri Jun 27 00:41:33 2008
@@ -38,13 +38,14 @@
>>> def mheader(pattern, headers):
... return compile_header_matcher(pattern)(headers)
>>> mheader('Something: foo', {'Something': 'foo'})
- True
+ (True, ['Something'])
>>> mheader('Something: foo', {'Something': 'foobar'})
- False
+ (False, ['Something'])
>>> mheader('Something: contains:foo', {'Something': 'foobar'})
- True
+ (True, ['Something'])
>>> mheader('X-*: contains:evil', {'X-Other': 'nothing', 'X-Foo-Bar': 'some evil!'})
- True
+ (True, ['X-Foo-Bar'])
>>> mheader('X-*: contains:evil', {'X-Foo-Bar': 'okay'})
- False
-
+ (False, ['X-Foo-Bar'])
+ >>> mheader('X-*: contains:other', {'X-Other': 'nothing', 'X-Foo-Bar': 'some evil!'})
+ (False, ['X-Foo-Bar', 'X-Other'])
Modified: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/util/converters.py
==============================================================================
--- z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/util/converters.py (original)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/util/converters.py Fri Jun 27 00:41:33 2008
@@ -9,3 +9,8 @@
raise ValueError(
"String is not true/false: %r" % obj)
return bool(obj)
+
+from cgi import escape as cgi_escape
+def html_quote(s):
+ s = unicode(s)
+ return cgi_escape(s, True)
More information about the z3-checkins
mailing list