[z3-checkins] r56104 - in z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance: . tests util

ianb at codespeak.net ianb at codespeak.net
Fri Jun 27 00:41:35 CEST 2008


Author: ianb
Date: Fri Jun 27 00:41:33 2008
New Revision: 56104

Added:
   z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/pagematch.py   (contents, props changed)
   z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/ruleset.py   (contents, props changed)
   z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_middleware.txt   (contents, props changed)
   z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_pagematch.txt   (contents, props changed)
Modified:
   z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/log.py
   z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/rules.py
   z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/selector.py
   z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/stringmatch.py
   z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_selection.txt
   z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_stringmatch.txt
   z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/util/converters.py
Log:
Added the page/class matching and some middleware, not yet complete

Modified: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/log.py
==============================================================================
--- z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/log.py	(original)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/log.py	Fri Jun 27 00:41:33 2008
@@ -1,6 +1,17 @@
+"""
+Logging for deliverance.
+
+This does not use the standard :mod:`logging` module because that
+module is not easily applied and inspected locally.  We want the log
+messages to be strictly per-request.
+"""
+
 import logging
 
 class SavingLogger(object):
+    """
+    Logger that saves all its messages locally.
+    """
     def __init__(self, description=True):
         self.messages = []
         if description:

Added: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/pagematch.py
==============================================================================
--- (empty file)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/pagematch.py	Fri Jun 27 00:41:33 2008
@@ -0,0 +1,165 @@
+"""
+Handles the <match> tag and matching requests and responses against these patterns.
+"""
+
+from deliverance.stringmatch import compile_matcher, compile_header_matcher
+from deliverance.util.converters import asbool, html_quote
+
+__all__ = ['MatchSyntaxError', 'Match']
+
+class MatchSyntaxError(Exception):
+    """
+    Raised if there's some error with the matching.
+    """
+
+class Match(object):
+    """
+    Represents the <match> tags.
+
+    You can call this object to apply the match
+    """
+
+    def __init__(self, classes, path=None, domain=None,
+                 request_header=None, response_header=None, environ=None,
+                 abort=False, last=False, source_location=None):
+        self.classes = classes
+        self.path = path
+        self.domain = domain
+        self.request_header = request_header
+        self.response_header = response_header
+        self.environ = environ
+        self.abort = abort
+        self.last = last
+        self.source_location = source_location
+    
+    @classmethod
+    def parse_xml(cls, el, source_location):
+        """
+        Creates an instance of Match from the given parsed XML element.
+        """
+        assert el.tag == 'match'
+        classes = el.get('class', '').split()
+        abort = asbool(el.get('abort'))
+        if not abort and not classes:
+            ## FIXME: source location
+            raise MatchSyntaxError(
+                "You must provide some classes in the class attribute")
+        if abort and classes:
+            ## FIXME: source location
+            raise MatchSyntaxError(
+                'You cannot provide both abort="1" and class="%s"'
+                % (' '.join(classes)))
+        path = cls._parse_attr(el, 'path', default='path')
+        domain = cls._parse_attr(el, 'domain', default='wildcard')
+        request_header = cls._parse_attr(el, 'request-header', default='exact', header=True)
+        response_header = cls._parse_attr(el, 'response-header', default='exact', header=True)
+        environ = cls._parse_attr(el, 'environ', default='exact', header=True)
+        last = asbool(el.get('last'))
+        return cls(
+            classes,
+            path=path,
+            domain=domain,
+            request_header=request_header,
+            response_header=response_header,
+            environ=environ,
+            abort=abort,
+            last=last,
+            source_location=source_location)
+
+    @staticmethod
+    def _parse_attr(el, attr, default=None, header=False):
+        """
+        Compiles a single string pattern
+        """
+        value = el.get(attr)
+        if value is None:
+            return None
+        if header:
+            return compile_header_matcher(value, default)
+        else:
+            return compile_matcher(value, default)
+
+    def __unicode__(self):
+        parts = [u'<match']
+        parts.append(u'class="%s"' % html_quote(' '.join(self.classes)))
+        for attr, value in [
+            ('path', self.path),
+            ('domain', self.domain),
+            ('request-header', self.request_header),
+            ('response-header', self.response_header),
+            ('environ', self.environ)]:
+            if value:
+                parts.append(u'%s="%s"' % (attr, html_quote(unicode(self.path))))
+        if self.abort:
+            parts.append(u'abort="1"')
+        if self.last:
+            parts.append(u'last="1"')
+        parts.append(u'/>')
+        return ' '.join(parts)
+
+    def __str__(self):
+        return unicode(self).encode('utf8')
+
+    def __call__(self, request, response_headers, log):
+        """
+        Checks this match against the given request and response_headers object.
+
+        `response_headers` should be a case-insensitive dictionary.  `request` should be a
+        :class:webob.Request object.
+        """
+        result = True
+        if self.abort:
+            class_name = 'abort'
+        elif len(self.classes) > 1:
+            class_name = '(%s)' % ' '.join(self.classes)
+        else:
+            class_name = self.classes[0]
+        if self.path:
+            if not self.path(request.path):
+                log.debug(self, 'Skipping class %s because request URL (%s) does not match path="%s"',
+                          class_name, request.path, self.path)
+                return False
+        if self.domain:
+            host = request.host.split(':', 1)[0]
+            if not self.domain(host):
+                log.debug(self, 'Skipping class %s because request domain (%s) does not match domain="%s"',
+                          class_name, host, self.domain)
+                return False
+        if self.request_header:
+            result, headers = self.request_header(request.headers)
+            if not result:
+                log.debug(self, 'Skipping class %s because request headers %s do not match request-header="%s"',
+                          class_name, ', '.join(headers), self.request_header)
+                return False
+        if self.response_header:
+            result, headers = self.response_header(response_headers)
+            if not result:
+                ## FIXME: maybe distinguish <meta> headers and real headers?
+                log.debug(self, 'Skipping class %s because the response headers %s do not match response-header="%s"',
+                          class_name, ', '.join(headers), self.response_header)
+                return False
+        if self.environ:
+            result, keys = self.environ(request.environ)
+            if not result:
+                log.debug(self, 'Skipping class %s because the request environ (keys %s) did not match environ="%s"',
+                          class_name, ', '.join(keys), self.environ)
+                return False
+        return True
+
+def run_matches(matchers, request, response_headers, log):
+    """
+    Runs all the match objects in matchers, returning the list of matched classes.
+    """
+    results = []
+    for matcher in matchers:
+        if matcher(request, response_headers, log):
+            log.debug(matcher, '<match> matched request, adding classes %s',
+                      ', '.join(matcher.classes))
+            for item in matcher.classes:
+                if item not in results:
+                    results.append(item)
+            if matcher.last:
+                log.debug(matcher, 'Stopping matches (skipping %i matches)',
+                          len(matchers) - matchers.index(matcher) - 1)
+                return results
+    return results

Modified: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/rules.py
==============================================================================
--- z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/rules.py	(original)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/rules.py	Fri Jun 27 00:41:33 2008
@@ -1,15 +1,12 @@
 """
-Represents individual rules
+Represents individual actions (<append> etc) and the RuleSet that puts them together
 """
 
 from deliverance.exceptions import add_exception_info
-from deliverance.util.converters import asbool
+from deliverance.util.converters import asbool, html_quote
 from deliverance.selector import Selector
 from lxml import etree
 
-## A dictionary mapping element names to their rule classes:
-rules = {}
-
 class RuleSyntaxError(Exception):
     """
     Exception raised when a rule itself is invalid
@@ -28,22 +25,83 @@
 
 CONTENT_ATTRIB = 'x-a-marker-attribute-for-deliverance'
 
-def parse_rule(el, source_location):
-    if el.tag not in rules:
+class Rule(object):
+    """
+    This represents everything in a <rule></rule> section.
+    """
+
+    def __init__(self, classes, actions, theme, source_location):
+        self.classes = classes
+        self._actions = actions
+        self.theme = theme
+        self.source_location = source_location
+
+    @classmethod
+    def parse_xml(cls, el, source_location):
+        """
+        Creates a Rule object from a parsed XML <rule> element.
+        """
+        assert el.tag == 'rule'
+        classes = el.get('class', '').split()
+        if not classes:
+            classes = ['default']
+        theme = None
+        actions = []
+        for el in el.iterchildren():
+            if el.tag == 'theme':
+                ## FIXME: error if more than one theme
+                ## FIXME: error if no href
+                theme = el.get('href')
+                continue
+            action = parse_action(el, source_location)
+            actions.append(action)
+        return cls(classes, actions, theme, source_location)
+
+    def apply(self, content_doc, theme_doc, resource_fetcher, log):
+        """
+        Applies all the actions in this rule to the theme_doc
+
+        Note that this leaves behind attributes to mark elements that
+        originated in the content.  You should call
+        :func:`remove_content_attribs` after applying all rules.
+        """
+        for action in self._actions:
+            action.apply(content_doc, theme_doc, resource_fetcher, log)
+        return theme_doc
+
+## A dictionary mapping element names to their rule classes:
+_actions = {}
+
+def parse_action(el, source_location):
+    """
+    Parses an element into an action object.
+    """
+    if el.tag not in _actions:
         raise RuleSyntaxError(
             "There is no rule with the name %s"
             % el.tag)
-    Class = rules[el.tag]
+    Class = _actions[el.tag]
     instance = Class.from_xml(el, source_location)
     return instance
 
-class AbstractRule(object):
+class AbstractAction(object):
+    # This is the abstract class for all other rules
 
+    # These values are allowed for nocontent and notheme attributes:
     _no_allowed = (None, 'ignore', 'abort', 'warn')
+    # These values are allowed for manycontent and manytheme attributes:
     _many_allowed = _no_allowed + ('last', 'first', 'ignore:first', 'ignore:last',
                                    'warn:first', 'warn:last')
 
     def convert_error(self, name, value):
+        """
+        Taking a ``name="value"`` attribute for an error type
+        (nocontent, manycontent, etc) this returns ``(error_handler,
+        position)`` (where ``position`` is None for notheme/nocontent).
+
+        This applies the default value of "warn" and the default
+        position of "first".
+        """
         if value == '':
             value = None
         if value:
@@ -74,9 +132,16 @@
                 value = ('abort', None)
         elif not value:
             value = ('warn', None)
+        if isinstance(value, basestring):
+            value = (value, None)
+        assert isinstance(value, tuple), 'Bad value: %r' % value
         return value
 
     def format_error(self, attr, value):
+        """
+        Takes the result of :meth:`convert_error` and serializes it
+        back into ``attribute="value"``
+        """
         if attr in ('manytheme', 'manycontent'):
             handler, pos = value
             if pos == 'last':
@@ -133,10 +198,15 @@
             return False
         return True
 
+    # Set to the tag name in subclasses (append, prepend, etc):
     name = None
+    # Set to true in subclasses if the move attribute means something:
     move_supported = True
 
     def describe_self(self):
+        """
+        A text description of this rule, for use in log messages and errors
+        """
         parts = ['<%s' % self.name]
         if getattr(self, 'content', None):
             parts.append('content="%s"' % html_quote(self.content))
@@ -158,6 +228,10 @@
         return ' '.join(parts) + ' />'
 
     def describe_content_elements(self, els, children=False):
+        """
+        A text description of a list of content elements, for use in
+        log messages and errors.
+        """
         text = ', '.join(el.tag for el in els)
         if children:
             return 'children of %s' % text
@@ -165,11 +239,19 @@
             return text
 
     def describe_theme_element(self, el):
+        """
+        A text description of a theme element, for use in log messages
+        and errors.
+        """
         return el.tag
 
     @classmethod
-    def compile_selector(cls, tag, attr, source_location):
-        value = tag.get(attr)
+    def compile_selector(cls, el, attr, source_location):
+        """
+        Compiles a single selector taken from the given attribute of
+        an element.
+        """
+        value = el.get(attr)
         if value is None:
             return None
         return Selector.parse(value)
@@ -179,7 +261,7 @@
         Takes a list of elements and prepares their children as a list and text,
         so that you can do::
 
-          text, els = preparent_content_children(self, els)
+          text, els = prepare_content_children(self, els)
           add_text(theme_el, text)
           theme_el.extend(els)
 
@@ -213,10 +295,8 @@
                 elements.remove(el)
         return type, elements, attributes
 
-class TransformRule(AbstractRule):
-    """
-    Abstract class for the rules that move from the content to the theme (replace, append, prepend)
-    """
+class TransformAction(AbstractAction):
+    # Abstract class for the rules that move from the content to the theme (replace, append, prepend)
 
     def __init__(self, source_location, content, theme, if_content=None, content_href=None,
                  move=True, nocontent=None, notheme=None, manytheme=None, manycontent=None):
@@ -241,6 +321,9 @@
 
     @classmethod
     def from_xml(cls, tag, source_location):
+        """
+        Creates an instance of this object from the given parsed XML element
+        """
         content = cls.compile_selector(tag, 'content', source_location)
         theme = cls.compile_selector(tag, 'theme', source_location)
         if_content = cls.compile_selector(tag, 'if_content', source_location)
@@ -254,6 +337,9 @@
                    manycontent=tag.get('manycontent'))
 
     def apply(self, content_doc, theme_doc, resource_fetcher, log):
+        """
+        Applies this action to the theme_doc.
+        """
         describe = log.describe
         if self.content_href:
             content_doc = resource_fetcher(self.content_href)
@@ -307,6 +393,9 @@
         self.apply_transformation(content_type, content_els, attributes, theme_type, theme_el, log)
 
     def join_attributes(self, attr1, attr2):
+        """
+        Joins the sets of attribute names in attr1 and attr2, where either might be None
+        """
         if not attr1 and not attr2:
             return None
         if attr1 and not attr2:
@@ -318,8 +407,12 @@
         attr |= attr2
         return list(attr)
 
-class Replace(TransformRule):
+    def apply_transformation(self, content_type, content_els, attributes, theme_type, theme_el, log):
+        raise NotImplementedError
+
+class Replace(TransformAction):
 
+    # Compatible types of child and theme selector types:
     _compatible_types = [
         ('children', 'elements'),
         ('children', 'children'),
@@ -328,7 +421,7 @@
         ('attributes', 'attributes'),
         ('tag', 'tag'),
         ]
-
+ 
     def apply_transformation(self, content_type, content_els, attributes, theme_type, theme_el, log):
         describe = log.describe
         if theme_type == 'children':
@@ -337,11 +430,11 @@
             theme_el.text = ''
             if content_type == 'elements':
                 if self.move:
-                    # If we are working with copies, then the tails don't/shouldn't be moved
+                    # If we aren't working with copies then we have to move the tails up as we remove the elements:
                     for el in reversed(content_els):
                         move_tail_upward(el)
                 else:
-                    # If we are working with copies, then we can just throw away the tails
+                    # If we are working with copies, then we can just throw away the tails:
                     for el in content_els:
                         el.tail = None
                 theme_el.extend(content_els)
@@ -440,10 +533,11 @@
             theme_el.attrib.update(content_els[0].attrib)
             # "move" in this case doesn't mean anything
 
-rules['replace'] = Replace
+_actions['replace'] = Replace
 
-class Append(TransformRule):
+class Append(TransformAction):
 
+    # This is set to False in Prepend:
     _append = True
 
     _compatible_types = [
@@ -472,7 +566,7 @@
                     theme_el.text = None
                     theme_el[:0] = content_els
             elif content_type == 'children':
-                text, els = self.preparent_content_children(content_els)
+                text, els = self.prepare_content_children(content_els)
                 if self._append:
                     if len(theme_el):
                         add_tail(theme_el[-1], text)
@@ -556,14 +650,14 @@
                 else:
                     content_attrib.clear()
 
-rules['append'] = Append
+_actions['append'] = Append
 
 class Prepend(Append):
     _append = False
 
-rules['prepend'] = Prepend
+_actions['prepend'] = Prepend
 
-class Drop(AbstractRule):
+class Drop(AbstractAction):
     
     def __init__(self, source_location, content, theme, if_content=None,
                  nocontent=None, notheme=None):
@@ -635,7 +729,7 @@
                    nocontent=tag.get('nocontent'),
                    notheme=tag.get('notheme'))
 
-rules['drop'] = Drop
+_actions['drop'] = Drop
             
 ## Element utilities ##
 
@@ -675,16 +769,25 @@
         add_text(parent, el.tail)
 
 def iter_self_and_ancestors(el):
+    """
+    Iterates over an element itself and all its ancestors (parent, grandparent, etc)
+    """
     yield el
     for item in el.iterancestors():
         yield item
 
 def mark_content_els(els):
+    """
+    Mark an element as originating from the content (this uses a special attribute)
+    """
     for el in els:
         ## FIXME: maybe put something that is trackable to the rule that moved the element
         el.set(CONTENT_ATTRIB, '1')
 
 def is_content_element(el):
+    """
+    Tests if the element came from the content (which includes if any of its ancestors)
+    """
     ## FIXME: should this check children too?
     for p in iter_self_and_ancestors(el):
         if p.get(CONTENT_ATTRIB):
@@ -692,11 +795,9 @@
     return False
 
 def remove_content_attribs(doc):
+    """
+    Remove the markers placed by :func:`mark_content_els`
+    """
     for p in doc.getiterator():
         if p.get(CONTENT_ATTRIB, None) is not None:
             del p.attrib[CONTENT_ATTRIB]
-
-from cgi import escape as cgi_escape
-def html_quote(s):
-    s = unicode(s)
-    return cgi_escape(s, True)

Added: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/ruleset.py
==============================================================================
--- (empty file)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/ruleset.py	Fri Jun 27 00:41:33 2008
@@ -0,0 +1,102 @@
+from deliverance.pagematch import run_matches, Match
+from deliverance.rules import Rule, remove_content_attribs
+from lxml.html import tostring, document_fromstring
+import re
+import urlparse
+
+class RuleSet(object):
+
+    def __init__(self, matchers, rules_by_class, default_theme=None):
+        self.matchers = matchers
+        self.rules_by_class = rules_by_class
+        self.default_theme = default_theme
+
+    def apply_rules(self, req, resp, resource_fetcher, log):
+        extra_headers = parse_meta_headers(resp.body)
+        if extra_headers:
+            response_headers = HeaderDict(resp.headerlist + extra_headers)
+        else:
+            response_headers = resp.headers
+        classes = run_matches(self.matchers, req, response_headers, log)
+        if not classes:
+            classes = ['default']
+        rules = []
+        theme = None
+        for class_name in classes:
+            ## FIXME: handle case of unknown classes
+            ## Or do that during compilation?
+            for rule in self.rules_by_class[class_name]:
+                if rule not in rules:
+                    rules.append(rule)
+                    if rule.theme:
+                        theme = rule.theme
+        if theme is None:
+            theme = self.default_theme
+            ## FIXME: error if not theme still
+        assert theme is not None
+        theme_doc = self.get_theme(theme, resource_fetcher, log)
+        content_doc = self.parse_document(resp.body, req.url)
+        for rule in rules:
+            rule.apply(content_doc, theme_doc, resource_fetcher, log)
+        remove_content_attribs(theme_doc)
+        ## FIXME: handle caching?
+        resp.body = tostring(theme_doc)
+        return resp
+
+    def get_theme(self, url, resource_getter, log):
+        log.info(self, 'Fetching theme from %s' % url)
+        ## FIXME: should do caching
+        doc = self.parse_document(resource_getter(url), url)
+        doc.make_links_absolute()
+        return doc
+
+    def parse_document(self, s, url):
+        return document_fromstring(s, base_url=url)
+
+    @classmethod
+    def parse_xml(cls, doc, source_location):
+        assert doc.tag == 'ruleset'
+        matchers = []
+        rules = []
+        default_theme = None
+        for el in doc.iterchildren():
+            if el.tag == 'match':
+                matcher = Match.parse_xml(el, source_location)
+                matchers.append(matcher)
+            elif el.tag == 'rule':
+                rule = Rule.parse_xml(el, source_location)
+                rules.append(rule)
+            elif el.tag == 'theme':
+                ## FIXME: Add parse error
+                default_theme = el.get('href')
+            else:
+                ## FIXME: better error
+                assert 0
+        rules_by_class = {}
+        for rule in rules:
+            for class_name in rule.classes:
+                rules_by_class.setdefault(class_name, []).append(rule)
+        if default_theme:
+            default_theme = urlparse.urljoin(doc.base, default_theme)
+        return cls(matchers, rules_by_class, default_theme=default_theme)
+
+_meta_tag_re = re.compile(r'<meta\s+(.*?)>', re.I | re.S)
+_http_equiv_re = re.compile(r'http-equiv=(?:"([^"]*)"|([^\s>]*))', re.I|re.S)
+_content_re = re.compile(r'content=(?:"([^"]*)"|([^\s>]*))', re.I|re.S)
+        
+def parse_meta_headers(body):
+    headers = []
+    for match in _meta_tag_re.finditer(body):
+        content = match.group(1)
+        http_equiv_match = _http_equiv_re.search(content)
+        content_match = _content_re.search(content)
+        if not http_equiv_match or not content_match:
+            ## FIXME: log partial matches?
+            continue
+        http_equiv = (http_equiv_match.group(1) or http_equiv_match.group(2) or '').strip()
+        content = content_match.group(1) or content_match.group(2) or ''
+        if not http_equiv or not content:
+            ## FIXME: is empty content really meaningless?
+            continue
+        headers.append((http_equiv, content))
+    return headers

Modified: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/selector.py
==============================================================================
--- z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/selector.py	(original)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/selector.py	Fri Jun 27 00:41:33 2008
@@ -1,3 +1,7 @@
+"""
+Implements the element selection; XPath, CSS, and the modifiers on
+those selections.
+"""
 from lxml.etree import XPath
 from lxml.cssselect import CSSSelector
 import re
@@ -10,6 +14,12 @@
     pass
 
 class Selector(object):
+    """
+    Represents one selection attribute
+
+    A selector contains multiple sub-selectors; this level combines
+    those from the || (cascading) operator.
+    """
 
     def __init__(self, major_type, attributes, selectors):
         self.major_type = major_type
@@ -20,6 +30,9 @@
 
     @classmethod
     def parse(cls, expr):
+        """
+        Parses one string expression, returning an instance of this class.
+        """
         major_type, attributes, expr = cls.parse_prefix(expr)
         selectors = [e.strip()
                      for e in expr.split('||')]
@@ -49,6 +62,13 @@
 
     @staticmethod
     def types_compatible(type1, type2):
+        """
+        When multiple types appear (separated with ||) this tests if
+        they are compatible with each other.
+
+        Only ``children`` and ``elements`` are compatible with each
+        other; in all other cases you must use the same type.
+        """
         if type1 in ('children', 'elements'):
             return type2 in ('children', 'elements')
         else:
@@ -61,9 +81,18 @@
              in self.selectors])
             
     def __str__(self):
-        return str(unicode(self))
+        return unicode(self).encode('utf8')
 
     def compile_selector(self, expr, default_type):
+        """
+        Compiles a single selector string to ``(selector_type,
+        selector_object, expression_string, attributes)`` where the
+        selector_type is a string (``"elements"``, ``"children"``,
+        etc), selector_object is a callable that returns elements,
+        expression_string is the original expression, passed in, and
+        ``attributes`` is a list of attributes in the case of
+        ``attributes(attr1, attr2):``
+        """
         type, attributes, rest_expr = self.parse_prefix(expr, default_type=default_type)
         if not self.types_compatible(type, self.major_type):
             raise SelectorSyntaxError(

Modified: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/stringmatch.py
==============================================================================
--- z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/stringmatch.py	(original)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/stringmatch.py	Fri Jun 27 00:41:33 2008
@@ -1,11 +1,21 @@
+"""
+Represents the string and header matching that is used to determine page classes.
+"""
+
 import fnmatch
 import re
+from deliverance.util.converters import asbool
 
 __all__ = ['compile_matcher', 'compile_header_matcher', 'MatchSyntaxError']
 
 _prefix_re = re.compile(r'^([a-z_-]+):', re.I)
 
 def compile_matcher(s, default=None):
+    """
+    Compiles the match string to a match object.
+
+    Match objects are callable objects that return a boolean.
+    """
     match = _prefix_re.search(s)
     if not match:
         if default is None:
@@ -17,7 +27,7 @@
         pattern = s
     else:
         type = match.group(1).lower()
-        pattern = s[match.end():]
+        pattern = s[match.end():].lstrip()
     if type not in _matches:
         ## FIXME: show possible names?
         raise MatchSyntaxError(
@@ -26,6 +36,14 @@
     return _matches[type](pattern)
 
 def compile_header_matcher(s, default='exact'):
+    """
+    Compiles the match header string to a match object.
+
+    Unlike simple match objects, these match against a dictionary of headers.
+
+    This also applies the the environ dictionary.  Case-sensitivity is
+    handled by the dictionary, not the matcher.
+    """
     if ':' not in s:
         raise MatchSyntaxError(
             "A header match must be like 'Header: pattern'; you have no header in %r"
@@ -50,6 +68,7 @@
     _matches[cls.name] = cls
 
 class Matcher(object):
+    # Abstract base class for matchers
 
     name = None
 
@@ -63,12 +82,15 @@
         return '%s:%s' % (self.name, self.pattern)
 
     def __str__(self):
-        return str(unicode(self))
+        return unicode(self).encode('utf8')
     
     def __repr__(self):
         return '<%s %s>' % (self.__class__.__name__, str(self))
 
 class WildcardMatcher(Matcher):
+    """
+    Matches a value against a pattern that may contain ``*`` wildcards.
+    """
 
     name = 'wildcard'
 
@@ -82,6 +104,9 @@
 _add_matcher(WildcardMatcher)
 
 class WildcardInsensitiveMatcher(Matcher):
+    """
+    Matches a value, ignoring case, against a pattern with wildcards.
+    """
 
     name = 'wildcard-insensitive'
 
@@ -95,6 +120,9 @@
 _add_matcher(WildcardInsensitiveMatcher)
 
 class RegexMatcher(Matcher):
+    """
+    Matches a value against a regular expression.
+    """
 
     name = 'regex'
 
@@ -113,6 +141,10 @@
 _add_matcher(RegexMatcher)
 
 class PathMatcher(Matcher):
+    """
+    Matches a value against a path.  This checks prefixes, but also
+    only matches /-delimited segments.
+    """
 
     name = 'path'
 
@@ -128,6 +160,9 @@
 _add_matcher(PathMatcher)
 
 class ExactMatcher(Matcher):
+    """
+    Matches a string exactly.
+    """
 
     name = 'exact'
 
@@ -137,6 +172,9 @@
 _add_matcher(ExactMatcher)
         
 class ExactInsensitiveMatcher(Matcher):
+    """
+    Matches a string exactly, but ignoring case.
+    """
     
     name = 'exact-insensitive'
 
@@ -146,6 +184,9 @@
 _add_matcher(ExactInsensitiveMatcher)
 
 class ContainsMatcher(Matcher):
+    """
+    Matches if the value contains the pattern.
+    """
 
     name = 'contains'
 
@@ -155,6 +196,9 @@
 _add_matcher(ContainsMatcher)
 
 class ContainsInsensitiveMatcher(Matcher):
+    """
+    Matches if the value contains the pattern, ignoring case.
+    """
 
     name = 'contains-insensitive'
 
@@ -163,19 +207,56 @@
 
 _add_matcher(ContainsInsensitiveMatcher)
 
+class BooleanMatcher(Matcher):
+    """
+    Matches according to a boolean true/falseness of a value
+    """
+    
+    name = 'boolean'
+
+    def __init__(self, pattern):
+        pattern = pattern.strip()
+        super(BooleanMatcher, self).__init__(pattern)
+        if pattern.lower() == 'not':
+            pattern = 'false'
+        if not pattern:
+            pattern = 'true'
+        self.boolean = asbool(pattern)
+
+    def __call__(self, s):
+        try:
+            value = asbool(s)
+        except ValueError:
+            value = False
+        if not self.boolean:
+            return not value
+        else:
+            return value
+
+_add_matcher(BooleanMatcher)
+
 class HeaderMatcher(object):
+    """
+    Matches simple "Header: pattern".  Does not match wildcard headers.
+    """
 
     def __init__(self, header, pattern):
         self.header = header
         self.pattern = pattern
 
     def __call__(self, headers):
-        return self.pattern(headers.get(self.header, ''))
+        return self.pattern(headers.get(self.header, '')), [self.header]
 
     def __unicode__(self):
         return u'%s: %s' % (self.header, self.pattern)
 
+    def __str__(self):
+        return unicode(self).encode('utf8')
+
 class HeaderWildcardMatcher(object):
+    """
+    Matches "Header*: pattern", where the header contains a wildcard.
+    """
 
     def __init__(self, header, pattern):
         self.header = header
@@ -184,11 +265,16 @@
 
     def __call__(self, headers):
         matches = self.header_re.match
+        matched = []
         for key in headers:
             if matches(key):
+                matched.append(key)
                 if self.pattern(headers[key]):
-                    return True
-        return False
+                    return True, [key]
+        return False, matched
 
     def __unicode__(self):
         return u'%s: %s' % (self.header, self.pattern)
+
+    def __str__(self):
+        return unicode(self).encode('utf8')

Added: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_middleware.txt
==============================================================================
--- (empty file)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_middleware.txt	Fri Jun 27 00:41:33 2008
@@ -0,0 +1,232 @@
+This tests the middleware, using a basically static site and applying themes.
+
+First we'll setup the site, using urlmap:
+
+    >>> from paste.urlmap import URLMap
+    >>> from webob import Request, Response
+    >>> app = URLMap()
+
+A theme:
+
+    >>> app['/theme.html'] = Response('''\
+    ... <html>
+    ...  <head>
+    ...   <title>This is a theme title</title>
+    ...   <link rel=Stylesheet type="text/css" href="style.css">
+    ...   <style type="text/css">
+    ...     @import "style2.css";
+    ...   </style>
+    ...  </head>
+    ...  <body>
+    ... 
+    ...   <div id="header" class="title-bar">
+    ...     <h1 id="title">This is the theme title</h1>
+    ...     <div class="topnav"></div>
+    ...   </div>
+    ...   <div id="content-wrapper">
+    ...     <div id="content">
+    ...       This content will be replaced.
+    ...     </div>
+    ...   </div>
+    ... 
+    ...   <div id="footer">
+    ...     Copyright (C) 2000 Some Corporation
+    ...   </div>
+    ... 
+    ...  </body>
+    ... </html>''')
+
+The rule file:
+
+    >>> app['/rules.xml'] = Response('''\
+    ... <ruleset>
+    ...   <match path="/blog" class="blog" />
+    ...   <match path="exact:/about.html" class="breakout" />
+    ...   <match request-header="X-No-Deliverate: boolean:true" abort="1" />
+    ...   <match environ="wsgi.url_scheme: https" class="via-https" />
+    ...   <theme href="/theme.html" />
+    ...   <rule class="default">
+    ...     <replace content="children:#footer" theme="children:#footer" nocontent="ignore" />
+    ...     <replace content="children:body" theme="children:#content" nocontent="abort" />
+    ...   </rule>
+    ...   <rule class="breakout">
+    ...     <replace content="children:#footer" theme="children:#footer" nocontent="ignore" />
+    ...     <replace content="children:body" theme="children:#content-wrapper" nocontent="abort" />
+    ...   </rule>
+    ...   <rule class="blog">
+    ...     <replace content="children:#content" theme="children:#content" nocontent="abort" />
+    ...   </rule>
+    ... </ruleset>''', content_type="application/xml")
+
+Some pages:
+
+    >>> app['/blog/index.html'] = Response('''\
+    ... <html><head><title>A blog post</title></head>
+    ... <body>
+    ... Some junk
+    ... <div id="content">the blog post <b>with some style</b></div>
+    ... some more junk
+    ... <div id="footer">a footer that will be ignored</div>
+    ... </body></html>
+    ... ''')
+    >>> app['/about.html'] = Response('''\
+    ... <html><title>About this site</title></html>
+    ... <body>
+    ... This is all about this site.
+    ... <div id="footer">a footer that will be ignored</div>
+    ... </body></html>
+    ... ''')
+    >>> app['/magic'] = Response('''\
+    ... <html><head></head><body>A simple page</body></html>''')
+    >>> app['/magic'].headers['x-no-deliverate'] = '1'
+
+Now to deliverate:
+
+    >>> from deliverance.middleware import DeliveranceMiddleware, SubrequestRuleGetter
+    >>> deliv = DeliveranceMiddleware(app, SubrequestRuleGetter('/rules.xml'))
+
+Now lets look at some plain content and its deliverated equivalent
+
+    >>> def compare_request(path):
+    ...     resp = Request.blank(path).get_response(app)
+    ...     print 'Original content:'
+    ...     print resp.body.strip()
+    ...     resp = Request.blank(path).get_response(deliv)
+    ...     print 'Themed content:'
+    ...     print resp.body.strip()
+    >>> compare_request('/blog/index.html')
+    Original content:
+    <html><head><title>A blog post</title></head>
+    <body>
+    Some junk
+    <div id="content">the blog post <b>with some style</b></div>
+    some more junk
+    <div id="footer">a footer that will be ignored</div>
+    </body></html>
+    Themed content:
+    <html><head><title>This is a theme title</title><link rel="Stylesheet" type="text/css" href="http://localhost/style.css"><style type="text/css">
+        @import "http://localhost/style2.css";
+      </style></head><body>
+    <BLANKLINE>
+      <div id="header" class="title-bar">
+        <h1 id="title">This is the theme title</h1>
+        <div class="topnav"></div>
+      </div>
+      <div id="content-wrapper">
+        <div id="content">the blog post <b>with some style</b></div>
+      </div>
+    <BLANKLINE>
+      <div id="footer">
+        Copyright (C) 2000 Some Corporation
+      </div>
+    <BLANKLINE>
+     </body></html>
+
+
+Other rule formats
+==================
+
+One could imagine the rules looking more like:
+
+    default_theme = /theme.html
+
+    [match:blog]
+    path = /blog
+
+    [match:breakout]
+    path = exact:/about.html
+
+    [match abort]
+    request-header X-No-Deliverate = boolean: true
+
+    [rule:default]
+    append content="children:body" theme="children:#content" nocontent=abort
+    replace content="children:#footer" theme="children:#footer" nocontent=ignore
+
+    [rule:breakout]
+    append content="children:body" theme="children:#content-wrapper" nocontent=abort
+    replace content="children#footer" theme="children#footer" nocontent=ignore
+
+    [rule:blog]
+    append content="children:#content" theme="children:#content" nocontent=abort
+
+Or something like:
+
+    theme "/theme.html";
+    match (path=/blog) {
+        class: blog;
+    }
+    match (path="exact:/about.html") {
+        class: breakout;
+    }
+    match (request X-No-Deliverate="boolean: true") {
+        abort;
+    }
+    .default {
+        append content "children:body"
+               theme "children:#content"
+               nocontent abort;
+        replace content "children:#footer" theme "children:#footer" nocontent ignore;
+    }
+
+Blech.  Maybe:
+
+    theme "/theme.html"
+    match (path="/blog") blog
+    match (path="exact:/about.html") (breakout)
+    match (request X-No-Deliverate="boolean: true") abort
+
+    rule default {
+      append "children:body" "children:#content" nocontent=abort
+      replace "children:#footer" "children:#footer" nocontent=ignore
+    }
+    rule breakout {
+      append "children:body" "children:#content-wrapper" nocontent=abort
+      replace "children:#footer" "children:#footer" nocontent=ignore
+    }
+    rule blog {
+      append "children:#content" "children:#content" nocontent=abort
+    }
+
+Or YAMLish:
+
+    theme: /theme.html
+    match: blog
+      path: /blog
+    match: breakout
+      path: exact:/about.html
+    match: abort
+      request X-No-Deliverate: boolean:true
+
+    rule default:
+      append "children:body" "children:#content" nocontent=abort
+      replace "children:#footer" "children:#footer" nocontent=ignore
+    rule breakout:
+      append "children:body" "children:#content-wrapper" nocontent=abort
+      replace "children:#footer" "children:#footer" nocontent=ignore
+    rule blog:
+      append "children:#content" "children:#content" nocontent=abort
+
+
+
+
+    
+
+    <ruleset>
+      <match path="/blog" class="blog" />
+      <match path="exact:/about.html" class="breakout" />
+      <match request-header="X-No-Deliverate: boolean:true" abort="1" />
+      <match environ="wsgi.url_scheme: https" class="via-https" />
+      <theme href="/theme.html" />
+      <rule class="default">
+        <append content="children:body" theme="children:#content" nocontent="abort" />
+        <replace content="children:#footer" theme="children:#footer" nocontent="ignore" />
+      </rule>
+      <rule class="breakout">
+        <append content="children:body" theme="children:#content-wrapper" nocontent="abort" />
+        <replace content="children:#footer" theme="children:#footer" nocontent="ignore" />
+      </rule>
+      <rule class="blog">
+        <append content="children:#content" theme="children:#content" nocontent="abort" />
+      </rule>
+    </ruleset>

Added: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_pagematch.txt
==============================================================================
--- (empty file)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_pagematch.txt	Fri Jun 27 00:41:33 2008
@@ -0,0 +1,50 @@
+This tests pagematch, which tests whether a request/response matches a <match> tag.
+
+    >>> from deliverance.pagematch import Match
+    >>> from lxml.etree import XML
+    >>> from webob import Request
+    >>> from webob.headerdict import HeaderDict
+    >>> from deliverance.log import SavingLogger
+    >>> def make(xml):
+    ...     el = XML(xml)
+    ...     return Match.parse_xml(el, source_location=None)
+    >>> def match(matcher, request, response_headers, show_log=True):
+    ...     if isinstance(matcher, basestring):
+    ...         matcher = make(matcher)
+    ...     log = SavingLogger()
+    ...     if isinstance(response_headers, list):
+    ...         response_headers = HeaderDict(response_headers)
+    ...     result = matcher(request, response_headers, log)
+    ...     if show_log:
+    ...         for level, rule, message in log.messages:
+    ...             print 'log:', message
+    ...     return result
+
+If you don't provide a class attribute, it is an error:
+
+    >>> make('<match path="foo" />')
+    Traceback (most recent call last):
+        ...
+    MatchSyntaxError: You must provide some classes in the class attribute
+
+Matches get normalized:
+
+    >>> print make('<match path="/foo" last="0" class="  a b"/>')
+    <match class="a b" path="path:/foo/" />
+
+Now, some matches:
+
+    >>> m = make('<match path="/foo" class="a" />')
+    >>> match(m, Request.blank('/foo'), [])
+    True
+    >>> match(m, Request.blank('/foobar'), [])
+    log: Skipping class a because request URL (/foobar) does not match path="path:/foo/"
+    False
+    >>> match(m, Request.blank('/foo/bar'), [])
+    True
+    >>> m = make('<match response-header="Content-Type: contains: html" class="x" />')
+    >>> match(m, Request.blank('/'), [('content-type', 'text/plain')])
+    log: Skipping class x because the response headers Content-Type do not match response-header="Content-Type: contains:html"
+    False
+    >>> match(m, Request.blank('/'), [('content-type', 'text/html')])
+    True

Modified: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_selection.txt
==============================================================================
--- z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_selection.txt	(original)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_selection.txt	Fri Jun 27 00:41:33 2008
@@ -88,11 +88,11 @@
 
     >>> from lxml.etree import XML
     >>> import copy
-    >>> from deliverance.rules import parse_rule, remove_content_attribs
+    >>> from deliverance.rules import parse_action, remove_content_attribs
     >>> from deliverance.log import SavingLogger
     >>> def t_rule_head(rule, selector='//head', show_log=False):
     ...     rule = XML(rule)
-    ...     rule = parse_rule(rule, None)
+    ...     rule = parse_action(rule, None)
     ...     theme_copy = copy.deepcopy(theme)
     ...     theme_copy.make_links_absolute()
     ...     logger = SavingLogger()

Modified: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_stringmatch.txt
==============================================================================
--- z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_stringmatch.txt	(original)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_stringmatch.txt	Fri Jun 27 00:41:33 2008
@@ -38,13 +38,14 @@
      >>> def mheader(pattern, headers):
      ...     return compile_header_matcher(pattern)(headers)
      >>> mheader('Something: foo', {'Something': 'foo'})
-     True
+     (True, ['Something'])
      >>> mheader('Something: foo', {'Something': 'foobar'})
-     False
+     (False, ['Something'])
      >>> mheader('Something: contains:foo', {'Something': 'foobar'})
-     True
+     (True, ['Something'])
      >>> mheader('X-*: contains:evil', {'X-Other': 'nothing', 'X-Foo-Bar': 'some evil!'})
-     True
+     (True, ['X-Foo-Bar'])
      >>> mheader('X-*: contains:evil', {'X-Foo-Bar': 'okay'})
-     False
-     
+     (False, ['X-Foo-Bar'])
+     >>> mheader('X-*: contains:other', {'X-Other': 'nothing', 'X-Foo-Bar': 'some evil!'})
+     (False, ['X-Foo-Bar', 'X-Other'])

Modified: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/util/converters.py
==============================================================================
--- z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/util/converters.py	(original)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/util/converters.py	Fri Jun 27 00:41:33 2008
@@ -9,3 +9,8 @@
             raise ValueError(
                 "String is not true/false: %r" % obj)
     return bool(obj)
+
+from cgi import escape as cgi_escape
+def html_quote(s):
+    s = unicode(s)
+    return cgi_escape(s, True)


More information about the z3-checkins mailing list