[z3-checkins] r56054 - in z3/deliverance/sandbox/ianb/deliverance/trunk: . deliverance deliverance/tests deliverance/util

ianb at codespeak.net ianb at codespeak.net
Tue Jun 24 22:47:45 CEST 2008


Author: ianb
Date: Tue Jun 24 22:47:43 2008
New Revision: 56054

Added:
   z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/log.py   (contents, props changed)
   z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/rules.py   (contents, props changed)
   z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/selector.py   (contents, props changed)
   z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/
   z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_selection.txt   (contents, props changed)
   z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/util/
   z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/util/__init__.py   (contents, props changed)
   z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/util/converters.py   (contents, props changed)
Modified:
   z3/deliverance/sandbox/ianb/deliverance/trunk/setup.py
Log:
A basic implementation of rules and selectors

Added: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/log.py
==============================================================================
--- (empty file)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/log.py	Tue Jun 24 22:47:43 2008
@@ -0,0 +1,29 @@
+import logging
+
+class SavingLogger(object):
+    def __init__(self, description=True):
+        self.messages = []
+        if description:
+            self.descriptions = []
+            self.describe = self.add_description
+    def add_description(self, msg):
+        self.descriptions.append(msg)
+    def message(self, level, el, msg, *args, **kw):
+        if args:
+            msg = msg % args
+        elif kw:
+            msg = msg % kw
+        self.messages.append((level, el, msg))
+    def debug(self, el, msg, *args, **kw):
+        self.message(logging.DEBUG, el, msg, *args, **kw)
+    def info(self, el, msg, *args, **kw):
+        self.message(logging.INFO, el, msg, *args, **kw)
+    def notify(self, el, msg, *args, **kw):
+        self.message(logging.INFO+1, el, msg, *args, **kw)
+    def warn(self, el, msg, *args, **kw):
+        self.message(logging.WARN, el, msg, *args, **kw)
+    warning = warn
+    def error(self, el, msg, *args, **kw):
+        self.message(logging.ERROR, el, msg, *args, **kw)
+    def fatal(self, el, msg, *args, **kw):
+        self.message(logging.FATAL, el, msg, *args, **kw)

Added: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/rules.py
==============================================================================
--- (empty file)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/rules.py	Tue Jun 24 22:47:43 2008
@@ -0,0 +1,702 @@
+"""
+Represents individual rules
+"""
+
+from deliverance.exceptions import add_exception_info
+from deliverance.util.converters import asbool
+from deliverance.selector import Selector
+from lxml import etree
+
+## A dictionary mapping element names to their rule classes:
+rules = {}
+
+class RuleSyntaxError(Exception):
+    """
+    Exception raised when a rule itself is invalid
+    """
+
+class SelectionError(Exception):
+    """
+    Exception raised when a selection somehow isn't right (e.g.,
+    returns no elements when it should return an element).
+    """
+
+class AbortTheme(Exception):
+    """
+    Raised when something aborts via something like nocontent="abort"
+    """
+
+CONTENT_ATTRIB = 'x-a-marker-attribute-for-deliverance'
+
+def parse_rule(el, source_location):
+    if el.tag not in rules:
+        raise RuleSyntaxError(
+            "There is no rule with the name %s"
+            % el.tag)
+    Class = rules[el.tag]
+    instance = Class.from_xml(el, source_location)
+    return instance
+
+class AbstractRule(object):
+
+    _no_allowed = (None, 'ignore', 'abort', 'warn')
+    _many_allowed = _no_allowed + ('last', 'first', 'ignore:first', 'ignore:last',
+                                   'warn:first', 'warn:last')
+
+    def convert_error(self, name, value):
+        if value == '':
+            value = None
+        if value:
+            value = value.lower()
+        bad_options = None
+        if name in ('manytheme', 'manycontent'):
+            if value not in self._many_allowed:
+                bad_options = self._many_allowed
+        else:
+            if value not in self._no_allowed:
+                vad_options = self._no_allowed
+        if bad_options:
+            raise RuleSyntaxError(
+                'The attribute %s="%s" should have a value of one of: %s'
+                % (name, value, ', '.join(v for v in bad_options if v)))
+        if value and ':' in value:
+            value = tuple(value.split(':', 1))
+        elif value == 'first':
+            value = ('ignore', 'first')
+        elif value == 'last':
+            value = ('ignore', 'last')
+        if name in ('manytheme', 'manycontent'):
+            if value == 'ignore':
+                value = ('ignore', 'first')
+            elif value == 'warn' or not value:
+                value = ('warn', 'first')
+            elif value == 'abort':
+                value = ('abort', None)
+        elif not value:
+            value = ('warn', None)
+        return value
+
+    def format_error(self, attr, value):
+        if attr in ('manytheme', 'manycontent'):
+            handler, pos = value
+            if pos == 'last':
+                text = '%s:%s' % (handler, pos)
+            else:
+                text = handler
+        else:
+            text = value[0]
+            if text == 'warn':
+                return None
+        return '%s="%s"' % (attr, html_quote(text))
+
+    def if_content_matches(self, content_doc, log):
+        """
+        Returns true if the if-content selector matches something,
+        i.e., if this rule should be executed.
+        """
+        if self.if_content is None:
+            # No if-content means always run
+            return True
+        sel_type, els, attributes = self.select_elements(self.if_content, content_doc, theme=False)
+        matched = bool(els)
+        if sel_type == 'elements':
+            # els is fine then
+            pass
+        elif sel_type == 'children':
+            matched = False
+            for el in els:
+                if el.text or len(el):
+                    matched = True
+                    break
+        elif sel_type == 'attributes':
+            matched = False
+            for el in els:
+                if attributes:
+                    for attr in attributes:
+                        if attr in el.attrib:
+                            matched = True
+                            break
+                    if matched:
+                        break
+                elif el.attrib:
+                    matched = True
+                    break
+        else:
+            ## FIXME: need to make sure 'tag' can't get in here:
+            assert 0
+        if ((not matched and not self.if_content.inverted)
+            or (matched and self.if_content.inverted)):
+            log.info(self, 'skipping rule because if-content="%s" does not match', self.if_content)
+            if log.describe:
+                log.describe('skipping rule %s because if-content="%s" does not match anything'
+                             % (self, self.if_content))
+            return False
+        return True
+
+    name = None
+    move_supported = True
+
+    def describe_self(self):
+        parts = ['<%s' % self.name]
+        if getattr(self, 'content', None):
+            parts.append('content="%s"' % html_quote(self.content))
+        if getattr(self, 'content_href', None):
+            parts.append('href="%s"' % html_quote(self.content_href))
+        if self.move_supported and not getattr(self, 'move', False):
+            parts.append('move="1"')
+        for attr in 'nocontent', 'manycontent':
+            value = getattr(self, 'nocontent', ('warn', None))
+            if value != ('warn', None):
+                parts.append(self.format_error(attr, value))
+        if getattr(self, 'theme', None):
+            parts.append('theme="%s"' % html_quote(self.theme))
+        for attr in 'notheme', 'manytheme':
+            value = getattr(self, 'nocontent', ('warn', None))
+            if value != ('warn', None):
+                parts.append(self.format_error(attr, value))
+        ## FIXME: add source_location
+        return ' '.join(parts) + ' />'
+
+    def describe_content_elements(self, els, children=False):
+        text = ', '.join(el.tag for el in els)
+        if children:
+            return 'children of %s' % text
+        else:
+            return text
+
+    def describe_theme_element(self, el):
+        return el.tag
+
+    @classmethod
+    def compile_selector(cls, tag, attr, source_location):
+        value = tag.get(attr)
+        if value is None:
+            return None
+        return Selector.parse(value)
+    
+    def prepare_content_children(self, els):
+        """
+        Takes a list of elements and prepares their children as a list and text,
+        so that you can do::
+
+          text, els = preparent_content_children(self, els)
+          add_text(theme_el, text)
+          theme_el.extend(els)
+
+        This is generally for use in content="children:..." rules.
+        """
+        for i in range(1, len(els)):
+            if els[i].text:
+                append_to = els[i-1]
+                if len(append_to):
+                    add_tail(append_to[-1], els[i].text)
+                else:
+                    add_tail(append_to, els[i].text)
+        result = []
+        for el in els:
+            result.extend(el)
+        return els[0].text, result
+
+    def select_elements(self, selector, doc, theme):
+        """
+        Selects the elements from the document.  `theme` is a boolean,
+        true if the document is the theme (in which case elements
+        originating in the content are not selectable).
+        """
+        type, elements, attributes = selector(doc)
+        if theme:
+            bad_els = []
+            for el in elements:
+                if is_content_element(el):
+                    bad_els.append(el)
+            for el in bad_els:
+                elements.remove(el)
+        return type, elements, attributes
+
+class TransformRule(AbstractRule):
+    """
+    Abstract class for the rules that move from the content to the theme (replace, append, prepend)
+    """
+
+    def __init__(self, source_location, content, theme, if_content=None, content_href=None,
+                 move=True, nocontent=None, notheme=None, manytheme=None, manycontent=None):
+        self.source_location = source_location
+        assert content is not None
+        self.content = content
+        assert theme is not None
+        self.theme = theme
+        for content_type in self.content.selector_types():
+            for theme_type in self.theme.selector_types():
+                if (theme_type, content_type) not in self._compatible_types:
+                    raise RuleSyntaxError(
+                        'Selector type %s (from content="%s") and type %s (from theme="%s") are not compatible'
+                        % (content_type, self.content, theme_type, self.theme))
+        self.if_content = if_content
+        self.content_href = content_href
+        self.move = move
+        self.nocontent = self.convert_error('nocontent', nocontent)
+        self.notheme = self.convert_error('notheme', notheme)
+        self.manytheme = self.convert_error('manytheme', manytheme)
+        self.manycontent = self.convert_error('manycontent', manycontent)
+
+    @classmethod
+    def from_xml(cls, tag, source_location):
+        content = cls.compile_selector(tag, 'content', source_location)
+        theme = cls.compile_selector(tag, 'theme', source_location)
+        if_content = cls.compile_selector(tag, 'if_content', source_location)
+        content_href = tag.get('href')
+        move = asbool(tag.get('move', '1'))
+        return cls(source_location, content, theme, if_content=if_content,
+                   content_href=content_href, move=move,
+                   nocontent=tag.get('nocontent'),
+                   notheme=tag.get('notheme'),
+                   manytheme=tag.get('manytheme'),
+                   manycontent=tag.get('manycontent'))
+
+    def apply(self, content_doc, theme_doc, resource_fetcher, log):
+        describe = log.describe
+        if self.content_href:
+            content_doc = resource_fetcher(self.content_href)
+        if not self.if_content_matches(content_doc, log):
+            return
+        content_type, content_els, content_attributes = self.select_elements(self.content, content_doc, theme=False)
+        if not content_els:
+            if self.nocontent == 'abort':
+                log.debug(self, 'aborting theme because no content matches rule content="%s"', self.content)
+                raise AbortTheme('No content matches content="%s"' % self.content)
+            elif self.nocontent == 'ignore':
+                log_meth = log.debug
+            else:
+                log_meth = log.warn
+            log_meth(self, 'skipping rule because no content matches rule content="%s"', self.content)
+            if describe:
+                describe(
+                    'skipping rule %s because content="%s" does not match anything'
+                    % (self.describe_self(), html_quote(self.content)))
+            return
+        theme_type, theme_els, theme_attributes = self.select_elements(self.theme, theme_doc, theme=True)
+        attributes = self.join_attributes(content_attributes, theme_attributes)
+        if not theme_els:
+            if self.notheme == 'abort':
+                raise AbortTheme('No theme element matches theme="%s"' % self.theme)
+            elif self.notheme == 'ignore':
+                log_meth = log.debug
+            else:
+                log_meth = log.warn
+            log_meth(self, 'skipping rule because no theme element matches rule theme="%s"', self.theme)
+            if describe:
+                describe('skipping rule %s because theme="%s" does not match anything'
+                         % (self.describe_self(), html_quote(self.content)))
+            return
+        if len(theme_els) > 1:
+            if self.manytheme[0] == 'warn':
+                log.warn(self, '%s elements match theme="%s", using the %s match',
+                         len(theme_els), self.theme, self.manytheme[1])
+                pass
+            elif self.manytheme[0] == 'abort':
+                raise AbortTheme('Many elements match theme="%s"' % self.theme)
+            if self.manytheme[1] == 'first':
+                theme_els = [theme_els[0]]
+            else:
+                theme_els = [theme_els[-1]]
+        theme_el = theme_els[0]
+        if not self.move and theme_type in ('children', 'elements'):
+            self.log.debug(self, 'content elements are being copied into theme (not moved)')
+            content_els = copy.deepcopy(content_els)
+        mark_content_els(content_els)
+        self.apply_transformation(content_type, content_els, attributes, theme_type, theme_el, log)
+
+    def join_attributes(self, attr1, attr2):
+        if not attr1 and not attr2:
+            return None
+        if attr1 and not attr2:
+            return attr1
+        if not attr1 and attr2:
+            return attr2
+        ## FIXME: is a join really the right method?
+        attr = set(attr1)
+        attr |= attr2
+        return list(attr)
+
+class Replace(TransformRule):
+
+    _compatible_types = [
+        ('children', 'elements'),
+        ('children', 'children'),
+        ('elements', 'elements'),
+        ('elements', 'children'),
+        ('attributes', 'attributes'),
+        ('tag', 'tag'),
+        ]
+
+    def apply_transformation(self, content_type, content_els, attributes, theme_type, theme_el, log):
+        describe = log.describe
+        if theme_type == 'children':
+            existing_children = len(theme_el) or theme_el.text
+            theme_el[:] = []
+            theme_el.text = ''
+            if content_type == 'elements':
+                if self.move:
+                    # If we are working with copies, then the tails don't/shouldn't be moved
+                    for el in reversed(content_els):
+                        move_tail_upward(el)
+                else:
+                    # If we are working with copies, then we can just throw away the tails
+                    for el in content_els:
+                        el.tail = None
+                theme_el.extend(content_els)
+                if describe:
+                    if existing_children:
+                        extra = ' and removed its children'
+                    else:
+                        extra = ''
+                    describe(
+                        "Rule %s moved elements %s into element %s%s"
+                        % (self.describe_self(), self.describe_content_elements(content_els), self.describe_theme_element(theme_el), extra))
+            elif content_type == 'children':
+                text, els = self.prepare_content_children(content_els)
+                add_text(theme_el, text)
+                theme_el.extend(els)
+                if describe:
+                    if existing_children:
+                        extra = ' and removed its children'
+                    else:
+                        extra = ''
+                    describe(
+                        "Rule %s moved the children of elements %s into element %s%s"
+                        % (self.describe_self(), self.describe_content_elements(content_els, children=True),
+                           self.describe_theme_element(theme_el), extra))
+                if self.move:
+                    # Since we moved just the children of the content elements, we still need to remove the parent
+                    # elements.
+                    for el in content_els:
+                        el.getparent().remove(el)
+            else:
+                assert 0
+            
+        if theme_type == 'elements':
+            move_tail_upwards(theme_el)
+            parent = theme_el.getparent()
+            pos = parent.index(theme_el)
+            if content_type == 'elements':
+                if self.move:
+                    for el in reversed(content_els):
+                        move_tail_upwards(el)
+                else:
+                    for el in content_els:
+                        el.tail = None
+                parent[pos:pos+1] = content_els
+            elif content_type == 'children':
+                text, els = self.prepare_content_children(content_els)
+                if pos == 0:
+                    add_text(parent, text)
+                else:
+                    add_tail(parent[pos-1], text)
+                parent[pos:pos+1] = els
+                if self.move:
+                    for el in content_els:
+                        el.getparent().remove(el)
+            else:
+                assert 0
+
+        if theme_type == 'attributes':
+            ## FIXME: handle named attributes, e.g., attributes(class):
+            assert content_type == 'attributes'
+            if len(content_els) > 1:
+                if self.manycontent[0] == 'abort':
+                    log.debug(self, 'aborting because %s elements in the content match content="%s"',
+                              len(content_els), self.content)
+                    raise AbortTheme()
+                else:
+                    if self.manycontent[0] == 'warn':
+                        log_meth = log.warn
+                    else:
+                        log_meth = log.debug
+                    log_meth(self, '%s elements match content="%s" (but only one expected), using the %s match',
+                             len(content_els, self.content, self.manycontent[1]))
+                    if self.manycontent[1] == 'first':
+                        content_els = [content_els[0]]
+                    else:
+                        content_els = [content_els[-1]]
+            theme_el.attrib.clear()
+            if attributes:
+                c_attrib = content_els[0].attrib
+                for name in attributes:
+                    if name in c_attrib:
+                        theme_el.set(name, c_attrib[name])
+                if self.move:
+                    for name in attributes:
+                        if name in c_attrib:
+                            del c_attrib[name]
+            else:
+                theme_el.attrib.update(content_els[0].attrib)
+                if self.move:
+                    content_els[0].attrib.clear()
+
+        if theme_type == 'tag':
+            assert content_type == 'tag'
+            theme_el.tag = content_els[0].tag
+            theme_el.attrib.clear()
+            theme_el.attrib.update(content_els[0].attrib)
+            # "move" in this case doesn't mean anything
+
+rules['replace'] = Replace
+
+class Append(TransformRule):
+
+    _append = True
+
+    _compatible_types = [
+        ('children', 'elements'),
+        ('children', 'children'),
+        ('elements', 'elements'),
+        ('elements', 'children'),
+        ('attributes', 'attributes'),
+        # Removing 'tag'
+        ]
+
+    def apply_transformation(self, content_type, content_els, attributes, theme_type, theme_el, log):
+        describe = log.describe
+        if theme_type == 'children':
+            if content_type == 'elements':
+                if self.move:
+                    for el in reversed(content_els):
+                        move_tail_upwards(el)
+                else:
+                    for el in content_els:
+                        el.tail = None
+                if self._append:
+                    theme_el.extend(content_els)
+                else:
+                    add_tail(content_els[-1], theme_el.text)
+                    theme_el.text = None
+                    theme_el[:0] = content_els
+            elif content_type == 'children':
+                text, els = self.preparent_content_children(content_els)
+                if self._append:
+                    if len(theme_el):
+                        add_tail(theme_el[-1], text)
+                    else:
+                        add_text(theme_el, text)
+                    theme_el.extend(els)
+                else:
+                    add_tail(els[-1], theme_el.text)
+                    theme_el.text = text
+                    theme_el[:0] = els
+            else:
+                assert 0
+
+        if theme_type == 'elements':
+            parent = theme_el.getparent()
+            pos = parent.index(theme_el)
+            if content_type == 'elements':
+                if self.move:
+                    for el in reversed(content_els):
+                        move_tail_upwards(el)
+                else:
+                    for el in content_els:
+                        el.tail = None
+                if self._append:
+                    parent[pos+1:pos+1] = content_els
+                else:
+                    parent[pos:pos] = content_els
+            elif content_type == 'children':
+                text, els = self.prepare_content_children(content_els)
+                if self._append:
+                    add_tail(theme_el, text)
+                    parent[pos+1:pos+1] = content_els
+                else:
+                    if pos == 0:
+                        add_text(parent, text)
+                    else:
+                        add_tail(parent[pos-1], text)
+                    parent[pos:pos] = content_els
+
+        if theme_type == 'attributes':
+            ## FIXME: handle named attributes
+            assert content_type == 'attributes'
+            if len(content_els) > 1:
+                if self.manycontent[0] == 'abort':
+                    log.debug(self, 'aborting because %s elements in the content match content="%s"',
+                              len(content_els), self.content)
+                    raise AbortTheme()
+                else:
+                    if self.manycontent[0] == 'warn':
+                        log_meth = log.warn
+                    else:
+                        log_meth = log.debug
+                    log_meth(self, '%s elements match content="%s" (but only one expected), using the %s match',
+                             len(content_els, self.content, self.manycontent[1]))
+                    if self.manycontent[1] == 'first':
+                        content_els = [content_els[0]]
+                    else:
+                        content_els = [content_els[-1]]
+            content_attrib = content_els[0].attrib
+            theme_attrib = theme_el.attrib
+            if self._append:
+                if attributes:
+                    for name in attributes:
+                        if name in content_attrib:
+                            theme_attrib.setdefault(name, content_attrib[name])
+                else:
+                    for key, value in content_attrib.items():
+                        theme_attrib.setdefault(key, value)
+            else:
+                if attributes:
+                    for name in attributes:
+                        if name in content_attrib:
+                            theme_attrib.set(name, content_attrib[name])
+                else:
+                    theme_attrib.update(content_attrib)
+            if self.move:
+                if attributes:
+                    for name in attributes:
+                        if name in content_attrib:
+                            del content_attrib[name]
+                else:
+                    content_attrib.clear()
+
+rules['append'] = Append
+
+class Prepend(Append):
+    _append = False
+
+rules['prepend'] = Prepend
+
+class Drop(AbstractRule):
+    
+    def __init__(self, source_location, content, theme, if_content=None,
+                 nocontent=None, notheme=None):
+        self.source_location = source_location
+        ## FIXME: proper error:
+        assert content is not None or theme is not None
+        self.content = content
+        assert theme is not None
+        self.theme = theme
+        self.if_content = if_content
+        self.nocontent = self.convert_error('nocontent', nocontent)
+        self.notheme = self.convert_error('notheme', notheme)
+
+    def apply(self, content_doc, theme_doc, resource_fetcher, log):
+        describe = log.describe
+        if not self.if_content_matches(content_doc, log):
+            return
+        for doc, selector, error, name in [(theme_doc, self.theme, self.notheme, 'theme'), (content_doc, self.content, self.nocontent, 'content')]:
+            if selector is None:
+                continue
+            sel_type, els, attributes = self.select_elements(selector, doc, name=='theme')
+            if not els:
+                if error == 'abort':
+                    log.debug(self, 'aborting %s because no %s element matches rule %s="%s"', name, name, name, selector)
+                    raise AbortTheme('No %s matches %s="%s"' % (name, name, selector))
+                elif error == 'ignore':
+                    log_meth = log.debug
+                else:
+                    log_meth = log.warn
+                log_meth(self, 'skipping rule because no %s matches rule %s="%s"', name, name, selector)
+                return
+            if sel_type == 'elements':
+                for el in els:
+                    move_tail_upwards(el)
+                    el.getparent().remove(el)
+            elif sel_type == 'children':
+                el[:] = []
+                el.text = ''
+            elif sel_type == 'attributes':
+                attrib = el.attrib
+                if attributes:
+                    for name in attributes:
+                        if name in attrib:
+                            del attrib[name]
+                else:
+                    attrib.clear()
+            elif sel_type == 'tag':
+                children = list(el)
+                if children:
+                    add_tail(children[-1], el.tail)
+                else:
+                    add_text(el, el.tail)
+                parent = el.getparent()
+                pos = parent.index(el)
+                if pos == 0:
+                    add_text(parent, el.text)
+                else:
+                    add_tail(parent[pos-1], el.text)
+                parent[pos:pos+1] = children
+            else:
+                assert 0
+
+    @classmethod
+    def from_xml(cls, tag, source_location):
+        content = cls.compile_selector(tag, 'content', source_location)
+        theme = cls.compile_selector(tag, 'theme', source_location)
+        if_content = cls.compile_selector(tag, 'if_content', source_location)
+        return cls(source_location, content, theme, if_content=if_content,
+                   nocontent=tag.get('nocontent'),
+                   notheme=tag.get('notheme'))
+
+rules['drop'] = Drop
+            
+## Element utilities ##
+
+def add_text(el, text):
+    """
+    Add the given text to the end of the el's text
+    """
+    if not text:
+        return
+    if el.text:
+        el.text += text
+    else:
+        # Note, el.text can be None (so we can't always add)
+        el.text = text
+
+def add_tail(el, tail):
+    """
+    Add the given tail text to the end of the el's tail
+    """
+    if not tail:
+        return
+    if el.tail:
+        el.tail += tail
+    else:
+        # Note, el.tail can be None (so we can't always add)
+        el.tail = tail
+
+def move_tail_upwards(el):
+    """
+    Move the tail of the el into its previous sibling or parent
+    """
+    dest = el.getprevious()
+    if dest is not None:
+        add_tail(dest, el.tail)
+    else:
+        parent = el.getparent()
+        add_text(parent, el.tail)
+
+def iter_self_and_ancestors(el):
+    yield el
+    for item in el.iterancestors():
+        yield item
+
+def mark_content_els(els):
+    for el in els:
+        ## FIXME: maybe put something that is trackable to the rule that moved the element
+        el.set(CONTENT_ATTRIB, '1')
+
+def is_content_element(el):
+    ## FIXME: should this check children too?
+    for p in iter_self_and_ancestors(el):
+        if p.get(CONTENT_ATTRIB):
+            return True
+    return False
+
+def remove_content_attribs(doc):
+    for p in doc.getiterator():
+        if p.get(CONTENT_ATTRIB, None) is not None:
+            del p.attrib[CONTENT_ATTRIB]
+
+from cgi import escape as cgi_escape
+def html_quote(s):
+    s = unicode(s)
+    return cgi_escape(s, True)

Added: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/selector.py
==============================================================================
--- (empty file)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/selector.py	Tue Jun 24 22:47:43 2008
@@ -0,0 +1,108 @@
+from lxml.etree import XPath
+from lxml.cssselect import CSSSelector
+import re
+
+type_re = re.compile(r'^(elements?|children|tag|attributes?):')
+type_map = dict(element='elements', attribute='attributes')
+attributes_re = re.compile(r'^attributes[(]([a-zA-Z0-9_,-]+)[)]:')
+
+class SelectorSyntaxError(Exception):
+    pass
+
+class Selector(object):
+
+    def __init__(self, major_type, attributes, selectors):
+        self.major_type = major_type
+        self.attributes = attributes
+        self.selectors_source = selectors
+        self.selectors = [self.compile_selector(selector, default_type=major_type)
+                          for selector in selectors]
+
+    @classmethod
+    def parse(cls, expr):
+        major_type, attributes, expr = cls.parse_prefix(expr)
+        selectors = [e.strip()
+                     for e in expr.split('||')]
+        return cls(major_type, attributes, selectors)
+
+    @staticmethod
+    def parse_prefix(expr, default_type='elements'):
+        """
+        Parses the elements:, etc, prefix.
+
+        Returns (type, attributes, rest_expr)
+        """
+        assert isinstance(expr, basestring), "Bad value for expr: %r" % expr
+        match = type_re.match(expr)
+        if match:
+            major_type = match.group(1)
+            major_type = type_map.get(major_type, major_type)
+            rest_expr = expr[match.end():]
+            return (major_type, None, rest_expr)
+        else:
+            match = attributes_re.match(expr)
+            if match:
+                attributes = [name.strip() for name in match.group(1).split(',') if name.strip()]
+                rest_expr = expr[match.end():]
+                return ('attributes', attributes, rest_expr)
+        return (default_type, None, expr)
+
+    @staticmethod
+    def types_compatible(type1, type2):
+        if type1 in ('children', 'elements'):
+            return type2 in ('children', 'elements')
+        else:
+            return type1 == type2
+
+    def __unicode__(self):
+        ' || '.join(
+            [unicode(selector)
+             for sel_type, selector, sel_expr, sel_attributes
+             in self.selectors])
+            
+    def __str__(self):
+        return str(unicode(self))
+
+    def compile_selector(self, expr, default_type):
+        type, attributes, rest_expr = self.parse_prefix(expr, default_type=default_type)
+        if not self.types_compatible(type, self.major_type):
+            raise SelectorSyntaxError(
+                "Expression %s in selector %r uses the type %r, but this is not compatible "
+                "with the type %r already declared earlier in the selector"
+                % (expr, self, type, self.major_type))
+        if rest_expr.startswith('/'):
+            selector = XPath(rest_expr)
+        else:
+            selector = CSSSelector(rest_expr)
+        return (type, selector, expr, attributes)
+
+    def __call__(self, doc):
+        """
+        Match this selector against the doc.  Returns (type, elements,
+        attributes), where type is one of elements, children, tag,
+        attributes.  attributes is the list of attributes, if that was
+        given.
+        """
+        for sel_type, selector, sel_expr, sel_attributes in self.selectors:
+            result = selector(doc)
+            if result:
+                type = sel_type or self.major_type
+                attributes = sel_attributes or self.attributes
+                return (type, result, attributes)
+        return (self.major_type, [], self.attributes)
+    
+    def selector_types(self):
+        """
+        Returns a set of all types used in this expression (usually a
+        single-item set, but some selectors can use multiple types).
+        """
+        return set([sel_type
+                    for sel_type, selector, sel_expr, sel_attributes in self.selectors])
+    
+    def __unicode__(self):
+        parts = []
+        for sel_type, selector, sel_expr, sel_attributes in self.selectors:
+            if sel_attributes:
+                sel_type = '%s(%s)' % (sel_type, ','.join(sel_attributes))
+            parts.append('%s:%s' % (sel_type, sel_expr))
+        return ' || '.join(parts)

Added: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_selection.txt
==============================================================================
--- (empty file)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_selection.txt	Tue Jun 24 22:47:43 2008
@@ -0,0 +1,146 @@
+First we create a theme document to test out:
+
+    >>> from lxml.html import fromstring, tostring
+    >>> theme = fromstring('''\
+    ... <html>
+    ...  <head>
+    ...   <title>This is a theme title</title>
+    ...   <link rel=Stylesheet type="text/css" href="style.css">
+    ...   <style type="text/css">
+    ...     @import "style2.css";
+    ...   </style>
+    ...  </head>
+    ...  <body>
+    ... 
+    ...   <div id="header" class="title-bar">
+    ...     <h1 id="title">This is the theme title</h1>
+    ...     <div class="topnav"></div>
+    ...   </div>
+    ... 
+    ...   <div id="content">
+    ...     This content will be replaced.
+    ...   </div>
+    ... 
+    ...   <div id="footer">
+    ...     Copyright (C) 2000 Some Corporation
+    ...   </div>
+    ... 
+    ...  </body>
+    ... </html>''',
+    ... base_url='http://somesite.com/theme/theme.html')
+
+Then, lets select something:
+
+    >>> from deliverance.selector import Selector
+    >>> def t_select(selection):
+    ...     selector = Selector.parse(selection)
+    ...     type, elements, attributes = selector(theme)
+    ...     if type == 'attributes':
+    ...         for element in elements:
+    ...             if not attributes:
+    ...                 attributes = element.attrib.keys()
+    ...             text = []
+    ...             for key in sorted(attributes):
+    ...                 text.append('%s="%s"' % (key, element.attrib[key]))
+    ...             print 'attributes:%s %s' % (element.tag, ' '.join(text))
+    ...         return
+    ...     if type == 'tag':
+    ...         for element in elements:
+    ...             tag = tostring(element).split('>')[0] + '>'
+    ...             print 'tag:%s' % tag
+    ...         return
+    ...     if type == 'elements':
+    ...         type = ''
+    ...     else:
+    ...         type += ':'
+    ...     for element in elements:
+    ...         print '%s%s' % (type, tostring(element).strip())
+    >>> t_select('link')
+    <link rel="Stylesheet" type="text/css" href="style.css">
+    >>> t_select('/html/head/title')
+    <title>This is a theme title</title>
+    >>> t_select('children:title')
+    children:<title>This is a theme title</title>
+    >>> t_select('attributes(class):#header')
+    attributes:div class="title-bar"
+    >>> t_select('#nothing')
+    >>> t_select('div')
+    <div id="header" class="title-bar">
+        <h1 id="title">This is the theme title</h1>
+        <div class="topnav"></div>
+      </div>
+    <div class="topnav"></div>
+    <div id="content">
+        This content will be replaced.
+      </div>
+    <div id="footer">
+        Copyright (C) 2000 Some Corporation
+      </div>
+    >>> t_select('div#header')
+    <div id="header" class="title-bar">
+        <h1 id="title">This is the theme title</h1>
+        <div class="topnav"></div>
+      </div>
+    >>> t_select("tag://div[@id='header']")
+    tag:<div id="header" class="title-bar">
+
+Now we'll select from some content:
+
+    >>> from lxml.etree import XML
+    >>> import copy
+    >>> from deliverance.rules import parse_rule, remove_content_attribs
+    >>> from deliverance.log import SavingLogger
+    >>> def t_rule_head(rule, selector='//head', show_log=False):
+    ...     rule = XML(rule)
+    ...     rule = parse_rule(rule, None)
+    ...     theme_copy = copy.deepcopy(theme)
+    ...     theme_copy.make_links_absolute()
+    ...     logger = SavingLogger()
+    ...     content_copy = copy.deepcopy(content)
+    ...     rule.apply(content_copy, theme_copy, None, logger)
+    ...     remove_content_attribs(theme_copy)
+    ...     el = theme_copy.xpath(selector)[0]
+    ...     if show_log:
+    ...         for level, rule, message in logger.messages:
+    ...             print 'log:', message
+    ...     print tostring(el)
+
+And the tests:
+
+    >>> content = fromstring('''\
+    ... <html>
+    ...  <head>
+    ...   <title>User: Bob</title>
+    ...   <link rel="Stylesheet" type="text/css" href="/users.css">
+    ...   <link rel="Stylesheet" type="text/css" href="/theme/style.css">
+    ...  </head>
+    ...  <body>
+    ...   <div id="some-stupid-app-content">blah blah blah</div>
+    ...   <h1 id="title">The user <b>Bob</b></h1>
+    ...   <div id="content">
+    ...     Some information about Bob.
+    ...   </div>
+    ...  </body>
+    ... </html>''',
+    ... base_url='http://somesite.com/users/bob/')
+    >>> t_rule_head('<append content="link" theme="children:head" />')
+    <head><title>This is a theme title</title><link rel="Stylesheet" type="text/css" href="http://somesite.com/theme/style.css"><style type="text/css">
+        @import "http://somesite.com/theme/style2.css";
+      </style><link rel="Stylesheet" type="text/css" href="/users.css"><link rel="Stylesheet" type="text/css" href="/theme/style.css"></head>
+
+Note that href has to be normalized for this to work (FIXME: remove-dups?):
+
+    >>> t_rule_head('<append content="link" theme="children:head" remove-dups="1" />')
+    <head><title>This is a theme title</title><link rel="Stylesheet" type="text/css" href="http://somesite.com/theme/style.css"><style type="text/css">
+        @import "http://somesite.com/theme/style2.css";
+      </style><link rel="Stylesheet" type="text/css" href="/users.css"><link rel="Stylesheet" type="text/css" href="/theme/style.css"></head>
+    >>> t_rule_head('<replace content="children:/html/head/title" theme="children:/html/head/title" />')
+    <head><title>User: Bob</title><link rel="Stylesheet" type="text/css" href="http://somesite.com/theme/style.css"><style type="text/css">
+        @import "http://somesite.com/theme/style2.css";
+      </style></head>
+    >>> t_rule_head('<drop theme="link, style" />')
+    <head><title>This is a theme title</title></head>
+    >>> t_rule_head('<drop theme="link || style" />')
+    <head><title>This is a theme title</title><style type="text/css">
+        @import "http://somesite.com/theme/style2.css";
+      </style></head>

Added: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/util/__init__.py
==============================================================================
--- (empty file)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/util/__init__.py	Tue Jun 24 22:47:43 2008
@@ -0,0 +1 @@
+#

Added: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/util/converters.py
==============================================================================
--- (empty file)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/util/converters.py	Tue Jun 24 22:47:43 2008
@@ -0,0 +1,11 @@
+def asbool(obj):
+    if isinstance(obj, (str, unicode)):
+        obj = obj.strip().lower()
+        if obj in ['true', 'yes', 'on', 'y', 't', '1']:
+            return True
+        elif obj in ['false', 'no', 'off', 'n', 'f', '0']:
+            return False
+        else:
+            raise ValueError(
+                "String is not true/false: %r" % obj)
+    return bool(obj)

Modified: z3/deliverance/sandbox/ianb/deliverance/trunk/setup.py
==============================================================================
--- z3/deliverance/sandbox/ianb/deliverance/trunk/setup.py	(original)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/setup.py	Tue Jun 24 22:47:43 2008
@@ -7,11 +7,20 @@
       version=version,
       description="",
       long_description="""\
+Deliverance does transformations of HTML to 'theme' pages, similar in
+function to XSLT but using a simpler XML-based language to express the
+transformation.
 """,
-      classifiers=[], # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers
-      keywords='',
-      author='',
-      author_email='',
+      classifiers=[
+        "Development Status :: 4 - Beta",
+        "Environment :: Web Environment",
+        "License :: OSI Approved :: MIT License",
+        "Topic :: Internet :: WWW/HTTP :: WSGI",
+        "Topic :: Internet :: WWW/HTTP :: WSGI :: Middleware",
+      ],
+      keywords='wsgi theming html',
+      author='Ian Bicking, The Open Planning Project',
+      author_email='deliverance-discuss at lists.openplans.org',
       url='http://openplans.org/projects/deliverance/',
       license='MIT',
       packages=find_packages(exclude=['ez_setup', 'examples', 'tests']),
@@ -21,6 +30,5 @@
         "lxml",
       ],
       entry_points="""
-      # -*- Entry points: -*-
       """,
       )


More information about the z3-checkins mailing list