[z3-checkins] r56054 - in z3/deliverance/sandbox/ianb/deliverance/trunk: . deliverance deliverance/tests deliverance/util
ianb at codespeak.net
ianb at codespeak.net
Tue Jun 24 22:47:45 CEST 2008
Author: ianb
Date: Tue Jun 24 22:47:43 2008
New Revision: 56054
Added:
z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/log.py (contents, props changed)
z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/rules.py (contents, props changed)
z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/selector.py (contents, props changed)
z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/
z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_selection.txt (contents, props changed)
z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/util/
z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/util/__init__.py (contents, props changed)
z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/util/converters.py (contents, props changed)
Modified:
z3/deliverance/sandbox/ianb/deliverance/trunk/setup.py
Log:
A basic implementation of rules and selectors
Added: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/log.py
==============================================================================
--- (empty file)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/log.py Tue Jun 24 22:47:43 2008
@@ -0,0 +1,29 @@
+import logging
+
+class SavingLogger(object):
+ def __init__(self, description=True):
+ self.messages = []
+ if description:
+ self.descriptions = []
+ self.describe = self.add_description
+ def add_description(self, msg):
+ self.descriptions.append(msg)
+ def message(self, level, el, msg, *args, **kw):
+ if args:
+ msg = msg % args
+ elif kw:
+ msg = msg % kw
+ self.messages.append((level, el, msg))
+ def debug(self, el, msg, *args, **kw):
+ self.message(logging.DEBUG, el, msg, *args, **kw)
+ def info(self, el, msg, *args, **kw):
+ self.message(logging.INFO, el, msg, *args, **kw)
+ def notify(self, el, msg, *args, **kw):
+ self.message(logging.INFO+1, el, msg, *args, **kw)
+ def warn(self, el, msg, *args, **kw):
+ self.message(logging.WARN, el, msg, *args, **kw)
+ warning = warn
+ def error(self, el, msg, *args, **kw):
+ self.message(logging.ERROR, el, msg, *args, **kw)
+ def fatal(self, el, msg, *args, **kw):
+ self.message(logging.FATAL, el, msg, *args, **kw)
Added: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/rules.py
==============================================================================
--- (empty file)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/rules.py Tue Jun 24 22:47:43 2008
@@ -0,0 +1,702 @@
+"""
+Represents individual rules
+"""
+
+from deliverance.exceptions import add_exception_info
+from deliverance.util.converters import asbool
+from deliverance.selector import Selector
+from lxml import etree
+
+## A dictionary mapping element names to their rule classes:
+rules = {}
+
+class RuleSyntaxError(Exception):
+ """
+ Exception raised when a rule itself is invalid
+ """
+
+class SelectionError(Exception):
+ """
+ Exception raised when a selection somehow isn't right (e.g.,
+ returns no elements when it should return an element).
+ """
+
+class AbortTheme(Exception):
+ """
+ Raised when something aborts via something like nocontent="abort"
+ """
+
+CONTENT_ATTRIB = 'x-a-marker-attribute-for-deliverance'
+
+def parse_rule(el, source_location):
+ if el.tag not in rules:
+ raise RuleSyntaxError(
+ "There is no rule with the name %s"
+ % el.tag)
+ Class = rules[el.tag]
+ instance = Class.from_xml(el, source_location)
+ return instance
+
+class AbstractRule(object):
+
+ _no_allowed = (None, 'ignore', 'abort', 'warn')
+ _many_allowed = _no_allowed + ('last', 'first', 'ignore:first', 'ignore:last',
+ 'warn:first', 'warn:last')
+
+ def convert_error(self, name, value):
+ if value == '':
+ value = None
+ if value:
+ value = value.lower()
+ bad_options = None
+ if name in ('manytheme', 'manycontent'):
+ if value not in self._many_allowed:
+ bad_options = self._many_allowed
+ else:
+ if value not in self._no_allowed:
+ vad_options = self._no_allowed
+ if bad_options:
+ raise RuleSyntaxError(
+ 'The attribute %s="%s" should have a value of one of: %s'
+ % (name, value, ', '.join(v for v in bad_options if v)))
+ if value and ':' in value:
+ value = tuple(value.split(':', 1))
+ elif value == 'first':
+ value = ('ignore', 'first')
+ elif value == 'last':
+ value = ('ignore', 'last')
+ if name in ('manytheme', 'manycontent'):
+ if value == 'ignore':
+ value = ('ignore', 'first')
+ elif value == 'warn' or not value:
+ value = ('warn', 'first')
+ elif value == 'abort':
+ value = ('abort', None)
+ elif not value:
+ value = ('warn', None)
+ return value
+
+ def format_error(self, attr, value):
+ if attr in ('manytheme', 'manycontent'):
+ handler, pos = value
+ if pos == 'last':
+ text = '%s:%s' % (handler, pos)
+ else:
+ text = handler
+ else:
+ text = value[0]
+ if text == 'warn':
+ return None
+ return '%s="%s"' % (attr, html_quote(text))
+
+ def if_content_matches(self, content_doc, log):
+ """
+ Returns true if the if-content selector matches something,
+ i.e., if this rule should be executed.
+ """
+ if self.if_content is None:
+ # No if-content means always run
+ return True
+ sel_type, els, attributes = self.select_elements(self.if_content, content_doc, theme=False)
+ matched = bool(els)
+ if sel_type == 'elements':
+ # els is fine then
+ pass
+ elif sel_type == 'children':
+ matched = False
+ for el in els:
+ if el.text or len(el):
+ matched = True
+ break
+ elif sel_type == 'attributes':
+ matched = False
+ for el in els:
+ if attributes:
+ for attr in attributes:
+ if attr in el.attrib:
+ matched = True
+ break
+ if matched:
+ break
+ elif el.attrib:
+ matched = True
+ break
+ else:
+ ## FIXME: need to make sure 'tag' can't get in here:
+ assert 0
+ if ((not matched and not self.if_content.inverted)
+ or (matched and self.if_content.inverted)):
+ log.info(self, 'skipping rule because if-content="%s" does not match', self.if_content)
+ if log.describe:
+ log.describe('skipping rule %s because if-content="%s" does not match anything'
+ % (self, self.if_content))
+ return False
+ return True
+
+ name = None
+ move_supported = True
+
+ def describe_self(self):
+ parts = ['<%s' % self.name]
+ if getattr(self, 'content', None):
+ parts.append('content="%s"' % html_quote(self.content))
+ if getattr(self, 'content_href', None):
+ parts.append('href="%s"' % html_quote(self.content_href))
+ if self.move_supported and not getattr(self, 'move', False):
+ parts.append('move="1"')
+ for attr in 'nocontent', 'manycontent':
+ value = getattr(self, 'nocontent', ('warn', None))
+ if value != ('warn', None):
+ parts.append(self.format_error(attr, value))
+ if getattr(self, 'theme', None):
+ parts.append('theme="%s"' % html_quote(self.theme))
+ for attr in 'notheme', 'manytheme':
+ value = getattr(self, 'nocontent', ('warn', None))
+ if value != ('warn', None):
+ parts.append(self.format_error(attr, value))
+ ## FIXME: add source_location
+ return ' '.join(parts) + ' />'
+
+ def describe_content_elements(self, els, children=False):
+ text = ', '.join(el.tag for el in els)
+ if children:
+ return 'children of %s' % text
+ else:
+ return text
+
+ def describe_theme_element(self, el):
+ return el.tag
+
+ @classmethod
+ def compile_selector(cls, tag, attr, source_location):
+ value = tag.get(attr)
+ if value is None:
+ return None
+ return Selector.parse(value)
+
+ def prepare_content_children(self, els):
+ """
+ Takes a list of elements and prepares their children as a list and text,
+ so that you can do::
+
+ text, els = preparent_content_children(self, els)
+ add_text(theme_el, text)
+ theme_el.extend(els)
+
+ This is generally for use in content="children:..." rules.
+ """
+ for i in range(1, len(els)):
+ if els[i].text:
+ append_to = els[i-1]
+ if len(append_to):
+ add_tail(append_to[-1], els[i].text)
+ else:
+ add_tail(append_to, els[i].text)
+ result = []
+ for el in els:
+ result.extend(el)
+ return els[0].text, result
+
+ def select_elements(self, selector, doc, theme):
+ """
+ Selects the elements from the document. `theme` is a boolean,
+ true if the document is the theme (in which case elements
+ originating in the content are not selectable).
+ """
+ type, elements, attributes = selector(doc)
+ if theme:
+ bad_els = []
+ for el in elements:
+ if is_content_element(el):
+ bad_els.append(el)
+ for el in bad_els:
+ elements.remove(el)
+ return type, elements, attributes
+
+class TransformRule(AbstractRule):
+ """
+ Abstract class for the rules that move from the content to the theme (replace, append, prepend)
+ """
+
+ def __init__(self, source_location, content, theme, if_content=None, content_href=None,
+ move=True, nocontent=None, notheme=None, manytheme=None, manycontent=None):
+ self.source_location = source_location
+ assert content is not None
+ self.content = content
+ assert theme is not None
+ self.theme = theme
+ for content_type in self.content.selector_types():
+ for theme_type in self.theme.selector_types():
+ if (theme_type, content_type) not in self._compatible_types:
+ raise RuleSyntaxError(
+ 'Selector type %s (from content="%s") and type %s (from theme="%s") are not compatible'
+ % (content_type, self.content, theme_type, self.theme))
+ self.if_content = if_content
+ self.content_href = content_href
+ self.move = move
+ self.nocontent = self.convert_error('nocontent', nocontent)
+ self.notheme = self.convert_error('notheme', notheme)
+ self.manytheme = self.convert_error('manytheme', manytheme)
+ self.manycontent = self.convert_error('manycontent', manycontent)
+
+ @classmethod
+ def from_xml(cls, tag, source_location):
+ content = cls.compile_selector(tag, 'content', source_location)
+ theme = cls.compile_selector(tag, 'theme', source_location)
+ if_content = cls.compile_selector(tag, 'if_content', source_location)
+ content_href = tag.get('href')
+ move = asbool(tag.get('move', '1'))
+ return cls(source_location, content, theme, if_content=if_content,
+ content_href=content_href, move=move,
+ nocontent=tag.get('nocontent'),
+ notheme=tag.get('notheme'),
+ manytheme=tag.get('manytheme'),
+ manycontent=tag.get('manycontent'))
+
+ def apply(self, content_doc, theme_doc, resource_fetcher, log):
+ describe = log.describe
+ if self.content_href:
+ content_doc = resource_fetcher(self.content_href)
+ if not self.if_content_matches(content_doc, log):
+ return
+ content_type, content_els, content_attributes = self.select_elements(self.content, content_doc, theme=False)
+ if not content_els:
+ if self.nocontent == 'abort':
+ log.debug(self, 'aborting theme because no content matches rule content="%s"', self.content)
+ raise AbortTheme('No content matches content="%s"' % self.content)
+ elif self.nocontent == 'ignore':
+ log_meth = log.debug
+ else:
+ log_meth = log.warn
+ log_meth(self, 'skipping rule because no content matches rule content="%s"', self.content)
+ if describe:
+ describe(
+ 'skipping rule %s because content="%s" does not match anything'
+ % (self.describe_self(), html_quote(self.content)))
+ return
+ theme_type, theme_els, theme_attributes = self.select_elements(self.theme, theme_doc, theme=True)
+ attributes = self.join_attributes(content_attributes, theme_attributes)
+ if not theme_els:
+ if self.notheme == 'abort':
+ raise AbortTheme('No theme element matches theme="%s"' % self.theme)
+ elif self.notheme == 'ignore':
+ log_meth = log.debug
+ else:
+ log_meth = log.warn
+ log_meth(self, 'skipping rule because no theme element matches rule theme="%s"', self.theme)
+ if describe:
+ describe('skipping rule %s because theme="%s" does not match anything'
+ % (self.describe_self(), html_quote(self.content)))
+ return
+ if len(theme_els) > 1:
+ if self.manytheme[0] == 'warn':
+ log.warn(self, '%s elements match theme="%s", using the %s match',
+ len(theme_els), self.theme, self.manytheme[1])
+ pass
+ elif self.manytheme[0] == 'abort':
+ raise AbortTheme('Many elements match theme="%s"' % self.theme)
+ if self.manytheme[1] == 'first':
+ theme_els = [theme_els[0]]
+ else:
+ theme_els = [theme_els[-1]]
+ theme_el = theme_els[0]
+ if not self.move and theme_type in ('children', 'elements'):
+ self.log.debug(self, 'content elements are being copied into theme (not moved)')
+ content_els = copy.deepcopy(content_els)
+ mark_content_els(content_els)
+ self.apply_transformation(content_type, content_els, attributes, theme_type, theme_el, log)
+
+ def join_attributes(self, attr1, attr2):
+ if not attr1 and not attr2:
+ return None
+ if attr1 and not attr2:
+ return attr1
+ if not attr1 and attr2:
+ return attr2
+ ## FIXME: is a join really the right method?
+ attr = set(attr1)
+ attr |= attr2
+ return list(attr)
+
+class Replace(TransformRule):
+
+ _compatible_types = [
+ ('children', 'elements'),
+ ('children', 'children'),
+ ('elements', 'elements'),
+ ('elements', 'children'),
+ ('attributes', 'attributes'),
+ ('tag', 'tag'),
+ ]
+
+ def apply_transformation(self, content_type, content_els, attributes, theme_type, theme_el, log):
+ describe = log.describe
+ if theme_type == 'children':
+ existing_children = len(theme_el) or theme_el.text
+ theme_el[:] = []
+ theme_el.text = ''
+ if content_type == 'elements':
+ if self.move:
+ # If we are working with copies, then the tails don't/shouldn't be moved
+ for el in reversed(content_els):
+ move_tail_upward(el)
+ else:
+ # If we are working with copies, then we can just throw away the tails
+ for el in content_els:
+ el.tail = None
+ theme_el.extend(content_els)
+ if describe:
+ if existing_children:
+ extra = ' and removed its children'
+ else:
+ extra = ''
+ describe(
+ "Rule %s moved elements %s into element %s%s"
+ % (self.describe_self(), self.describe_content_elements(content_els), self.describe_theme_element(theme_el), extra))
+ elif content_type == 'children':
+ text, els = self.prepare_content_children(content_els)
+ add_text(theme_el, text)
+ theme_el.extend(els)
+ if describe:
+ if existing_children:
+ extra = ' and removed its children'
+ else:
+ extra = ''
+ describe(
+ "Rule %s moved the children of elements %s into element %s%s"
+ % (self.describe_self(), self.describe_content_elements(content_els, children=True),
+ self.describe_theme_element(theme_el), extra))
+ if self.move:
+ # Since we moved just the children of the content elements, we still need to remove the parent
+ # elements.
+ for el in content_els:
+ el.getparent().remove(el)
+ else:
+ assert 0
+
+ if theme_type == 'elements':
+ move_tail_upwards(theme_el)
+ parent = theme_el.getparent()
+ pos = parent.index(theme_el)
+ if content_type == 'elements':
+ if self.move:
+ for el in reversed(content_els):
+ move_tail_upwards(el)
+ else:
+ for el in content_els:
+ el.tail = None
+ parent[pos:pos+1] = content_els
+ elif content_type == 'children':
+ text, els = self.prepare_content_children(content_els)
+ if pos == 0:
+ add_text(parent, text)
+ else:
+ add_tail(parent[pos-1], text)
+ parent[pos:pos+1] = els
+ if self.move:
+ for el in content_els:
+ el.getparent().remove(el)
+ else:
+ assert 0
+
+ if theme_type == 'attributes':
+ ## FIXME: handle named attributes, e.g., attributes(class):
+ assert content_type == 'attributes'
+ if len(content_els) > 1:
+ if self.manycontent[0] == 'abort':
+ log.debug(self, 'aborting because %s elements in the content match content="%s"',
+ len(content_els), self.content)
+ raise AbortTheme()
+ else:
+ if self.manycontent[0] == 'warn':
+ log_meth = log.warn
+ else:
+ log_meth = log.debug
+ log_meth(self, '%s elements match content="%s" (but only one expected), using the %s match',
+ len(content_els, self.content, self.manycontent[1]))
+ if self.manycontent[1] == 'first':
+ content_els = [content_els[0]]
+ else:
+ content_els = [content_els[-1]]
+ theme_el.attrib.clear()
+ if attributes:
+ c_attrib = content_els[0].attrib
+ for name in attributes:
+ if name in c_attrib:
+ theme_el.set(name, c_attrib[name])
+ if self.move:
+ for name in attributes:
+ if name in c_attrib:
+ del c_attrib[name]
+ else:
+ theme_el.attrib.update(content_els[0].attrib)
+ if self.move:
+ content_els[0].attrib.clear()
+
+ if theme_type == 'tag':
+ assert content_type == 'tag'
+ theme_el.tag = content_els[0].tag
+ theme_el.attrib.clear()
+ theme_el.attrib.update(content_els[0].attrib)
+ # "move" in this case doesn't mean anything
+
+rules['replace'] = Replace
+
+class Append(TransformRule):
+
+ _append = True
+
+ _compatible_types = [
+ ('children', 'elements'),
+ ('children', 'children'),
+ ('elements', 'elements'),
+ ('elements', 'children'),
+ ('attributes', 'attributes'),
+ # Removing 'tag'
+ ]
+
+ def apply_transformation(self, content_type, content_els, attributes, theme_type, theme_el, log):
+ describe = log.describe
+ if theme_type == 'children':
+ if content_type == 'elements':
+ if self.move:
+ for el in reversed(content_els):
+ move_tail_upwards(el)
+ else:
+ for el in content_els:
+ el.tail = None
+ if self._append:
+ theme_el.extend(content_els)
+ else:
+ add_tail(content_els[-1], theme_el.text)
+ theme_el.text = None
+ theme_el[:0] = content_els
+ elif content_type == 'children':
+ text, els = self.preparent_content_children(content_els)
+ if self._append:
+ if len(theme_el):
+ add_tail(theme_el[-1], text)
+ else:
+ add_text(theme_el, text)
+ theme_el.extend(els)
+ else:
+ add_tail(els[-1], theme_el.text)
+ theme_el.text = text
+ theme_el[:0] = els
+ else:
+ assert 0
+
+ if theme_type == 'elements':
+ parent = theme_el.getparent()
+ pos = parent.index(theme_el)
+ if content_type == 'elements':
+ if self.move:
+ for el in reversed(content_els):
+ move_tail_upwards(el)
+ else:
+ for el in content_els:
+ el.tail = None
+ if self._append:
+ parent[pos+1:pos+1] = content_els
+ else:
+ parent[pos:pos] = content_els
+ elif content_type == 'children':
+ text, els = self.prepare_content_children(content_els)
+ if self._append:
+ add_tail(theme_el, text)
+ parent[pos+1:pos+1] = content_els
+ else:
+ if pos == 0:
+ add_text(parent, text)
+ else:
+ add_tail(parent[pos-1], text)
+ parent[pos:pos] = content_els
+
+ if theme_type == 'attributes':
+ ## FIXME: handle named attributes
+ assert content_type == 'attributes'
+ if len(content_els) > 1:
+ if self.manycontent[0] == 'abort':
+ log.debug(self, 'aborting because %s elements in the content match content="%s"',
+ len(content_els), self.content)
+ raise AbortTheme()
+ else:
+ if self.manycontent[0] == 'warn':
+ log_meth = log.warn
+ else:
+ log_meth = log.debug
+ log_meth(self, '%s elements match content="%s" (but only one expected), using the %s match',
+ len(content_els, self.content, self.manycontent[1]))
+ if self.manycontent[1] == 'first':
+ content_els = [content_els[0]]
+ else:
+ content_els = [content_els[-1]]
+ content_attrib = content_els[0].attrib
+ theme_attrib = theme_el.attrib
+ if self._append:
+ if attributes:
+ for name in attributes:
+ if name in content_attrib:
+ theme_attrib.setdefault(name, content_attrib[name])
+ else:
+ for key, value in content_attrib.items():
+ theme_attrib.setdefault(key, value)
+ else:
+ if attributes:
+ for name in attributes:
+ if name in content_attrib:
+ theme_attrib.set(name, content_attrib[name])
+ else:
+ theme_attrib.update(content_attrib)
+ if self.move:
+ if attributes:
+ for name in attributes:
+ if name in content_attrib:
+ del content_attrib[name]
+ else:
+ content_attrib.clear()
+
+rules['append'] = Append
+
+class Prepend(Append):
+ _append = False
+
+rules['prepend'] = Prepend
+
+class Drop(AbstractRule):
+
+ def __init__(self, source_location, content, theme, if_content=None,
+ nocontent=None, notheme=None):
+ self.source_location = source_location
+ ## FIXME: proper error:
+ assert content is not None or theme is not None
+ self.content = content
+ assert theme is not None
+ self.theme = theme
+ self.if_content = if_content
+ self.nocontent = self.convert_error('nocontent', nocontent)
+ self.notheme = self.convert_error('notheme', notheme)
+
+ def apply(self, content_doc, theme_doc, resource_fetcher, log):
+ describe = log.describe
+ if not self.if_content_matches(content_doc, log):
+ return
+ for doc, selector, error, name in [(theme_doc, self.theme, self.notheme, 'theme'), (content_doc, self.content, self.nocontent, 'content')]:
+ if selector is None:
+ continue
+ sel_type, els, attributes = self.select_elements(selector, doc, name=='theme')
+ if not els:
+ if error == 'abort':
+ log.debug(self, 'aborting %s because no %s element matches rule %s="%s"', name, name, name, selector)
+ raise AbortTheme('No %s matches %s="%s"' % (name, name, selector))
+ elif error == 'ignore':
+ log_meth = log.debug
+ else:
+ log_meth = log.warn
+ log_meth(self, 'skipping rule because no %s matches rule %s="%s"', name, name, selector)
+ return
+ if sel_type == 'elements':
+ for el in els:
+ move_tail_upwards(el)
+ el.getparent().remove(el)
+ elif sel_type == 'children':
+ el[:] = []
+ el.text = ''
+ elif sel_type == 'attributes':
+ attrib = el.attrib
+ if attributes:
+ for name in attributes:
+ if name in attrib:
+ del attrib[name]
+ else:
+ attrib.clear()
+ elif sel_type == 'tag':
+ children = list(el)
+ if children:
+ add_tail(children[-1], el.tail)
+ else:
+ add_text(el, el.tail)
+ parent = el.getparent()
+ pos = parent.index(el)
+ if pos == 0:
+ add_text(parent, el.text)
+ else:
+ add_tail(parent[pos-1], el.text)
+ parent[pos:pos+1] = children
+ else:
+ assert 0
+
+ @classmethod
+ def from_xml(cls, tag, source_location):
+ content = cls.compile_selector(tag, 'content', source_location)
+ theme = cls.compile_selector(tag, 'theme', source_location)
+ if_content = cls.compile_selector(tag, 'if_content', source_location)
+ return cls(source_location, content, theme, if_content=if_content,
+ nocontent=tag.get('nocontent'),
+ notheme=tag.get('notheme'))
+
+rules['drop'] = Drop
+
+## Element utilities ##
+
+def add_text(el, text):
+ """
+ Add the given text to the end of the el's text
+ """
+ if not text:
+ return
+ if el.text:
+ el.text += text
+ else:
+ # Note, el.text can be None (so we can't always add)
+ el.text = text
+
+def add_tail(el, tail):
+ """
+ Add the given tail text to the end of the el's tail
+ """
+ if not tail:
+ return
+ if el.tail:
+ el.tail += tail
+ else:
+ # Note, el.tail can be None (so we can't always add)
+ el.tail = tail
+
+def move_tail_upwards(el):
+ """
+ Move the tail of the el into its previous sibling or parent
+ """
+ dest = el.getprevious()
+ if dest is not None:
+ add_tail(dest, el.tail)
+ else:
+ parent = el.getparent()
+ add_text(parent, el.tail)
+
+def iter_self_and_ancestors(el):
+ yield el
+ for item in el.iterancestors():
+ yield item
+
+def mark_content_els(els):
+ for el in els:
+ ## FIXME: maybe put something that is trackable to the rule that moved the element
+ el.set(CONTENT_ATTRIB, '1')
+
+def is_content_element(el):
+ ## FIXME: should this check children too?
+ for p in iter_self_and_ancestors(el):
+ if p.get(CONTENT_ATTRIB):
+ return True
+ return False
+
+def remove_content_attribs(doc):
+ for p in doc.getiterator():
+ if p.get(CONTENT_ATTRIB, None) is not None:
+ del p.attrib[CONTENT_ATTRIB]
+
+from cgi import escape as cgi_escape
+def html_quote(s):
+ s = unicode(s)
+ return cgi_escape(s, True)
Added: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/selector.py
==============================================================================
--- (empty file)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/selector.py Tue Jun 24 22:47:43 2008
@@ -0,0 +1,108 @@
+from lxml.etree import XPath
+from lxml.cssselect import CSSSelector
+import re
+
+type_re = re.compile(r'^(elements?|children|tag|attributes?):')
+type_map = dict(element='elements', attribute='attributes')
+attributes_re = re.compile(r'^attributes[(]([a-zA-Z0-9_,-]+)[)]:')
+
+class SelectorSyntaxError(Exception):
+ pass
+
+class Selector(object):
+
+ def __init__(self, major_type, attributes, selectors):
+ self.major_type = major_type
+ self.attributes = attributes
+ self.selectors_source = selectors
+ self.selectors = [self.compile_selector(selector, default_type=major_type)
+ for selector in selectors]
+
+ @classmethod
+ def parse(cls, expr):
+ major_type, attributes, expr = cls.parse_prefix(expr)
+ selectors = [e.strip()
+ for e in expr.split('||')]
+ return cls(major_type, attributes, selectors)
+
+ @staticmethod
+ def parse_prefix(expr, default_type='elements'):
+ """
+ Parses the elements:, etc, prefix.
+
+ Returns (type, attributes, rest_expr)
+ """
+ assert isinstance(expr, basestring), "Bad value for expr: %r" % expr
+ match = type_re.match(expr)
+ if match:
+ major_type = match.group(1)
+ major_type = type_map.get(major_type, major_type)
+ rest_expr = expr[match.end():]
+ return (major_type, None, rest_expr)
+ else:
+ match = attributes_re.match(expr)
+ if match:
+ attributes = [name.strip() for name in match.group(1).split(',') if name.strip()]
+ rest_expr = expr[match.end():]
+ return ('attributes', attributes, rest_expr)
+ return (default_type, None, expr)
+
+ @staticmethod
+ def types_compatible(type1, type2):
+ if type1 in ('children', 'elements'):
+ return type2 in ('children', 'elements')
+ else:
+ return type1 == type2
+
+ def __unicode__(self):
+ ' || '.join(
+ [unicode(selector)
+ for sel_type, selector, sel_expr, sel_attributes
+ in self.selectors])
+
+ def __str__(self):
+ return str(unicode(self))
+
+ def compile_selector(self, expr, default_type):
+ type, attributes, rest_expr = self.parse_prefix(expr, default_type=default_type)
+ if not self.types_compatible(type, self.major_type):
+ raise SelectorSyntaxError(
+ "Expression %s in selector %r uses the type %r, but this is not compatible "
+ "with the type %r already declared earlier in the selector"
+ % (expr, self, type, self.major_type))
+ if rest_expr.startswith('/'):
+ selector = XPath(rest_expr)
+ else:
+ selector = CSSSelector(rest_expr)
+ return (type, selector, expr, attributes)
+
+ def __call__(self, doc):
+ """
+ Match this selector against the doc. Returns (type, elements,
+ attributes), where type is one of elements, children, tag,
+ attributes. attributes is the list of attributes, if that was
+ given.
+ """
+ for sel_type, selector, sel_expr, sel_attributes in self.selectors:
+ result = selector(doc)
+ if result:
+ type = sel_type or self.major_type
+ attributes = sel_attributes or self.attributes
+ return (type, result, attributes)
+ return (self.major_type, [], self.attributes)
+
+ def selector_types(self):
+ """
+ Returns a set of all types used in this expression (usually a
+ single-item set, but some selectors can use multiple types).
+ """
+ return set([sel_type
+ for sel_type, selector, sel_expr, sel_attributes in self.selectors])
+
+ def __unicode__(self):
+ parts = []
+ for sel_type, selector, sel_expr, sel_attributes in self.selectors:
+ if sel_attributes:
+ sel_type = '%s(%s)' % (sel_type, ','.join(sel_attributes))
+ parts.append('%s:%s' % (sel_type, sel_expr))
+ return ' || '.join(parts)
Added: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_selection.txt
==============================================================================
--- (empty file)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/tests/test_selection.txt Tue Jun 24 22:47:43 2008
@@ -0,0 +1,146 @@
+First we create a theme document to test out:
+
+ >>> from lxml.html import fromstring, tostring
+ >>> theme = fromstring('''\
+ ... <html>
+ ... <head>
+ ... <title>This is a theme title</title>
+ ... <link rel=Stylesheet type="text/css" href="style.css">
+ ... <style type="text/css">
+ ... @import "style2.css";
+ ... </style>
+ ... </head>
+ ... <body>
+ ...
+ ... <div id="header" class="title-bar">
+ ... <h1 id="title">This is the theme title</h1>
+ ... <div class="topnav"></div>
+ ... </div>
+ ...
+ ... <div id="content">
+ ... This content will be replaced.
+ ... </div>
+ ...
+ ... <div id="footer">
+ ... Copyright (C) 2000 Some Corporation
+ ... </div>
+ ...
+ ... </body>
+ ... </html>''',
+ ... base_url='http://somesite.com/theme/theme.html')
+
+Then, lets select something:
+
+ >>> from deliverance.selector import Selector
+ >>> def t_select(selection):
+ ... selector = Selector.parse(selection)
+ ... type, elements, attributes = selector(theme)
+ ... if type == 'attributes':
+ ... for element in elements:
+ ... if not attributes:
+ ... attributes = element.attrib.keys()
+ ... text = []
+ ... for key in sorted(attributes):
+ ... text.append('%s="%s"' % (key, element.attrib[key]))
+ ... print 'attributes:%s %s' % (element.tag, ' '.join(text))
+ ... return
+ ... if type == 'tag':
+ ... for element in elements:
+ ... tag = tostring(element).split('>')[0] + '>'
+ ... print 'tag:%s' % tag
+ ... return
+ ... if type == 'elements':
+ ... type = ''
+ ... else:
+ ... type += ':'
+ ... for element in elements:
+ ... print '%s%s' % (type, tostring(element).strip())
+ >>> t_select('link')
+ <link rel="Stylesheet" type="text/css" href="style.css">
+ >>> t_select('/html/head/title')
+ <title>This is a theme title</title>
+ >>> t_select('children:title')
+ children:<title>This is a theme title</title>
+ >>> t_select('attributes(class):#header')
+ attributes:div class="title-bar"
+ >>> t_select('#nothing')
+ >>> t_select('div')
+ <div id="header" class="title-bar">
+ <h1 id="title">This is the theme title</h1>
+ <div class="topnav"></div>
+ </div>
+ <div class="topnav"></div>
+ <div id="content">
+ This content will be replaced.
+ </div>
+ <div id="footer">
+ Copyright (C) 2000 Some Corporation
+ </div>
+ >>> t_select('div#header')
+ <div id="header" class="title-bar">
+ <h1 id="title">This is the theme title</h1>
+ <div class="topnav"></div>
+ </div>
+ >>> t_select("tag://div[@id='header']")
+ tag:<div id="header" class="title-bar">
+
+Now we'll select from some content:
+
+ >>> from lxml.etree import XML
+ >>> import copy
+ >>> from deliverance.rules import parse_rule, remove_content_attribs
+ >>> from deliverance.log import SavingLogger
+ >>> def t_rule_head(rule, selector='//head', show_log=False):
+ ... rule = XML(rule)
+ ... rule = parse_rule(rule, None)
+ ... theme_copy = copy.deepcopy(theme)
+ ... theme_copy.make_links_absolute()
+ ... logger = SavingLogger()
+ ... content_copy = copy.deepcopy(content)
+ ... rule.apply(content_copy, theme_copy, None, logger)
+ ... remove_content_attribs(theme_copy)
+ ... el = theme_copy.xpath(selector)[0]
+ ... if show_log:
+ ... for level, rule, message in logger.messages:
+ ... print 'log:', message
+ ... print tostring(el)
+
+And the tests:
+
+ >>> content = fromstring('''\
+ ... <html>
+ ... <head>
+ ... <title>User: Bob</title>
+ ... <link rel="Stylesheet" type="text/css" href="/users.css">
+ ... <link rel="Stylesheet" type="text/css" href="/theme/style.css">
+ ... </head>
+ ... <body>
+ ... <div id="some-stupid-app-content">blah blah blah</div>
+ ... <h1 id="title">The user <b>Bob</b></h1>
+ ... <div id="content">
+ ... Some information about Bob.
+ ... </div>
+ ... </body>
+ ... </html>''',
+ ... base_url='http://somesite.com/users/bob/')
+ >>> t_rule_head('<append content="link" theme="children:head" />')
+ <head><title>This is a theme title</title><link rel="Stylesheet" type="text/css" href="http://somesite.com/theme/style.css"><style type="text/css">
+ @import "http://somesite.com/theme/style2.css";
+ </style><link rel="Stylesheet" type="text/css" href="/users.css"><link rel="Stylesheet" type="text/css" href="/theme/style.css"></head>
+
+Note that href has to be normalized for this to work (FIXME: remove-dups?):
+
+ >>> t_rule_head('<append content="link" theme="children:head" remove-dups="1" />')
+ <head><title>This is a theme title</title><link rel="Stylesheet" type="text/css" href="http://somesite.com/theme/style.css"><style type="text/css">
+ @import "http://somesite.com/theme/style2.css";
+ </style><link rel="Stylesheet" type="text/css" href="/users.css"><link rel="Stylesheet" type="text/css" href="/theme/style.css"></head>
+ >>> t_rule_head('<replace content="children:/html/head/title" theme="children:/html/head/title" />')
+ <head><title>User: Bob</title><link rel="Stylesheet" type="text/css" href="http://somesite.com/theme/style.css"><style type="text/css">
+ @import "http://somesite.com/theme/style2.css";
+ </style></head>
+ >>> t_rule_head('<drop theme="link, style" />')
+ <head><title>This is a theme title</title></head>
+ >>> t_rule_head('<drop theme="link || style" />')
+ <head><title>This is a theme title</title><style type="text/css">
+ @import "http://somesite.com/theme/style2.css";
+ </style></head>
Added: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/util/__init__.py
==============================================================================
--- (empty file)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/util/__init__.py Tue Jun 24 22:47:43 2008
@@ -0,0 +1 @@
+#
Added: z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/util/converters.py
==============================================================================
--- (empty file)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/deliverance/util/converters.py Tue Jun 24 22:47:43 2008
@@ -0,0 +1,11 @@
+def asbool(obj):
+ if isinstance(obj, (str, unicode)):
+ obj = obj.strip().lower()
+ if obj in ['true', 'yes', 'on', 'y', 't', '1']:
+ return True
+ elif obj in ['false', 'no', 'off', 'n', 'f', '0']:
+ return False
+ else:
+ raise ValueError(
+ "String is not true/false: %r" % obj)
+ return bool(obj)
Modified: z3/deliverance/sandbox/ianb/deliverance/trunk/setup.py
==============================================================================
--- z3/deliverance/sandbox/ianb/deliverance/trunk/setup.py (original)
+++ z3/deliverance/sandbox/ianb/deliverance/trunk/setup.py Tue Jun 24 22:47:43 2008
@@ -7,11 +7,20 @@
version=version,
description="",
long_description="""\
+Deliverance does transformations of HTML to 'theme' pages, similar in
+function to XSLT but using a simpler XML-based language to express the
+transformation.
""",
- classifiers=[], # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers
- keywords='',
- author='',
- author_email='',
+ classifiers=[
+ "Development Status :: 4 - Beta",
+ "Environment :: Web Environment",
+ "License :: OSI Approved :: MIT License",
+ "Topic :: Internet :: WWW/HTTP :: WSGI",
+ "Topic :: Internet :: WWW/HTTP :: WSGI :: Middleware",
+ ],
+ keywords='wsgi theming html',
+ author='Ian Bicking, The Open Planning Project',
+ author_email='deliverance-discuss at lists.openplans.org',
url='http://openplans.org/projects/deliverance/',
license='MIT',
packages=find_packages(exclude=['ez_setup', 'examples', 'tests']),
@@ -21,6 +30,5 @@
"lxml",
],
entry_points="""
- # -*- Entry points: -*-
""",
)
More information about the z3-checkins
mailing list