First we create a theme document to test out:
>>> from lxml.html import fromstring, tostring
>>> theme = fromstring('''\
...
...
... This is a theme title
...
...
...
...
...
...
...
This is the theme title
...
...
...
...
... This content will be replaced.
...
...
...
...
...
... ''',
... base_url='http://somesite.com/theme/theme.html')
Then, lets select something:
>>> from deliverance.selector import Selector
>>> def t_select(selection):
... selector = Selector.parse(selection)
... type, elements, attributes = selector(theme)
... if type == 'attributes':
... for element in elements:
... if not attributes:
... attributes = element.attrib.keys()
... text = []
... for key in sorted(attributes):
... text.append('%s="%s"' % (key, element.attrib[key]))
... print 'attributes:%s %s' % (element.tag, ' '.join(text))
... return
... if type == 'tag':
... for element in elements:
... tag = tostring(element).split('>')[0] + '>'
... print 'tag:%s' % tag
... return
... if type == 'elements':
... type = ''
... else:
... type += ':'
... for element in elements:
... print '%s%s' % (type, tostring(element).strip())
>>> t_select('link')
>>> t_select('/html/head/title')
This is a theme title
>>> t_select('children:title')
children:This is a theme title
>>> t_select('attributes(class):#header')
attributes:div class="title-bar"
>>> t_select('#nothing')
>>> t_select('div')
This is the theme title
This content will be replaced.
>>> t_select('div#header')
This is the theme title
>>> t_select("tag://div[@id='header']")
tag:
Now we'll select from some content:
>>> from lxml.etree import XML
>>> import copy
>>> from deliverance.rules import parse_action, remove_content_attribs
>>> from deliverance.log import SavingLogger
>>> def t_rule_head(rule, selector='//head', show_log=False):
... rule = XML(rule)
... rule = parse_action(rule, None)
... theme_copy = copy.deepcopy(theme)
... theme_copy.make_links_absolute()
... logger = SavingLogger(request=None, middleware=None)
... content_copy = copy.deepcopy(content)
... rule.apply(content_copy, theme_copy, None, logger)
... remove_content_attribs(theme_copy)
... el = theme_copy.xpath(selector)[0]
... if show_log:
... for level, rule, message in logger.messages:
... print 'log:', message
... print tostring(el)
And the tests:
>>> content = fromstring('''\
...
...
... User: Bob
...
...
...
...
...
blah blah blah
...
The user Bob
...
... Some information about Bob.
...
...
... ''',
... base_url='http://somesite.com/users/bob/')
>>> t_rule_head('')
This is a theme title
Note that href has to be normalized for this to work (FIXME: remove-dups?):
>>> t_rule_head('')
This is a theme title
>>> t_rule_head('')
User: Bob
>>> t_rule_head('')
This is a theme title
>>> t_rule_head('')
This is a theme title