from lxml import etree
import re
import urlparse
import copy
from deliverance import htmlserialize
import sys
# Test if the libxml2 fix is in place
html = etree.HTML('
')
if html[0][0].text != 'some text':
import warnings
warnings.warn(
'Deliverance requires a newer version of libxml2 (1.1.18 or later)')
# And another fix:
html_comment = etree.Comment('test comment')
if copy.deepcopy(html_comment) is None:
import warnings
warnings.warn(
'Deliverance requires a newer version of lxml (1.2 or later)')
if sys.version_info <= (2, 4, 1):
# There are reported threading issues for this version
import warnings
warnings.warn(
'lxml has threading problems for Python 2.4.1 and earlier')
class DeliveranceError(Exception):
"""
General Deliverance Error.
"""
class RuleSyntaxError(DeliveranceError):
"""
Raised when an invalid or unknown rule is encountered by a renderer
during rule processing
"""
DELIVERANCE_ERROR_PAGE = """
Deliverance Error
Deliverance Error
An error occurred processing the request
%s
Stack Trace:
%s
"""
class RendererBase(object):
"""
base class for deliverance renderers.
subclasses should implement:
render(self,content)
which should accept an lxml.etree structure and produce
an lxml.etree structure representing the content having been 'themed'
with this renderer's theme according to its rules.
"""
APPEND_RULE_TAG = "{http://www.plone.org/deliverance}append"
PREPEND_RULE_TAG = "{http://www.plone.org/deliverance}prepend"
REPLACE_RULE_TAG = "{http://www.plone.org/deliverance}replace"
COPY_RULE_TAG = "{http://www.plone.org/deliverance}copy"
APPEND_OR_REPLACE_RULE_TAG = "{http://www.plone.org/deliverance}append-or-replace"
SUBRULES_TAG = "{http://www.plone.org/deliverance}rules"
DROP_RULE_TAG = "{http://www.plone.org/deliverance}drop"
RULE_CONTENT_KEY = "content"
RULE_THEME_KEY = "theme"
RULE_MOVE_KEY = "move"
RULE_HREF_KEY = "href"
NOCONTENT_KEY = "nocontent"
NOTHEME_KEY = "notheme"
IGNORE_KEYWORD = "ignore"
REQUEST_CONTENT = "deliverance:request-content"
def shouldnt_theme(self, document):
if document is None:
return False
nt = document.xpath('//head/meta[@http-equiv="x-deliverance-no-theme"]')
return len(nt) != 0
def get_theme_el(self,rule,theme):
"""
get the element referred to by the "theme" attribute of the
rule given in the theme document given. theme and rule
should be lxml etree structures.
"""
theme_els = theme.xpath(rule.get(self.RULE_THEME_KEY))
if len(theme_els)== 0:
if rule.get(self.NOTHEME_KEY) != self.IGNORE_KEYWORD:
e = self.format_error("no element found in theme", rule)
self.add_to_body_start(theme, e)
return None
elif len(theme_els)> 1:
e = self.format_error("multiple elements found in theme", rule, theme_els)
self.add_to_body_start(theme, e)
return None
return theme_els[0]
def format_error(self, message, rule, elts=None):
"""
Returns a node containing the error message;
If the onerror attribute of the rule element is set to ignore,
returns None
"""
if rule.get('onerror',None) == self.IGNORE_KEYWORD:
return None
d = etree.Element('div')
d.attrib['class'] = 'deliverance-error'
d.text = 'Deliverance error: %s' % message
br = etree.Element('br')
br.tail = 'rule: %s' % rule_tostring(rule)
d.append(br)
if elts:
d.append(etree.Element('br'))
textArea = etree.Element('textarea')
textArea.attrib['rows'] = '24'
textArea.attrib['cols'] = '80'
textArea.attrib['readonly'] = 'readonly'
textArea.text = ''
for el in elts:
textArea.text += etree.tostring(el)
d.append(textArea)
return d
TAG_MATCHER = re.compile(r'^\.?/?/?(.*?/)*(?P[^*^(^)^:^[^.^/]+?)(\[.*\])*$',re.I)
def get_tag_from_xpath(self,xpath):
"""
attemtps to extract the tag type that an xpath expression selects (if any)
"""
match = self.TAG_MATCHER.match(xpath)
if match:
return match.group('tag')
else:
return None
def add_to_body_start(self,doc,el):
"""
inserts the element el into the beginning of body
element of the document given
"""
if el is None:
return
body = doc.find('body')
if body is None:
body = doc[0]
body[:0] = [el]
def replace_element(self,old_el, new_el):
"""
replaces old_el with new_el in the parent
element of old_el. The tail of
new_el is replaced by the tail of old_el
"""
new_el.tail = old_el.tail
parent = old_el.getparent()
parent[parent.index(old_el)] = new_el
def fixup_links(self, doc, uri):
"""
replaces relative urls found in the document given
with absolute urls by prepending the uri given.
tags are removed from the document.
Affects urls in href attributes, src attributes and
css of the form url(...) in style elements
"""
base_uri = uri
basetags = doc.xpath('//base[@href]')
if basetags:
base_uri = basetags[0].attrib['href']
for b in basetags:
b.getparent().remove(b)
elts = doc.xpath('//*[@href]')
self.fixup_link_attrs(elts, base_uri, 'href')
elts = doc.xpath('//*[@src]')
self.fixup_link_attrs(elts, base_uri, 'src')
elts = doc.xpath('//head/style')
self.fixup_css_links(elts, base_uri)
return doc
def fixup_link_attrs(self, elts, base_uri, attr):
"""
prepends base_uri onto the attribute given by attr for
all elements given in elts
"""
for el in elts:
el.attrib[attr] = urlparse.urljoin(base_uri, el.attrib[attr])
def separate_drop_rules(self, rules):
"""
separates out drop rules from a list of rules, returns two
lists.
first the list of all drop rules, second all other rules
order is preserved.
"""
regular_rules = []
drop_rules = []
for rule in rules:
if rule.tag == self.DROP_RULE_TAG:
drop_rules.append(rule)
else:
regular_rules.append(rule)
return drop_rules, regular_rules
def separate_move_rules(self, rules):
"""
separates out drop rules from a list of rules, returns two
lists.
first the list of all drop rules, second all other rules
order is preserved.
"""
regular_rules = []
move_rules = []
for rule in rules:
if rule.get(self.RULE_MOVE_KEY):
move_rules.append(rule)
else:
regular_rules.append(rule)
return move_rules, regular_rules
CSS_URL_PAT = re.compile(r'url\(\s*[\"\']*(.*?)[\"\']*\s*\)',re.I)
CSS_IMPORT_PAT = re.compile(r'\@import\s*[\"\'](.*?)[\"\']',re.I)
def fixup_css_links(self, elts, base_uri):
"""
prepends url(...) in css style elements to be
absolute links based on base_uri
"""
def absuri(matchobj):
return 'url(' + urlparse.urljoin(base_uri,matchobj.group(1)) + ')'
def imp_absuri(matchobj):
return '@import url(' + urlparse.urljoin(base_uri,matchobj.group(1)) + ')'
for el in elts:
if el.text:
el.text = re.sub(self.CSS_IMPORT_PAT,imp_absuri,el.text)
el.text = re.sub(self.CSS_URL_PAT,absuri,el.text)
def append_text(self,parent,text):
if text is None:
return
if len(parent) == 0:
target = parent
else:
target = parent[-1]
if target.text:
target.text = target.text + text
else:
target.text = text
def attach_text_to_previous(self,el,text):
"""
attaches the text given to the nearest previous node to el,
ie its preceding sibling or parent
"""
if text is None:
return
parent = el.getparent()
assert parent is not None, (
"Element %r has no parent" % el)
el_i = parent.index(el)
if el_i > 0:
sib_el = el.getparent()[el_i - 1]
if sib_el.tail:
sib_el.tail += text
else:
sib_el.tail = text
else:
if el.getparent().text:
el.getparent().text += text
else:
el.getparent().text = text
def elements_in(self, els):
"""
return a list containing elements from els which are not strings
"""
return [x for x in els if type(x) is not type(str())]
def strip_tails(self, els):
"""
for each lxml etree element in the list els,
set the tail of the element to None
"""
for el in els:
el.tail = None
def attach_tails(self,els):
"""
whereever an lxml element in the list is followed by
a string, set the tail of the lxml element to that string
"""
for index,el in enumerate(els):
# if we run into a string after the current element,
# attach it to the current element as the tail
if (type(el) is not type(str()) and
index + 1 < len(els) and
type(els[index+1]) is type(str())):
el.tail = els[index+1]
def append_many(self, parent, children):
if children is None or len(children) == 0:
return
if type(children[0]) is type(str()):
self.append_text(parent,children[0])
children = children[1:]
non_text_els = self.elements_in(children)
self.strip_tails(non_text_els)
self.attach_tails(children)
for el in non_text_els:
parent.append(el)
def replace_many(self, theme_el, content_els):
non_text_els = self.elements_in(content_els)
self.strip_tails(non_text_els)
# the xpath may return a mixture of strings and elements, handle strings
# by attaching them to the proper element
if (type(content_els[0]) is type(str())):
# text must be appended to the tail of the most recent sibling or appended
# to the text of the parent of the replaced element
self.attach_text_to_previous(theme_el, content_els[0])
if len(non_text_els) == 0:
self.attach_text_to_previous(theme_el, theme_el.tail)
theme_el.getparent().remove(theme_el)
return
self.attach_tails(content_els)
# this tail, if there is one, should stick around
preserve_tail = non_text_els[0].tail
#replaces first element
self.replace_element(theme_el, non_text_els[0])
temptail = non_text_els[0].tail
non_text_els[0].tail = None
parent = non_text_els[0].getparent()
# appends the rest of the elements
i = parent.index(non_text_els[0])
parent[i+1:i+1] = non_text_els[1:]
if non_text_els[-1].tail:
non_text_els[-1].tail += temptail
else:
non_text_els[-1].tail = temptail
# tack in any preserved tail we stored above
if preserve_tail:
if non_text_els[0].tail:
non_text_els[0].tail = preserve_tail + non_text_els[0].tail
else:
non_text_els[0].tail = preserve_tail
def aggregate(self, resolve_uri, rules, content):
"""
aggregates the requested docuemnt and documents
referred to in the "href" attribute of
rules into a single document structured like:
...
...
content is an lxml etree structure representing the
requested content which appears in the document node
with content attribute set to the value of REQUEST_CONTENT
the reference_relover is a function used to get the content of other
documents referred to in rules, and is described in the
initializer for renderers.
"""
root = etree.Element("content")
## FIXME: this was originally "if content:", but I think it actually
## was meant to catch content==None. But I'm not sure.
if content is not None:
request_doc = etree.SubElement(root,"document")
request_doc.set("content",self.REQUEST_CONTENT)
request_doc.append(content)
if resolve_uri is None:
return root
aggregated = {}
for rule in rules:
href = rule.get(self.RULE_HREF_KEY,None)
if href is None or aggregated.has_key(href):
continue
doc = resolve_uri(href, parse="html")
aggregated[href] = True
if doc is None:
continue
self.fixup_links(doc, href)
doc_node = etree.SubElement(root,"document")
doc_node.set("content",href)
doc_node.append(doc)
return root
def get_content_xpath(self, rule):
"""
gets the xpath to lookup the content referred to by rule
in the aggregated content document
"""
content_xpath = rule.get(self.RULE_CONTENT_KEY)
if content_xpath is None:
return None
content_doc = rule.get(self.RULE_HREF_KEY, self.REQUEST_CONTENT)
ored_parts = content_xpath.split('||')
ored_parts = [self._translate_content_xpath(part, content_doc)
for part in ored_parts]
if len(ored_parts) == 1:
# Simplest/commonest case, we'll just skip the rest:
return ored_parts[0]
prev_parts = []
new_parts = []
for part in ored_parts:
for prev_part in prev_parts:
part = self._add_xpath_condition(
part, 'count(%s) = 0' % (prev_part))
new_parts.append(part)
prev_parts.append(part)
content_xpath = ' | '.join(new_parts)
return content_xpath
def _translate_content_xpath(self, xpath, content_doc):
paths = xpath.split('|')
new_paths = []
for path in paths:
path = path.strip()
if not path.startswith('/'):
path = '/' + path
new_paths.append(
"/content/document[@content='%s']%s"
% (content_doc, path))
return ' | '.join(new_paths)
def _add_xpath_condition(self, xpath, cond, operator='and'):
if xpath.endswith(']'):
return '%s %s (%s)]' % (
xpath[:-1], operator, cond)
else:
return '%s[%s]' % (xpath, cond)
def x_get_content_xpath(self, rule):
"""
gets the xpath to lookup the content referred to by rule
in the aggregated content document
"""
content_xpath = rule.get(self.RULE_CONTENT_KEY)
if content_xpath is None:
return None
if not content_xpath.startswith('/'):
content_xpath = '/%s' % content_xpath
content_doc = rule.get(self.RULE_HREF_KEY,self.REQUEST_CONTENT)
new_xpath = "/content/document[@content='%s']%s" % (content_doc,content_xpath)
return new_xpath
def rule_tostring(rule, include_xmlns=False):
"""
Convert a rule back to a string
"""
if include_xmlns:
return etree.tostring(rule)
else:
text = etree.tostring(rule)
text = text.replace(' xmlns="http://www.plone.org/deliverance"', '')
return text
# API for sharing overridden theme / rule URIs with other middleware layers.
_THEME_URI_KEY = 'deliverance.theme_uri'
def set_theme_uri(environ, uri):
environ[_THEME_URI_KEY] = uri
def get_theme_uri(environ, default=None):
return environ.get(_THEME_URI_KEY, default)
_RULE_URI_KEY = 'deliverance.rule_uri'
def set_rule_uri(environ, uri):
environ[_RULE_URI_KEY] = uri
def get_rule_uri(environ, default=None):
return environ.get(_RULE_URI_KEY, default)
_SERIALIZER_KEY = 'deliverance.serializer'
def set_serializer(environ, dotted_or_egg):
environ[_SERIALIZER_KEY] = dotted_or_egg
def get_serializer(environ, default=None):
return resolve_callable(environ.get(_SERIALIZER_KEY, default))
def resolve_callable(dotted_or_egg):
if isinstance(dotted_or_egg, basestring):
return resolve_dotted_or_egg(dotted_or_egg)
return dotted_or_egg
def resolve_dotted_or_egg(dotted_or_egg):
from pkg_resources import EntryPoint
return EntryPoint.parse('x=%s' % dotted_or_egg).load(False)
def bool_from_string(value):
if isinstance(value, basestring):
if value.lower() in ('false', 'no'):
return False
return bool(value)