[Lxml-checkins] r42695 - in lxml/trunk: . doc src/lxml src/lxml/tests
scoder at codespeak.net
scoder at codespeak.net
Sat May 5 12:29:51 CEST 2007
Author: scoder
Date: Sat May 5 12:29:50 2007
New Revision: 42695
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/doc/sax.txt
lxml/trunk/src/lxml/apihelpers.pxi
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/sax.py
lxml/trunk/src/lxml/serializer.pxi
lxml/trunk/src/lxml/tests/test_etree.py
lxml/trunk/src/lxml/tests/test_sax.py
Log:
comment/PI fixes for lxml.sax, support for serialising top-level PIs and comments, appending and prepending comments andd PIs to the root node
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Sat May 5 12:29:50 2007
@@ -8,12 +8,17 @@
Features added
--------------
+* ``Element.addnext(el)`` and ``Element.addprevious(el)`` methods to support
+ adding processing instructions and comments around the root node
+
* Element.attrib now has a ``pop()`` method
* Extended type annotation in objectify: cleaner annotation namespace setup
plus new ``xsiannotate()`` and ``deannotate()`` functions
-* Support for custom Element class instantiation in lxml.sax
+* Support for custom Element class instantiation in lxml.sax: passing a
+ ``makeelement()`` function to the ElementTreeContentHandler will reuse the
+ lookup context of that function
* '.' represents empty ObjectPath (identity)
@@ -30,6 +35,11 @@
Bugs fixed
----------
+* Documents lost their top-level PIs and comments on serialisation
+
+* lxml.sax failed on comments and PIs. Comments are now properly ignored and
+ PIs are copied.
+
* Thread safety in XPath evaluators
* Raise AssertionError when passing strings containing '\0' bytes
Modified: lxml/trunk/doc/sax.txt
==============================================================================
--- lxml/trunk/doc/sax.txt (original)
+++ lxml/trunk/doc/sax.txt Sat May 5 12:29:50 2007
@@ -39,6 +39,10 @@
>>> lxml.etree.tostring(tree.getroot())
'<a><b foo="bar">Hello world</b></a>'
+By passing a ``makeelement`` function the constructor of
+``ElementTreeContentHandler``, e.g. the one of a parser you configured, you
+can determine which element class lookup scheme should be used.
+
Producing SAX events from an ElementTree or Element
---------------------------------------------------
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Sat May 5 12:29:50 2007
@@ -541,7 +541,6 @@
c_node = child._c_node
# store possible text node
c_next = c_node.next
- # XXX what if element is coming from a different document?
tree.xmlUnlinkNode(c_node)
# move node itself
tree.xmlAddChild(parent._c_node, c_node)
@@ -550,6 +549,38 @@
# parent element has moved; change them too..
moveNodeToDocument(child, parent._doc)
+cdef void _appendSibling(_Element element, _Element sibling):
+ """Append a new child to a parent element.
+ """
+ cdef xmlNode* c_next
+ cdef xmlNode* c_node
+ c_node = sibling._c_node
+ # store possible text node
+ c_next = c_node.next
+ tree.xmlUnlinkNode(c_node)
+ # move node itself
+ tree.xmlAddNextSibling(element._c_node, c_node)
+ _moveTail(c_next, c_node)
+ # uh oh, elements may be pointing to different doc when
+ # parent element has moved; change them too..
+ moveNodeToDocument(sibling, element._doc)
+
+cdef void _prependSibling(_Element element, _Element sibling):
+ """Append a new child to a parent element.
+ """
+ cdef xmlNode* c_next
+ cdef xmlNode* c_node
+ c_node = sibling._c_node
+ # store possible text node
+ c_next = c_node.next
+ tree.xmlUnlinkNode(c_node)
+ # move node itself
+ tree.xmlAddPrevSibling(element._c_node, c_node)
+ _moveTail(c_next, c_node)
+ # uh oh, elements may be pointing to different doc when
+ # parent element has moved; change them too..
+ moveNodeToDocument(sibling, element._doc)
+
cdef int isutf8(char* s):
cdef char c
c = s[0]
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Sat May 5 12:29:50 2007
@@ -531,6 +531,36 @@
"""
_appendChild(self, element)
+ def addnext(self, _Element element):
+ """Adds the element as a following sibling directly after this
+ element.
+
+ This is normally used to set a processing instruction or comment after
+ the root node of a document. Note that tail text is automatically
+ discarded when adding at the root level.
+ """
+ if self._c_node.parent != NULL and not _isElement(self._c_node.parent):
+ if element._c_node.type != tree.XML_PI_NODE:
+ if element._c_node.type != tree.XML_COMMENT_NODE:
+ raise TypeError, "Only processing instructions and comments can be siblings of the root element"
+ element.tail = None
+ _appendSibling(self, element)
+
+ def addprevious(self, _Element element):
+ """Adds the element as a preceding sibling directly before this
+ element.
+
+ This is normally used to set a processing instruction or comment
+ before the root node of a document. Note that tail text is
+ automatically discarded when adding at the root level.
+ """
+ if self._c_node.parent != NULL and not _isElement(self._c_node.parent):
+ if element._c_node.type != tree.XML_PI_NODE:
+ if element._c_node.type != tree.XML_COMMENT_NODE:
+ raise TypeError, "Only processing instructions and comments can be siblings of the root element"
+ element.tail = None
+ _prependSibling(self, element)
+
def extend(self, elements):
"""Extends the current children by the elements in the iterable.
"""
Modified: lxml/trunk/src/lxml/sax.py
==============================================================================
--- lxml/trunk/src/lxml/sax.py (original)
+++ lxml/trunk/src/lxml/sax.py Sat May 5 12:29:50 2007
@@ -1,5 +1,6 @@
from xml.sax.handler import ContentHandler
from etree import ElementTree, Element, SubElement, LxmlError
+from etree import XML, Comment, ProcessingInstruction
class SaxError(LxmlError):
pass
@@ -15,6 +16,7 @@
"""
def __init__(self, makeelement=None):
self._root = None
+ self._root_siblings = []
self._element_stack = []
self._default_ns = None
self._ns_mapping = { None : [None] }
@@ -82,6 +84,10 @@
if self._root is None:
element = self._root = \
self._makeelement(el_name, attrs, self._new_mappings)
+ if self._root_siblings and hasattr(element, 'addprevious'):
+ for sibling in self._root_siblings:
+ element.addprevious(sibling)
+ del self._root_siblings[:]
else:
element = SubElement(element_stack[-1], el_name,
attrs, self._new_mappings)
@@ -89,10 +95,16 @@
self._new_mappings.clear()
+ def processingInstruction(self, target, data):
+ pi = ProcessingInstruction(target, data)
+ if self._root is None:
+ self._root_siblings.append(pi)
+ else:
+ self._element_stack[-1].append(pi)
+
def endElementNS(self, ns_name, qname):
element = self._element_stack.pop()
- tag = element.tag
- if ns_name != _getNsTag(tag):
+ if ns_name != _getNsTag(element.tag):
raise SaxError, "Unexpected element closed: {%s}%s" % ns_name
def startElement(self, name, attributes=None):
@@ -106,10 +118,13 @@
try:
# if there already is a child element, we must append to its tail
last_element = last_element[-1]
- last_element.tail = (last_element.tail or u'') + data
+ last_element.tail = (last_element.tail or '') + data
except IndexError:
# otherwise: append to the text
- last_element.text = (last_element.text or u'') + data
+ last_element.text = (last_element.text or '') + data
+
+ ignorableWhitespace = characters
+
class ElementTreeProducer(object):
"""Produces SAX events for an element and children.
@@ -124,13 +139,41 @@
from xml.sax.xmlreader import AttributesNSImpl as attr_class
self._attr_class = attr_class
self._empty_attributes = attr_class({}, {})
-
+
def saxify(self):
self._content_handler.startDocument()
- self._recursive_saxify(self._element, {})
+
+ element = self._element
+ if hasattr(element, 'getprevious'):
+ siblings = []
+ sibling = element.getprevious()
+ while getattr(sibling, 'tag', None) is ProcessingInstruction:
+ siblings.append(sibling)
+ sibling = sibling.getprevious()
+ for sibling in siblings[::-1]:
+ self._recursive_saxify(sibling, {})
+
+ self._recursive_saxify(element, {})
+
+ if hasattr(element, 'getnext'):
+ sibling = element.getnext()
+ while getattr(sibling, 'tag', None) is ProcessingInstruction:
+ self._recursive_saxify(sibling, {})
+ sibling = sibling.getnext()
+
self._content_handler.endDocument()
def _recursive_saxify(self, element, prefixes):
+ content_handler = self._content_handler
+ tag = element.tag
+ if tag is Comment or tag is ProcessingInstruction:
+ if tag is ProcessingInstruction:
+ content_handler.processingInstruction(
+ element.target, element.text)
+ if element.tail:
+ content_handler.characters(element.tail)
+ return
+
new_prefixes = []
build_qname = self._build_qname
attribs = element.items()
@@ -146,10 +189,9 @@
else:
sax_attributes = self._empty_attributes
- ns_uri, local_name = _getNsTag(element.tag)
+ ns_uri, local_name = _getNsTag(tag)
qname = build_qname(ns_uri, local_name, prefixes, new_prefixes)
- content_handler = self._content_handler
for prefix, uri in new_prefixes:
content_handler.startPrefixMapping(prefix, uri)
content_handler.startElementNS((ns_uri, local_name),
Modified: lxml/trunk/src/lxml/serializer.pxi
==============================================================================
--- lxml/trunk/src/lxml/serializer.pxi (original)
+++ lxml/trunk/src/lxml/serializer.pxi Sat May 5 12:29:50 2007
@@ -78,8 +78,10 @@
if write_xml_declaration:
_writeDeclarationToBuffer(c_buffer, c_doc.version, encoding)
+ _writePrevSiblings(c_buffer, c_node, encoding, pretty_print)
tree.xmlNodeDumpOutput(c_buffer, c_doc, c_node, 0, pretty_print, encoding)
_writeTail(c_buffer, c_node, encoding, pretty_print)
+ _writeNextSiblings(c_buffer, c_node, encoding, pretty_print)
cdef void _writeDeclarationToBuffer(tree.xmlOutputBuffer* c_buffer,
char* version, char* encoding):
@@ -100,6 +102,36 @@
pretty_print, encoding)
c_node = c_node.next
+cdef void _writePrevSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
+ char* encoding, int pretty_print):
+ cdef xmlNode* c_sibling
+ if c_node.parent is not NULL and _isElement(c_node.parent):
+ return
+ # we are at a root node, so add PI and comment siblings
+ c_sibling = c_node
+ while c_sibling.prev != NULL and \
+ (c_sibling.prev.type == tree.XML_PI_NODE or \
+ c_sibling.prev.type == tree.XML_COMMENT_NODE):
+ c_sibling = c_sibling.prev
+ while c_sibling != c_node:
+ tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_sibling, 0,
+ pretty_print, encoding)
+ c_sibling = c_sibling.next
+
+cdef void _writeNextSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
+ char* encoding, int pretty_print):
+ cdef xmlNode* c_sibling
+ if c_node.parent is not NULL and _isElement(c_node.parent):
+ return
+ # we are at a root node, so add PI and comment siblings
+ c_sibling = c_node.next
+ while c_sibling != NULL and \
+ (c_sibling.type == tree.XML_PI_NODE or \
+ c_sibling.type == tree.XML_COMMENT_NODE):
+ tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_sibling, 0,
+ pretty_print, encoding)
+ c_sibling = c_sibling.next
+
# output to file-like objects
cdef class _FilelikeWriter:
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Sat May 5 12:29:50 2007
@@ -404,6 +404,156 @@
Element = self.etree.Element
self.assertRaises(TypeError, Element('a').append, None)
+ def test_addnext(self):
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+ root = Element('root')
+ SubElement(root, 'a')
+ SubElement(root, 'b')
+
+ self.assertEquals(['a', 'b'],
+ [c.tag for c in root])
+ root[1].addnext(root[0])
+ self.assertEquals(['b', 'a'],
+ [c.tag for c in root])
+
+ def test_addprevious(self):
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+ root = Element('root')
+ SubElement(root, 'a')
+ SubElement(root, 'b')
+
+ self.assertEquals(['a', 'b'],
+ [c.tag for c in root])
+ root[0].addprevious(root[1])
+ self.assertEquals(['b', 'a'],
+ [c.tag for c in root])
+
+ def test_addnext_root(self):
+ Element = self.etree.Element
+ a = Element('a')
+ b = Element('b')
+ self.assertRaises(TypeError, a.addnext, b)
+
+ def test_addnext_root(self):
+ Element = self.etree.Element
+ a = Element('a')
+ b = Element('b')
+ self.assertRaises(TypeError, a.addnext, b)
+
+ def test_addprevious_pi(self):
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+ PI = self.etree.PI
+ root = Element('root')
+ SubElement(root, 'a')
+ pi = PI('TARGET', 'TEXT')
+ pi.tail = "TAIL"
+
+ self.assertEquals('<root><a></a></root>',
+ self._writeElement(root))
+ root[0].addprevious(pi)
+ self.assertEquals('<root><?TARGET TEXT?>TAIL<a></a></root>',
+ self._writeElement(root))
+
+ def test_addprevious_root_pi(self):
+ Element = self.etree.Element
+ PI = self.etree.PI
+ root = Element('root')
+ pi = PI('TARGET', 'TEXT')
+ pi.tail = "TAIL"
+
+ self.assertEquals('<root></root>',
+ self._writeElement(root))
+ root.addprevious(pi)
+ self.assertEquals('<?TARGET TEXT?>\n<root></root>',
+ self._writeElement(root))
+
+ def test_addnext_pi(self):
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+ PI = self.etree.PI
+ root = Element('root')
+ SubElement(root, 'a')
+ pi = PI('TARGET', 'TEXT')
+ pi.tail = "TAIL"
+
+ self.assertEquals('<root><a></a></root>',
+ self._writeElement(root))
+ root[0].addnext(pi)
+ self.assertEquals('<root><a></a><?TARGET TEXT?>TAIL</root>',
+ self._writeElement(root))
+
+ def test_addnext_root_pi(self):
+ Element = self.etree.Element
+ PI = self.etree.PI
+ root = Element('root')
+ pi = PI('TARGET', 'TEXT')
+ pi.tail = "TAIL"
+
+ self.assertEquals('<root></root>',
+ self._writeElement(root))
+ root.addnext(pi)
+ self.assertEquals('<root></root>\n<?TARGET TEXT?>',
+ self._writeElement(root))
+
+ def test_addnext_comment(self):
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+ Comment = self.etree.Comment
+ root = Element('root')
+ SubElement(root, 'a')
+ comment = Comment('TEXT ')
+ comment.tail = "TAIL"
+
+ self.assertEquals('<root><a></a></root>',
+ self._writeElement(root))
+ root[0].addnext(comment)
+ self.assertEquals('<root><a></a><!--TEXT -->TAIL</root>',
+ self._writeElement(root))
+
+ def test_addnext_root_comment(self):
+ Element = self.etree.Element
+ Comment = self.etree.Comment
+ root = Element('root')
+ comment = Comment('TEXT ')
+ comment.tail = "TAIL"
+
+ self.assertEquals('<root></root>',
+ self._writeElement(root))
+ root.addnext(comment)
+ self.assertEquals('<root></root>\n<!--TEXT -->',
+ self._writeElement(root))
+
+ def test_addprevious_comment(self):
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+ Comment = self.etree.Comment
+ root = Element('root')
+ SubElement(root, 'a')
+ comment = Comment('TEXT ')
+ comment.tail = "TAIL"
+
+ self.assertEquals('<root><a></a></root>',
+ self._writeElement(root))
+ root[0].addprevious(comment)
+ self.assertEquals('<root><!--TEXT -->TAIL<a></a></root>',
+ self._writeElement(root))
+
+ def test_addprevious_root_comment(self):
+ Element = self.etree.Element
+ Comment = self.etree.Comment
+ root = Element('root')
+ comment = Comment('TEXT ')
+ comment.tail = "TAIL"
+
+ self.assertEquals('<root></root>',
+ self._writeElement(root))
+ root.addprevious(comment)
+ self.assertEquals('<!--TEXT -->\n<root></root>',
+ self._writeElement(root))
+
# ET's Elements have items() and key(), but not values()
def test_attribute_values(self):
XML = self.etree.XML
Modified: lxml/trunk/src/lxml/tests/test_sax.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_sax.py (original)
+++ lxml/trunk/src/lxml/tests/test_sax.py Sat May 5 12:29:50 2007
@@ -25,6 +25,30 @@
self.assertEquals('<a>ab<b>bb</b>ba</a>',
xml_out)
+ def test_etree_sax_comment(self):
+ tree = self.parse('<a>ab<!-- TEST -->ba</a>')
+ xml_out = self._saxify_serialize(tree)
+ self.assertEquals('<a>abba</a>',
+ xml_out)
+
+ def test_etree_sax_pi(self):
+ tree = self.parse('<a>ab<?this and that?>ba</a>')
+ xml_out = self._saxify_serialize(tree)
+ self.assertEquals('<a>ab<?this and that?>ba</a>',
+ xml_out)
+
+ def test_etree_sax_comment_root(self):
+ tree = self.parse('<!-- TEST --><a>ab</a>')
+ xml_out = self._saxify_serialize(tree)
+ self.assertEquals('<a>ab</a>',
+ xml_out)
+
+ def test_etree_sax_pi_root(self):
+ tree = self.parse('<?this and that?><a>ab</a>')
+ xml_out = self._saxify_serialize(tree)
+ self.assertEquals('<?this and that?><a>ab</a>',
+ xml_out)
+
def test_etree_sax_attributes(self):
tree = self.parse('<a aa="5">ab<b b="5"/>ba</a>')
xml_out = self._saxify_serialize(tree)
More information about the lxml-checkins
mailing list