[Lxml-checkins] r42695 - in lxml/trunk: . doc src/lxml src/lxml/tests

scoder at codespeak.net scoder at codespeak.net
Sat May 5 12:29:51 CEST 2007


Author: scoder
Date: Sat May  5 12:29:50 2007
New Revision: 42695

Modified:
   lxml/trunk/CHANGES.txt
   lxml/trunk/doc/sax.txt
   lxml/trunk/src/lxml/apihelpers.pxi
   lxml/trunk/src/lxml/etree.pyx
   lxml/trunk/src/lxml/sax.py
   lxml/trunk/src/lxml/serializer.pxi
   lxml/trunk/src/lxml/tests/test_etree.py
   lxml/trunk/src/lxml/tests/test_sax.py
Log:
comment/PI fixes for lxml.sax, support for serialising top-level PIs and comments, appending and prepending comments andd PIs to the root node

Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt	(original)
+++ lxml/trunk/CHANGES.txt	Sat May  5 12:29:50 2007
@@ -8,12 +8,17 @@
 Features added
 --------------
 
+* ``Element.addnext(el)`` and ``Element.addprevious(el)`` methods to support
+  adding processing instructions and comments around the root node
+
 * Element.attrib now has a ``pop()`` method
 
 * Extended type annotation in objectify: cleaner annotation namespace setup
   plus new ``xsiannotate()`` and ``deannotate()`` functions
 
-* Support for custom Element class instantiation in lxml.sax
+* Support for custom Element class instantiation in lxml.sax: passing a
+  ``makeelement()`` function to the ElementTreeContentHandler will reuse the
+  lookup context of that function
 
 * '.' represents empty ObjectPath (identity)
 
@@ -30,6 +35,11 @@
 Bugs fixed
 ----------
 
+* Documents lost their top-level PIs and comments on serialisation
+
+* lxml.sax failed on comments and PIs. Comments are now properly ignored and
+  PIs are copied.
+
 * Thread safety in XPath evaluators
 
 * Raise AssertionError when passing strings containing '\0' bytes

Modified: lxml/trunk/doc/sax.txt
==============================================================================
--- lxml/trunk/doc/sax.txt	(original)
+++ lxml/trunk/doc/sax.txt	Sat May  5 12:29:50 2007
@@ -39,6 +39,10 @@
   >>> lxml.etree.tostring(tree.getroot())
   '<a><b foo="bar">Hello world</b></a>'
 
+By passing a ``makeelement`` function the constructor of
+``ElementTreeContentHandler``, e.g. the one of a parser you configured, you
+can determine which element class lookup scheme should be used.
+
 
 Producing SAX events from an ElementTree or Element
 ---------------------------------------------------

Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi	(original)
+++ lxml/trunk/src/lxml/apihelpers.pxi	Sat May  5 12:29:50 2007
@@ -541,7 +541,6 @@
     c_node = child._c_node
     # store possible text node
     c_next = c_node.next
-    # XXX what if element is coming from a different document?
     tree.xmlUnlinkNode(c_node)
     # move node itself
     tree.xmlAddChild(parent._c_node, c_node)
@@ -550,6 +549,38 @@
     # parent element has moved; change them too..
     moveNodeToDocument(child, parent._doc)
 
+cdef void _appendSibling(_Element element, _Element sibling):
+    """Append a new child to a parent element.
+    """
+    cdef xmlNode* c_next
+    cdef xmlNode* c_node
+    c_node = sibling._c_node
+    # store possible text node
+    c_next = c_node.next
+    tree.xmlUnlinkNode(c_node)
+    # move node itself
+    tree.xmlAddNextSibling(element._c_node, c_node)
+    _moveTail(c_next, c_node)
+    # uh oh, elements may be pointing to different doc when
+    # parent element has moved; change them too..
+    moveNodeToDocument(sibling, element._doc)
+
+cdef void _prependSibling(_Element element, _Element sibling):
+    """Append a new child to a parent element.
+    """
+    cdef xmlNode* c_next
+    cdef xmlNode* c_node
+    c_node = sibling._c_node
+    # store possible text node
+    c_next = c_node.next
+    tree.xmlUnlinkNode(c_node)
+    # move node itself
+    tree.xmlAddPrevSibling(element._c_node, c_node)
+    _moveTail(c_next, c_node)
+    # uh oh, elements may be pointing to different doc when
+    # parent element has moved; change them too..
+    moveNodeToDocument(sibling, element._doc)
+
 cdef int isutf8(char* s):
     cdef char c
     c = s[0]

Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx	(original)
+++ lxml/trunk/src/lxml/etree.pyx	Sat May  5 12:29:50 2007
@@ -531,6 +531,36 @@
         """
         _appendChild(self, element)
 
+    def addnext(self, _Element element):
+        """Adds the element as a following sibling directly after this
+        element.
+
+        This is normally used to set a processing instruction or comment after
+        the root node of a document.  Note that tail text is automatically
+        discarded when adding at the root level.
+        """
+        if self._c_node.parent != NULL and not _isElement(self._c_node.parent):
+            if element._c_node.type != tree.XML_PI_NODE:
+                if element._c_node.type != tree.XML_COMMENT_NODE:
+                    raise TypeError, "Only processing instructions and comments can be siblings of the root element"
+            element.tail = None
+        _appendSibling(self, element)
+
+    def addprevious(self, _Element element):
+        """Adds the element as a preceding sibling directly before this
+        element.
+
+        This is normally used to set a processing instruction or comment
+        before the root node of a document.  Note that tail text is
+        automatically discarded when adding at the root level.
+        """
+        if self._c_node.parent != NULL and not _isElement(self._c_node.parent):
+            if element._c_node.type != tree.XML_PI_NODE:
+                if element._c_node.type != tree.XML_COMMENT_NODE:
+                    raise TypeError, "Only processing instructions and comments can be siblings of the root element"
+            element.tail = None
+        _prependSibling(self, element)
+
     def extend(self, elements):
         """Extends the current children by the elements in the iterable.
         """

Modified: lxml/trunk/src/lxml/sax.py
==============================================================================
--- lxml/trunk/src/lxml/sax.py	(original)
+++ lxml/trunk/src/lxml/sax.py	Sat May  5 12:29:50 2007
@@ -1,5 +1,6 @@
 from xml.sax.handler import ContentHandler
 from etree import ElementTree, Element, SubElement, LxmlError
+from etree import XML, Comment, ProcessingInstruction
 
 class SaxError(LxmlError):
     pass
@@ -15,6 +16,7 @@
     """
     def __init__(self, makeelement=None):
         self._root = None
+        self._root_siblings = []
         self._element_stack = []
         self._default_ns = None
         self._ns_mapping = { None : [None] }
@@ -82,6 +84,10 @@
         if self._root is None:
             element = self._root = \
                       self._makeelement(el_name, attrs, self._new_mappings)
+            if self._root_siblings and hasattr(element, 'addprevious'):
+                for sibling in self._root_siblings:
+                    element.addprevious(sibling)
+            del self._root_siblings[:]
         else:
             element = SubElement(element_stack[-1], el_name,
                                  attrs, self._new_mappings)
@@ -89,10 +95,16 @@
 
         self._new_mappings.clear()
 
+    def processingInstruction(self, target, data):
+        pi = ProcessingInstruction(target, data)
+        if self._root is None:
+            self._root_siblings.append(pi)
+        else:
+            self._element_stack[-1].append(pi)
+
     def endElementNS(self, ns_name, qname):
         element = self._element_stack.pop()
-        tag = element.tag
-        if ns_name != _getNsTag(tag):
+        if ns_name != _getNsTag(element.tag):
             raise SaxError, "Unexpected element closed: {%s}%s" % ns_name
 
     def startElement(self, name, attributes=None):
@@ -106,10 +118,13 @@
         try:
             # if there already is a child element, we must append to its tail
             last_element = last_element[-1]
-            last_element.tail = (last_element.tail or u'') + data
+            last_element.tail = (last_element.tail or '') + data
         except IndexError:
             # otherwise: append to the text
-            last_element.text = (last_element.text or u'') + data
+            last_element.text = (last_element.text or '') + data
+
+    ignorableWhitespace = characters
+        
 
 class ElementTreeProducer(object):
     """Produces SAX events for an element and children.
@@ -124,13 +139,41 @@
         from xml.sax.xmlreader import AttributesNSImpl as attr_class
         self._attr_class = attr_class
         self._empty_attributes = attr_class({}, {})
-        
+
     def saxify(self):
         self._content_handler.startDocument()
-        self._recursive_saxify(self._element, {})
+
+        element = self._element
+        if hasattr(element, 'getprevious'):
+            siblings = []
+            sibling = element.getprevious()
+            while getattr(sibling, 'tag', None) is ProcessingInstruction:
+                siblings.append(sibling)
+                sibling = sibling.getprevious()
+            for sibling in siblings[::-1]:
+                self._recursive_saxify(sibling, {})
+
+        self._recursive_saxify(element, {})
+
+        if hasattr(element, 'getnext'):
+            sibling = element.getnext()
+            while getattr(sibling, 'tag', None) is ProcessingInstruction:
+                self._recursive_saxify(sibling, {})
+                sibling = sibling.getnext()
+
         self._content_handler.endDocument()
 
     def _recursive_saxify(self, element, prefixes):
+        content_handler = self._content_handler
+        tag = element.tag
+        if tag is Comment or tag is ProcessingInstruction:
+            if tag is ProcessingInstruction:
+                content_handler.processingInstruction(
+                    element.target, element.text)
+            if element.tail:
+                content_handler.characters(element.tail)
+            return
+
         new_prefixes = []
         build_qname = self._build_qname
         attribs = element.items()
@@ -146,10 +189,9 @@
         else:
             sax_attributes = self._empty_attributes
 
-        ns_uri, local_name = _getNsTag(element.tag)
+        ns_uri, local_name = _getNsTag(tag)
         qname = build_qname(ns_uri, local_name, prefixes, new_prefixes)
 
-        content_handler = self._content_handler
         for prefix, uri in new_prefixes:
             content_handler.startPrefixMapping(prefix, uri)
         content_handler.startElementNS((ns_uri, local_name),

Modified: lxml/trunk/src/lxml/serializer.pxi
==============================================================================
--- lxml/trunk/src/lxml/serializer.pxi	(original)
+++ lxml/trunk/src/lxml/serializer.pxi	Sat May  5 12:29:50 2007
@@ -78,8 +78,10 @@
     if write_xml_declaration:
         _writeDeclarationToBuffer(c_buffer, c_doc.version, encoding)
 
+    _writePrevSiblings(c_buffer, c_node, encoding, pretty_print)
     tree.xmlNodeDumpOutput(c_buffer, c_doc, c_node, 0, pretty_print, encoding)
     _writeTail(c_buffer, c_node, encoding, pretty_print)
+    _writeNextSiblings(c_buffer, c_node, encoding, pretty_print)
 
 cdef void _writeDeclarationToBuffer(tree.xmlOutputBuffer* c_buffer,
                                     char* version, char* encoding):
@@ -100,6 +102,36 @@
                                pretty_print, encoding)
         c_node = c_node.next
 
+cdef void _writePrevSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
+                             char* encoding, int pretty_print):
+    cdef xmlNode* c_sibling
+    if c_node.parent is not NULL and _isElement(c_node.parent):
+        return
+    # we are at a root node, so add PI and comment siblings
+    c_sibling = c_node
+    while c_sibling.prev != NULL and \
+              (c_sibling.prev.type == tree.XML_PI_NODE or \
+               c_sibling.prev.type == tree.XML_COMMENT_NODE):
+        c_sibling = c_sibling.prev
+    while c_sibling != c_node:
+        tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_sibling, 0,
+                               pretty_print, encoding)
+        c_sibling = c_sibling.next
+
+cdef void _writeNextSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
+                             char* encoding, int pretty_print):
+    cdef xmlNode* c_sibling
+    if c_node.parent is not NULL and _isElement(c_node.parent):
+        return
+    # we are at a root node, so add PI and comment siblings
+    c_sibling = c_node.next
+    while c_sibling != NULL and \
+              (c_sibling.type == tree.XML_PI_NODE or \
+               c_sibling.type == tree.XML_COMMENT_NODE):
+        tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_sibling, 0,
+                               pretty_print, encoding)
+        c_sibling = c_sibling.next
+
 # output to file-like objects
 
 cdef class _FilelikeWriter:

Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py	(original)
+++ lxml/trunk/src/lxml/tests/test_etree.py	Sat May  5 12:29:50 2007
@@ -404,6 +404,156 @@
         Element = self.etree.Element
         self.assertRaises(TypeError, Element('a').append, None)
 
+    def test_addnext(self):
+        Element = self.etree.Element
+        SubElement = self.etree.SubElement
+        root = Element('root')
+        SubElement(root, 'a')
+        SubElement(root, 'b')
+
+        self.assertEquals(['a', 'b'],
+                          [c.tag for c in root])
+        root[1].addnext(root[0])
+        self.assertEquals(['b', 'a'],
+                          [c.tag for c in root])
+
+    def test_addprevious(self):
+        Element = self.etree.Element
+        SubElement = self.etree.SubElement
+        root = Element('root')
+        SubElement(root, 'a')
+        SubElement(root, 'b')
+
+        self.assertEquals(['a', 'b'],
+                          [c.tag for c in root])
+        root[0].addprevious(root[1])
+        self.assertEquals(['b', 'a'],
+                          [c.tag for c in root])
+
+    def test_addnext_root(self):
+        Element = self.etree.Element
+        a = Element('a')
+        b = Element('b')
+        self.assertRaises(TypeError, a.addnext, b)
+
+    def test_addnext_root(self):
+        Element = self.etree.Element
+        a = Element('a')
+        b = Element('b')
+        self.assertRaises(TypeError, a.addnext, b)
+
+    def test_addprevious_pi(self):
+        Element = self.etree.Element
+        SubElement = self.etree.SubElement
+        PI = self.etree.PI
+        root = Element('root')
+        SubElement(root, 'a')
+        pi = PI('TARGET', 'TEXT')
+        pi.tail = "TAIL"
+
+        self.assertEquals('<root><a></a></root>',
+                          self._writeElement(root))
+        root[0].addprevious(pi)
+        self.assertEquals('<root><?TARGET TEXT?>TAIL<a></a></root>',
+                          self._writeElement(root))
+
+    def test_addprevious_root_pi(self):
+        Element = self.etree.Element
+        PI = self.etree.PI
+        root = Element('root')
+        pi = PI('TARGET', 'TEXT')
+        pi.tail = "TAIL"
+
+        self.assertEquals('<root></root>',
+                          self._writeElement(root))
+        root.addprevious(pi)
+        self.assertEquals('<?TARGET TEXT?>\n<root></root>',
+                          self._writeElement(root))
+
+    def test_addnext_pi(self):
+        Element = self.etree.Element
+        SubElement = self.etree.SubElement
+        PI = self.etree.PI
+        root = Element('root')
+        SubElement(root, 'a')
+        pi = PI('TARGET', 'TEXT')
+        pi.tail = "TAIL"
+
+        self.assertEquals('<root><a></a></root>',
+                          self._writeElement(root))
+        root[0].addnext(pi)
+        self.assertEquals('<root><a></a><?TARGET TEXT?>TAIL</root>',
+                          self._writeElement(root))
+
+    def test_addnext_root_pi(self):
+        Element = self.etree.Element
+        PI = self.etree.PI
+        root = Element('root')
+        pi = PI('TARGET', 'TEXT')
+        pi.tail = "TAIL"
+
+        self.assertEquals('<root></root>',
+                          self._writeElement(root))
+        root.addnext(pi)
+        self.assertEquals('<root></root>\n<?TARGET TEXT?>',
+                          self._writeElement(root))
+
+    def test_addnext_comment(self):
+        Element = self.etree.Element
+        SubElement = self.etree.SubElement
+        Comment = self.etree.Comment
+        root = Element('root')
+        SubElement(root, 'a')
+        comment = Comment('TEXT ')
+        comment.tail = "TAIL"
+
+        self.assertEquals('<root><a></a></root>',
+                          self._writeElement(root))
+        root[0].addnext(comment)
+        self.assertEquals('<root><a></a><!--TEXT -->TAIL</root>',
+                          self._writeElement(root))
+
+    def test_addnext_root_comment(self):
+        Element = self.etree.Element
+        Comment = self.etree.Comment
+        root = Element('root')
+        comment = Comment('TEXT ')
+        comment.tail = "TAIL"
+
+        self.assertEquals('<root></root>',
+                          self._writeElement(root))
+        root.addnext(comment)
+        self.assertEquals('<root></root>\n<!--TEXT -->',
+                          self._writeElement(root))
+
+    def test_addprevious_comment(self):
+        Element = self.etree.Element
+        SubElement = self.etree.SubElement
+        Comment = self.etree.Comment
+        root = Element('root')
+        SubElement(root, 'a')
+        comment = Comment('TEXT ')
+        comment.tail = "TAIL"
+
+        self.assertEquals('<root><a></a></root>',
+                          self._writeElement(root))
+        root[0].addprevious(comment)
+        self.assertEquals('<root><!--TEXT -->TAIL<a></a></root>',
+                          self._writeElement(root))
+
+    def test_addprevious_root_comment(self):
+        Element = self.etree.Element
+        Comment = self.etree.Comment
+        root = Element('root')
+        comment = Comment('TEXT ')
+        comment.tail = "TAIL"
+
+        self.assertEquals('<root></root>',
+                          self._writeElement(root))
+        root.addprevious(comment)
+        self.assertEquals('<!--TEXT -->\n<root></root>',
+                          self._writeElement(root))
+
     # ET's Elements have items() and key(), but not values()
     def test_attribute_values(self):
         XML = self.etree.XML

Modified: lxml/trunk/src/lxml/tests/test_sax.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_sax.py	(original)
+++ lxml/trunk/src/lxml/tests/test_sax.py	Sat May  5 12:29:50 2007
@@ -25,6 +25,30 @@
         self.assertEquals('<a>ab<b>bb</b>ba</a>',
                           xml_out)
 
+    def test_etree_sax_comment(self):
+        tree = self.parse('<a>ab<!-- TEST -->ba</a>')
+        xml_out = self._saxify_serialize(tree)
+        self.assertEquals('<a>abba</a>',
+                          xml_out)
+
+    def test_etree_sax_pi(self):
+        tree = self.parse('<a>ab<?this and that?>ba</a>')
+        xml_out = self._saxify_serialize(tree)
+        self.assertEquals('<a>ab<?this and that?>ba</a>',
+                          xml_out)
+
+    def test_etree_sax_comment_root(self):
+        tree = self.parse('<!-- TEST --><a>ab</a>')
+        xml_out = self._saxify_serialize(tree)
+        self.assertEquals('<a>ab</a>',
+                          xml_out)
+
+    def test_etree_sax_pi_root(self):
+        tree = self.parse('<?this and that?><a>ab</a>')
+        xml_out = self._saxify_serialize(tree)
+        self.assertEquals('<?this and that?><a>ab</a>',
+                          xml_out)
+
     def test_etree_sax_attributes(self):
         tree = self.parse('<a aa="5">ab<b b="5"/>ba</a>')
         xml_out = self._saxify_serialize(tree)


More information about the lxml-checkins mailing list