[Lxml-checkins] r32623 - in lxml/branch/lxml-1.1: . src/lxml src/lxml/tests

scoder at codespeak.net scoder at codespeak.net
Mon Sep 25 11:15:15 CEST 2006


Author: scoder
Date: Mon Sep 25 11:15:13 2006
New Revision: 32623

Modified:
   lxml/branch/lxml-1.1/CHANGES.txt
   lxml/branch/lxml-1.1/src/lxml/apihelpers.pxi
   lxml/branch/lxml-1.1/src/lxml/tests/test_etree.py
Log:
fix: skip over xinclude nodes when collecting text nodes

Modified: lxml/branch/lxml-1.1/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-1.1/CHANGES.txt	(original)
+++ lxml/branch/lxml-1.1/CHANGES.txt	Mon Sep 25 11:15:13 2006
@@ -12,6 +12,8 @@
 Bugs fixed
 ----------
 
+* Show text xincluded between text nodes correctly in .text and .tail
+
 * 'integer * objectify.StringElement' operation was not supported
 
 

Modified: lxml/branch/lxml-1.1/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/branch/lxml-1.1/src/lxml/apihelpers.pxi	(original)
+++ lxml/branch/lxml-1.1/src/lxml/apihelpers.pxi	Mon Sep 25 11:15:13 2006
@@ -233,12 +233,12 @@
     # check for multiple text nodes
     scount = 0
     text = NULL
-    c_node_cur = c_node
-    while c_node_cur is not NULL and c_node_cur.type == tree.XML_TEXT_NODE:
+    c_node_cur = c_node = _textNodeOrSkip(c_node)
+    while c_node_cur is not NULL:
         if c_node_cur.content[0] != c'\0':
             text = c_node_cur.content
         scount = scount + 1
-        c_node_cur = c_node_cur.next
+        c_node_cur = _textNodeOrSkip(c_node_cur.next)
 
     # handle two most common cases first
     if text is NULL:
@@ -251,9 +251,9 @@
 
     # the rest is not performance critical anymore
     result = ''
-    while c_node is not NULL and c_node.type == tree.XML_TEXT_NODE:
+    while c_node is not NULL:
         result = result + c_node.content
-        c_node = c_node.next
+        c_node = _textNodeOrSkip(c_node.next)
     return funicode(result)
 
 cdef void _removeText(xmlNode* c_node):
@@ -262,10 +262,10 @@
     Start removing at c_node.
     """
     cdef xmlNode* c_next
-    while c_node is not NULL and c_node.type == tree.XML_TEXT_NODE:
-        c_next = c_node.next
+    c_node = _textNodeOrSkip(c_node)
+    while c_node is not NULL:
+        c_next = _textNodeOrSkip(c_node.next)
         tree.xmlUnlinkNode(c_node)
-        # XXX cannot safely free in case of direct text node proxies..
         tree.xmlFreeNode(c_node)
         c_node = c_next
 
@@ -333,6 +333,23 @@
         c_child = c_child.prev
     return NULL
     
+cdef xmlNode* _textNodeOrSkip(xmlNode* c_node):
+    """Return the node if it's a text node.  Skip over ignorable nodes in a
+    series of text nodes.  Return NULL if a non-ignorable node is found.
+
+    This is used to skip over XInclude nodes when collecting adjacent text
+    nodes.
+    """
+    while c_node is not NULL:
+        if c_node.type == tree.XML_TEXT_NODE:
+            return c_node
+        elif c_node.type == tree.XML_XINCLUDE_START or \
+                 c_node.type == tree.XML_XINCLUDE_END:
+            c_node = c_node.next
+        else:
+            return NULL
+    return NULL
+
 cdef xmlNode* _nextElement(xmlNode* c_node):
     """Given a node, find the next sibling that is an element.
     """
@@ -410,8 +427,9 @@
     cdef xmlNode* c_next
     # tail support: look for any text nodes trailing this node and 
     # move them too
-    while c_tail is not NULL and c_tail.type == tree.XML_TEXT_NODE:
-        c_next = c_tail.next
+    c_tail = _textNodeOrSkip(c_tail)
+    while c_tail is not NULL:
+        c_next = _textNodeOrSkip(c_tail.next)
         tree.xmlUnlinkNode(c_tail)
         tree.xmlAddNextSibling(c_target, c_tail)
         c_target = c_tail
@@ -421,14 +439,15 @@
     cdef xmlNode* c_new_tail
     # tail copying support: look for any text nodes trailing this node and
     # copy it to the target node
-    while c_tail is not NULL and c_tail.type == tree.XML_TEXT_NODE:
+    c_tail = _textNodeOrSkip(c_tail)
+    while c_tail is not NULL:
         if c_target.doc is not c_tail.doc:
             c_new_tail = tree.xmlDocCopyNode(c_tail, c_target.doc, 0)
         else:
             c_new_tail = tree.xmlCopyNode(c_tail, 0)
         tree.xmlAddNextSibling(c_target, c_new_tail)
         c_target = c_new_tail
-        c_tail = c_tail.next
+        c_tail = _textNodeOrSkip(c_tail.next)
 
 cdef xmlNode* _deleteSlice(xmlNode* c_node, Py_ssize_t start, Py_ssize_t stop):
     """Delete slice, starting with c_node, start counting at start, end at stop.

Modified: lxml/branch/lxml-1.1/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/branch/lxml-1.1/src/lxml/tests/test_etree.py	(original)
+++ lxml/branch/lxml-1.1/src/lxml/tests/test_etree.py	Mon Sep 25 11:15:13 2006
@@ -1195,6 +1195,22 @@
         self.assertEquals(
             'a',
             tree.getroot()[1].tag)
+
+    def test_xinclude_text(self):
+        filename = fileInTestDir('test_broken.xml')
+        root = etree.XML('''\
+        <doc xmlns:xi="http://www.w3.org/2001/XInclude">
+          <xi:include href="%s" parse="text"/>
+        </doc>
+        ''' % filename)
+        old_text = root.text
+        content = open(filename).read()
+        old_tail = root[0].tail
+
+        etree.ElementTree(root).xinclude()
+        self.assertEquals(old_text + content + old_tail,
+                          root.text)
+        
         
 class ETreeC14NTestCase(HelperTestCase):
     def test_c14n(self):


More information about the lxml-checkins mailing list