[Lxml-checkins] r32623 - in lxml/branch/lxml-1.1: . src/lxml src/lxml/tests
scoder at codespeak.net
scoder at codespeak.net
Mon Sep 25 11:15:15 CEST 2006
Author: scoder
Date: Mon Sep 25 11:15:13 2006
New Revision: 32623
Modified:
lxml/branch/lxml-1.1/CHANGES.txt
lxml/branch/lxml-1.1/src/lxml/apihelpers.pxi
lxml/branch/lxml-1.1/src/lxml/tests/test_etree.py
Log:
fix: skip over xinclude nodes when collecting text nodes
Modified: lxml/branch/lxml-1.1/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-1.1/CHANGES.txt (original)
+++ lxml/branch/lxml-1.1/CHANGES.txt Mon Sep 25 11:15:13 2006
@@ -12,6 +12,8 @@
Bugs fixed
----------
+* Show text xincluded between text nodes correctly in .text and .tail
+
* 'integer * objectify.StringElement' operation was not supported
Modified: lxml/branch/lxml-1.1/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/branch/lxml-1.1/src/lxml/apihelpers.pxi (original)
+++ lxml/branch/lxml-1.1/src/lxml/apihelpers.pxi Mon Sep 25 11:15:13 2006
@@ -233,12 +233,12 @@
# check for multiple text nodes
scount = 0
text = NULL
- c_node_cur = c_node
- while c_node_cur is not NULL and c_node_cur.type == tree.XML_TEXT_NODE:
+ c_node_cur = c_node = _textNodeOrSkip(c_node)
+ while c_node_cur is not NULL:
if c_node_cur.content[0] != c'\0':
text = c_node_cur.content
scount = scount + 1
- c_node_cur = c_node_cur.next
+ c_node_cur = _textNodeOrSkip(c_node_cur.next)
# handle two most common cases first
if text is NULL:
@@ -251,9 +251,9 @@
# the rest is not performance critical anymore
result = ''
- while c_node is not NULL and c_node.type == tree.XML_TEXT_NODE:
+ while c_node is not NULL:
result = result + c_node.content
- c_node = c_node.next
+ c_node = _textNodeOrSkip(c_node.next)
return funicode(result)
cdef void _removeText(xmlNode* c_node):
@@ -262,10 +262,10 @@
Start removing at c_node.
"""
cdef xmlNode* c_next
- while c_node is not NULL and c_node.type == tree.XML_TEXT_NODE:
- c_next = c_node.next
+ c_node = _textNodeOrSkip(c_node)
+ while c_node is not NULL:
+ c_next = _textNodeOrSkip(c_node.next)
tree.xmlUnlinkNode(c_node)
- # XXX cannot safely free in case of direct text node proxies..
tree.xmlFreeNode(c_node)
c_node = c_next
@@ -333,6 +333,23 @@
c_child = c_child.prev
return NULL
+cdef xmlNode* _textNodeOrSkip(xmlNode* c_node):
+ """Return the node if it's a text node. Skip over ignorable nodes in a
+ series of text nodes. Return NULL if a non-ignorable node is found.
+
+ This is used to skip over XInclude nodes when collecting adjacent text
+ nodes.
+ """
+ while c_node is not NULL:
+ if c_node.type == tree.XML_TEXT_NODE:
+ return c_node
+ elif c_node.type == tree.XML_XINCLUDE_START or \
+ c_node.type == tree.XML_XINCLUDE_END:
+ c_node = c_node.next
+ else:
+ return NULL
+ return NULL
+
cdef xmlNode* _nextElement(xmlNode* c_node):
"""Given a node, find the next sibling that is an element.
"""
@@ -410,8 +427,9 @@
cdef xmlNode* c_next
# tail support: look for any text nodes trailing this node and
# move them too
- while c_tail is not NULL and c_tail.type == tree.XML_TEXT_NODE:
- c_next = c_tail.next
+ c_tail = _textNodeOrSkip(c_tail)
+ while c_tail is not NULL:
+ c_next = _textNodeOrSkip(c_tail.next)
tree.xmlUnlinkNode(c_tail)
tree.xmlAddNextSibling(c_target, c_tail)
c_target = c_tail
@@ -421,14 +439,15 @@
cdef xmlNode* c_new_tail
# tail copying support: look for any text nodes trailing this node and
# copy it to the target node
- while c_tail is not NULL and c_tail.type == tree.XML_TEXT_NODE:
+ c_tail = _textNodeOrSkip(c_tail)
+ while c_tail is not NULL:
if c_target.doc is not c_tail.doc:
c_new_tail = tree.xmlDocCopyNode(c_tail, c_target.doc, 0)
else:
c_new_tail = tree.xmlCopyNode(c_tail, 0)
tree.xmlAddNextSibling(c_target, c_new_tail)
c_target = c_new_tail
- c_tail = c_tail.next
+ c_tail = _textNodeOrSkip(c_tail.next)
cdef xmlNode* _deleteSlice(xmlNode* c_node, Py_ssize_t start, Py_ssize_t stop):
"""Delete slice, starting with c_node, start counting at start, end at stop.
Modified: lxml/branch/lxml-1.1/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/branch/lxml-1.1/src/lxml/tests/test_etree.py (original)
+++ lxml/branch/lxml-1.1/src/lxml/tests/test_etree.py Mon Sep 25 11:15:13 2006
@@ -1195,6 +1195,22 @@
self.assertEquals(
'a',
tree.getroot()[1].tag)
+
+ def test_xinclude_text(self):
+ filename = fileInTestDir('test_broken.xml')
+ root = etree.XML('''\
+ <doc xmlns:xi="http://www.w3.org/2001/XInclude">
+ <xi:include href="%s" parse="text"/>
+ </doc>
+ ''' % filename)
+ old_text = root.text
+ content = open(filename).read()
+ old_tail = root[0].tail
+
+ etree.ElementTree(root).xinclude()
+ self.assertEquals(old_text + content + old_tail,
+ root.text)
+
class ETreeC14NTestCase(HelperTestCase):
def test_c14n(self):
More information about the lxml-checkins
mailing list