[Lxml-checkins] r46532 - in lxml/trunk: . src/lxml src/lxml/tests
scoder at codespeak.net
scoder at codespeak.net
Thu Sep 13 11:15:42 CEST 2007
Author: scoder
Date: Thu Sep 13 11:15:41 2007
New Revision: 46532
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/iterparse.pxi
lxml/trunk/src/lxml/tests/test_etree.py
Log:
itertext() method on elements
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Thu Sep 13 11:15:41 2007
@@ -8,6 +8,8 @@
Features added
--------------
+* ``itertext()`` method on Elements
+
* Setting a QName object as value of the .text property or as an attribute
will resolve its prefix in the respective context
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Thu Sep 13 11:15:41 2007
@@ -1092,6 +1092,17 @@
"""
return ElementDepthFirstIterator(self, tag)
+ def itertext(self, tag=None, with_tail=True):
+ """Iterates over the text content of a subtree.
+
+ You can pass the ``tag`` keyword argument to restrict text content to
+ a specific tag name.
+
+ You can set the ``with_tail`` keyword argument to ``False`` to skip
+ over tail text.
+ """
+ return ElementTextIterator(self, tag, with_tail)
+
def makeelement(self, _tag, attrib=None, nsmap=None, **_extra):
"""Creates a new element associated with the same document.
"""
@@ -1897,6 +1908,36 @@
tree.END_FOR_EACH_ELEMENT_FROM(c_node)
return NULL
+cdef class ElementTextIterator:
+ """Iterates over the text content of a subtree.
+
+ You can pass the ``tag`` keyword argument to restrict text content to a
+ specific tag name.
+
+ You can set the ``with_tail`` keyword argument to ``False`` to skip over
+ tail text.
+ """
+ cdef object _nextEvent
+ def __init__(self, _Element element not None, tag=None, with_tail=True):
+ if with_tail:
+ events = ("start", "end")
+ else:
+ events = ("start",)
+ self._nextEvent = iterwalk(element, events=events, tag=tag).next
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ cdef _Element element
+ while result is None:
+ event, element = self._nextEvent()
+ if event == "start":
+ result = element.text
+ else:
+ result = element.tail
+ return result
+
cdef xmlNode* _createElement(xmlDoc* c_doc, object name_utf) except NULL:
cdef xmlNode* c_node
c_node = tree.xmlNewDocNode(c_doc, NULL, _cstr(name_utf), NULL)
Modified: lxml/trunk/src/lxml/iterparse.pxi
==============================================================================
--- lxml/trunk/src/lxml/iterparse.pxi (original)
+++ lxml/trunk/src/lxml/iterparse.pxi Thu Sep 13 11:15:41 2007
@@ -444,6 +444,7 @@
cdef _Element _end_node(self):
cdef _Element node
+ cdef int i, ns_count
node, ns_count = self._pop_node()
if self._event_filter & ITERPARSE_FILTER_END:
if self._tag_tuple is None or \
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Thu Sep 13 11:15:41 2007
@@ -1420,6 +1420,15 @@
[a, b, c],
list(a.getiterator('*')))
+ def test_itertext(self):
+ # ET 1.3+
+ XML = self.etree.XML
+ root = XML("<root>RTEXT<a></a>ATAIL<b/><c>CTEXT</c>CTAIL</root>")
+
+ text = list(root.itertext())
+ self.assertEquals(["RTEXT", "ATAIL", "CTEXT", "CTAIL"],
+ text)
+
def test_findall_ns(self):
XML = self.etree.XML
root = XML('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>')
More information about the lxml-checkins
mailing list