[Lxml-checkins] r46532 - in lxml/trunk: . src/lxml src/lxml/tests

scoder at codespeak.net scoder at codespeak.net
Thu Sep 13 11:15:42 CEST 2007


Author: scoder
Date: Thu Sep 13 11:15:41 2007
New Revision: 46532

Modified:
   lxml/trunk/CHANGES.txt
   lxml/trunk/src/lxml/etree.pyx
   lxml/trunk/src/lxml/iterparse.pxi
   lxml/trunk/src/lxml/tests/test_etree.py
Log:
itertext() method on elements

Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt	(original)
+++ lxml/trunk/CHANGES.txt	Thu Sep 13 11:15:41 2007
@@ -8,6 +8,8 @@
 Features added
 --------------
 
+* ``itertext()`` method on Elements
+
 * Setting a QName object as value of the .text property or as an attribute
   will resolve its prefix in the respective context
 

Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx	(original)
+++ lxml/trunk/src/lxml/etree.pyx	Thu Sep 13 11:15:41 2007
@@ -1092,6 +1092,17 @@
         """
         return ElementDepthFirstIterator(self, tag)
 
+    def itertext(self, tag=None, with_tail=True):
+        """Iterates over the text content of a subtree.
+
+        You can pass the ``tag`` keyword argument to restrict text content to
+        a specific tag name.
+
+        You can set the ``with_tail`` keyword argument to ``False`` to skip
+        over tail text.
+        """
+        return ElementTextIterator(self, tag, with_tail)
+
     def makeelement(self, _tag, attrib=None, nsmap=None, **_extra):
         """Creates a new element associated with the same document.
         """
@@ -1897,6 +1908,36 @@
         tree.END_FOR_EACH_ELEMENT_FROM(c_node)
         return NULL
 
+cdef class ElementTextIterator:
+    """Iterates over the text content of a subtree.
+
+    You can pass the ``tag`` keyword argument to restrict text content to a
+    specific tag name.
+
+    You can set the ``with_tail`` keyword argument to ``False`` to skip over
+    tail text.
+    """
+    cdef object _nextEvent
+    def __init__(self, _Element element not None, tag=None, with_tail=True):
+        if with_tail:
+            events = ("start", "end")
+        else:
+            events = ("start",)
+        self._nextEvent = iterwalk(element, events=events, tag=tag).next
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        cdef _Element element
+        while result is None:
+            event, element = self._nextEvent()
+            if event == "start":
+                result = element.text
+            else:
+                result = element.tail
+        return result
+
 cdef xmlNode* _createElement(xmlDoc* c_doc, object name_utf) except NULL:
     cdef xmlNode* c_node
     c_node = tree.xmlNewDocNode(c_doc, NULL, _cstr(name_utf), NULL)

Modified: lxml/trunk/src/lxml/iterparse.pxi
==============================================================================
--- lxml/trunk/src/lxml/iterparse.pxi	(original)
+++ lxml/trunk/src/lxml/iterparse.pxi	Thu Sep 13 11:15:41 2007
@@ -444,6 +444,7 @@
 
     cdef _Element _end_node(self):
         cdef _Element node
+        cdef int i, ns_count
         node, ns_count = self._pop_node()
         if self._event_filter & ITERPARSE_FILTER_END:
             if self._tag_tuple is None or \

Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py	(original)
+++ lxml/trunk/src/lxml/tests/test_etree.py	Thu Sep 13 11:15:41 2007
@@ -1420,6 +1420,15 @@
             [a, b, c],
             list(a.getiterator('*')))
 
+    def test_itertext(self):
+        # ET 1.3+
+        XML = self.etree.XML
+        root = XML("<root>RTEXT<a></a>ATAIL<b/><c>CTEXT</c>CTAIL</root>")
+
+        text = list(root.itertext())
+        self.assertEquals(["RTEXT", "ATAIL", "CTEXT", "CTAIL"],
+                          text)
+
     def test_findall_ns(self):
         XML = self.etree.XML
         root = XML('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>')


More information about the lxml-checkins mailing list