[Lxml-checkins] r51012 - in lxml/trunk: . src/lxml src/lxml/tests

scoder at codespeak.net scoder at codespeak.net
Fri Jan 25 10:36:01 CET 2008


Author: scoder
Date: Fri Jan 25 10:35:59 2008
New Revision: 51012

Modified:
   lxml/trunk/   (props changed)
   lxml/trunk/CHANGES.txt
   lxml/trunk/src/lxml/lxml.etree.pyx
   lxml/trunk/src/lxml/serializer.pxi
   lxml/trunk/src/lxml/tests/test_etree.py
Log:
 r3314 at delle:  sbehnel | 2008-01-25 07:08:35 +0100
 'with_tail' keyword in serialiser functions


Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt	(original)
+++ lxml/trunk/CHANGES.txt	Fri Jan 25 10:35:59 2008
@@ -8,6 +8,8 @@
 Features added
 --------------
 
+* ``with_tail`` option in serialiser functions.
+
 * More accurate exception messages in validator creation.
 
 Bugs fixed

Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx	(original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx	Fri Jan 25 10:35:59 2008
@@ -1444,7 +1444,7 @@
             return None
 
     def write(self, file, *, encoding=None, method="xml",
-              pretty_print=False, xml_declaration=None):
+              pretty_print=False, xml_declaration=None, with_tail=True):
         """Write the tree to a file or file-like object.
 
         Defaults to ASCII encoding and writing a declaration as needed.
@@ -1467,7 +1467,7 @@
             write_declaration = encoding not in \
                                   ('US-ASCII', 'ASCII', 'UTF8', 'UTF-8')
         _tofilelike(file, self._context_node, encoding, method,
-                    write_declaration, 1, pretty_print)
+                    write_declaration, 1, pretty_print, with_tail)
 
     def getpath(self, _Element element not None):
         """Returns a structural, absolute XPath expression to find that element.
@@ -2233,14 +2233,14 @@
     """
     return isinstance(element, _Element)
 
-def dump(_Element elem not None, *, pretty_print=True):
+def dump(_Element elem not None, *, pretty_print=True, with_tail=True):
     """Writes an element tree or element structure to sys.stdout. This function
     should be used for debugging only.
     """
-    _dumpToFile(sys.stdout, elem._c_node, pretty_print)
+    _dumpToFile(sys.stdout, elem._c_node, pretty_print, with_tail)
 
 def tostring(element_or_tree, *, encoding=None, method="xml",
-             xml_declaration=None, pretty_print=False):
+             xml_declaration=None, pretty_print=False, with_tail=True):
     """Serialize an element to an encoded string representation of its XML
     tree.
 
@@ -2253,6 +2253,10 @@
 
     The keyword argument 'method' selects the output method: 'xml',
     'html' or plain 'text'.
+
+    You can prevent the tail text of the element from being serialised
+    by passing the boolean ``with_tail`` option.  This has no impact
+    on the tail text of children, which will always be serialised.
     """
     cdef bint write_declaration
     if xml_declaration is None:
@@ -2266,10 +2270,11 @@
 
     if isinstance(element_or_tree, _Element):
         return _tostring(<_Element>element_or_tree, encoding, method,
-                         write_declaration, 0, pretty_print)
+                         write_declaration, 0, pretty_print, with_tail)
     elif isinstance(element_or_tree, _ElementTree):
         return _tostring((<_ElementTree>element_or_tree)._context_node,
-                         encoding, method, write_declaration, 1, pretty_print)
+                         encoding, method, write_declaration, 1, pretty_print,
+                         with_tail)
     else:
         raise TypeError("Type '%s' cannot be serialized." %
                         type(element_or_tree))
@@ -2283,7 +2288,8 @@
     """
     return [tostring(element_or_tree, *args, **kwargs)]
 
-def tounicode(element_or_tree, *, method="xml", pretty_print=False):
+def tounicode(element_or_tree, *, method="xml", pretty_print=False,
+              with_tail=True):
     """Serialize an element to the Python unicode representation of its XML
     tree.
 
@@ -2295,12 +2301,17 @@
 
     The keyword argument 'method' selects the output method: 'xml',
     'html' or plain 'text'.
+
+    You can prevent the tail text of the element from being serialised
+    by passing the boolean ``with_tail`` option.  This has no impact
+    on the tail text of children, which will always be serialised.
     """
     if isinstance(element_or_tree, _Element):
-        return _tounicode(<_Element>element_or_tree, method, 0, pretty_print)
+        return _tounicode(<_Element>element_or_tree, method, 0, pretty_print,
+                           with_tail)
     elif isinstance(element_or_tree, _ElementTree):
         return _tounicode((<_ElementTree>element_or_tree)._context_node,
-                          method, 1, pretty_print)
+                          method, 1, pretty_print, with_tail)
     else:
         raise TypeError("Type '%s' cannot be serialized." %
                         type(element_or_tree))

Modified: lxml/trunk/src/lxml/serializer.pxi
==============================================================================
--- lxml/trunk/src/lxml/serializer.pxi	(original)
+++ lxml/trunk/src/lxml/serializer.pxi	Fri Jan 25 10:35:59 2008
@@ -17,7 +17,7 @@
         return OUTPUT_METHOD_TEXT
     raise ValueError("unknown output method %r" % method)
 
-cdef _textToString(xmlNode* c_node, encoding):
+cdef _textToString(xmlNode* c_node, encoding, bint with_tail):
     cdef char* c_text
     with nogil:
         c_text = tree.xmlNodeGetContent(c_node)
@@ -27,7 +27,7 @@
     text = c_text
     tree.xmlFree(c_text)
 
-    if _hasTail(c_node):
+    if with_tail and _hasTail(c_node):
         tail = _collectText(c_node.next)
         if tail:
             text = text + tail
@@ -43,7 +43,7 @@
 
 cdef _tostring(_Element element, encoding, method,
                bint write_xml_declaration, bint write_complete_document,
-               bint pretty_print):
+               bint pretty_print, bint with_tail):
     """Serialize an element to an encoded string representation of its XML
     tree.
     """
@@ -62,7 +62,7 @@
         c_enc = _cstr(encoding)
     c_method = _findOutputMethod(method)
     if c_method == OUTPUT_METHOD_TEXT:
-        return _textToString(element._c_node, encoding)
+        return _textToString(element._c_node, encoding, with_tail)
     # it is necessary to *and* find the encoding handler *and* use
     # encoding during output
     enchandler = tree.xmlFindCharEncodingHandler(c_enc)
@@ -77,7 +77,7 @@
     with nogil:
         _writeNodeToBuffer(c_buffer, element._c_node, c_enc, c_method,
                            write_xml_declaration, write_complete_document,
-                           pretty_print)
+                           pretty_print, with_tail)
         tree.xmlOutputBufferFlush(c_buffer)
         if c_buffer.conv is not NULL:
             c_result_buffer = c_buffer.conv
@@ -92,8 +92,8 @@
         tree.xmlOutputBufferClose(c_buffer)
     return result
 
-cdef _tounicode(_Element element, method,
-                bint write_complete_document, bint pretty_print):
+cdef _tounicode(_Element element, method, bint write_complete_document,
+                bint pretty_print, bint with_tail):
     """Serialize an element to the Python unicode representation of its XML
     tree.
     """
@@ -104,7 +104,7 @@
         return None
     c_method = _findOutputMethod(method)
     if c_method == OUTPUT_METHOD_TEXT:
-        text = _textToString(element._c_node, None)
+        text = _textToString(element._c_node, None, with_tail)
         return python.PyUnicode_FromEncodedObject(text, 'utf-8', 'strict')
     c_buffer = tree.xmlAllocOutputBuffer(NULL)
     if c_buffer is NULL:
@@ -112,7 +112,7 @@
 
     with nogil:
         _writeNodeToBuffer(c_buffer, element._c_node, NULL, c_method, 0,
-                           write_complete_document, pretty_print)
+                           write_complete_document, pretty_print, with_tail)
         tree.xmlOutputBufferFlush(c_buffer)
         if c_buffer.conv is not NULL:
             c_result_buffer = c_buffer.conv
@@ -132,7 +132,7 @@
                              xmlNode* c_node, char* encoding, int c_method,
                              bint write_xml_declaration,
                              bint write_complete_document,
-                             bint pretty_print) nogil:
+                             bint pretty_print, bint with_tail) nogil:
     cdef xmlDoc* c_doc
     cdef xmlNode* c_nsdecl_node
     c_doc = c_node.doc
@@ -169,7 +169,8 @@
         tree.xmlFreeNode(c_nsdecl_node)
 
     # write tail, trailing comments, etc.
-    _writeTail(c_buffer, c_node, encoding, pretty_print)
+    if with_tail:
+        _writeTail(c_buffer, c_node, encoding, pretty_print)
     if write_complete_document:
         _writeNextSiblings(c_buffer, c_node, encoding, pretty_print)
     if pretty_print:
@@ -312,7 +313,7 @@
 
 cdef _tofilelike(f, _Element element, encoding, method,
                  bint write_xml_declaration, bint write_doctype,
-                 bint pretty_print):
+                 bint pretty_print, bint with_tail):
     cdef python.PyThreadState* state
     cdef _FilelikeWriter writer
     cdef tree.xmlOutputBuffer* c_buffer
@@ -328,10 +329,10 @@
         if _isString(f):
             filename8 = _encodeFilename(f)
             f = open(filename8, 'wb')
-            f.write(_textToString(element._c_node, encoding))
+            f.write(_textToString(element._c_node, encoding, with_tail))
             f.close()
         else:
-            f.write(_textToString(element._c_node, encoding))
+            f.write(_textToString(element._c_node, encoding, with_tail))
         return
     enchandler = tree.xmlFindCharEncodingHandler(c_enc)
     if enchandler is NULL:
@@ -353,7 +354,8 @@
         raise TypeError("File or filename expected, got '%s'" % type(f))
 
     _writeNodeToBuffer(c_buffer, element._c_node, c_enc, c_method,
-                       write_xml_declaration, write_doctype, pretty_print)
+                       write_xml_declaration, write_doctype,
+                       pretty_print, with_tail)
     tree.xmlOutputBufferClose(c_buffer)
     tree.xmlCharEncCloseFunc(enchandler)
     if writer is None:
@@ -403,13 +405,14 @@
 
 # dump node to file (mainly for debug)
 
-cdef _dumpToFile(f, xmlNode* c_node, bint pretty_print):
+cdef _dumpToFile(f, xmlNode* c_node, bint pretty_print, bint with_tail):
     cdef tree.xmlOutputBuffer* c_buffer
     if not python.PyFile_Check(f):
         raise ValueError("not a file")
     c_buffer = tree.xmlOutputBufferCreateFile(python.PyFile_AsFile(f), NULL)
     tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_node, 0, pretty_print, NULL)
-    _writeTail(c_buffer, c_node, NULL, 0)
+    if with_tail:
+        _writeTail(c_buffer, c_node, NULL, 0)
     if not pretty_print:
         # not written yet
         tree.xmlOutputBufferWriteString(c_buffer, '\n')

Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py	(original)
+++ lxml/trunk/src/lxml/tests/test_etree.py	Fri Jan 25 10:35:59 2008
@@ -1916,6 +1916,26 @@
         result = tostring(a, pretty_print=True)
         self.assertEquals(result, "<a>\n  <b/>\n  <c/>\n</a>\n")
 
+    def test_tostring_with_tail(self):
+        tostring = self.etree.tostring
+        Element = self.etree.Element
+        SubElement = self.etree.SubElement
+
+        a = Element('a')
+        a.tail = "aTAIL"
+        b = SubElement(a, 'b')
+        b.tail = "bTAIL"
+        c = SubElement(a, 'c')
+
+        result = tostring(a)
+        self.assertEquals(result, "<a><b/>bTAIL<c/></a>aTAIL")
+
+        result = tostring(a, with_tail=False)
+        self.assertEquals(result, "<a><b/>bTAIL<c/></a>")
+
+        result = tostring(a, with_tail=True)
+        self.assertEquals(result, "<a><b/>bTAIL<c/></a>aTAIL")
+
     def test_tostring_method_text_encoding(self):
         tostring = self.etree.tostring
         Element = self.etree.Element


More information about the lxml-checkins mailing list