[Lxml-checkins] r51012 - in lxml/trunk: . src/lxml src/lxml/tests
scoder at codespeak.net
scoder at codespeak.net
Fri Jan 25 10:36:01 CET 2008
Author: scoder
Date: Fri Jan 25 10:35:59 2008
New Revision: 51012
Modified:
lxml/trunk/ (props changed)
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/lxml.etree.pyx
lxml/trunk/src/lxml/serializer.pxi
lxml/trunk/src/lxml/tests/test_etree.py
Log:
r3314 at delle: sbehnel | 2008-01-25 07:08:35 +0100
'with_tail' keyword in serialiser functions
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Fri Jan 25 10:35:59 2008
@@ -8,6 +8,8 @@
Features added
--------------
+* ``with_tail`` option in serialiser functions.
+
* More accurate exception messages in validator creation.
Bugs fixed
Modified: lxml/trunk/src/lxml/lxml.etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/lxml.etree.pyx (original)
+++ lxml/trunk/src/lxml/lxml.etree.pyx Fri Jan 25 10:35:59 2008
@@ -1444,7 +1444,7 @@
return None
def write(self, file, *, encoding=None, method="xml",
- pretty_print=False, xml_declaration=None):
+ pretty_print=False, xml_declaration=None, with_tail=True):
"""Write the tree to a file or file-like object.
Defaults to ASCII encoding and writing a declaration as needed.
@@ -1467,7 +1467,7 @@
write_declaration = encoding not in \
('US-ASCII', 'ASCII', 'UTF8', 'UTF-8')
_tofilelike(file, self._context_node, encoding, method,
- write_declaration, 1, pretty_print)
+ write_declaration, 1, pretty_print, with_tail)
def getpath(self, _Element element not None):
"""Returns a structural, absolute XPath expression to find that element.
@@ -2233,14 +2233,14 @@
"""
return isinstance(element, _Element)
-def dump(_Element elem not None, *, pretty_print=True):
+def dump(_Element elem not None, *, pretty_print=True, with_tail=True):
"""Writes an element tree or element structure to sys.stdout. This function
should be used for debugging only.
"""
- _dumpToFile(sys.stdout, elem._c_node, pretty_print)
+ _dumpToFile(sys.stdout, elem._c_node, pretty_print, with_tail)
def tostring(element_or_tree, *, encoding=None, method="xml",
- xml_declaration=None, pretty_print=False):
+ xml_declaration=None, pretty_print=False, with_tail=True):
"""Serialize an element to an encoded string representation of its XML
tree.
@@ -2253,6 +2253,10 @@
The keyword argument 'method' selects the output method: 'xml',
'html' or plain 'text'.
+
+ You can prevent the tail text of the element from being serialised
+ by passing the boolean ``with_tail`` option. This has no impact
+ on the tail text of children, which will always be serialised.
"""
cdef bint write_declaration
if xml_declaration is None:
@@ -2266,10 +2270,11 @@
if isinstance(element_or_tree, _Element):
return _tostring(<_Element>element_or_tree, encoding, method,
- write_declaration, 0, pretty_print)
+ write_declaration, 0, pretty_print, with_tail)
elif isinstance(element_or_tree, _ElementTree):
return _tostring((<_ElementTree>element_or_tree)._context_node,
- encoding, method, write_declaration, 1, pretty_print)
+ encoding, method, write_declaration, 1, pretty_print,
+ with_tail)
else:
raise TypeError("Type '%s' cannot be serialized." %
type(element_or_tree))
@@ -2283,7 +2288,8 @@
"""
return [tostring(element_or_tree, *args, **kwargs)]
-def tounicode(element_or_tree, *, method="xml", pretty_print=False):
+def tounicode(element_or_tree, *, method="xml", pretty_print=False,
+ with_tail=True):
"""Serialize an element to the Python unicode representation of its XML
tree.
@@ -2295,12 +2301,17 @@
The keyword argument 'method' selects the output method: 'xml',
'html' or plain 'text'.
+
+ You can prevent the tail text of the element from being serialised
+ by passing the boolean ``with_tail`` option. This has no impact
+ on the tail text of children, which will always be serialised.
"""
if isinstance(element_or_tree, _Element):
- return _tounicode(<_Element>element_or_tree, method, 0, pretty_print)
+ return _tounicode(<_Element>element_or_tree, method, 0, pretty_print,
+ with_tail)
elif isinstance(element_or_tree, _ElementTree):
return _tounicode((<_ElementTree>element_or_tree)._context_node,
- method, 1, pretty_print)
+ method, 1, pretty_print, with_tail)
else:
raise TypeError("Type '%s' cannot be serialized." %
type(element_or_tree))
Modified: lxml/trunk/src/lxml/serializer.pxi
==============================================================================
--- lxml/trunk/src/lxml/serializer.pxi (original)
+++ lxml/trunk/src/lxml/serializer.pxi Fri Jan 25 10:35:59 2008
@@ -17,7 +17,7 @@
return OUTPUT_METHOD_TEXT
raise ValueError("unknown output method %r" % method)
-cdef _textToString(xmlNode* c_node, encoding):
+cdef _textToString(xmlNode* c_node, encoding, bint with_tail):
cdef char* c_text
with nogil:
c_text = tree.xmlNodeGetContent(c_node)
@@ -27,7 +27,7 @@
text = c_text
tree.xmlFree(c_text)
- if _hasTail(c_node):
+ if with_tail and _hasTail(c_node):
tail = _collectText(c_node.next)
if tail:
text = text + tail
@@ -43,7 +43,7 @@
cdef _tostring(_Element element, encoding, method,
bint write_xml_declaration, bint write_complete_document,
- bint pretty_print):
+ bint pretty_print, bint with_tail):
"""Serialize an element to an encoded string representation of its XML
tree.
"""
@@ -62,7 +62,7 @@
c_enc = _cstr(encoding)
c_method = _findOutputMethod(method)
if c_method == OUTPUT_METHOD_TEXT:
- return _textToString(element._c_node, encoding)
+ return _textToString(element._c_node, encoding, with_tail)
# it is necessary to *and* find the encoding handler *and* use
# encoding during output
enchandler = tree.xmlFindCharEncodingHandler(c_enc)
@@ -77,7 +77,7 @@
with nogil:
_writeNodeToBuffer(c_buffer, element._c_node, c_enc, c_method,
write_xml_declaration, write_complete_document,
- pretty_print)
+ pretty_print, with_tail)
tree.xmlOutputBufferFlush(c_buffer)
if c_buffer.conv is not NULL:
c_result_buffer = c_buffer.conv
@@ -92,8 +92,8 @@
tree.xmlOutputBufferClose(c_buffer)
return result
-cdef _tounicode(_Element element, method,
- bint write_complete_document, bint pretty_print):
+cdef _tounicode(_Element element, method, bint write_complete_document,
+ bint pretty_print, bint with_tail):
"""Serialize an element to the Python unicode representation of its XML
tree.
"""
@@ -104,7 +104,7 @@
return None
c_method = _findOutputMethod(method)
if c_method == OUTPUT_METHOD_TEXT:
- text = _textToString(element._c_node, None)
+ text = _textToString(element._c_node, None, with_tail)
return python.PyUnicode_FromEncodedObject(text, 'utf-8', 'strict')
c_buffer = tree.xmlAllocOutputBuffer(NULL)
if c_buffer is NULL:
@@ -112,7 +112,7 @@
with nogil:
_writeNodeToBuffer(c_buffer, element._c_node, NULL, c_method, 0,
- write_complete_document, pretty_print)
+ write_complete_document, pretty_print, with_tail)
tree.xmlOutputBufferFlush(c_buffer)
if c_buffer.conv is not NULL:
c_result_buffer = c_buffer.conv
@@ -132,7 +132,7 @@
xmlNode* c_node, char* encoding, int c_method,
bint write_xml_declaration,
bint write_complete_document,
- bint pretty_print) nogil:
+ bint pretty_print, bint with_tail) nogil:
cdef xmlDoc* c_doc
cdef xmlNode* c_nsdecl_node
c_doc = c_node.doc
@@ -169,7 +169,8 @@
tree.xmlFreeNode(c_nsdecl_node)
# write tail, trailing comments, etc.
- _writeTail(c_buffer, c_node, encoding, pretty_print)
+ if with_tail:
+ _writeTail(c_buffer, c_node, encoding, pretty_print)
if write_complete_document:
_writeNextSiblings(c_buffer, c_node, encoding, pretty_print)
if pretty_print:
@@ -312,7 +313,7 @@
cdef _tofilelike(f, _Element element, encoding, method,
bint write_xml_declaration, bint write_doctype,
- bint pretty_print):
+ bint pretty_print, bint with_tail):
cdef python.PyThreadState* state
cdef _FilelikeWriter writer
cdef tree.xmlOutputBuffer* c_buffer
@@ -328,10 +329,10 @@
if _isString(f):
filename8 = _encodeFilename(f)
f = open(filename8, 'wb')
- f.write(_textToString(element._c_node, encoding))
+ f.write(_textToString(element._c_node, encoding, with_tail))
f.close()
else:
- f.write(_textToString(element._c_node, encoding))
+ f.write(_textToString(element._c_node, encoding, with_tail))
return
enchandler = tree.xmlFindCharEncodingHandler(c_enc)
if enchandler is NULL:
@@ -353,7 +354,8 @@
raise TypeError("File or filename expected, got '%s'" % type(f))
_writeNodeToBuffer(c_buffer, element._c_node, c_enc, c_method,
- write_xml_declaration, write_doctype, pretty_print)
+ write_xml_declaration, write_doctype,
+ pretty_print, with_tail)
tree.xmlOutputBufferClose(c_buffer)
tree.xmlCharEncCloseFunc(enchandler)
if writer is None:
@@ -403,13 +405,14 @@
# dump node to file (mainly for debug)
-cdef _dumpToFile(f, xmlNode* c_node, bint pretty_print):
+cdef _dumpToFile(f, xmlNode* c_node, bint pretty_print, bint with_tail):
cdef tree.xmlOutputBuffer* c_buffer
if not python.PyFile_Check(f):
raise ValueError("not a file")
c_buffer = tree.xmlOutputBufferCreateFile(python.PyFile_AsFile(f), NULL)
tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_node, 0, pretty_print, NULL)
- _writeTail(c_buffer, c_node, NULL, 0)
+ if with_tail:
+ _writeTail(c_buffer, c_node, NULL, 0)
if not pretty_print:
# not written yet
tree.xmlOutputBufferWriteString(c_buffer, '\n')
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Fri Jan 25 10:35:59 2008
@@ -1916,6 +1916,26 @@
result = tostring(a, pretty_print=True)
self.assertEquals(result, "<a>\n <b/>\n <c/>\n</a>\n")
+ def test_tostring_with_tail(self):
+ tostring = self.etree.tostring
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+
+ a = Element('a')
+ a.tail = "aTAIL"
+ b = SubElement(a, 'b')
+ b.tail = "bTAIL"
+ c = SubElement(a, 'c')
+
+ result = tostring(a)
+ self.assertEquals(result, "<a><b/>bTAIL<c/></a>aTAIL")
+
+ result = tostring(a, with_tail=False)
+ self.assertEquals(result, "<a><b/>bTAIL<c/></a>")
+
+ result = tostring(a, with_tail=True)
+ self.assertEquals(result, "<a><b/>bTAIL<c/></a>aTAIL")
+
def test_tostring_method_text_encoding(self):
tostring = self.etree.tostring
Element = self.etree.Element
More information about the lxml-checkins
mailing list