[Lxml-checkins] r45623 - in lxml/branch/lxml-1.3/src/lxml: . tests
scoder at codespeak.net
scoder at codespeak.net
Mon Aug 13 15:11:28 CEST 2007
Author: scoder
Date: Mon Aug 13 15:11:28 2007
New Revision: 45623
Modified:
lxml/branch/lxml-1.3/src/lxml/etree.pyx
lxml/branch/lxml-1.3/src/lxml/serializer.pxi
lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py
lxml/branch/lxml-1.3/src/lxml/tree.pxd
Log:
trunk merge: let DTDs that get parsed in also go out if serialising an ElementTree
Modified: lxml/branch/lxml-1.3/src/lxml/etree.pyx
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/etree.pyx (original)
+++ lxml/branch/lxml-1.3/src/lxml/etree.pyx Mon Aug 13 15:11:28 2007
@@ -254,22 +254,22 @@
return _elementFactory(self, c_node)
cdef getdoctype(self):
- cdef tree.xmlDtd* dtd
+ cdef tree.xmlDtd* c_dtd
cdef xmlNode* c_root_node
public_id = None
sys_url = None
- dtd = self._c_doc.intSubset
- if dtd is not NULL:
- if dtd.ExternalID is not NULL:
- public_id = funicode(dtd.ExternalID)
- if dtd.SystemID is not NULL:
- sys_url = funicode(dtd.SystemID)
- dtd = self._c_doc.extSubset
- if dtd is not NULL:
- if not public_id and dtd.ExternalID is not NULL:
- public_id = funicode(dtd.ExternalID)
- if not sys_url and dtd.SystemID is not NULL:
- sys_url = funicode(dtd.SystemID)
+ c_dtd = self._c_doc.intSubset
+ if c_dtd is not NULL:
+ if c_dtd.ExternalID is not NULL:
+ public_id = funicode(c_dtd.ExternalID)
+ if c_dtd.SystemID is not NULL:
+ sys_url = funicode(c_dtd.SystemID)
+ c_dtd = self._c_doc.extSubset
+ if c_dtd is not NULL:
+ if not public_id and c_dtd.ExternalID is not NULL:
+ public_id = funicode(c_dtd.ExternalID)
+ if not sys_url and c_dtd.SystemID is not NULL:
+ sys_url = funicode(c_dtd.SystemID)
c_root_node = tree.xmlDocGetRootElement(self._c_doc)
if c_root_node is NULL:
root_name = None
@@ -1278,7 +1278,7 @@
c_write_declaration = encoding not in \
('US-ASCII', 'ASCII', 'UTF8', 'UTF-8')
_tofilelike(file, self._context_node, encoding,
- c_write_declaration, bool(pretty_print))
+ c_write_declaration, 1, bool(pretty_print))
def getpath(self, _Element element not None):
"""Returns a structural, absolute XPath expression to find that element.
@@ -1967,10 +1967,10 @@
if isinstance(element_or_tree, _Element):
return _tostring(<_Element>element_or_tree,
- encoding, write_declaration, c_pretty_print)
+ encoding, write_declaration, 0, c_pretty_print)
elif isinstance(element_or_tree, _ElementTree):
return _tostring((<_ElementTree>element_or_tree)._context_node,
- encoding, write_declaration, c_pretty_print)
+ encoding, write_declaration, 1, c_pretty_print)
else:
raise TypeError, "Type '%s' cannot be serialized." % type(element_or_tree)
@@ -1987,10 +1987,10 @@
cdef int c_pretty_print
c_pretty_print = bool(pretty_print)
if isinstance(element_or_tree, _Element):
- return _tounicode(<_Element>element_or_tree, c_pretty_print)
+ return _tounicode(<_Element>element_or_tree, 0, c_pretty_print)
elif isinstance(element_or_tree, _ElementTree):
return _tounicode((<_ElementTree>element_or_tree)._context_node,
- c_pretty_print)
+ 1, c_pretty_print)
else:
raise TypeError, "Type '%s' cannot be serialized." % type(element_or_tree)
Modified: lxml/branch/lxml-1.3/src/lxml/serializer.pxi
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/serializer.pxi (original)
+++ lxml/branch/lxml-1.3/src/lxml/serializer.pxi Mon Aug 13 15:11:28 2007
@@ -1,7 +1,7 @@
# XML serialization and output functions
cdef _tostring(_Element element, encoding,
- int write_xml_declaration, int pretty_print):
+ int write_xml_declaration, int write_doctype, int pretty_print):
"Serialize an element to an encoded string representation of its XML tree."
cdef python.PyThreadState* state
cdef tree.xmlOutputBuffer* c_buffer
@@ -29,7 +29,8 @@
try:
state = python.PyEval_SaveThread()
_writeNodeToBuffer(c_buffer, element._c_node, c_enc,
- write_xml_declaration, pretty_print)
+ write_xml_declaration, write_doctype,
+ pretty_print)
tree.xmlOutputBufferFlush(c_buffer)
python.PyEval_RestoreThread(state)
if c_buffer.conv is not NULL:
@@ -43,7 +44,7 @@
tree.xmlOutputBufferClose(c_buffer)
return result
-cdef _tounicode(_Element element, int pretty_print):
+cdef _tounicode(_Element element, int write_doctype, int pretty_print):
"Serialize an element to the Python unicode representation of its XML tree."
cdef python.PyThreadState* state
cdef tree.xmlOutputBuffer* c_buffer
@@ -55,7 +56,8 @@
raise LxmlError, "Failed to create output buffer"
try:
state = python.PyEval_SaveThread()
- _writeNodeToBuffer(c_buffer, element._c_node, NULL, 0, pretty_print)
+ _writeNodeToBuffer(c_buffer, element._c_node, NULL, 0,
+ write_doctype, pretty_print)
tree.xmlOutputBufferFlush(c_buffer)
python.PyEval_RestoreThread(state)
if c_buffer.conv is not NULL:
@@ -72,12 +74,15 @@
cdef void _writeNodeToBuffer(tree.xmlOutputBuffer* c_buffer,
xmlNode* c_node, char* encoding,
- int write_xml_declaration, int pretty_print):
+ int write_xml_declaration, int write_doctype,
+ int pretty_print):
cdef xmlDoc* c_doc
c_doc = c_node.doc
if write_xml_declaration:
_writeDeclarationToBuffer(c_buffer, c_doc.version, encoding)
+ if write_doctype:
+ _writeDtdToBuffer(c_buffer, c_doc, c_node.name, encoding)
_writePrevSiblings(c_buffer, c_node, encoding, pretty_print)
tree.xmlNodeDumpOutput(c_buffer, c_doc, c_node, 0, pretty_print, encoding)
_writeTail(c_buffer, c_node, encoding, pretty_print)
@@ -93,6 +98,41 @@
tree.xmlOutputBufferWriteString(c_buffer, encoding)
tree.xmlOutputBufferWriteString(c_buffer, "'?>\n")
+cdef void _writeDtdToBuffer(tree.xmlOutputBuffer* c_buffer,
+ xmlDoc* c_doc, char* c_root_name, char* encoding):
+ cdef tree.xmlDtd* c_dtd
+ cdef xmlNode* c_node
+ c_dtd = c_doc.intSubset
+ if c_dtd == NULL or c_dtd.name == NULL:
+ return
+ if c_dtd.ExternalID == NULL and c_dtd.SystemID == NULL:
+ return
+ if cstd.strcmp(c_root_name, c_dtd.name) != 0:
+ return
+ tree.xmlOutputBufferWrite(c_buffer, 10, "<!DOCTYPE ")
+ tree.xmlOutputBufferWriteString(c_buffer, c_dtd.name)
+ if c_dtd.ExternalID != NULL:
+ tree.xmlOutputBufferWrite(c_buffer, 9, ' PUBLIC "')
+ tree.xmlOutputBufferWriteString(c_buffer, c_dtd.ExternalID)
+ tree.xmlOutputBufferWrite(c_buffer, 3, '" "')
+ else:
+ tree.xmlOutputBufferWrite(c_buffer, 9, ' SYSTEM "')
+ tree.xmlOutputBufferWriteString(c_buffer, c_dtd.SystemID)
+ if c_dtd.entities == NULL and c_dtd.elements == NULL and \
+ c_dtd.attributes == NULL and c_dtd.notations == NULL and \
+ c_dtd.pentities == NULL:
+ tree.xmlOutputBufferWrite(c_buffer, 3, '">\n')
+ return
+ tree.xmlOutputBufferWrite(c_buffer, 4, '" [\n')
+ if c_dtd.notations != NULL:
+ tree.xmlDumpNotationTable(c_buffer.buffer,
+ <tree.xmlNotationTable*>c_dtd.notations)
+ c_node = c_dtd.children
+ while c_node is not NULL:
+ tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_node, 0, 0, encoding)
+ c_node = c_node.next
+ tree.xmlOutputBufferWrite(c_buffer, 3, "]>\n")
+
cdef void _writeTail(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
char* encoding, int pretty_print):
"Write the element tail."
@@ -179,7 +219,8 @@
return (<_FilelikeWriter>ctxt).close()
cdef _tofilelike(f, _Element element, encoding,
- int write_xml_declaration, int pretty_print):
+ int write_xml_declaration, int write_doctype,
+ int pretty_print):
cdef python.PyThreadState* state
cdef _FilelikeWriter writer
cdef tree.xmlOutputBuffer* c_buffer
@@ -209,7 +250,7 @@
raise TypeError, "File or filename expected, got '%s'" % type(f)
_writeNodeToBuffer(c_buffer, element._c_node, c_enc,
- write_xml_declaration, pretty_print)
+ write_xml_declaration, write_doctype, pretty_print)
tree.xmlOutputBufferClose(c_buffer)
tree.xmlCharEncCloseFunc(enchandler)
if writer is None:
Modified: lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py (original)
+++ lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py Mon Aug 13 15:11:28 2007
@@ -1502,6 +1502,20 @@
self.assertEquals(docinfo.system_url, None)
self.assertEquals(docinfo.root_name, 'html')
self.assertEquals(docinfo.doctype, '')
+
+ def test_dtd_io(self):
+ # check that DTDs that go in also go back out
+ xml = '''\
+ <!DOCTYPE test SYSTEM "test.dtd" [
+ <!ENTITY entity "tasty">
+ <!ELEMENT test (a)>
+ <!ELEMENT a (#PCDATA)>
+ ]>
+ <test><a>test-test</a></test>\
+ '''
+ root = self.etree.parse(StringIO(xml))
+ self.assertEqual(self.etree.tostring(root).replace(" ", ""),
+ xml.replace(" ", ""))
def test_byte_zero(self):
Element = self.etree.Element
Modified: lxml/branch/lxml-1.3/src/lxml/tree.pxd
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/tree.pxd (original)
+++ lxml/branch/lxml-1.3/src/lxml/tree.pxd Mon Aug 13 15:11:28 2007
@@ -58,7 +58,8 @@
ctypedef struct xmlDoc
ctypedef struct xmlAttr
-
+ ctypedef struct xmlNotationTable
+
ctypedef enum xmlElementType:
XML_ELEMENT_NODE= 1
XML_ATTRIBUTE_NODE= 2
@@ -103,8 +104,16 @@
unsigned short line
ctypedef struct xmlDtd:
+ char* name
char* ExternalID
char* SystemID
+ void* notations
+ void* entities
+ void* pentities
+ void* attributes
+ void* elements
+ xmlNode* children
+ xmlDoc* doc
ctypedef struct xmlDoc:
xmlElementType type
@@ -152,7 +161,7 @@
xmlDoc* doc
ctypedef struct xmlBuffer
-
+
ctypedef struct xmlOutputBuffer:
xmlBuffer* buffer
xmlBuffer* conv
@@ -223,9 +232,12 @@
cdef extern from "libxml/valid.h":
cdef xmlAttr* xmlGetID(xmlDoc* doc, char* ID)
+ cdef void xmlDumpNotationTable(xmlBuffer* buffer, xmlNotationTable* table)
cdef extern from "libxml/xmlIO.h":
+ cdef void xmlBufferWriteQuotedString(xmlOutputBuffer* out, char* str)
cdef int xmlOutputBufferWriteString(xmlOutputBuffer* out, char* str)
+ cdef int xmlOutputBufferWrite(xmlOutputBuffer* out, int len, char* str)
cdef int xmlOutputBufferFlush(xmlOutputBuffer* out)
cdef int xmlOutputBufferClose(xmlOutputBuffer* out)
More information about the lxml-checkins
mailing list