[Lxml-checkins] r45623 - in lxml/branch/lxml-1.3/src/lxml: . tests

scoder at codespeak.net scoder at codespeak.net
Mon Aug 13 15:11:28 CEST 2007


Author: scoder
Date: Mon Aug 13 15:11:28 2007
New Revision: 45623

Modified:
   lxml/branch/lxml-1.3/src/lxml/etree.pyx
   lxml/branch/lxml-1.3/src/lxml/serializer.pxi
   lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py
   lxml/branch/lxml-1.3/src/lxml/tree.pxd
Log:
trunk merge: let DTDs that get parsed in also go out if serialising an ElementTree

Modified: lxml/branch/lxml-1.3/src/lxml/etree.pyx
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/etree.pyx	(original)
+++ lxml/branch/lxml-1.3/src/lxml/etree.pyx	Mon Aug 13 15:11:28 2007
@@ -254,22 +254,22 @@
         return _elementFactory(self, c_node)
 
     cdef getdoctype(self):
-        cdef tree.xmlDtd* dtd
+        cdef tree.xmlDtd* c_dtd
         cdef xmlNode* c_root_node
         public_id = None
         sys_url   = None
-        dtd = self._c_doc.intSubset
-        if dtd is not NULL:
-            if dtd.ExternalID is not NULL:
-                public_id = funicode(dtd.ExternalID)
-            if dtd.SystemID is not NULL:
-                sys_url = funicode(dtd.SystemID)
-        dtd = self._c_doc.extSubset
-        if dtd is not NULL:
-            if not public_id and dtd.ExternalID is not NULL:
-                public_id = funicode(dtd.ExternalID)
-            if not sys_url and dtd.SystemID is not NULL:
-                sys_url = funicode(dtd.SystemID)
+        c_dtd = self._c_doc.intSubset
+        if c_dtd is not NULL:
+            if c_dtd.ExternalID is not NULL:
+                public_id = funicode(c_dtd.ExternalID)
+            if c_dtd.SystemID is not NULL:
+                sys_url = funicode(c_dtd.SystemID)
+        c_dtd = self._c_doc.extSubset
+        if c_dtd is not NULL:
+            if not public_id and c_dtd.ExternalID is not NULL:
+                public_id = funicode(c_dtd.ExternalID)
+            if not sys_url and c_dtd.SystemID is not NULL:
+                sys_url = funicode(c_dtd.SystemID)
         c_root_node = tree.xmlDocGetRootElement(self._c_doc)
         if c_root_node is NULL:
             root_name = None
@@ -1278,7 +1278,7 @@
             c_write_declaration = encoding not in \
                                   ('US-ASCII', 'ASCII', 'UTF8', 'UTF-8')
         _tofilelike(file, self._context_node, encoding,
-                    c_write_declaration, bool(pretty_print))
+                    c_write_declaration, 1, bool(pretty_print))
 
     def getpath(self, _Element element not None):
         """Returns a structural, absolute XPath expression to find that element.
@@ -1967,10 +1967,10 @@
 
     if isinstance(element_or_tree, _Element):
         return _tostring(<_Element>element_or_tree,
-                         encoding, write_declaration, c_pretty_print)
+                         encoding, write_declaration, 0, c_pretty_print)
     elif isinstance(element_or_tree, _ElementTree):
         return _tostring((<_ElementTree>element_or_tree)._context_node,
-                         encoding, write_declaration, c_pretty_print)
+                         encoding, write_declaration, 1, c_pretty_print)
     else:
         raise TypeError, "Type '%s' cannot be serialized." % type(element_or_tree)
 
@@ -1987,10 +1987,10 @@
     cdef int c_pretty_print
     c_pretty_print = bool(pretty_print)
     if isinstance(element_or_tree, _Element):
-        return _tounicode(<_Element>element_or_tree, c_pretty_print)
+        return _tounicode(<_Element>element_or_tree, 0, c_pretty_print)
     elif isinstance(element_or_tree, _ElementTree):
         return _tounicode((<_ElementTree>element_or_tree)._context_node,
-                          c_pretty_print)
+                          1, c_pretty_print)
     else:
         raise TypeError, "Type '%s' cannot be serialized." % type(element_or_tree)
 

Modified: lxml/branch/lxml-1.3/src/lxml/serializer.pxi
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/serializer.pxi	(original)
+++ lxml/branch/lxml-1.3/src/lxml/serializer.pxi	Mon Aug 13 15:11:28 2007
@@ -1,7 +1,7 @@
 # XML serialization and output functions
 
 cdef _tostring(_Element element, encoding,
-               int write_xml_declaration, int pretty_print):
+               int write_xml_declaration, int write_doctype, int pretty_print):
     "Serialize an element to an encoded string representation of its XML tree."
     cdef python.PyThreadState* state
     cdef tree.xmlOutputBuffer* c_buffer
@@ -29,7 +29,8 @@
     try:
         state = python.PyEval_SaveThread()
         _writeNodeToBuffer(c_buffer, element._c_node, c_enc,
-                           write_xml_declaration, pretty_print)
+                           write_xml_declaration, write_doctype,
+                           pretty_print)
         tree.xmlOutputBufferFlush(c_buffer)
         python.PyEval_RestoreThread(state)
         if c_buffer.conv is not NULL:
@@ -43,7 +44,7 @@
         tree.xmlOutputBufferClose(c_buffer)
     return result
 
-cdef _tounicode(_Element element, int pretty_print):
+cdef _tounicode(_Element element, int write_doctype, int pretty_print):
     "Serialize an element to the Python unicode representation of its XML tree."
     cdef python.PyThreadState* state
     cdef tree.xmlOutputBuffer* c_buffer
@@ -55,7 +56,8 @@
         raise LxmlError, "Failed to create output buffer"
     try:
         state = python.PyEval_SaveThread()
-        _writeNodeToBuffer(c_buffer, element._c_node, NULL, 0, pretty_print)
+        _writeNodeToBuffer(c_buffer, element._c_node, NULL, 0,
+                           write_doctype, pretty_print)
         tree.xmlOutputBufferFlush(c_buffer)
         python.PyEval_RestoreThread(state)
         if c_buffer.conv is not NULL:
@@ -72,12 +74,15 @@
 
 cdef void _writeNodeToBuffer(tree.xmlOutputBuffer* c_buffer,
                              xmlNode* c_node, char* encoding,
-                             int write_xml_declaration, int pretty_print):
+                             int write_xml_declaration, int write_doctype,
+                             int pretty_print):
     cdef xmlDoc* c_doc
     c_doc = c_node.doc
     if write_xml_declaration:
         _writeDeclarationToBuffer(c_buffer, c_doc.version, encoding)
 
+    if write_doctype:
+        _writeDtdToBuffer(c_buffer, c_doc, c_node.name, encoding)
     _writePrevSiblings(c_buffer, c_node, encoding, pretty_print)
     tree.xmlNodeDumpOutput(c_buffer, c_doc, c_node, 0, pretty_print, encoding)
     _writeTail(c_buffer, c_node, encoding, pretty_print)
@@ -93,6 +98,41 @@
     tree.xmlOutputBufferWriteString(c_buffer, encoding)
     tree.xmlOutputBufferWriteString(c_buffer, "'?>\n")
 
+cdef void _writeDtdToBuffer(tree.xmlOutputBuffer* c_buffer,
+                            xmlDoc* c_doc, char* c_root_name, char* encoding):
+    cdef tree.xmlDtd* c_dtd
+    cdef xmlNode* c_node
+    c_dtd = c_doc.intSubset
+    if c_dtd == NULL or c_dtd.name == NULL:
+        return
+    if c_dtd.ExternalID == NULL and c_dtd.SystemID == NULL:
+        return
+    if cstd.strcmp(c_root_name, c_dtd.name) != 0:
+        return
+    tree.xmlOutputBufferWrite(c_buffer, 10, "<!DOCTYPE ")
+    tree.xmlOutputBufferWriteString(c_buffer, c_dtd.name)
+    if c_dtd.ExternalID != NULL:
+        tree.xmlOutputBufferWrite(c_buffer, 9, ' PUBLIC "')
+        tree.xmlOutputBufferWriteString(c_buffer, c_dtd.ExternalID)
+        tree.xmlOutputBufferWrite(c_buffer, 3, '" "')
+    else:
+        tree.xmlOutputBufferWrite(c_buffer, 9, ' SYSTEM "')
+    tree.xmlOutputBufferWriteString(c_buffer, c_dtd.SystemID)
+    if c_dtd.entities == NULL and c_dtd.elements == NULL and \
+           c_dtd.attributes == NULL and c_dtd.notations == NULL and \
+           c_dtd.pentities == NULL:
+        tree.xmlOutputBufferWrite(c_buffer, 3, '">\n')
+        return
+    tree.xmlOutputBufferWrite(c_buffer, 4, '" [\n')
+    if c_dtd.notations != NULL:
+        tree.xmlDumpNotationTable(c_buffer.buffer,
+                                  <tree.xmlNotationTable*>c_dtd.notations)
+    c_node = c_dtd.children
+    while c_node is not NULL:
+        tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_node, 0, 0, encoding)
+        c_node = c_node.next
+    tree.xmlOutputBufferWrite(c_buffer, 3, "]>\n")
+
 cdef void _writeTail(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
                      char* encoding, int pretty_print):
     "Write the element tail."
@@ -179,7 +219,8 @@
     return (<_FilelikeWriter>ctxt).close()
 
 cdef _tofilelike(f, _Element element, encoding,
-                 int write_xml_declaration, int pretty_print):
+                 int write_xml_declaration, int write_doctype,
+                 int pretty_print):
     cdef python.PyThreadState* state
     cdef _FilelikeWriter writer
     cdef tree.xmlOutputBuffer* c_buffer
@@ -209,7 +250,7 @@
         raise TypeError, "File or filename expected, got '%s'" % type(f)
 
     _writeNodeToBuffer(c_buffer, element._c_node, c_enc,
-                       write_xml_declaration, pretty_print)
+                       write_xml_declaration, write_doctype, pretty_print)
     tree.xmlOutputBufferClose(c_buffer)
     tree.xmlCharEncCloseFunc(enchandler)
     if writer is None:

Modified: lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py	(original)
+++ lxml/branch/lxml-1.3/src/lxml/tests/test_etree.py	Mon Aug 13 15:11:28 2007
@@ -1502,6 +1502,20 @@
         self.assertEquals(docinfo.system_url,  None)
         self.assertEquals(docinfo.root_name,   'html')
         self.assertEquals(docinfo.doctype, '')
+        
+    def test_dtd_io(self):
+        # check that DTDs that go in also go back out
+        xml = '''\
+        <!DOCTYPE test SYSTEM "test.dtd" [
+          <!ENTITY entity "tasty">
+          <!ELEMENT test (a)>
+          <!ELEMENT a (#PCDATA)>
+        ]>
+        <test><a>test-test</a></test>\
+        '''
+        root = self.etree.parse(StringIO(xml))
+        self.assertEqual(self.etree.tostring(root).replace(" ", ""),
+                         xml.replace(" ", ""))
 
     def test_byte_zero(self):
         Element = self.etree.Element

Modified: lxml/branch/lxml-1.3/src/lxml/tree.pxd
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/tree.pxd	(original)
+++ lxml/branch/lxml-1.3/src/lxml/tree.pxd	Mon Aug 13 15:11:28 2007
@@ -58,7 +58,8 @@
     
     ctypedef struct xmlDoc
     ctypedef struct xmlAttr
-    
+    ctypedef struct xmlNotationTable
+
     ctypedef enum xmlElementType:
         XML_ELEMENT_NODE=           1
         XML_ATTRIBUTE_NODE=         2
@@ -103,8 +104,16 @@
         unsigned short line
 
     ctypedef struct xmlDtd:
+        char* name
         char* ExternalID
         char* SystemID
+        void* notations
+        void* entities
+        void* pentities
+        void* attributes
+        void* elements
+        xmlNode* children
+        xmlDoc* doc
 
     ctypedef struct xmlDoc:
         xmlElementType type
@@ -152,7 +161,7 @@
         xmlDoc* doc
         
     ctypedef struct xmlBuffer
-    
+
     ctypedef struct xmlOutputBuffer:
         xmlBuffer* buffer
         xmlBuffer* conv
@@ -223,9 +232,12 @@
 
 cdef extern from "libxml/valid.h":
     cdef xmlAttr* xmlGetID(xmlDoc* doc, char* ID)
+    cdef void xmlDumpNotationTable(xmlBuffer* buffer, xmlNotationTable* table)
 
 cdef extern from "libxml/xmlIO.h":
+    cdef void xmlBufferWriteQuotedString(xmlOutputBuffer* out, char* str)
     cdef int xmlOutputBufferWriteString(xmlOutputBuffer* out, char* str)
+    cdef int xmlOutputBufferWrite(xmlOutputBuffer* out, int len, char* str)
     cdef int xmlOutputBufferFlush(xmlOutputBuffer* out)
     cdef int xmlOutputBufferClose(xmlOutputBuffer* out)
 


More information about the lxml-checkins mailing list