[Lxml-checkins] r45756 - in lxml/branch/lxml-1.3: . src/lxml src/lxml/tests

scoder at codespeak.net scoder at codespeak.net
Thu Aug 16 22:41:17 CEST 2007


Author: scoder
Date: Thu Aug 16 22:41:16 2007
New Revision: 45756

Modified:
   lxml/branch/lxml-1.3/CHANGES.txt
   lxml/branch/lxml-1.3/src/lxml/dtd.pxi
   lxml/branch/lxml-1.3/src/lxml/etree.pyx
   lxml/branch/lxml-1.3/src/lxml/tests/test_dtd.py
   lxml/branch/lxml-1.3/src/lxml/tree.pxd
Log:
trunk merge: support for retrieving the DTD defined internally in a document for validation

Modified: lxml/branch/lxml-1.3/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-1.3/CHANGES.txt	(original)
+++ lxml/branch/lxml-1.3/CHANGES.txt	Thu Aug 16 22:41:16 2007
@@ -8,6 +8,10 @@
 Features added
 --------------
 
+* The ``docinfo`` on ElementTree objects has new properties ``internalDTD``
+  and ``externalDTD`` that return a DTD object for the internal or external
+  subset of the document respectively.
+
 * Serialising an ElementTree now includes any internal DTD subsets that are
   part of the document, as well as comments and PIs that are siblings of the
   root node.

Modified: lxml/branch/lxml-1.3/src/lxml/dtd.pxi
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/dtd.pxi	(original)
+++ lxml/branch/lxml-1.3/src/lxml/dtd.pxi	Thu Aug 16 22:41:16 2007
@@ -96,3 +96,19 @@
     if c_dtd is NULL:
         raise DTDParseError, "error parsing DTD"
     return c_dtd
+
+cdef extern from "etree_defs.h":
+    # macro call to 't->tp_new()' for fast instantiation
+    cdef DTD NEW_DTD "PY_NEW" (object t)
+
+cdef DTD _dtdFactory(tree.xmlDtd* c_dtd):
+    # do not run through DTD.__init__()!
+    cdef DTD dtd
+    if c_dtd is NULL:
+        return None
+    dtd = NEW_DTD(DTD)
+    dtd._c_dtd = tree.xmlCopyDtd(c_dtd)
+    if dtd._c_dtd is NULL:
+        python.PyErr_NoMemory()
+    _Validator.__init__(dtd)
+    return dtd

Modified: lxml/branch/lxml-1.3/src/lxml/etree.pyx
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/etree.pyx	(original)
+++ lxml/branch/lxml-1.3/src/lxml/etree.pyx	Thu Aug 16 22:41:16 2007
@@ -384,37 +384,76 @@
 
 cdef class DocInfo:
     "Document information provided by parser and DTD."
-    cdef readonly object root_name
-    cdef readonly object public_id
-    cdef readonly object system_url
-    cdef readonly object xml_version
-    cdef readonly object encoding
-    cdef readonly object URL
+    cdef _Document _doc
     def __init__(self, tree):
         "Create a DocInfo object for an ElementTree object or root Element."
-        cdef _Document doc
-        doc = _documentOrRaise(tree)
-        self.root_name, self.public_id, self.system_url = doc.getdoctype()
-        if not self.root_name and (self.public_id or self.system_url):
+        self._doc = _documentOrRaise(tree)
+        root_name, public_id, system_url = self._doc.getdoctype()
+        if not root_name and (public_id or system_url):
             raise ValueError, "Could not find root node"
-        self.xml_version, self.encoding = doc.getxmlinfo()
-        self.URL = doc.getURL()
+
+    property root_name:
+        "Returns the name of the root node as defined by the DOCTYPE."
+        def __get__(self):
+            root_name, public_id, system_url = self._doc.getdoctype()
+            return root_name
+
+    property public_id:
+        "Returns the public ID of the DOCTYPE."
+        def __get__(self):
+            root_name, public_id, system_url = self._doc.getdoctype()
+            return public_id
+
+    property system_url:
+        "Returns the system ID of the DOCTYPE."
+        def __get__(self):
+            root_name, public_id, system_url = self._doc.getdoctype()
+            return system_url
+
+    property xml_version:
+        "Returns the XML version as declared by the document."
+        def __get__(self):
+            xml_version, encoding = self._doc.getxmlinfo()
+            return xml_version
+
+    property encoding:
+        "Returns the encoding name as declared by the document."
+        def __get__(self):
+            xml_version, encoding = self._doc.getxmlinfo()
+            return encoding
+
+    property URL:
+        "Returns the source URL of the document (or None if unknown)."
+        def __get__(self):
+            return self._doc.getURL()
 
     property doctype:
+        "Returns a DOCTYPE declaration string for the document."
         def __get__(self):
-            if self.public_id:
-                if self.system_url:
+            root_name, public_id, system_url = self._doc.getdoctype()
+            if public_id:
+                if system_url:
                     return '<!DOCTYPE %s PUBLIC "%s" "%s">' % (
-                        self.root_name, self.public_id, self.system_url)
+                        root_name, public_id, system_url)
                 else:
                     return '<!DOCTYPE %s PUBLIC "%s">' % (
-                        self.root_name, self.public_id)
-            elif self.system_url:
+                        root_name, public_id)
+            elif system_url:
                 return '<!DOCTYPE %s SYSTEM "%s">' % (
-                    self.root_name, self.system_url)
+                    root_name, system_url)
             else:
                 return ""
 
+    property internalDTD:
+        "Returns a DTD validator based on the internal subset of the document."
+        def __get__(self):
+            return _dtdFactory(self._doc._c_doc.intSubset)
+
+    property externalDTD:
+        "Returns a DTD validator based on the external subset of the document."
+        def __get__(self):
+            return _dtdFactory(self._doc._c_doc.extSubset)
+
 
 cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
     """Element class.  References a document object and a libxml node.

Modified: lxml/branch/lxml-1.3/src/lxml/tests/test_dtd.py
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/tests/test_dtd.py	(original)
+++ lxml/branch/lxml-1.3/src/lxml/tests/test_dtd.py	Thu Aug 16 22:41:16 2007
@@ -36,6 +36,31 @@
         dtd = etree.DTD(StringIO("<!ELEMENT b (a)><!ELEMENT a EMPTY>"))
         dtd.assertValid(root)
 
+    def test_dtd_internal(self):
+        root = etree.XML('''
+        <!DOCTYPE b SYSTEM "none" [
+        <!ELEMENT b (a)>
+        <!ELEMENT a EMPTY>
+        ]>
+        <b><a/></b>
+        ''')
+        dtd = etree.ElementTree(root).docinfo.internalDTD
+        self.assert_(dtd)
+        dtd.assertValid(root)
+
+    def test_dtd_internal_invalid(self):
+        root = etree.XML('''
+        <!DOCTYPE b SYSTEM "none" [
+        <!ELEMENT b (a)>
+        <!ELEMENT a (c)>
+        <!ELEMENT c EMPTY>
+        ]>
+        <b><a/></b>
+        ''')
+        dtd = etree.ElementTree(root).docinfo.internalDTD
+        self.assert_(dtd)
+        self.assertFalse(dtd.validate(root))
+
     def test_dtd_broken(self):
         self.assertRaises(etree.DTDParseError, etree.DTD,
                           StringIO("<!ELEMENT b HONKEY>"))

Modified: lxml/branch/lxml-1.3/src/lxml/tree.pxd
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/tree.pxd	(original)
+++ lxml/branch/lxml-1.3/src/lxml/tree.pxd	Thu Aug 16 22:41:16 2007
@@ -218,6 +218,7 @@
                                 int format, char* encoding)
     cdef void xmlNodeSetName(xmlNode* cur, char* name)
     cdef void xmlNodeSetContent(xmlNode* cur, char* content)
+    cdef xmlDtd* xmlCopyDtd(xmlDtd* dtd)
     cdef xmlDoc* xmlCopyDoc(xmlDoc* doc, int recursive)
     cdef xmlNode* xmlCopyNode(xmlNode* node, int extended)
     cdef xmlNode* xmlDocCopyNode(xmlNode* node, xmlDoc* doc, int extended)


More information about the lxml-checkins mailing list