[Lxml-checkins] r39452 - in lxml/trunk: . src/lxml src/lxml/tests

scoder at codespeak.net scoder at codespeak.net
Mon Feb 26 17:27:40 CET 2007


Author: scoder
Date: Mon Feb 26 17:27:38 2007
New Revision: 39452

Added:
   lxml/trunk/src/lxml/dtd.pxi
   lxml/trunk/src/lxml/dtdvalid.pxd
   lxml/trunk/src/lxml/tests/test_dtd.py
Modified:
   lxml/trunk/CHANGES.txt
   lxml/trunk/src/lxml/etree.pyx
   lxml/trunk/src/lxml/parser.pxi
   lxml/trunk/src/lxml/tests/test_etree.py
   lxml/trunk/src/lxml/tree.pxd
   lxml/trunk/src/lxml/xmlparser.pxd
Log:
new DTD() validator to validate against external DTDs

Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt	(original)
+++ lxml/trunk/CHANGES.txt	Mon Feb 26 17:27:38 2007
@@ -8,6 +8,8 @@
 Features added
 --------------
 
+* ``DTD`` validator class (like ``RelaxNG`` and ``XMLSchema``)
+
 * HTML generator helpers by Fredrik Lundh in ``lxml.htmlbuilder``
 
 * ``ElementMaker`` XML generator by Fredrik Lundh in ``lxml.builder.E``

Added: lxml/trunk/src/lxml/dtd.pxi
==============================================================================
--- (empty file)
+++ lxml/trunk/src/lxml/dtd.pxi	Mon Feb 26 17:27:38 2007
@@ -0,0 +1,91 @@
+# support for DTD validation
+cimport dtdvalid
+
+class DTDError(LxmlError):
+    pass
+
+class DTDParseError(DTDError):
+    pass
+
+class DTDValidateError(DTDError):
+    pass
+
+################################################################################
+# DTD
+
+cdef class DTD(_Validator):
+    """A DTD validator.
+
+    Can load from filesystem directly given a filename.  Alternatively, pass
+    the keyword parameter ``external_id`` to load from a catalog.
+    """
+    cdef tree.xmlDtd* _c_dtd
+    def __init__(self, file=None, external_id=None):
+        self._c_dtd = NULL
+        if file is not None:
+            if python._isString(file):
+                self._c_dtd = xmlparser.xmlParseDTD(NULL, _cstr(file))
+            elif hasattr(file, 'read'):
+                self._c_dtd = _parseDtdFromFilelike(file)
+            else:
+                raise DTDParseError, "parsing from file objects is not supported"
+        elif external_id is not None:
+            self._c_dtd = xmlparser.xmlParseDTD(external_id, NULL)
+        else:
+            raise DTDParseError, "either filename or external ID required"
+
+        if self._c_dtd is NULL:
+            raise DTDParseError, "error parsing DTD"
+        _Validator.__init__(self)
+
+    def __dealloc__(self):
+        tree.xmlFreeDtd(self._c_dtd)
+
+    def __call__(self, etree):
+        """Validate doc using the DTD.
+
+        Returns true if the document is valid, false if not.
+        """
+        cdef python.PyThreadState* state
+        cdef _Document doc
+        cdef _Element root_node
+        cdef xmlDoc* c_doc
+        cdef dtdvalid.xmlValidCtxt* valid_ctxt
+        cdef int ret
+
+        doc = _documentOrRaise(etree)
+        root_node = _rootNodeOrRaise(etree)
+
+        self._error_log.connect()
+        valid_ctxt = dtdvalid.xmlNewValidCtxt()
+        if valid_ctxt is NULL:
+            self._error_log.disconnect()
+            raise DTDError, "Failed to create validation context"
+
+        c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
+        state = python.PyEval_SaveThread()
+        ret = dtdvalid.xmlValidateDtd(valid_ctxt, c_doc, self._c_dtd)
+        python.PyEval_RestoreThread(state)
+        _destroyFakeDoc(doc._c_doc, c_doc)
+
+        dtdvalid.xmlFreeValidCtxt(valid_ctxt)
+
+        self._error_log.disconnect()
+        if ret == -1:
+            raise DTDValidateError, "Internal error in DTD validation"
+        return ret == 1
+
+
+cdef tree.xmlDtd* _parseDtdFromFilelike(file) except NULL:
+    cdef _ExceptionContext exc_context
+    cdef _FileParserContext dtd_parser
+    cdef tree.xmlDtd* c_dtd
+    exc_context = _ExceptionContext()
+    dtd_parser = _FileParserContext(file, exc_context)
+
+    c_dtd = dtd_parser._readDtd()
+
+    exc_context._raise_if_stored()
+    if c_dtd is NULL:
+        raise DTDParseError, "error parsing DTD"
+    return c_dtd

Added: lxml/trunk/src/lxml/dtdvalid.pxd
==============================================================================
--- (empty file)
+++ lxml/trunk/src/lxml/dtdvalid.pxd	Mon Feb 26 17:27:38 2007
@@ -0,0 +1,10 @@
+cimport tree
+from tree cimport xmlDoc, xmlDtd
+
+cdef extern from "libxml/valid.h":
+    ctypedef struct xmlValidCtxt
+
+    cdef xmlValidCtxt* xmlNewValidCtxt()
+    cdef void xmlFreeValidCtxt(xmlValidCtxt* cur)
+
+    cdef int xmlValidateDtd(xmlValidCtxt* ctxt, xmlDoc* doc, xmlDtd* dtd)

Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx	(original)
+++ lxml/trunk/src/lxml/etree.pyx	Mon Feb 26 17:27:38 2007
@@ -1893,6 +1893,7 @@
         def __get__(self):
             return self._error_log.copy()
 
+include "dtd.pxi"       # DTD
 include "relaxng.pxi"   # RelaxNG
 include "xmlschema.pxi" # XMLSchema
 

Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi	(original)
+++ lxml/trunk/src/lxml/parser.pxi	Mon Feb 26 17:27:38 2007
@@ -220,6 +220,18 @@
         python.PyEval_RestoreThread(state)
         return result
 
+    cdef tree.xmlDtd* _readDtd(self):
+        cdef python.PyThreadState* state
+        cdef tree.xmlDtd* result
+        cdef xmlparser.xmlParserInputBuffer* c_buffer
+        c_buffer = xmlparser.xmlAllocParserInputBuffer(0)
+        c_buffer.context = <python.PyObject*>self
+        c_buffer.readcallback = _readFilelikeParser
+        state = python.PyEval_SaveThread()
+        result = xmlparser.xmlIOParseDTD(NULL, c_buffer, 0)
+        python.PyEval_RestoreThread(state)
+        return result
+
     cdef int copyToBuffer(self, char* c_buffer, int c_size):
         cdef char* c_start
         cdef Py_ssize_t byte_count, remaining

Added: lxml/trunk/src/lxml/tests/test_dtd.py
==============================================================================
--- (empty file)
+++ lxml/trunk/src/lxml/tests/test_dtd.py	Mon Feb 26 17:27:38 2007
@@ -0,0 +1,67 @@
+# -*- coding: utf-8 -*-
+
+"""
+Test cases related to DTD parsing and validation
+"""
+
+import unittest
+
+from common_imports import etree, StringIO, HelperTestCase, fileInTestDir
+
+class ETreeDtdTestCase(HelperTestCase):
+    def test_dtd(self):
+        pass
+
+    def test_dtd_file(self):
+        parse = etree.parse
+        tree = parse(fileInTestDir("test.xml"))
+        root = tree.getroot()
+
+        dtd = etree.DTD(fileInTestDir("test.dtd"))
+        self.assert_(dtd.validate(root))
+
+    def test_dtd_stringio(self):
+        root = etree.XML("<b/>")
+        dtd = etree.DTD(StringIO("<!ELEMENT b EMPTY>"))
+        self.assert_(dtd.validate(root))
+
+    def test_dtd_invalid(self):
+        root = etree.XML("<b><a/></b>")
+        dtd = etree.DTD(StringIO("<!ELEMENT b EMPTY>"))
+        self.assertRaises(etree.DocumentInvalid, dtd.assertValid, root)
+
+    def test_dtd_broken(self):
+        self.assertRaises(etree.DTDParseError, etree.DTD,
+                          StringIO("<!ELEMENT b HONKEY>"))
+
+    def test_parse_file_dtd(self):
+        parser = etree.XMLParser(attribute_defaults=True)
+
+        tree = etree.parse(fileInTestDir('test.xml'), parser)
+        root = tree.getroot()
+
+        self.assertEquals(
+            "valueA",
+            root.get("default"))
+        self.assertEquals(
+            "valueB",
+            root[0].get("default"))
+
+    def test_iterparse_file_dtd(self):
+        iterparse = etree.iterparse
+        iterator = iterparse(fileInTestDir("test.xml"), events=("start",),
+                             attribute_defaults=True)
+        attributes = [ element.get("default")
+                       for event, element in iterator ]
+        self.assertEquals(
+            ["valueA", "valueB"],
+            attributes)
+
+
+def test_suite():
+    suite = unittest.TestSuite()
+    suite.addTests([unittest.makeSuite(ETreeDtdTestCase)])
+    return suite
+
+if __name__ == '__main__':
+    unittest.main()

Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py	(original)
+++ lxml/trunk/src/lxml/tests/test_etree.py	Mon Feb 26 17:27:38 2007
@@ -70,20 +70,6 @@
         self.assertEquals("TEST", root.get("attr"))
         self.assertRaises(TypeError, root.set, "newattr", 5)
 
-    def test_parse_file_dtd(self):
-        parse = self.etree.parse
-        parser = self.etree.XMLParser(attribute_defaults=True)
-
-        tree = parse(fileInTestDir('test.xml'), parser)
-        root = tree.getroot()
-
-        self.assertEquals(
-            "valueA",
-            root.get("default"))
-        self.assertEquals(
-            "valueB",
-            root[0].get("default"))
-
     def test_pi(self):
         # lxml.etree separates target and text
         Element = self.etree.Element
@@ -178,16 +164,6 @@
         # ET raises ExpatError, lxml raises XMLSyntaxError
         self.assertRaises(self.etree.XMLSyntaxError, list, iterparse(f))
 
-    def test_iterparse_file_dtd(self):
-        iterparse = self.etree.iterparse
-        iterator = iterparse(fileInTestDir("test.xml"), events=("start",),
-                             attribute_defaults=True)
-        attributes = [ element.get("default")
-                       for event, element in iterator ]
-        self.assertEquals(
-            ["valueA", "valueB"],
-            attributes)
-
     def test_iterparse_strip(self):
         iterparse = self.etree.iterparse
         f = StringIO("""

Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd	(original)
+++ lxml/trunk/src/lxml/tree.pxd	Mon Feb 26 17:27:38 2007
@@ -153,7 +153,8 @@
         xmlBuffer* buffer
         xmlBuffer* conv
         
-    cdef void xmlFreeDoc(xmlDoc *cur)
+    cdef void xmlFreeDoc(xmlDoc* cur)
+    cdef void xmlFreeDtd(xmlDtd* cur)
     cdef void xmlFreeNode(xmlNode* cur)
     cdef void xmlFreeNsList(xmlNs* ns)
     cdef void xmlFreeNs(xmlNs* ns)

Modified: lxml/trunk/src/lxml/xmlparser.pxd
==============================================================================
--- lxml/trunk/src/lxml/xmlparser.pxd	(original)
+++ lxml/trunk/src/lxml/xmlparser.pxd	Mon Feb 26 17:27:38 2007
@@ -1,4 +1,4 @@
-from tree cimport xmlDoc, xmlNode, xmlDict
+from tree cimport xmlDoc, xmlNode, xmlDict, xmlDtd
 from tree cimport xmlInputReadCallback, xmlInputCloseCallback
 from xmlerror cimport xmlError
 
@@ -125,6 +125,13 @@
     cdef xmlExternalEntityLoader xmlGetExternalEntityLoader()
     cdef void xmlSetExternalEntityLoader(xmlExternalEntityLoader f)
 
+# DTDs:
+
+    cdef xmlDtd* xmlParseDTD(char* ExternalID, char* SystemID)
+    cdef xmlDtd* xmlIOParseDTD(xmlSAXHandler* sax,
+                               xmlParserInputBuffer* input,
+                               int enc)
+
 cdef extern from "libxml/parserInternals.h":
     cdef xmlParserInput* xmlNewStringInputStream(xmlParserCtxt* ctxt, 
                                                  char* buffer)


More information about the lxml-checkins mailing list