[Lxml-checkins] r39452 - in lxml/trunk: . src/lxml src/lxml/tests
scoder at codespeak.net
scoder at codespeak.net
Mon Feb 26 17:27:40 CET 2007
Author: scoder
Date: Mon Feb 26 17:27:38 2007
New Revision: 39452
Added:
lxml/trunk/src/lxml/dtd.pxi
lxml/trunk/src/lxml/dtdvalid.pxd
lxml/trunk/src/lxml/tests/test_dtd.py
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/parser.pxi
lxml/trunk/src/lxml/tests/test_etree.py
lxml/trunk/src/lxml/tree.pxd
lxml/trunk/src/lxml/xmlparser.pxd
Log:
new DTD() validator to validate against external DTDs
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Mon Feb 26 17:27:38 2007
@@ -8,6 +8,8 @@
Features added
--------------
+* ``DTD`` validator class (like ``RelaxNG`` and ``XMLSchema``)
+
* HTML generator helpers by Fredrik Lundh in ``lxml.htmlbuilder``
* ``ElementMaker`` XML generator by Fredrik Lundh in ``lxml.builder.E``
Added: lxml/trunk/src/lxml/dtd.pxi
==============================================================================
--- (empty file)
+++ lxml/trunk/src/lxml/dtd.pxi Mon Feb 26 17:27:38 2007
@@ -0,0 +1,91 @@
+# support for DTD validation
+cimport dtdvalid
+
+class DTDError(LxmlError):
+ pass
+
+class DTDParseError(DTDError):
+ pass
+
+class DTDValidateError(DTDError):
+ pass
+
+################################################################################
+# DTD
+
+cdef class DTD(_Validator):
+ """A DTD validator.
+
+ Can load from filesystem directly given a filename. Alternatively, pass
+ the keyword parameter ``external_id`` to load from a catalog.
+ """
+ cdef tree.xmlDtd* _c_dtd
+ def __init__(self, file=None, external_id=None):
+ self._c_dtd = NULL
+ if file is not None:
+ if python._isString(file):
+ self._c_dtd = xmlparser.xmlParseDTD(NULL, _cstr(file))
+ elif hasattr(file, 'read'):
+ self._c_dtd = _parseDtdFromFilelike(file)
+ else:
+ raise DTDParseError, "parsing from file objects is not supported"
+ elif external_id is not None:
+ self._c_dtd = xmlparser.xmlParseDTD(external_id, NULL)
+ else:
+ raise DTDParseError, "either filename or external ID required"
+
+ if self._c_dtd is NULL:
+ raise DTDParseError, "error parsing DTD"
+ _Validator.__init__(self)
+
+ def __dealloc__(self):
+ tree.xmlFreeDtd(self._c_dtd)
+
+ def __call__(self, etree):
+ """Validate doc using the DTD.
+
+ Returns true if the document is valid, false if not.
+ """
+ cdef python.PyThreadState* state
+ cdef _Document doc
+ cdef _Element root_node
+ cdef xmlDoc* c_doc
+ cdef dtdvalid.xmlValidCtxt* valid_ctxt
+ cdef int ret
+
+ doc = _documentOrRaise(etree)
+ root_node = _rootNodeOrRaise(etree)
+
+ self._error_log.connect()
+ valid_ctxt = dtdvalid.xmlNewValidCtxt()
+ if valid_ctxt is NULL:
+ self._error_log.disconnect()
+ raise DTDError, "Failed to create validation context"
+
+ c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
+ state = python.PyEval_SaveThread()
+ ret = dtdvalid.xmlValidateDtd(valid_ctxt, c_doc, self._c_dtd)
+ python.PyEval_RestoreThread(state)
+ _destroyFakeDoc(doc._c_doc, c_doc)
+
+ dtdvalid.xmlFreeValidCtxt(valid_ctxt)
+
+ self._error_log.disconnect()
+ if ret == -1:
+ raise DTDValidateError, "Internal error in DTD validation"
+ return ret == 1
+
+
+cdef tree.xmlDtd* _parseDtdFromFilelike(file) except NULL:
+ cdef _ExceptionContext exc_context
+ cdef _FileParserContext dtd_parser
+ cdef tree.xmlDtd* c_dtd
+ exc_context = _ExceptionContext()
+ dtd_parser = _FileParserContext(file, exc_context)
+
+ c_dtd = dtd_parser._readDtd()
+
+ exc_context._raise_if_stored()
+ if c_dtd is NULL:
+ raise DTDParseError, "error parsing DTD"
+ return c_dtd
Added: lxml/trunk/src/lxml/dtdvalid.pxd
==============================================================================
--- (empty file)
+++ lxml/trunk/src/lxml/dtdvalid.pxd Mon Feb 26 17:27:38 2007
@@ -0,0 +1,10 @@
+cimport tree
+from tree cimport xmlDoc, xmlDtd
+
+cdef extern from "libxml/valid.h":
+ ctypedef struct xmlValidCtxt
+
+ cdef xmlValidCtxt* xmlNewValidCtxt()
+ cdef void xmlFreeValidCtxt(xmlValidCtxt* cur)
+
+ cdef int xmlValidateDtd(xmlValidCtxt* ctxt, xmlDoc* doc, xmlDtd* dtd)
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Mon Feb 26 17:27:38 2007
@@ -1893,6 +1893,7 @@
def __get__(self):
return self._error_log.copy()
+include "dtd.pxi" # DTD
include "relaxng.pxi" # RelaxNG
include "xmlschema.pxi" # XMLSchema
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Mon Feb 26 17:27:38 2007
@@ -220,6 +220,18 @@
python.PyEval_RestoreThread(state)
return result
+ cdef tree.xmlDtd* _readDtd(self):
+ cdef python.PyThreadState* state
+ cdef tree.xmlDtd* result
+ cdef xmlparser.xmlParserInputBuffer* c_buffer
+ c_buffer = xmlparser.xmlAllocParserInputBuffer(0)
+ c_buffer.context = <python.PyObject*>self
+ c_buffer.readcallback = _readFilelikeParser
+ state = python.PyEval_SaveThread()
+ result = xmlparser.xmlIOParseDTD(NULL, c_buffer, 0)
+ python.PyEval_RestoreThread(state)
+ return result
+
cdef int copyToBuffer(self, char* c_buffer, int c_size):
cdef char* c_start
cdef Py_ssize_t byte_count, remaining
Added: lxml/trunk/src/lxml/tests/test_dtd.py
==============================================================================
--- (empty file)
+++ lxml/trunk/src/lxml/tests/test_dtd.py Mon Feb 26 17:27:38 2007
@@ -0,0 +1,67 @@
+# -*- coding: utf-8 -*-
+
+"""
+Test cases related to DTD parsing and validation
+"""
+
+import unittest
+
+from common_imports import etree, StringIO, HelperTestCase, fileInTestDir
+
+class ETreeDtdTestCase(HelperTestCase):
+ def test_dtd(self):
+ pass
+
+ def test_dtd_file(self):
+ parse = etree.parse
+ tree = parse(fileInTestDir("test.xml"))
+ root = tree.getroot()
+
+ dtd = etree.DTD(fileInTestDir("test.dtd"))
+ self.assert_(dtd.validate(root))
+
+ def test_dtd_stringio(self):
+ root = etree.XML("<b/>")
+ dtd = etree.DTD(StringIO("<!ELEMENT b EMPTY>"))
+ self.assert_(dtd.validate(root))
+
+ def test_dtd_invalid(self):
+ root = etree.XML("<b><a/></b>")
+ dtd = etree.DTD(StringIO("<!ELEMENT b EMPTY>"))
+ self.assertRaises(etree.DocumentInvalid, dtd.assertValid, root)
+
+ def test_dtd_broken(self):
+ self.assertRaises(etree.DTDParseError, etree.DTD,
+ StringIO("<!ELEMENT b HONKEY>"))
+
+ def test_parse_file_dtd(self):
+ parser = etree.XMLParser(attribute_defaults=True)
+
+ tree = etree.parse(fileInTestDir('test.xml'), parser)
+ root = tree.getroot()
+
+ self.assertEquals(
+ "valueA",
+ root.get("default"))
+ self.assertEquals(
+ "valueB",
+ root[0].get("default"))
+
+ def test_iterparse_file_dtd(self):
+ iterparse = etree.iterparse
+ iterator = iterparse(fileInTestDir("test.xml"), events=("start",),
+ attribute_defaults=True)
+ attributes = [ element.get("default")
+ for event, element in iterator ]
+ self.assertEquals(
+ ["valueA", "valueB"],
+ attributes)
+
+
+def test_suite():
+ suite = unittest.TestSuite()
+ suite.addTests([unittest.makeSuite(ETreeDtdTestCase)])
+ return suite
+
+if __name__ == '__main__':
+ unittest.main()
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Mon Feb 26 17:27:38 2007
@@ -70,20 +70,6 @@
self.assertEquals("TEST", root.get("attr"))
self.assertRaises(TypeError, root.set, "newattr", 5)
- def test_parse_file_dtd(self):
- parse = self.etree.parse
- parser = self.etree.XMLParser(attribute_defaults=True)
-
- tree = parse(fileInTestDir('test.xml'), parser)
- root = tree.getroot()
-
- self.assertEquals(
- "valueA",
- root.get("default"))
- self.assertEquals(
- "valueB",
- root[0].get("default"))
-
def test_pi(self):
# lxml.etree separates target and text
Element = self.etree.Element
@@ -178,16 +164,6 @@
# ET raises ExpatError, lxml raises XMLSyntaxError
self.assertRaises(self.etree.XMLSyntaxError, list, iterparse(f))
- def test_iterparse_file_dtd(self):
- iterparse = self.etree.iterparse
- iterator = iterparse(fileInTestDir("test.xml"), events=("start",),
- attribute_defaults=True)
- attributes = [ element.get("default")
- for event, element in iterator ]
- self.assertEquals(
- ["valueA", "valueB"],
- attributes)
-
def test_iterparse_strip(self):
iterparse = self.etree.iterparse
f = StringIO("""
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Mon Feb 26 17:27:38 2007
@@ -153,7 +153,8 @@
xmlBuffer* buffer
xmlBuffer* conv
- cdef void xmlFreeDoc(xmlDoc *cur)
+ cdef void xmlFreeDoc(xmlDoc* cur)
+ cdef void xmlFreeDtd(xmlDtd* cur)
cdef void xmlFreeNode(xmlNode* cur)
cdef void xmlFreeNsList(xmlNs* ns)
cdef void xmlFreeNs(xmlNs* ns)
Modified: lxml/trunk/src/lxml/xmlparser.pxd
==============================================================================
--- lxml/trunk/src/lxml/xmlparser.pxd (original)
+++ lxml/trunk/src/lxml/xmlparser.pxd Mon Feb 26 17:27:38 2007
@@ -1,4 +1,4 @@
-from tree cimport xmlDoc, xmlNode, xmlDict
+from tree cimport xmlDoc, xmlNode, xmlDict, xmlDtd
from tree cimport xmlInputReadCallback, xmlInputCloseCallback
from xmlerror cimport xmlError
@@ -125,6 +125,13 @@
cdef xmlExternalEntityLoader xmlGetExternalEntityLoader()
cdef void xmlSetExternalEntityLoader(xmlExternalEntityLoader f)
+# DTDs:
+
+ cdef xmlDtd* xmlParseDTD(char* ExternalID, char* SystemID)
+ cdef xmlDtd* xmlIOParseDTD(xmlSAXHandler* sax,
+ xmlParserInputBuffer* input,
+ int enc)
+
cdef extern from "libxml/parserInternals.h":
cdef xmlParserInput* xmlNewStringInputStream(xmlParserCtxt* ctxt,
char* buffer)
More information about the lxml-checkins
mailing list