[Lxml-checkins] r45756 - in lxml/branch/lxml-1.3: . src/lxml src/lxml/tests
scoder at codespeak.net
scoder at codespeak.net
Thu Aug 16 22:41:17 CEST 2007
Author: scoder
Date: Thu Aug 16 22:41:16 2007
New Revision: 45756
Modified:
lxml/branch/lxml-1.3/CHANGES.txt
lxml/branch/lxml-1.3/src/lxml/dtd.pxi
lxml/branch/lxml-1.3/src/lxml/etree.pyx
lxml/branch/lxml-1.3/src/lxml/tests/test_dtd.py
lxml/branch/lxml-1.3/src/lxml/tree.pxd
Log:
trunk merge: support for retrieving the DTD defined internally in a document for validation
Modified: lxml/branch/lxml-1.3/CHANGES.txt
==============================================================================
--- lxml/branch/lxml-1.3/CHANGES.txt (original)
+++ lxml/branch/lxml-1.3/CHANGES.txt Thu Aug 16 22:41:16 2007
@@ -8,6 +8,10 @@
Features added
--------------
+* The ``docinfo`` on ElementTree objects has new properties ``internalDTD``
+ and ``externalDTD`` that return a DTD object for the internal or external
+ subset of the document respectively.
+
* Serialising an ElementTree now includes any internal DTD subsets that are
part of the document, as well as comments and PIs that are siblings of the
root node.
Modified: lxml/branch/lxml-1.3/src/lxml/dtd.pxi
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/dtd.pxi (original)
+++ lxml/branch/lxml-1.3/src/lxml/dtd.pxi Thu Aug 16 22:41:16 2007
@@ -96,3 +96,19 @@
if c_dtd is NULL:
raise DTDParseError, "error parsing DTD"
return c_dtd
+
+cdef extern from "etree_defs.h":
+ # macro call to 't->tp_new()' for fast instantiation
+ cdef DTD NEW_DTD "PY_NEW" (object t)
+
+cdef DTD _dtdFactory(tree.xmlDtd* c_dtd):
+ # do not run through DTD.__init__()!
+ cdef DTD dtd
+ if c_dtd is NULL:
+ return None
+ dtd = NEW_DTD(DTD)
+ dtd._c_dtd = tree.xmlCopyDtd(c_dtd)
+ if dtd._c_dtd is NULL:
+ python.PyErr_NoMemory()
+ _Validator.__init__(dtd)
+ return dtd
Modified: lxml/branch/lxml-1.3/src/lxml/etree.pyx
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/etree.pyx (original)
+++ lxml/branch/lxml-1.3/src/lxml/etree.pyx Thu Aug 16 22:41:16 2007
@@ -384,37 +384,76 @@
cdef class DocInfo:
"Document information provided by parser and DTD."
- cdef readonly object root_name
- cdef readonly object public_id
- cdef readonly object system_url
- cdef readonly object xml_version
- cdef readonly object encoding
- cdef readonly object URL
+ cdef _Document _doc
def __init__(self, tree):
"Create a DocInfo object for an ElementTree object or root Element."
- cdef _Document doc
- doc = _documentOrRaise(tree)
- self.root_name, self.public_id, self.system_url = doc.getdoctype()
- if not self.root_name and (self.public_id or self.system_url):
+ self._doc = _documentOrRaise(tree)
+ root_name, public_id, system_url = self._doc.getdoctype()
+ if not root_name and (public_id or system_url):
raise ValueError, "Could not find root node"
- self.xml_version, self.encoding = doc.getxmlinfo()
- self.URL = doc.getURL()
+
+ property root_name:
+ "Returns the name of the root node as defined by the DOCTYPE."
+ def __get__(self):
+ root_name, public_id, system_url = self._doc.getdoctype()
+ return root_name
+
+ property public_id:
+ "Returns the public ID of the DOCTYPE."
+ def __get__(self):
+ root_name, public_id, system_url = self._doc.getdoctype()
+ return public_id
+
+ property system_url:
+ "Returns the system ID of the DOCTYPE."
+ def __get__(self):
+ root_name, public_id, system_url = self._doc.getdoctype()
+ return system_url
+
+ property xml_version:
+ "Returns the XML version as declared by the document."
+ def __get__(self):
+ xml_version, encoding = self._doc.getxmlinfo()
+ return xml_version
+
+ property encoding:
+ "Returns the encoding name as declared by the document."
+ def __get__(self):
+ xml_version, encoding = self._doc.getxmlinfo()
+ return encoding
+
+ property URL:
+ "Returns the source URL of the document (or None if unknown)."
+ def __get__(self):
+ return self._doc.getURL()
property doctype:
+ "Returns a DOCTYPE declaration string for the document."
def __get__(self):
- if self.public_id:
- if self.system_url:
+ root_name, public_id, system_url = self._doc.getdoctype()
+ if public_id:
+ if system_url:
return '<!DOCTYPE %s PUBLIC "%s" "%s">' % (
- self.root_name, self.public_id, self.system_url)
+ root_name, public_id, system_url)
else:
return '<!DOCTYPE %s PUBLIC "%s">' % (
- self.root_name, self.public_id)
- elif self.system_url:
+ root_name, public_id)
+ elif system_url:
return '<!DOCTYPE %s SYSTEM "%s">' % (
- self.root_name, self.system_url)
+ root_name, system_url)
else:
return ""
+ property internalDTD:
+ "Returns a DTD validator based on the internal subset of the document."
+ def __get__(self):
+ return _dtdFactory(self._doc._c_doc.intSubset)
+
+ property externalDTD:
+ "Returns a DTD validator based on the external subset of the document."
+ def __get__(self):
+ return _dtdFactory(self._doc._c_doc.extSubset)
+
cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
"""Element class. References a document object and a libxml node.
Modified: lxml/branch/lxml-1.3/src/lxml/tests/test_dtd.py
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/tests/test_dtd.py (original)
+++ lxml/branch/lxml-1.3/src/lxml/tests/test_dtd.py Thu Aug 16 22:41:16 2007
@@ -36,6 +36,31 @@
dtd = etree.DTD(StringIO("<!ELEMENT b (a)><!ELEMENT a EMPTY>"))
dtd.assertValid(root)
+ def test_dtd_internal(self):
+ root = etree.XML('''
+ <!DOCTYPE b SYSTEM "none" [
+ <!ELEMENT b (a)>
+ <!ELEMENT a EMPTY>
+ ]>
+ <b><a/></b>
+ ''')
+ dtd = etree.ElementTree(root).docinfo.internalDTD
+ self.assert_(dtd)
+ dtd.assertValid(root)
+
+ def test_dtd_internal_invalid(self):
+ root = etree.XML('''
+ <!DOCTYPE b SYSTEM "none" [
+ <!ELEMENT b (a)>
+ <!ELEMENT a (c)>
+ <!ELEMENT c EMPTY>
+ ]>
+ <b><a/></b>
+ ''')
+ dtd = etree.ElementTree(root).docinfo.internalDTD
+ self.assert_(dtd)
+ self.assertFalse(dtd.validate(root))
+
def test_dtd_broken(self):
self.assertRaises(etree.DTDParseError, etree.DTD,
StringIO("<!ELEMENT b HONKEY>"))
Modified: lxml/branch/lxml-1.3/src/lxml/tree.pxd
==============================================================================
--- lxml/branch/lxml-1.3/src/lxml/tree.pxd (original)
+++ lxml/branch/lxml-1.3/src/lxml/tree.pxd Thu Aug 16 22:41:16 2007
@@ -218,6 +218,7 @@
int format, char* encoding)
cdef void xmlNodeSetName(xmlNode* cur, char* name)
cdef void xmlNodeSetContent(xmlNode* cur, char* content)
+ cdef xmlDtd* xmlCopyDtd(xmlDtd* dtd)
cdef xmlDoc* xmlCopyDoc(xmlDoc* doc, int recursive)
cdef xmlNode* xmlCopyNode(xmlNode* node, int extended)
cdef xmlNode* xmlDocCopyNode(xmlNode* node, xmlDoc* doc, int extended)
More information about the lxml-checkins
mailing list