[Lxml-checkins] r39788 - in lxml/trunk: doc src/lxml

scoder at codespeak.net scoder at codespeak.net
Sat Mar 3 13:38:23 CET 2007


Author: scoder
Date: Sat Mar  3 13:38:21 2007
New Revision: 39788

Added:
   lxml/trunk/src/lxml/schematron.pxd
   lxml/trunk/src/lxml/schematron.pxi
Modified:
   lxml/trunk/doc/validation.txt
   lxml/trunk/src/lxml/etree.pyx
Log:
schematron support (disabled by default: requires libxml2 2.6.21+, better 2.6.27)

Modified: lxml/trunk/doc/validation.txt
==============================================================================
--- lxml/trunk/doc/validation.txt	(original)
+++ lxml/trunk/doc/validation.txt	Sat Mar  3 13:38:21 2007
@@ -11,11 +11,17 @@
 .. _`Relax NG`:   http://www.relaxng.org/
 .. _`XML Schema`: http://www.w3.org/XML/Schema
 
+There is also initial support for Schematron_.  However, it is currently
+disabled in lxml builds due to insufficiencies in the implementation as of
+libxml2 2.6.27.
+
+.. _Schematron:   http://www.ascc.net/xml/schematron
+
 .. contents::
 .. 
    1  DTD
    2  RelaxNG
-   2  XMLSchema
+   3  XMLSchema
 
 The usual setup procedure::
 
@@ -114,10 +120,9 @@
     [...]
   AssertionError: Document does not comply with schema
 
-Starting with version 0.9, lxml now has a simple API to report the errors
-generated by libxml2. If you want to find out why the validation failed in the
-second case, you can look up the error log of the validation process and check
-it for relevant messages::
+If you want to find out why the validation failed in the second case, you can
+look up the error log of the validation process and check it for relevant
+messages::
 
   >>> log = relaxng.error_log
   >>> print log.last_error
@@ -126,7 +131,7 @@
 You can see that the error (ERROR) happened during RelaxNG validation
 (RELAXNGV).  The message then tells you what went wrong.  Note that this error
 is local to the RelaxNG object.  It will only contain log entries that
-appeares during the validation.  The DocumentInvalid exception raised by the
+appeared during the validation.  The DocumentInvalid exception raised by the
 ``assertValid`` method above provides access to the global error log (like all
 other lxml exceptions).
 

Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx	(original)
+++ lxml/trunk/src/lxml/etree.pyx	Sat Mar  3 13:38:21 2007
@@ -1893,9 +1893,10 @@
         def __get__(self):
             return self._error_log.copy()
 
-include "dtd.pxi"       # DTD
-include "relaxng.pxi"   # RelaxNG
-include "xmlschema.pxi" # XMLSchema
+include "dtd.pxi"        # DTD
+include "relaxng.pxi"    # RelaxNG
+include "xmlschema.pxi"  # XMLSchema
+#include "schematron.pxi" # Schematron
 
 ################################################################################
 # Public C API

Added: lxml/trunk/src/lxml/schematron.pxd
==============================================================================
--- (empty file)
+++ lxml/trunk/src/lxml/schematron.pxd	Sat Mar  3 13:38:21 2007
@@ -0,0 +1,28 @@
+cimport tree
+from tree cimport xmlDoc, xmlDtd
+
+cdef extern from "libxml/schematron.h":
+    ctypedef struct xmlSchematron
+    ctypedef struct xmlSchematronParserCtxt
+    ctypedef struct xmlSchematronValidCtxt
+
+    ctypedef enum xmlSchematronValidOptions:
+        XML_SCHEMATRON_OUT_QUIET           =    1 # quiet no report
+        XML_SCHEMATRON_OUT_TEXT            =    2 # build a textual report
+        XML_SCHEMATRON_OUT_XML             =    4 # output SVRL
+        XML_SCHEMATRON_OUT_FILE            =  256 # output to a file descriptor
+        XML_SCHEMATRON_OUT_BUFFER          =  512 # output to a buffer
+        XML_SCHEMATRON_OUT_IO              = 1024 # output to I/O mechanism
+
+    cdef xmlSchematronParserCtxt* xmlSchematronNewDocParserCtxt(xmlDoc* doc)
+    cdef xmlSchematronParserCtxt* xmlSchematronNewParserCtxt(char* filename)
+    cdef xmlSchematronValidCtxt* xmlSchematronNewValidCtxt(xmlSchematron* schema,
+                                                           int options)
+
+    cdef xmlSchematron* xmlSchematronParse(xmlSchematronParserCtxt* ctxt)
+    cdef int xmlSchematronValidateDoc(xmlSchematronValidCtxt* ctxt,
+                                      xmlDoc* instance)
+
+    cdef void xmlSchematronFreeParserCtxt(xmlSchematronParserCtxt* ctxt)
+    cdef void xmlSchematronFreeValidCtxt(xmlSchematronValidCtxt* ctxt)
+    cdef void xmlSchematronFree(xmlSchematron* schema)

Added: lxml/trunk/src/lxml/schematron.pxi
==============================================================================
--- (empty file)
+++ lxml/trunk/src/lxml/schematron.pxi	Sat Mar  3 13:38:21 2007
@@ -0,0 +1,145 @@
+# support for Schematron validation
+cimport schematron
+
+"""
+Schematron
+----------
+
+Schematron is a less well known, but very powerful schema language.  The main
+idea is to use the capabilities of XPath to put restrictions on the structure
+and the content of XML documents.  Here is a simple example::
+
+  >>> schematron = etree.Schematron(etree.XML("""
+  ... <schema xmlns="http://www.ascc.net/xml/schematron" >
+  ...   <pattern name="id is the only permited attribute name">
+  ...     <rule context="*">
+  ...       <report test="@*[not(name()='id')]">Attribute
+  ...         <name path="@*[not(name()='id')]"/> is forbidden<name/>
+  ...       </report>
+  ...     </rule>
+  ...   </pattern>
+  ... </schema>
+  ... """))
+
+  >>> xml = etree.XML("""
+  ... <AAA name="aaa">
+  ...   <BBB id="bbb"/>
+  ...   <CCC color="ccc"/>
+  ... </AAA>
+  ... """)
+
+  >>> schematron.validate(xml)
+  0
+
+  >>> xml = etree.XML("""
+  ... <AAA id="aaa">
+  ...   <BBB id="bbb"/>
+  ...   <CCC/>
+  ... </AAA>
+  ... """)
+
+  >>> schematron.validate(xml)
+  1
+
+Schematron was added to libxml2 in version 2.6.21.  As of version 2.6.27,
+however, Schematron lacks support for error reporting other than to stderr.
+It is therefore not possible to retrieve validation warnings and errors in
+lxml.
+"""
+
+class SchematronError(LxmlError):
+    pass
+
+class SchematronParseError(SchematronError):
+    pass
+
+class SchematronValidateError(SchematronError):
+    pass
+
+################################################################################
+# Schematron
+
+cdef class Schematron(_Validator):
+    """A Schematron validator.
+
+    Pass a root Element or an ElementTree to turn it into a validator.
+    Alternatively, pass a filename as keyword argument 'file' to parse from
+    the file system.
+    """
+    cdef schematron.xmlSchematron* _c_schema
+    cdef tree.xmlDoc* _c_doc
+    def __init__(self, etree=None, file=None):
+        cdef _Document doc
+        cdef _Element root_node
+        cdef xmlNode* c_node
+        cdef xmlDoc* c_doc
+        cdef char* c_href
+        cdef schematron.xmlSchematronParserCtxt* parser_ctxt
+        self._c_schema = NULL
+        self._c_doc = NULL
+        if etree is not None:
+            doc = _documentOrRaise(etree)
+            root_node = _rootNodeOrRaise(etree)
+            self._c_doc = _copyDocRoot(doc._c_doc, root_node._c_node)
+            parser_ctxt = schematron.xmlSchematronNewDocParserCtxt(self._c_doc)
+        elif file is not None:
+            filename = _getFilenameForFile(file)
+            if filename is None:
+                # XXX assume a string object
+                filename = file
+            filename = _encodeFilename(filename)
+            parser_ctxt = schematron.xmlSchematronNewParserCtxt(_cstr(filename))
+        else:
+            raise SchematronParseError, "No tree or file given"
+
+        if parser_ctxt is NULL:
+            if self._c_doc is not NULL:
+                tree.xmlFreeDoc(self._c_doc)
+            raise SchematronParseError, "Document is not parsable as Schematron"
+        self._c_schema = schematron.xmlSchematronParse(parser_ctxt)
+
+        if self._c_schema is NULL:
+            if self._c_doc is not NULL:
+                schematron.xmlSchematronFreeParserCtxt(parser_ctxt)
+                tree.xmlFreeDoc(self._c_doc)
+            raise SchematronParseError, "Document is not a valid Schematron schema"
+        schematron.xmlSchematronFreeParserCtxt(parser_ctxt)
+        _Validator.__init__(self)
+
+    def __dealloc__(self):
+        schematron.xmlSchematronFree(self._c_schema)
+        tree.xmlFreeDoc(self._c_doc)
+
+    def __call__(self, etree):
+        """Validate doc using Schematron.
+
+        Returns true if document is valid, false if not."""
+        cdef python.PyThreadState* state
+        cdef _Document doc
+        cdef _Element root_node
+        cdef xmlDoc* c_doc
+        cdef schematron.xmlSchematronValidCtxt* valid_ctxt
+        cdef int ret
+
+        doc = _documentOrRaise(etree)
+        root_node = _rootNodeOrRaise(etree)
+
+        self._error_log.connect()
+        valid_ctxt = schematron.xmlSchematronNewValidCtxt(
+            self._c_schema, schematron.XML_SCHEMATRON_OUT_QUIET)
+        if valid_ctxt is NULL:
+            self._error_log.disconnect()
+            raise SchematronError, "Failed to create validation context"
+
+        c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
+        state = python.PyEval_SaveThread()
+        ret = schematron.xmlSchematronValidateDoc(valid_ctxt, c_doc)
+        python.PyEval_RestoreThread(state)
+        _destroyFakeDoc(doc._c_doc, c_doc)
+
+        schematron.xmlSchematronFreeValidCtxt(valid_ctxt)
+
+        self._error_log.disconnect()
+        if ret == -1:
+            raise SchematronValidateError, "Internal error in Schematron validation"
+        return ret == 0


More information about the lxml-checkins mailing list