From scoder at codespeak.net Fri Dec 4 12:14:40 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 4 Dec 2009 12:14:40 +0100 (CET) Subject: [Lxml-checkins] r69889 - in lxml/trunk: . src/lxml Message-ID: <20091204111440.9872F168020@codespeak.net> Author: scoder Date: Fri Dec 4 12:14:40 2009 New Revision: 69889 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/python.pxd Log: r5354 at delle: sbehnel | 2009-12-04 12:14:33 +0100 make sure we use the correct 'unicode' type instead of the redeclared one Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Fri Dec 4 12:14:40 2009 @@ -1,4 +1,5 @@ from cstd cimport FILE +cimport cython cdef extern from "Python.h": ctypedef struct PyObject @@ -26,15 +27,15 @@ cdef bint PyBytes_Check(object obj) cdef bint PyBytes_CheckExact(object obj) - cdef unicode PyUnicode_FromEncodedObject(object s, char* encoding, - char* errors) + cdef cython.unicode PyUnicode_FromEncodedObject(object s, char* encoding, + char* errors) cdef bytes PyUnicode_AsEncodedString(object u, char* encoding, - char* errors) - cdef unicode PyUnicode_FromFormat(char* format, ...) # Python 3 - cdef unicode PyUnicode_Decode(char* s, Py_ssize_t size, - char* encoding, char* errors) - cdef unicode PyUnicode_DecodeUTF8(char* s, Py_ssize_t size, char* errors) - cdef unicode PyUnicode_DecodeLatin1(char* s, Py_ssize_t size, char* errors) + char* errors) + cdef cython.unicode PyUnicode_FromFormat(char* format, ...) # Python 3 + cdef cython.unicode PyUnicode_Decode(char* s, Py_ssize_t size, + char* encoding, char* errors) + cdef cython.unicode PyUnicode_DecodeUTF8(char* s, Py_ssize_t size, char* errors) + cdef cython.unicode PyUnicode_DecodeLatin1(char* s, Py_ssize_t size, char* errors) cdef bytes PyUnicode_AsUTF8String(object ustring) cdef bytes PyUnicode_AsASCIIString(object ustring) cdef char* PyUnicode_AS_DATA(object ustring) From jholg at codespeak.net Sat Dec 5 22:14:23 2009 From: jholg at codespeak.net (jholg at codespeak.net) Date: Sat, 5 Dec 2009 22:14:23 +0100 (CET) Subject: [Lxml-checkins] r69913 - lxml/branch/iso-schematron Message-ID: <20091205211423.A80BD168020@codespeak.net> Author: jholg Date: Sat Dec 5 22:14:21 2009 New Revision: 69913 Added: lxml/branch/iso-schematron/ - copied from r69912, lxml/trunk/ Log: Created branch for iso schematron addition. From scoder at codespeak.net Sun Dec 6 00:43:28 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 6 Dec 2009 00:43:28 +0100 (CET) Subject: [Lxml-checkins] r69914 - in lxml/trunk: . doc Message-ID: <20091205234328.73702168022@codespeak.net> Author: scoder Date: Sun Dec 6 00:43:26 2009 New Revision: 69914 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/build.txt Log: r5357 at delle: sbehnel | 2009-12-06 00:43:19 +0100 require Cython 0.12 Modified: lxml/trunk/doc/build.txt ============================================================================== --- lxml/trunk/doc/build.txt (original) +++ lxml/trunk/doc/build.txt Sun Dec 6 00:43:26 2009 @@ -46,9 +46,9 @@ want to be an lxml developer, then you do need a working Cython installation. You can use EasyInstall_ to install it:: - easy_install Cython==0.11 + easy_install Cython>=0.12 -lxml currently requires Cython 0.11, later release versions should +lxml currently requires Cython 0.12, later release versions should work as well. From scoder at codespeak.net Sun Dec 6 01:33:02 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 6 Dec 2009 01:33:02 +0100 (CET) Subject: [Lxml-checkins] r69915 - in lxml/trunk: . src/lxml Message-ID: <20091206003302.9031A168023@codespeak.net> Author: scoder Date: Sun Dec 6 01:33:01 2009 New Revision: 69915 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/apihelpers.pxi lxml/trunk/src/lxml/objectpath.pxi lxml/trunk/src/lxml/serializer.pxi lxml/trunk/src/lxml/xslt.pxi Log: r5360 at delle: sbehnel | 2009-12-06 01:32:54 +0100 code simplifications based on Cython 0.12 Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Sun Dec 6 01:33:01 2009 @@ -1269,7 +1269,7 @@ cdef bint is_non_ascii if python.IS_PYTHON3: slen = cstd.strlen(s) - return python.PyUnicode_DecodeUTF8(s, slen, NULL) + return s[:slen].decode('UTF-8') spos = s is_non_ascii = 0 while spos[0] != c'\0': @@ -1281,8 +1281,8 @@ spos += 1 slen = spos - s if is_non_ascii: - return python.PyUnicode_DecodeUTF8(s, slen, NULL) - return python.PyBytes_FromStringAndSize(s, slen) + return s[:slen].decode('UTF-8') + return s[:slen] cdef object _utf8(object s): cdef int invalid @@ -1396,13 +1396,14 @@ cdef char* c_ns_end cdef Py_ssize_t taglen cdef Py_ssize_t nslen + cdef bytes ns = None # _isString() is much faster than isinstance() if not _isString(tag) and isinstance(tag, QName): tag = (tag).text tag = _utf8(tag) c_tag = _cstr(tag) if c_tag[0] == c'{': - c_tag = c_tag + 1 + c_tag += 1 c_ns_end = cstd.strchr(c_tag, c'}') if c_ns_end is NULL: raise ValueError, u"Invalid tag name" @@ -1411,8 +1412,8 @@ if taglen == 0: raise ValueError, u"Empty tag name" if nslen > 0: - ns = python.PyBytes_FromStringAndSize(c_tag, nslen) - tag = python.PyBytes_FromStringAndSize(c_ns_end+1, taglen) + ns = c_tag[:nslen] + tag = c_ns_end[1:taglen+1] elif python.PyBytes_GET_SIZE(tag) == 0: raise ValueError, u"Empty tag name" return ns, tag Modified: lxml/trunk/src/lxml/objectpath.pxi ============================================================================== --- lxml/trunk/src/lxml/objectpath.pxi (original) +++ lxml/trunk/src/lxml/objectpath.pxi Sun Dec 6 01:33:01 2009 @@ -152,13 +152,10 @@ index_end = cstd.strchr(index_pos + 1, c']') if index_end is NULL: raise ValueError, u"index must be enclosed in []" - index = int( - python.PyBytes_FromStringAndSize( - index_pos + 1, (index_end - index_pos - 1))) + index = int(index_pos[1:index_end - index_pos]) if python.PyList_GET_SIZE(new_path) == 0 and index != 0: raise ValueError, u"index not allowed on root node" - name = python.PyBytes_FromStringAndSize( - c_name, (index_pos - c_name)) + name = c_name[:index_pos - c_name] new_path.append( (ns, name, index) ) if python.PyList_GET_SIZE(new_path) == 0: raise ValueError, u"invalid path" Modified: lxml/trunk/src/lxml/serializer.pxi ============================================================================== --- lxml/trunk/src/lxml/serializer.pxi (original) +++ lxml/trunk/src/lxml/serializer.pxi Sun Dec 6 01:33:01 2009 @@ -125,14 +125,11 @@ try: if encoding is _unicode: - result = python.PyUnicode_DecodeUTF8( - tree.xmlBufferContent(c_result_buffer), - tree.xmlBufferLength(c_result_buffer), - 'strict') + result = tree.xmlBufferContent( + c_result_buffer)[:tree.xmlBufferLength(c_result_buffer)].decode('UTF-8') else: - result = python.PyBytes_FromStringAndSize( - tree.xmlBufferContent(c_result_buffer), - tree.xmlBufferLength(c_result_buffer)) + result = tree.xmlBufferContent( + c_result_buffer)[:tree.xmlBufferLength(c_result_buffer)] finally: error_result = tree.xmlOutputBufferClose(c_buffer) if error_result < 0: @@ -332,7 +329,7 @@ try: if self._filelike is None: raise IOError, u"File is already closed" - py_buffer = python.PyBytes_FromStringAndSize(c_buffer, size) + py_buffer = c_buffer[:size] self._filelike.write(py_buffer) return size except: Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Sun Dec 6 01:33:01 2009 @@ -689,7 +689,7 @@ return '' # we must not use 'funicode' here as this is not always UTF-8 try: - result = python.PyBytes_FromStringAndSize(s, l) + result = s[:l] finally: tree.xmlFree(s) return result From scoder at codespeak.net Tue Dec 15 14:38:01 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 15 Dec 2009 14:38:01 +0100 (CET) Subject: [Lxml-checkins] r70132 - in lxml/trunk: . src/lxml/html Message-ID: <20091215133801.CC849168015@codespeak.net> Author: scoder Date: Tue Dec 15 14:38:00 2009 New Revision: 70132 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/html/__init__.py Log: r5362 at delle: sbehnel | 2009-12-15 14:37:39 +0100 fix urllib usage in Py3 Modified: lxml/trunk/src/lxml/html/__init__.py ============================================================================== --- lxml/trunk/src/lxml/html/__init__.py (original) +++ lxml/trunk/src/lxml/html/__init__.py Tue Dec 15 14:38:00 2009 @@ -846,20 +846,24 @@ return open_http(form.method, url, values) def open_http_urllib(method, url, values): - import urllib if not url: raise ValueError("cannot submit, no URL provided") ## FIXME: should test that it's not a relative URL or something + try: + from urllib import urlencode, urlopen + except ImportError: # Python 3 + from urllib.request import urlopen + from urllib.parse import urlencode if method == 'GET': if '?' in url: url += '&' else: url += '?' - url += urllib.urlencode(values) + url += urlencode(values) data = None else: - data = urllib.urlencode(values) - return urllib.urlopen(url, data) + data = urlencode(values) + return urlopen(url, data) class FieldsDict(DictMixin): From scoder at codespeak.net Tue Dec 15 14:49:43 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 15 Dec 2009 14:49:43 +0100 (CET) Subject: [Lxml-checkins] r70134 - in lxml/branch/lxml-2.2: . src/lxml/html Message-ID: <20091215134943.95929168015@codespeak.net> Author: scoder Date: Tue Dec 15 14:49:42 2009 New Revision: 70134 Modified: lxml/branch/lxml-2.2/ (props changed) lxml/branch/lxml-2.2/CHANGES.txt lxml/branch/lxml-2.2/src/lxml/html/__init__.py Log: trunk merge: fix lxml.html.open_http_urllib() in Py3.1 Modified: lxml/branch/lxml-2.2/CHANGES.txt ============================================================================== --- lxml/branch/lxml-2.2/CHANGES.txt (original) +++ lxml/branch/lxml-2.2/CHANGES.txt Tue Dec 15 14:49:42 2009 @@ -14,6 +14,8 @@ Bugs fixed ---------- +* lxml.html.open_http_urllib() did not work in Python 3. + * The functions ``strip_tags()`` and ``strip_elements()`` in ``lxml.etree`` did not remove all occurrences of a tag in all cases. Modified: lxml/branch/lxml-2.2/src/lxml/html/__init__.py ============================================================================== --- lxml/branch/lxml-2.2/src/lxml/html/__init__.py (original) +++ lxml/branch/lxml-2.2/src/lxml/html/__init__.py Tue Dec 15 14:49:42 2009 @@ -821,18 +821,22 @@ return open_http(form.method, form.action, values) def open_http_urllib(method, url, values): - import urllib ## FIXME: should test that it's not a relative URL or something + try: + from urllib import urlencode, urlopen + except ImportError: # Python 3 + from urllib.request import urlopen + from urllib.parse import urlencode if method == 'GET': if '?' in url: url += '&' else: url += '?' - url += urllib.urlencode(values) + url += urlencode(values) data = None else: - data = urllib.urlencode(values) - return urllib.urlopen(url, data) + data = urlencode(values) + return urlopen(url, data) class FieldsDict(DictMixin): From scoder at codespeak.net Tue Dec 15 14:52:57 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 15 Dec 2009 14:52:57 +0100 (CET) Subject: [Lxml-checkins] r70136 - lxml/trunk Message-ID: <20091215135257.8C764168015@codespeak.net> Author: scoder Date: Tue Dec 15 14:52:56 2009 New Revision: 70136 Modified: lxml/trunk/ (props changed) lxml/trunk/test.py Log: 2.2 branch merge: fix test runner in Py3.1 Modified: lxml/trunk/test.py ============================================================================== --- lxml/trunk/test.py (original) +++ lxml/trunk/test.py Tue Dec 15 14:52:56 2009 @@ -71,11 +71,18 @@ import getopt import unittest import traceback + try: set except NameError: from sets import Set as set +try: + # Python >=2.7 and >=3.2 + from unittest.runner import _TextTestResult +except ImportError: + from unittest import _TextTestResult + __metaclass__ = type def stderr(text): @@ -302,14 +309,14 @@ return results -class CustomTestResult(unittest._TextTestResult): +class CustomTestResult(_TextTestResult): """Customised TestResult. It can show a progress bar, and displays tracebacks for errors and failures as soon as they happen, in addition to listing them all at the end. """ - __super = unittest._TextTestResult + __super = _TextTestResult __super_init = __super.__init__ __super_startTest = __super.startTest __super_stopTest = __super.stopTest From jholg at codespeak.net Thu Dec 17 00:23:29 2009 From: jholg at codespeak.net (jholg at codespeak.net) Date: Thu, 17 Dec 2009 00:23:29 +0100 (CET) Subject: [Lxml-checkins] r70160 - in lxml/branch/iso-schematron: . doc src/lxml src/lxml/isoschematron src/lxml/isoschematron/resources src/lxml/isoschematron/resources/rng src/lxml/isoschematron/resources/xsl src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1 src/lxml/tests Message-ID: <20091216232329.522DA168011@codespeak.net> Author: jholg Date: Thu Dec 17 00:23:29 2009 New Revision: 70160 Added: lxml/branch/iso-schematron/src/lxml/isoschematron/ lxml/branch/iso-schematron/src/lxml/isoschematron/__init__.py lxml/branch/iso-schematron/src/lxml/isoschematron/resources/ lxml/branch/iso-schematron/src/lxml/isoschematron/resources/rng/ lxml/branch/iso-schematron/src/lxml/isoschematron/resources/rng/iso-schematron.rng lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/ lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/ lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt lxml/branch/iso-schematron/src/lxml/tests/test_isoschematron.py Modified: lxml/branch/iso-schematron/CHANGES.txt lxml/branch/iso-schematron/LICENSES.txt lxml/branch/iso-schematron/doc/validation.txt lxml/branch/iso-schematron/setup.py lxml/branch/iso-schematron/src/lxml/xmlerror.pxi Log: Added iso-schematron support. Modified: lxml/branch/iso-schematron/CHANGES.txt ============================================================================== --- lxml/branch/iso-schematron/CHANGES.txt (original) +++ lxml/branch/iso-schematron/CHANGES.txt Thu Dec 17 00:23:29 2009 @@ -25,6 +25,9 @@ * Target parsers show their target object in the ``.target`` property (compatible with ElementTree). +* ISO-Schematron support based on the de-facto Schematron reference + 'skeleton implementation' + Bugs fixed ---------- Modified: lxml/branch/iso-schematron/LICENSES.txt ============================================================================== --- lxml/branch/iso-schematron/LICENSES.txt (original) +++ lxml/branch/iso-schematron/LICENSES.txt Thu Dec 17 00:23:29 2009 @@ -13,3 +13,17 @@ the doctest.py module is taken from the Python library and falls under the PSF Python License. + +The isoschematron implementation uses several XSL and RelaxNG resources: + * The (XML syntax) RelaxNG schema for schematron, copyright International + Organization for Standardization (see + src/lxml/isoschematron/resources/rng/iso-schematron.rng for the license + text) + * The skeleton iso-schematron-xlt1 pure-xslt schematron implementation + xsl stylesheets, copyright Rick Jelliffe and Academia Sinica Computing + Center, Taiwan (see the xsl files here for the license text: + src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/) + * The xsd/rng schema schematron extraction xsl transformations are unlicensed + and copyright the respective authors as noted (see + src/lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl and + src/lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl) Modified: lxml/branch/iso-schematron/doc/validation.txt ============================================================================== --- lxml/branch/iso-schematron/doc/validation.txt (original) +++ lxml/branch/iso-schematron/doc/validation.txt Thu Dec 17 00:23:29 2009 @@ -11,11 +11,18 @@ .. _`Relax NG`: http://www.relaxng.org/ .. _`XML Schema`: http://www.w3.org/XML/Schema -There is also initial support for Schematron_. However, it does not currently -support error reporting in the validation phase due to insufficiencies in the -implementation as of libxml2 2.6.30. +lxml also provides support for ISO-`Schematron`_, based on the pure-XSLT +`skeleton implementation`_ of Schematron: -.. _Schematron: http://www.ascc.net/xml/schematron +.. _Schematron: http://www.schematron.com +.. _`skeleton implementation`: http://www.schematron.com/implementation.html + +There is also basic support for `pre-ISO-Schematron` through the libxml2 +Schematron features. However, this does not currently support error reporting +in the validation phase due to insufficiencies in the implementation as of +libxml2 2.6.30. + +.. _`pre-ISO-Schematron`: http://www.ascc.net/xml/schematron .. contents:: .. @@ -24,6 +31,7 @@ 3 RelaxNG 4 XMLSchema 5 Schematron + 6 (Pre-ISO-Schematron) The usual setup procedure: @@ -341,11 +349,103 @@ >>> doc2.xmlschema(xmlschema_doc) False - Schematron ---------- -Since version 2.0, lxml.etree features Schematron_ support, using the +From version 2.3 on lxml features ISO-`Schematron`_ support built on the +de-facto reference implementation of Schematron, the pure-XSLT-1.0 +`skeleton implementation`_. This is provided by the lxml.isoschematron package +that implements the Schematron class, with an API compatible to the other +validators'. Pass an ElementTree object to construct a Schematron validator: + +.. sourcecode:: pycon + >>> from lxml import isoschematron + >>> f = StringIO('''\ + ... + ... + ... Sum equals 100%. + ... + ... Sum is not 100%. + ... + ... + ... + ... ''') + + >>> sct_doc = etree.parse(f) + >>> schematron = isoschematron.Schematron(sct_doc) + +You can then validate some ElementTree document with this. Just like with +XMLSchema or RelaxNG, you'll get back true if the document is valid against the +schema, and false if not: + +.. sourcecode:: pycon + + >>> valid = StringIO('''\ + ... + ... 20 + ... 30 + ... 50 + ... + ... ''') + + >>> doc = etree.parse(valid) + >>> schematron.validate(doc) + True + + >>> etree.SubElement(doc.getroot(), "Percent").text = "10" + + >>> schematron.validate(doc) + False + +Calling the schema object has the same effect as calling its validate method. +This can be useful for conditional statements: + +.. sourcecode:: pycon + + >>> is_valid = isoschematron.Schematron(sct_doc) + + >>> if not is_valid(doc): + ... print("invalid!") + invalid! + +Built on a pure-xslt implementation, the actual validator is created as an +XSLT 1.0 stylesheet using these steps: + +0. (Extract from XML Schema or RelaxNG schema) +1. Process inclusions +2. Process abstract patterns +3. Compile the schematron schema to XSLT + +To allow more control over the individual steps, isoschematron.Schematron +supports an extended API: + +The ``include`` and ``expand`` keyword arguments can be used to switch off +steps 1) and 2). + +To set parameters for steps 1), 2) and 3) dictionaries containing XSLT +parameters can be provided using the keyword arguments ``include_params``, +``expand_params`` or ``compile_params``. Note that these parameters are +stylesheet parameters so you need to set string parameters using quotes or the +XSLT.strparam() classmethod, see XPath and XSLT with lxml: +Stylesheet-parameters_. + +By setting ``store_schematron`` to True, the (included-and-expanded) schematron +document tree is stored and made available through the ``schematron`` property. + +Similarly, setting ``store_xslt`` to True will result in the validation XSLT +document tree being kept; it can be retrieved through the ``validator_xslt`` +property. + +Finally, with ``store_report`` set to True (the default), the resulting +validation report document gets stored and can be accessed as the +``validation_report`` property. + +.. _Stylesheet-parameters: xpathxslt.html#stylesheet-parameters + +(Pre-ISO-Schematron) +-------------------- + +Since version 2.0, lxml.etree features `pre-ISO-Schematron`_ support, using the class lxml.etree.Schematron. It requires at least libxml2 2.6.21 to work. The API is the same as for the other validators. Pass an ElementTree object to construct a Schematron validator: Modified: lxml/branch/iso-schematron/setup.py ============================================================================== --- lxml/branch/iso-schematron/setup.py (original) +++ lxml/branch/iso-schematron/setup.py Thu Dec 17 00:23:29 2009 @@ -113,7 +113,12 @@ ], package_dir = {'': 'src'}, - packages = ['lxml', 'lxml.html'], + packages = ['lxml', 'lxml.html', 'lxml.isoschematron'], + package_data = {'lxml.isoschematron': + ['resources/rng/iso-schematron.rng', + 'resources/xsl/*.xsl', + 'resources/xsl/iso-schematron-xslt1/*.xsl', + 'resources/xsl/iso-schematron-xslt1/readme.txt']}, ext_modules = setupinfo.ext_modules( STATIC_INCLUDE_DIRS, STATIC_LIBRARY_DIRS, STATIC_CFLAGS, STATIC_BINARIES), Added: lxml/branch/iso-schematron/src/lxml/isoschematron/__init__.py ============================================================================== --- (empty file) +++ lxml/branch/iso-schematron/src/lxml/isoschematron/__init__.py Thu Dec 17 00:23:29 2009 @@ -0,0 +1,266 @@ +"""The ``lxml.isoschematron`` package implements ISO Schematron support on top +of the pure-xslt 'skeleton' implementation. +""" + +import os.path +from lxml import etree as _etree # due to validator __init__ signature + + +# some namespaces +#FIXME: Maybe lxml should provide a dedicated place for common namespace +#FIXME: definitions? +XML_SCHEMA_NS = "http://www.w3.org/2001/XMLSchema" +RELAXNG_NS = "http://relaxng.org/ns/structure/1.0" +SCHEMATRON_NS = "http://purl.oclc.org/dsdl/schematron" +SVRL_NS = "http://purl.oclc.org/dsdl/svrl" + + +# some helpers +_schematron_root = '{%s}schema' % SCHEMATRON_NS +_xml_schema_root = '{%s}schema' % XML_SCHEMA_NS +_resources_dir = os.path.join(os.path.dirname(__file__), 'resources') + + +# the iso-schematron skeleton implementation steps aka xsl transformations +extract_from_xsd = _etree.XSLT(_etree.parse( + os.path.join(_resources_dir, 'xsl', 'XSD2Schtrn.xsl'))) +extract_from_rng = _etree.XSLT(_etree.parse( + os.path.join(_resources_dir, 'xsl', 'RNG2Schtrn.xsl'))) +iso_dsdl_include = _etree.XSLT(_etree.parse( + os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1', + 'iso_dsdl_include.xsl'))) +iso_abstract_expand = _etree.XSLT(_etree.parse( + os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1', + 'iso_abstract_expand.xsl'))) +iso_svrl_for_xslt1 = _etree.XSLT(_etree.parse( + os.path.join(_resources_dir, + 'xsl', 'iso-schematron-xslt1', 'iso_svrl_for_xslt1.xsl'))) +# if you want to use another "meta-stylesheet" for compilation to xslt, plug it +# here +iso_compile2xslt = iso_svrl_for_xslt1 + + +# svrl result accessors +svrl_validation_errors = _etree.XPath( + '//svrl:failed-assert', namespaces={'sch': SCHEMATRON_NS, 'svrl': SVRL_NS}) + + +# RelaxNG validator for schematron schemas +schematron_schema_valid = _etree.RelaxNG(_etree.parse( + os.path.join(_resources_dir, 'rng', 'iso-schematron.rng'))) + + +#FIXME: This wouldn't be necessary if etree._Validator made _error_log +#FIXME: accessible from python +class _Validator(object): + "Base class for Schematron validators." + + def __init__(self): + "__init__(self)" + self._error_log = _etree._ErrorLog() + + def validate(self, etree): + """validate(self, etree) + + Validate the document using this schema. + + Returns true if document is valid, false if not. + """ + return self(etree) + + def assertValid(self, etree): + """assertValid(self, etree) + + Raises `DocumentInvalid` if the document does not comply with the schema. + """ + if not self(etree): + raise _etree.DocumentInvalid(self._error_log._buildExceptionMessage( + "Document does not comply with schema"), + self._error_log) + + def assert_(self, etree): + """assert_(self, etree) + + Raises `AssertionError` if the document does not comply with the schema. + """ + if not self(etree): + raise AssertionError, self._error_log._buildExceptionMessage( + "Document does not comply with schema") + + def error_log(self): + """The log of validation errors and warnings. + """ + return self._error_log.copy() + error_log = property(error_log, doc=error_log.__doc__) + + +class Schematron(_Validator): + """Schematron(self, etree=None, file=None, include=True, expand=True, + include_params={}, expand_params={}, compile_params={}, + store_schematron=False, store_xslt=False, store_report=True) + An ISO Schematron validator. + + Pass a root Element or an ElementTree to turn it into a validator. + Alternatively, pass a filename as keyword argument 'file' to parse from + the file system. + Built on the Schematron language 'reference' skeleton pure-xslt + implementation, the validator is created as an XSLT 1.0 stylesheet using + these steps: + (0) (Extract from XML Schema or RelaxNG schema) + 1) Process inclusions + 2) Process abstract patterns + 3) Compile the schematron schema to XSLT + The ``include`` and ``expand`` keyword arguments can be used to switch off + steps 1) and 2). + To set parameters for steps 1), 2) and 3) hand dictionaries containing xslt + parameters to the keyword arguments ``include_params``, ``expand_params`` + or ``compile_params``. + If ``store_schematron`` is set to True, the (included-and-expanded) + schematron document tree is stored and available through the ``schematron`` + property. + If ``store_xslt`` is set to True, the validation XSLT document tree will be + stored and can be retrieved through the ``validator_xslt`` property. + With ``store_report`` set to True (the default), the resulting validation + report document gets stored and can be accessed as the ``validation_report`` + property. + + Schematron is a less well known, but very powerful schema language. The main + idea is to use the capabilities of XPath to put restrictions on the structure + and the content of XML documents. Here is a simple example:: + + >>> from lxml import isoschematron + >>> schematron = isoschematron.Schematron(etree.XML(''' + ... + ... + ... id is the only permitted attribute name + ... + ... Attribute + ... is forbidden + ... + ... + ... + ... + ... ''')) + + >>> xml = etree.XML(''' + ... + ... + ... + ... + ... ''') + + >>> schematron.validate(xml) + 0 + + >>> xml = etree.XML(''' + ... + ... + ... + ... + ... ''') + + >>> schematron.validate(xml) + 1 + """ + + _domain = _etree.ErrorDomains.SCHEMATRONV + _level = _etree.ErrorLevels.ERROR + _error_type = _etree.ErrorTypes.SCHEMATRONV_ASSERT + + def __init__(self, etree=None, file=None, include=True, expand=True, + include_params={}, expand_params={}, compile_params={}, + store_schematron=False, store_xslt=False, store_report=True): + super(self.__class__, self).__init__() + + self._store_report = store_report + self._schematron = None + self._validator_xslt = None + self._validation_report = None + + # parse schema document, may be a schematron schema or an XML Schema or + # a RelaxNG schema with embedded schematron rules + try: + if etree is not None: + root = etree.getroot() + elif file is not None: + root = _etree.parse(file).getroot() + except Exception, e: + raise _etree.SchematronParseError( + "No tree or file given: %s" % e) + if root is None: + raise ValueError("Empty tree") + if root.tag == _schematron_root: + schematron = root + elif root.tag == _xml_schema_root: + schematron = extract_from_xsd(root) + elif root.nsmap[root.prefix] == RELAXNG_NS: + # RelaxNG does not have a single unique root element + schematron = extract_from_rng(root) + else: + raise _etree.SchematronParseError( + "Document is not a schematron schema or schematron-extractable") + # perform the iso-schematron skeleton implementation steps to get a + # validating xslt + if include: + schematron = iso_dsdl_include(schematron, **include_params) + if expand: + schematron = iso_abstract_expand(schematron, **expand_params) + if not schematron_schema_valid(schematron): + raise _etree.SchematronParseError( + "invalid schematron schema: %s" % + schematron_schema_valid.error_log) + if store_schematron: + self._schematron = schematron + validator_xslt = iso_compile2xslt(schematron, **compile_params) + if store_xslt: + self._validator_xslt = validator_xslt + self._validator = _etree.XSLT(validator_xslt) + + def __call__(self, etree): + """__call__(self, etree) + + Validate doc using Schematron. + + Returns true if document is valid, false if not. + """ + result = self._validator(etree) + if self._store_report: + self._validation_report = result + errors = svrl_validation_errors(result) + self._error_log.clear() + if errors: + if isinstance(etree, _etree._Element): + fname = etree.getroottree().docinfo.URL or '' + else: + fname = etree.docinfo.URL or '' + for error in errors: + logEntry = _etree._SettableLogEntry() + # Does svrl report the line number, anywhere? Don't think so. + logEntry.setGeneric( + domain=self._domain, type=self._error_type, + level=self._level, line=0, message=_etree.tounicode(error), + filename=fname) + self._error_log.receive(logEntry) + return False + return True + + def schematron(self): + """ISO-schematron schema document (None if object has been initialized + with store_schematron=False). + """ + return self._schematron + schematron = property(schematron, doc=schematron.__doc__) + + def validator_xslt(self): + """ISO-schematron skeleton implementation XSLT validator document (None + if object has been initialized with store_xslt=False). + """ + return self._validator_xslt + validator_xslt = property(validator_xslt, doc=validator_xslt.__doc__) + + def validation_report(self): + """ISO-schematron validation result report (None if result-storing has + been turned off). + """ + return self._validation_report + validation_report = property(validation_report, doc=validation_report.__doc__) Added: lxml/branch/iso-schematron/src/lxml/isoschematron/resources/rng/iso-schematron.rng ============================================================================== --- (empty file) +++ lxml/branch/iso-schematron/src/lxml/isoschematron/resources/rng/iso-schematron.rng Thu Dec 17 00:23:29 2009 @@ -0,0 +1,622 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ltr + rtl + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + + + + + + + + + + + + + + + + + + + + + + + false + + + + + + + + + + + + + + + + + + + + + + + + + + false + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + + + + + + + + + + + + + + + + + + + + + + + + + + + false + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + preserve + default + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 + + + Added: lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl ============================================================================== --- (empty file) +++ lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl Thu Dec 17 00:23:29 2009 @@ -0,0 +1,75 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Added: lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl ============================================================================== --- (empty file) +++ lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl Thu Dec 17 00:23:29 2009 @@ -0,0 +1,77 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Added: lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl ============================================================================== --- (empty file) +++ lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl Thu Dec 17 00:23:29 2009 @@ -0,0 +1,296 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Suppressed abstract pattern was here + + + + + + + Start pattern based on abstract + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file Added: lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl ============================================================================== --- (empty file) +++ lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl Thu Dec 17 00:23:29 2009 @@ -0,0 +1,1160 @@ + + + + + + + + + + true + true + true + true + true + true + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Error: Impossible URL in RELAX NG extRef + include + + + + + + + + + + + + + + Unable to open referenced included file: + + + + + + + + + Unable to locate id attribute: + + + + + + + + + + + + + Unable to open referenced included file: + + + + + + + Unable to locate id attribute: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Error: Impossible URL in Schematron include + + + + + + + + + + + + + + + + + + + Unable to open referenced included file: + + + + + + + + + + + + + Unable to locate id attribute: + + + + + + + + + + + Schema error: Use include to + include fragments, not a whole + schema + + + + + + + + + + + + + + + + + + + + Unable to open referenced included file: + + + + + + + + + + Unable to locate id attribute: + + + + + + + + + + Schema error: Use include to include + fragments, not a whole schema + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Error: Impossible URL in Schematron include + + + + + + + + + + + + + + + + + + + Unable to open referenced included file: + + + + + + + + + + + + + Unable to locate id attribute: + + + + + + + + + + + + + + + + + + + + + + + + + + + + Unable to open referenced included file: + + + + + + + + + + Unable to locate id attribute: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Error: Impossible URL in Schematron include + + + + + + + + + + + + + + Unable to open referenced included file: + + + + + + + + + Schema error: Use include to include + fragments, not a whole schema + + + + + Unable to locate id attribute: + + + + + + + + + + + + + + + + Unable to open referenced included file: + + + + + + + Schema error: Use include to include + fragments, not a whole schema + + + + + Unable to locate id attribute: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Error: Impossible URL in DTLL include + + + + + + + + + + + + + Unable to open referenced included file: + + + + + + + + + Unable to locate id attribute: + + + + + + + + + + + + + + Unable to open referenced included file: + + + + + + + Unable to locate id attribute: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Error: Impossible URL in CRDL include + + + + + + + + + + + + + + Unable to open referenced included file: + + + + + + + + + + Unable to locate id attribute: + + + + + + + + + + + + + + Unable to open referenced included file: + + + + + + Unable to locate id attribute: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Fatal error: Xinclude href contains fragment + identifier # + + + + + + + Fatal error: Sorry, this software only + supports simple ids in XInclude xpointers + + + + + + + Fatal Error: Impossible URL in XInclude + include + + + + + + + + + + + + + + + + + + + + + + + + + + + Unable to open referenced included file and fallback + file: + + + + + + + Unable to open referenced included file: + + + + + + + + + + + + + + + + Unable to open referenced included file: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Error: Impossible URL in XLink embedding + link + + + + + + + + + + + + + Unable to open referenced included file: + + + + + + + + + Unable to locate id attribute: + + + + + + + + + + + + + + Unable to open referenced included file: + + + + + + + Unable to locate id attribute: + + + + + + + + + + + + + + + + + \ No newline at end of file Added: lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl ============================================================================== --- (empty file) +++ lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl Thu Dec 17 00:23:29 2009 @@ -0,0 +1,55 @@ + + + + + + + + + + + + + + + + + + + + + + + ( + / + ) + + + \ No newline at end of file Added: lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl ============================================================================== --- (empty file) +++ lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl Thu Dec 17 00:23:29 2009 @@ -0,0 +1,1796 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + #ALL + + + +false + +true + + + + + true + false + + + + + + + true + false + + + + + + + + + @*| + + * + node() + *|comment()|processing-instruction() + + + + + + + + + + + + +default + +false + + + +1 + + + + + Schema error: Schematron elements in old and new namespaces found + + + + + + + + + + + + + + + + + Schema error: in the queryBinding attribute, use 'xslt' + + + + + 1.0 + + + + + + + + + This XSLT was automatically generated from a Schematron schema. + + + + + 1.0 + + + + + + + + + + Fail: This implementation of ISO Schematron does not work with + schemas using the "" query language. + + + + + Implementers: please note that overriding process-prolog or process-root is + the preferred method for meta-stylesheets to use where possible. + + + + + + + + + + PHASES + + PROLOG + + KEYS + + DEFAULT RULES + + SCHEMA METADATA + + SCHEMATRON PATTERNS + + + + + + + + + + + + + + + + + + + + + + + Phase Error: no phase with name has been defined. + + + + + + + MODE: SCHEMATRON-SELECT-FULL-PATH + This mode can be used to generate an ugly though full XPath for locators + + + + + + + + + + + + + + + + + + + + + + + + + MODE: SCHEMATRON-FULL-PATH + This mode can be used to generate an ugly though full XPath for locators + + + + + + / + + + + + + [] + + + + *[local-name()=' + ' and namespace-uri()=' + + '] + + + [] + + + + + + + + + + / + + @ + + @*[local-name()=' + + ' and namespace-uri()=' + + '] + + + + + + + + + MODE: SCHEMATRON-FULL-PATH-2 + + This mode can be used to generate prefixed XPath for humans + + + + + + / + + + [ + + ] + + + + + /@ + + + + + MODE: GENERATE-ID-FROM-PATH + + + + + + + + + + + + + + + + + + + + + + . + + + + + + + MODE: SCHEMATRON-FULL-PATH-3 + + + This mode can be used to generate prefixed XPath for humans + (Top-level element has index) + + + + + + / + + + [ + + ] + + + + + /@ + + + + + MODE: GENERATE-ID-2 + + + U + + + U + + + + + U. + + n + + + + + U. + + _ + + _ + + + + + Strip characters + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Markup Error: no pattern attribute in <active> + + + + Reference Error: the pattern "" has been activated but is not declared + + + + + + + + Markup Error: no test attribute in <assert + + + ASSERT + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Markup Error: no test attribute in <report> + + + + REPORT + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Markup Error: no id attribute in <diagnostic> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Markup Error: no rule attribute in <extends> + + + Reference Error: the abstract rule "" has been referenced but is not declared + + + + + + + + + + + + + + Markup Error: no name attribute in <key> + + + Markup Error: no path or use attribute in <key> + + + + + + + + + + + + + + + + Markup Error: no path or use attribute in <key> + + + + + + + + + + + + Schema error: The key element is not in the ISO Schematron namespace. Use the XSLT namespace. + + + + + + + + Schema error: Empty href= attribute for include directive. + + + + + + + + + + + + + + Error: Impossible URL in Schematron include + + + + + + + Schema error: Use include to include fragments, not a whole schema + + + + + + + + + + Schema error: Use include to include fragments, not a whole schema + + + + + + + + + + + + + + + Error: Impossible URL in Schematron include + + + + + + + Schema error: Use include to include fragments, not a whole schema + + + + + + + + + + + Schema error: Use include to include fragments, not a whole schema + + + + + + + + + + Warning: Variables should not be used with the "xpath" query language binding. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Markup Error: no uri attribute in <ns> + + + Markup Error: no prefix attribute in <ns> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + //( + + ( + + ) + | + + ) + [not(self::text())] + + + + + + + + + + + + + Schema implementation error: This schema has abstract patterns, yet they are supposed to be preprocessed out already + + + + + + + + + + PATTERN + + + + + + + + + + + + + + + + + + + + Markup Error: no id attribute in <phase> + + + + + + + + Markup Error: no context attribute in <rule> + + + RULE + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Markup Error: no id attribute on abstract <rule> + + + Markup Error: (2) context attribute on abstract <rule> + + + + + + Markup Error: context attribute on abstract <rule> + + + + + + + + + + + + + + + + + + + + + + + + + + + Markup Error: no select attribute in <value-of> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Warning: + + must not contain any child elements + + + + + + + + + + + + + + + + + + + + + + + + + Reference error: A diagnostic "" has been referenced but is not declared + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Using the XSLT namespace with a prefix other than "xsl" in + Schematron rules is not supported + in this processor: + + + + + + + + + + + + + + + + + + + + Error: unrecognized element in ISO Schematron namespace: check spelling + and capitalization + + + + + + + + + + + + + Warning: unrecognized element + + + + + + + + + + + + + + + Warning: unrecognized element + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + title + + + + + + + schema-title + + + + + + + + + + + + + + + + + + + + + + + + + + + Added: lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl ============================================================================== --- (empty file) +++ lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl Thu Dec 17 00:23:29 2009 @@ -0,0 +1,588 @@ + + + + + + + + + + + + + + + + +true + + + + + + + + + + + #ALL + + +false +true +true + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + xslt1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +   +   +   + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Added: lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt ============================================================================== --- (empty file) +++ lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt Thu Dec 17 00:23:29 2009 @@ -0,0 +1,83 @@ +ISO SCHEMATRON 2009 + +XSLT implementation by Rick Jelliffe with assistance from members of Schematron-love-in maillist. + +2009-03-18 + +Two distributions are available. One is for XSLT1 engines. +The other is for XSLT2 engines, such as SAXON 9. + + +This version of Schematron splits the process into a pipeline of several different XSLT stages. + +1) First, preprocess your Schematron schema with iso_dsdl_include.xsl. +This is a macro processor to assemble the schema from various parts. +If your schema is not in separate parts, you can skip this stage. + +2) Second, preprocess the output from stage 1 with iso_abstract_expand.xsl. +This is a macro processor to convert abstract patterns to real patterns. +If your schema does not use abstract patterns, you can skip this +stage. + +3) Third, compile the Schematron schema into an XSLT script. +This will typically use iso_svrl_for_xslt1.xsl or iso_svrl_for_xslt2.xsl +(which in turn invoke iso_schematron_skeleton_for_xslt1.xsl or iso_schematron_skeleton_for_saxon.xsl) +However, other "meta-styleseets" are also in common use; the principle of operation is the same. +If your schema uses Schematron phases, supply these as command line/invocation parameters +to this process. + +4) Fourth, run the script generated by stage 3 against the document being validated. +If you are using the SVRL script, then the output of validation will be an XML document. +If your schema uses Schematron parameters, supply these as command line/invocation parameters +to this process. + + +The XSLT2 distribution also features several next generation features, +such as validating multiple documents. See the source code for details. + +Schematron assertions can be written in any language, of course; the file +sch-messages-en.xhtml contains the diagnostics messages from the XSLT2 skeleton +in English, and this can be used as template to localize the skeleton's +error messages. Note that typically programming errors in Schematron are XPath +errors, which requires localized messages from the XSLT engine. + +ANT +--- +To give an example of how to process a document, here is a sample ANT task. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file Added: lxml/branch/iso-schematron/src/lxml/tests/test_isoschematron.py ============================================================================== --- (empty file) +++ lxml/branch/iso-schematron/src/lxml/tests/test_isoschematron.py Thu Dec 17 00:23:29 2009 @@ -0,0 +1,585 @@ +# -*- coding: utf-8 -*- + +""" +Test cases related to ISO-Schematron parsing and validation +""" + +import unittest, sys, os.path +from lxml import isoschematron + +this_dir = os.path.dirname(__file__) +if this_dir not in sys.path: + sys.path.insert(0, this_dir) # needed for Py3 + +from common_imports import etree, HelperTestCase, fileInTestDir +from common_imports import doctest, make_doctest + +class ETreeISOSchematronTestCase(HelperTestCase): + def test_schematron(self): + tree_valid = self.parse('') + tree_invalid = self.parse('') + schema = self.parse('''\ + + + Open Model + + BBB element is not present + CCC element is not present + + + + Closed model" + + BBB element is not present + CCC element is not present + There is an extra element + + + +''') + schema = isoschematron.Schematron(schema) + self.assert_(schema.validate(tree_valid)) + self.assert_(not schema.validate(tree_invalid)) + + def test_schematron_elementtree_error(self): + self.assertRaises(ValueError, isoschematron.Schematron, etree.ElementTree()) + + # an empty pattern is valid in iso schematron + def test_schematron_empty_pattern(self): + schema = self.parse('''\ + + + Open model + + +''') + schema = isoschematron.Schematron(schema) + self.assert_(schema) + + def test_schematron_invalid_schema_empty(self): + schema = self.parse('''\ + +''') + self.assertRaises(etree.SchematronParseError, + isoschematron.Schematron, schema) + + def test_schematron_invalid_schema_namespace(self): + schema = self.parse('''\ + +''') + self.assertRaises(etree.SchematronParseError, + isoschematron.Schematron, schema) + + def test_schematron_validate(self): + schema = self.parse('''\ + + + mandatory number_of_entries tests + + [ERROR] number_of_entries () must equal the number of entries/entry elements () + + + +''') + tree_valid = self.parse('''\ + + 0 + + + +''') + tree_invalid = self.parse('''\ + + 3 + + Entry 1 + Entry 2 + + +''') + schematron = isoschematron.Schematron(schema) + self.assert_(schematron.validate(tree_valid), schematron.error_log) + valid = schematron.validate(tree_invalid) + self.assert_(not valid) + + def test_schematron_assertValid(self): + schema = self.parse('''\ + + + mandatory number_of_entries tests + + [ERROR] number_of_entries () must equal the number of entries/entry elements () + + + +''') + tree_valid = self.parse('''\ + + 0 + + + +''') + tree_invalid = self.parse('''\ + + 3 + + Entry 1 + Entry 2 + + +''') + schematron = isoschematron.Schematron(schema) + self.assert_(schematron(tree_valid), schematron.error_log) + self.assertRaises(etree.DocumentInvalid, schematron.assertValid, + tree_invalid) + + def test_schematron_error_log(self): + schema = self.parse('''\ + + + mandatory number_of_entries tests + + [ERROR] number_of_entries () must equal the number of entries/entry elements () + + + +''') + tree_valid = self.parse('''\ + + 0 + + + +''') + tree_invalid = self.parse('''\ + + 3 + + Entry 1 + Entry 2 + + +''') + schematron = isoschematron.Schematron(schema) + self.assert_(schematron(tree_valid), schematron.error_log) + valid = schematron(tree_invalid) + self.assert_(not valid) + self.assertEquals(len(schematron.error_log), 1, + 'expected single error: %s (%s errors)' % + (schematron.error_log, len(schematron.error_log))) + + def test_schematron_result_report(self): + schema = self.parse('''\ + + + mandatory number_of_entries tests + + [ERROR] number_of_entries () must equal the number of entries/entry elements () + + + +''') + tree_valid = self.parse('''\ + + 0 + + + +''') + tree_invalid = self.parse('''\ + + 3 + + Entry 1 + Entry 2 + + +''') + schematron = isoschematron.Schematron(schema) + self.assert_(schematron(tree_valid), schematron.error_log) + valid = schematron(tree_invalid) + self.assert_(not valid) + self.assert_( + isinstance(schematron.validation_report, etree._ElementTree), + 'expected a validation report result tree, got: %s' % + (schematron.validation_report)) + + schematron = isoschematron.Schematron(schema, store_report=False) + self.assert_(schematron(tree_valid), schematron.error_log) + valid = schematron(tree_invalid) + self.assert_(not valid) + self.assert_(schematron.validation_report is None, + 'validation reporting switched off, still: %s' % + (schematron.validation_report)) + + def test_schematron_store_schematron(self): + schema = self.parse('''\ + + + mandatory number_of_entries tests + + [ERROR] number_of_entries () must equal the number of entries/entry elements () + + + +''') + schematron = isoschematron.Schematron(schema) + self.assert_(schematron.validator_xslt is None) + + schematron = isoschematron.Schematron(schema, store_schematron=True) + self.assert_(isinstance(schematron.schematron, etree._ElementTree), + 'expected schematron schema to be stored') + + def test_schematron_store_xslt(self): + schema = self.parse('''\ + + + mandatory number_of_entries tests + + [ERROR] number_of_entries () must equal the number of entries/entry elements () + + + +''') + schematron = isoschematron.Schematron(schema) + self.assert_(schematron.validator_xslt is None) + + schematron = isoschematron.Schematron(schema, store_xslt=True) + self.assert_(isinstance(schematron.validator_xslt, etree._ElementTree), + 'expected validator xslt to be stored') + + def test_schematron_abstract(self): + schema = self.parse('''\ + + iso schematron validation + + + + + + + + + + [ERROR] element () dateTime value () is not qualified as UTC (tz: ) + + + + + + + + [ERROR] element () dateTime value () is not qualified as UTC (tz: ) + + + + + + + + + + + + +''') + valid_trees = [ + self.parse('''\ + + 2009-12-10T15:21:00Z + + +'''), + self.parse('''\ + + 2009-12-10T15:21:00Z + 2009-12-10T15:21:00Z + +'''), + self.parse('''\ + + 2009-12-10T15:21:00+00:00 + 2009-12-10T15:21:00-00:00 + +'''), + ] + + schematron = isoschematron.Schematron(schema) + for tree_valid in valid_trees: + self.assert_(schematron(tree_valid), schematron.error_log) + + tree_invalid = self.parse('''\ + + 2009-12-10T16:21:00+01:00 + 2009-12-10T16:21:00+01:00 + +''') + expected = 2 + valid = schematron(tree_invalid) + self.assert_(not valid) + self.assertEquals( + len(schematron.error_log), expected, 'expected %s errors: %s (%s errors)' % + (expected, schematron.error_log, len(schematron.error_log))) + + tree_invalid = self.parse('''\ + + + 2009-12-10T16:21:00Z + +''') + expected = 1 + valid = schematron(tree_invalid) + self.assert_(not valid) + self.assertEquals( + len(schematron.error_log), expected, 'expected %s errors: %s (%s errors)' % + (expected, schematron.error_log, len(schematron.error_log))) + + def test_schematron_phases(self): + schema = self.parse('''\ + + iso schematron validation + + + + + + + + + + + + + + + + + + + + + + + + + [ERROR] element () dateTime value () is not qualified as UTC (tz: ) + + + + + + + + [ERROR] element () dateTime value () is not qualified as UTC (tz: ) + + + + + mandatory number_of_entries test + + [ERROR] number_of_entries () must equal the number of entries/entry elements () + + + + + + + + + + + + +''') + tree_valid = self.parse('''\ + + 2009-12-10T15:21:00Z + + 0 + + + +''') + tree_invalid = self.parse('''\ + + 2009-12-10T16:21:00+01:00 + 2009-12-10T16:21:00+01:00 + 3 + + Entry 1 + Entry 2 + + +''') + # check everything (default phase #ALL) + schematron = isoschematron.Schematron(schema) + self.assert_(schematron(tree_valid), schematron.error_log) + expected = 3 + valid = schematron(tree_invalid) + self.assert_(not valid) + self.assertEquals( + len(schematron.error_log), expected, 'expected %s errors: %s (%s errors)' % + (expected, schematron.error_log, len(schematron.error_log))) + + # check phase mandatory + schematron = isoschematron.Schematron( + schema, compile_params={'phase': '"mandatory"'}) + self.assert_(schematron(tree_valid), schematron.error_log) + expected = 1 + valid = schematron(tree_invalid) + self.assert_(not valid) + self.assertEquals( + len(schematron.error_log), expected, 'expected %s errors: %s (%s errors)' % + (expected, schematron.error_log, len(schematron.error_log))) + + # check phase datetime_checks + schematron = isoschematron.Schematron( + schema, compile_params={'phase': '"datetime_checks"'}) + self.assert_(schematron(tree_valid), schematron.error_log) + expected = 2 + valid = schematron(tree_invalid) + self.assert_(not valid) + self.assertEquals( + len(schematron.error_log), expected, 'expected %s errors: %s (%s errors)' % + (expected, schematron.error_log, len(schematron.error_log))) + + # check phase full + schematron = isoschematron.Schematron( + schema, compile_params={'phase': '"full"'}) + self.assert_(schematron(tree_valid), schematron.error_log) + expected = 3 + valid = schematron(tree_invalid) + self.assert_(not valid) + self.assertEquals( + len(schematron.error_log), expected, 'expected %s errors: %s (%s errors)' % + (expected, schematron.error_log, len(schematron.error_log))) + + def test_schematron_xmlschema_embedded(self): + schema = self.parse('''\ + + + + + + + + + mandatory number_of_entries tests + + [ERROR] number_of_entries () must equal the number of entries/entry elements () + + + + + + + + + + + + + + + + +''') + tree_valid = self.parse('''\ + + 2 + + Entry 1 + Entry 2 + + +''') + tree_invalid = self.parse('''\ + + 1 + + Entry 1 + Entry 2 + + +''') + xmlschema = etree.XMLSchema(schema) + schematron = isoschematron.Schematron(schema) + # fwiw, this must also be XMLSchema-valid + self.assert_(xmlschema(tree_valid), xmlschema.error_log) + self.assert_(schematron(tree_valid)) + # still schema-valid + self.assert_(xmlschema(tree_invalid), xmlschema.error_log) + self.assert_(not schematron(tree_invalid)) + + def test_schematron_relaxng_embedded(self): + schema = self.parse('''\ + + + + + + + + + + mandatory number_of_entries tests + + [ERROR] number_of_entries () must equal the number of entries/entry elements () + + + + + + + + + + + + +''') + tree_valid = self.parse('''\ + + 2 + + Entry 1 + Entry 2 + + +''') + tree_invalid = self.parse('''\ + + 1 + + Entry 1 + Entry 2 + + +''') + relaxng = etree.RelaxNG(schema) + schematron = isoschematron.Schematron(schema) + # fwiw, this must also be RelaxNG-valid + self.assert_(relaxng(tree_valid), relaxng.error_log) + self.assert_(schematron(tree_valid)) + # still schema-valid + self.assert_(relaxng(tree_invalid), relaxng.error_log) + self.assert_(not schematron(tree_invalid)) + + #TODO: test xslt parameters for inclusion, expand & compile steps (?) + + +def test_suite(): + suite = unittest.TestSuite() + suite.addTests([unittest.makeSuite(ETreeISOSchematronTestCase)]) + suite.addTests( + [make_doctest('../../../doc/validation.txt')]) + return suite + +if __name__ == '__main__': + print('to test use test.py %s' % __file__) Modified: lxml/branch/iso-schematron/src/lxml/xmlerror.pxi ============================================================================== --- lxml/branch/iso-schematron/src/lxml/xmlerror.pxi (original) +++ lxml/branch/iso-schematron/src/lxml/xmlerror.pxi Thu Dec 17 00:23:29 2009 @@ -71,6 +71,8 @@ else: self.filename = _decodeFilename(error.file) + #FIXME: This seems not to have been used anywhere, so far. Is my addition + #FIXME: of _utf8()-ing message & filename correct? cdef _setGeneric(self, int domain, int type, int level, int line, message, filename): self.domain = domain @@ -78,8 +80,8 @@ self.level = level self.line = line self.column = 0 - self.message = message - self.filename = filename + self.message = _utf8(message) + self.filename = _utf8(filename) def __repr__(self): return u"%s:%d:%d:%s:%s:%s: %s" % ( @@ -102,6 +104,12 @@ def __get__(self): return ErrorLevels._getName(self.level, u"unknown") +#FIXME: Can _LogEntry be settable itself so we don't need this? +cdef class _SettableLogEntry(_LogEntry): + cpdef setGeneric(self, int domain, int type, int level, int line, + message, filename): + self._setGeneric(domain, type, level, line, message, filename) + cdef class _BaseErrorLog: cdef _LogEntry _first_error cdef readonly object last_error @@ -172,7 +180,7 @@ message = u"%s, line %d" % (message, line) return exctype(message, code, line, column) - cdef _buildExceptionMessage(self, default_message): + cpdef _buildExceptionMessage(self, default_message): if self._first_error is None: return default_message if self._first_error.message is not None and self._first_error.message: From jholg at codespeak.net Fri Dec 18 22:32:44 2009 From: jholg at codespeak.net (jholg at codespeak.net) Date: Fri, 18 Dec 2009 22:32:44 +0100 (CET) Subject: [Lxml-checkins] r70210 - in lxml/branch/iso-schematron/src/lxml: . isoschematron tests Message-ID: <20091218213244.1F461168007@codespeak.net> Author: jholg Date: Fri Dec 18 22:32:43 2009 New Revision: 70210 Modified: lxml/branch/iso-schematron/src/lxml/isoschematron/__init__.py lxml/branch/iso-schematron/src/lxml/lxml.etree.pyx lxml/branch/iso-schematron/src/lxml/tests/test_isoschematron.py lxml/branch/iso-schematron/src/lxml/xmlerror.pxi Log: Changes: * more tests for parsing from file, initializing from tree vs element * removed erroneous _utf8() calls in _LogEntry * added _append_error_message and _clear_error_log methods to etree._Validator * removed now-obsolete isoschematron._Validator class, fix for initializing from element Modified: lxml/branch/iso-schematron/src/lxml/isoschematron/__init__.py ============================================================================== --- lxml/branch/iso-schematron/src/lxml/isoschematron/__init__.py (original) +++ lxml/branch/iso-schematron/src/lxml/isoschematron/__init__.py Fri Dec 18 22:32:43 2009 @@ -50,51 +50,7 @@ os.path.join(_resources_dir, 'rng', 'iso-schematron.rng'))) -#FIXME: This wouldn't be necessary if etree._Validator made _error_log -#FIXME: accessible from python -class _Validator(object): - "Base class for Schematron validators." - - def __init__(self): - "__init__(self)" - self._error_log = _etree._ErrorLog() - - def validate(self, etree): - """validate(self, etree) - - Validate the document using this schema. - - Returns true if document is valid, false if not. - """ - return self(etree) - - def assertValid(self, etree): - """assertValid(self, etree) - - Raises `DocumentInvalid` if the document does not comply with the schema. - """ - if not self(etree): - raise _etree.DocumentInvalid(self._error_log._buildExceptionMessage( - "Document does not comply with schema"), - self._error_log) - - def assert_(self, etree): - """assert_(self, etree) - - Raises `AssertionError` if the document does not comply with the schema. - """ - if not self(etree): - raise AssertionError, self._error_log._buildExceptionMessage( - "Document does not comply with schema") - - def error_log(self): - """The log of validation errors and warnings. - """ - return self._error_log.copy() - error_log = property(error_log, doc=error_log.__doc__) - - -class Schematron(_Validator): +class Schematron(_etree._Validator): """Schematron(self, etree=None, file=None, include=True, expand=True, include_params={}, expand_params={}, compile_params={}, store_schematron=False, store_xslt=False, store_report=True) @@ -169,7 +125,7 @@ def __init__(self, etree=None, file=None, include=True, expand=True, include_params={}, expand_params={}, compile_params={}, - store_schematron=False, store_xslt=False, store_report=True): + store_schematron=False, store_xslt=False, store_report=False): super(self.__class__, self).__init__() self._store_report = store_report @@ -181,7 +137,10 @@ # a RelaxNG schema with embedded schematron rules try: if etree is not None: - root = etree.getroot() + if isinstance(etree, _etree._Element): + root = etree + else: + root = etree.getroot() elif file is not None: root = _etree.parse(file).getroot() except Exception, e: @@ -223,11 +182,11 @@ Returns true if document is valid, false if not. """ + self._clear_error_log() result = self._validator(etree) if self._store_report: self._validation_report = result errors = svrl_validation_errors(result) - self._error_log.clear() if errors: if isinstance(etree, _etree._Element): fname = etree.getroottree().docinfo.URL or '' @@ -236,11 +195,10 @@ for error in errors: logEntry = _etree._SettableLogEntry() # Does svrl report the line number, anywhere? Don't think so. - logEntry.setGeneric( + self._append_log_message( domain=self._domain, type=self._error_type, level=self._level, line=0, message=_etree.tounicode(error), filename=fname) - self._error_log.receive(logEntry) return False return True Modified: lxml/branch/iso-schematron/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/branch/iso-schematron/src/lxml/lxml.etree.pyx (original) +++ lxml/branch/iso-schematron/src/lxml/lxml.etree.pyx Fri Dec 18 22:32:43 2009 @@ -2783,6 +2783,14 @@ raise AssertionError, self._error_log._buildExceptionMessage( u"Document does not comply with schema") + cpdef _append_log_message(self, int domain, int type, int level, int line, + message, filename): + self._error_log._receiveGeneric(domain, type, level, line, message, + filename) + + cpdef _clear_error_log(self): + self._error_log.clear() + property error_log: u"The log of validation errors and warnings." def __get__(self): Modified: lxml/branch/iso-schematron/src/lxml/tests/test_isoschematron.py ============================================================================== --- lxml/branch/iso-schematron/src/lxml/tests/test_isoschematron.py (original) +++ lxml/branch/iso-schematron/src/lxml/tests/test_isoschematron.py Fri Dec 18 22:32:43 2009 @@ -70,6 +70,70 @@ self.assertRaises(etree.SchematronParseError, isoschematron.Schematron, schema) + def test_schematron_from_tree(self): + schema = self.parse('''\ + + + mandatory number_of_entries tests + + [ERROR] number_of_entries () must equal the number of entries/entry elements () + + + +''') + schematron = isoschematron.Schematron(schema) + self.assert_(isinstance(schematron, isoschematron.Schematron)) + + def test_schematron_from_element(self): + schema = self.parse('''\ + + + mandatory number_of_entries tests + + [ERROR] number_of_entries () must equal the number of entries/entry elements () + + + +''') + schematron = isoschematron.Schematron(schema.getroot()) + self.assert_(isinstance(schematron, isoschematron.Schematron)) + + def test_schematron_from_file(self): + schematron = isoschematron.Schematron(file=fileInTestDir('test.sch')) + self.assert_(isinstance(schematron, isoschematron.Schematron)) + + def test_schematron_call(self): + schema = self.parse('''\ + + + mandatory number_of_entries tests + + [ERROR] number_of_entries () must equal the number of entries/entry elements () + + + +''') + tree_valid = self.parse('''\ + + 0 + + + +''') + tree_invalid = self.parse('''\ + + 3 + + Entry 1 + Entry 2 + + +''') + schematron = isoschematron.Schematron(schema) + self.assert_(schematron(tree_valid), schematron.error_log) + valid = schematron(tree_invalid) + self.assert_(not valid) + def test_schematron_validate(self): schema = self.parse('''\ @@ -196,7 +260,7 @@ ''') - schematron = isoschematron.Schematron(schema) + schematron = isoschematron.Schematron(schema, store_report=True) self.assert_(schematron(tree_valid), schematron.error_log) valid = schematron(tree_invalid) self.assert_(not valid) Modified: lxml/branch/iso-schematron/src/lxml/xmlerror.pxi ============================================================================== --- lxml/branch/iso-schematron/src/lxml/xmlerror.pxi (original) +++ lxml/branch/iso-schematron/src/lxml/xmlerror.pxi Fri Dec 18 22:32:43 2009 @@ -71,8 +71,6 @@ else: self.filename = _decodeFilename(error.file) - #FIXME: This seems not to have been used anywhere, so far. Is my addition - #FIXME: of _utf8()-ing message & filename correct? cdef _setGeneric(self, int domain, int type, int level, int line, message, filename): self.domain = domain @@ -80,8 +78,8 @@ self.level = level self.line = line self.column = 0 - self.message = _utf8(message) - self.filename = _utf8(filename) + self.message = message + self.filename = filename def __repr__(self): return u"%s:%d:%d:%s:%s:%s: %s" % ( From jholg at codespeak.net Sat Dec 19 02:33:19 2009 From: jholg at codespeak.net (jholg at codespeak.net) Date: Sat, 19 Dec 2009 02:33:19 +0100 (CET) Subject: [Lxml-checkins] r70213 - lxml/branch/iso-schematron/src/lxml Message-ID: <20091219013319.CC4B4168015@codespeak.net> Author: jholg Date: Sat Dec 19 02:33:18 2009 New Revision: 70213 Modified: lxml/branch/iso-schematron/src/lxml/xmlerror.pxi Log: Took out the last change that's now obsolete for making isoschematron work with the existing xmlerror infrastructure. Modified: lxml/branch/iso-schematron/src/lxml/xmlerror.pxi ============================================================================== --- lxml/branch/iso-schematron/src/lxml/xmlerror.pxi (original) +++ lxml/branch/iso-schematron/src/lxml/xmlerror.pxi Sat Dec 19 02:33:18 2009 @@ -178,7 +178,7 @@ message = u"%s, line %d" % (message, line) return exctype(message, code, line, column) - cpdef _buildExceptionMessage(self, default_message): + cdef _buildExceptionMessage(self, default_message): if self._first_error is None: return default_message if self._first_error.message is not None and self._first_error.message: From jholg at codespeak.net Mon Dec 21 22:02:19 2009 From: jholg at codespeak.net (jholg at codespeak.net) Date: Mon, 21 Dec 2009 22:02:19 +0100 (CET) Subject: [Lxml-checkins] r70239 - in lxml/branch/iso-schematron: . src/lxml src/lxml/tests Message-ID: <20091221210219.27FF2168016@codespeak.net> Author: jholg Date: Mon Dec 21 22:02:18 2009 New Revision: 70239 Modified: lxml/branch/iso-schematron/CHANGES.txt lxml/branch/iso-schematron/src/lxml/tests/test_xslt.py lxml/branch/iso-schematron/src/lxml/xslt.pxi Log: XSLT now takes XPath object as __call__ stylesheet parameter. Modified: lxml/branch/iso-schematron/CHANGES.txt ============================================================================== --- lxml/branch/iso-schematron/CHANGES.txt (original) +++ lxml/branch/iso-schematron/CHANGES.txt Mon Dec 21 22:02:18 2009 @@ -25,6 +25,8 @@ * Target parsers show their target object in the ``.target`` property (compatible with ElementTree). +* XSLT objects now take XPath object as __call__ stylesheet parameters + * ISO-Schematron support based on the de-facto Schematron reference 'skeleton implementation' Modified: lxml/branch/iso-schematron/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/branch/iso-schematron/src/lxml/tests/test_xslt.py (original) +++ lxml/branch/iso-schematron/src/lxml/tests/test_xslt.py Mon Dec 21 22:02:18 2009 @@ -420,6 +420,24 @@ ''', str(res)) + def test_xslt_parameter_xpath_object(self): + tree = self.parse('BC') + style = self.parse('''\ + + + + + +''') + + st = etree.XSLT(style) + res = st(tree, bar=etree.XPath("/a/b/text()")) + self.assertEquals('''\ + +B +''', + str(res)) def test_xslt_default_parameters(self): tree = self.parse('BC') Modified: lxml/branch/iso-schematron/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/iso-schematron/src/lxml/xslt.pxi (original) +++ lxml/branch/iso-schematron/src/lxml/xslt.pxi Mon Dec 21 22:02:18 2009 @@ -609,7 +609,10 @@ xslt.xsltQuoteOneUserParam( transform_ctxt, _cstr(k), _cstr(v)) else: - v = _utf8(value) + if isinstance(value, XPath): + v = _utf8((value).path) + else: + v = _utf8(value) params[i] = _cstr(k) i += 1 params[i] = _cstr(v) From jholg at codespeak.net Tue Dec 22 01:22:32 2009 From: jholg at codespeak.net (jholg at codespeak.net) Date: Tue, 22 Dec 2009 01:22:32 +0100 (CET) Subject: [Lxml-checkins] r70243 - lxml/branch/iso-schematron/doc Message-ID: <20091222002232.6DC9E168020@codespeak.net> Author: jholg Date: Tue Dec 22 01:22:30 2009 New Revision: 70243 Modified: lxml/branch/iso-schematron/doc/xpathxslt.txt Log: XPath object as stylesheet param option documented Modified: lxml/branch/iso-schematron/doc/xpathxslt.txt ============================================================================== --- lxml/branch/iso-schematron/doc/xpathxslt.txt (original) +++ lxml/branch/iso-schematron/doc/xpathxslt.txt Tue Dec 22 01:22:30 2009 @@ -541,6 +541,14 @@ >>> str(result) '\nText\n' +It's also possible to pass an XPath object as a parameter: + +.. sourcecode:: pycon + + >>> result = transform(doc, a=etree.XPath("/a/b/text()")) + >>> str(result) + '\nText\n' + Passing a string expression looks like this: .. sourcecode:: pycon From jholg at codespeak.net Tue Dec 22 01:38:02 2009 From: jholg at codespeak.net (jholg at codespeak.net) Date: Tue, 22 Dec 2009 01:38:02 +0100 (CET) Subject: [Lxml-checkins] r70244 - in lxml/branch/iso-schematron: doc src/lxml/isoschematron src/lxml/tests Message-ID: <20091222003802.6B760168020@codespeak.net> Author: jholg Date: Tue Dec 22 01:38:00 2009 New Revision: 70244 Modified: lxml/branch/iso-schematron/doc/validation.txt lxml/branch/iso-schematron/src/lxml/isoschematron/__init__.py lxml/branch/iso-schematron/src/lxml/tests/test_isoschematron.py Log: Changes: - more convenient parameter api, automagically converting to stylesheet parameters - added documentation for new 'phase' keyword arg, plus tests Modified: lxml/branch/iso-schematron/doc/validation.txt ============================================================================== --- lxml/branch/iso-schematron/doc/validation.txt (original) +++ lxml/branch/iso-schematron/doc/validation.txt Tue Dec 22 01:38:00 2009 @@ -356,7 +356,8 @@ de-facto reference implementation of Schematron, the pure-XSLT-1.0 `skeleton implementation`_. This is provided by the lxml.isoschematron package that implements the Schematron class, with an API compatible to the other -validators'. Pass an ElementTree object to construct a Schematron validator: +validators'. Pass an Element or ElementTree object to construct a Schematron +validator: .. sourcecode:: pycon >>> from lxml import isoschematron @@ -411,7 +412,7 @@ Built on a pure-xslt implementation, the actual validator is created as an XSLT 1.0 stylesheet using these steps: -0. (Extract from XML Schema or RelaxNG schema) +0. (Extract embedded Schematron from XML Schema or RelaxNG schema) 1. Process inclusions 2. Process abstract patterns 3. Compile the schematron schema to XSLT @@ -422,12 +423,18 @@ The ``include`` and ``expand`` keyword arguments can be used to switch off steps 1) and 2). -To set parameters for steps 1), 2) and 3) dictionaries containing XSLT -parameters can be provided using the keyword arguments ``include_params``, -``expand_params`` or ``compile_params``. Note that these parameters are -stylesheet parameters so you need to set string parameters using quotes or the -XSLT.strparam() classmethod, see XPath and XSLT with lxml: -Stylesheet-parameters_. +To set parameters for steps 1), 2) and 3) dictionaries containing parameters +for XSLT can be provided using the keyword arguments ``include_params``, +``expand_params`` or ``compile_params``. Schematron automatically converts these +parameters to stylesheet parameters so you need not worry to set string +parameters using quotes or to use XSLT.strparam(). If you ever need to pass an +XPath as argument to the XSLT stylesheet you can pass in an etree.XPath object +(see XPath and XSLT with lxml: Stylesheet-parameters_ for background on this). + +The ``phase`` parameter of the compile step is additionally exposed as a keyword +argument. If set, it overrides occurrence in ``compile_params``. Note that +isoschematron.Schematron might expose more common parameters as additional keyword +args in the future. By setting ``store_schematron`` to True, the (included-and-expanded) schematron document tree is stored and made available through the ``schematron`` property. @@ -436,12 +443,81 @@ document tree being kept; it can be retrieved through the ``validator_xslt`` property. -Finally, with ``store_report`` set to True (the default), the resulting +Finally, with ``store_report`` set to True (default: False), the resulting validation report document gets stored and can be accessed as the ``validation_report`` property. .. _Stylesheet-parameters: xpathxslt.html#stylesheet-parameters +Using the ``phase`` parameter of isoschematron.Schematron allows for selective +validation of predefined pattern groups: + +.. sourcecode:: pycon + + >>> f = StringIO('''\ + ... + ... + ... + ... + ... + ... + ... + ... + ... Sum equals 100%. + ... + ... Sum is not 100%. + ... + ... + ... + ... All entries must be positive. + ... + ... Number () not positive + ... + ... + ... + ... ''') + + >>> sct_doc = etree.parse(f) + >>> schematron = isoschematron.Schematron(sct_doc) + + >>> valid = StringIO('''\ + ... + ... 20 + ... 30 + ... 50 + ... + ... ''') + + >>> doc = etree.parse(valid) + >>> schematron.validate(doc) + True + + >>> invalid_positive = StringIO('''\ + ... + ... 0 + ... 50 + ... 50 + ... + ... ''') + + >>> doc = etree.parse(invalid_positive) + + >>> schematron.validate(doc) + False + +If the constraint of Percent entries being positive is not of interest in a +certain validation scenario, it can now be disabled: + +.. sourcecode:: pycon + + >>> selective = isoschematron.Schematron(sct_doc, phase="phase.sum_check") + >>> selective.validate(doc) + True + +The usage of validation phases is a unique feature of ISO-Schematron and can be +a very powerful tool e.g. for establishing validation stages or to provide +different validators for different "validation audiences". + (Pre-ISO-Schematron) -------------------- Modified: lxml/branch/iso-schematron/src/lxml/isoschematron/__init__.py ============================================================================== --- lxml/branch/iso-schematron/src/lxml/isoschematron/__init__.py (original) +++ lxml/branch/iso-schematron/src/lxml/isoschematron/__init__.py Tue Dec 22 01:38:00 2009 @@ -3,8 +3,26 @@ """ import os.path +from copy import copy from lxml import etree as _etree # due to validator __init__ signature +# some compat stuff, borrowed from lxml.html +try: + bytes = __builtins__["bytes"] +except (KeyError, NameError): + # Python < 2.6 + bytes = str +try: + unicode = __builtins__["unicode"] +except (KeyError, NameError): + # Python 3 + unicode = str +try: + basestring = __builtins__["basestring"] +except (KeyError, NameError): + # Python 3 + basestring = (str, bytes) + # some namespaces #FIXME: Maybe lxml should provide a dedicated place for common namespace @@ -50,6 +68,43 @@ os.path.join(_resources_dir, 'rng', 'iso-schematron.rng'))) +def stylesheet_params(**kwargs): + """Convert keyword args to a dictionary of stylesheet parameters. + Conversion follows these rules: + If an arg is string wrap it with XSLT.strparam(). + If an arg is an XPath object just use its .path property. + If arg is None pass None. + Else convert arg to string. + """ + result = {} + for key, val in kwargs.iteritems(): + if isinstance(val, basestring): + val = _etree.XSLT.strparam(val) + elif isinstance(val, _etree.XPath): + val = val.path + elif val is None: + pass + else: + val = unicode(val) + result[key] = val + return result + + +# helper function for use in Schematron __init__ +def _stylesheet_param_dict(paramsDict, kwargsDict): + """Return a copy of paramsDict, updated with kwargsDict entries, wrapped as + stylesheet arguments. + kwargsDict entries with a value of None are ignored. + """ + kwargsDict = dict([ (k, v) for k, v in kwargsDict.iteritems() + if v is not None ]) + # beware of changing mutable default arg + paramsDict = copy(paramsDict) + paramsDict.update(kwargsDict) + paramsDict = stylesheet_params(**paramsDict) + return paramsDict + + class Schematron(_etree._Validator): """Schematron(self, etree=None, file=None, include=True, expand=True, include_params={}, expand_params={}, compile_params={}, @@ -76,7 +131,7 @@ property. If ``store_xslt`` is set to True, the validation XSLT document tree will be stored and can be retrieved through the ``validator_xslt`` property. - With ``store_report`` set to True (the default), the resulting validation + With ``store_report`` set to True (default: False), the resulting validation report document gets stored and can be accessed as the ``validation_report`` property. @@ -125,14 +180,15 @@ def __init__(self, etree=None, file=None, include=True, expand=True, include_params={}, expand_params={}, compile_params={}, - store_schematron=False, store_xslt=False, store_report=False): + store_schematron=False, store_xslt=False, store_report=False, + phase=None, **kwargs): super(self.__class__, self).__init__() self._store_report = store_report self._schematron = None self._validator_xslt = None self._validation_report = None - + # parse schema document, may be a schematron schema or an XML Schema or # a RelaxNG schema with embedded schematron rules try: @@ -170,6 +226,9 @@ schematron_schema_valid.error_log) if store_schematron: self._schematron = schematron + # add new compile keyword args here if exposing them + compile_kwargs = {'phase': phase} + compile_params = _stylesheet_param_dict(compile_params, compile_kwargs) validator_xslt = iso_compile2xslt(schematron, **compile_params) if store_xslt: self._validator_xslt = validator_xslt Modified: lxml/branch/iso-schematron/src/lxml/tests/test_isoschematron.py ============================================================================== --- lxml/branch/iso-schematron/src/lxml/tests/test_isoschematron.py (original) +++ lxml/branch/iso-schematron/src/lxml/tests/test_isoschematron.py Tue Dec 22 01:38:00 2009 @@ -384,7 +384,8 @@ valid = schematron(tree_invalid) self.assert_(not valid) self.assertEquals( - len(schematron.error_log), expected, 'expected %s errors: %s (%s errors)' % + len(schematron.error_log), expected, + 'expected %s errors: %s (%s errors)' % (expected, schematron.error_log, len(schematron.error_log))) tree_invalid = self.parse('''\ @@ -397,7 +398,8 @@ valid = schematron(tree_invalid) self.assert_(not valid) self.assertEquals( - len(schematron.error_log), expected, 'expected %s errors: %s (%s errors)' % + len(schematron.error_log), expected, + 'expected %s errors: %s (%s errors)' % (expected, schematron.error_log, len(schematron.error_log))) def test_schematron_phases(self): @@ -485,34 +487,159 @@ valid = schematron(tree_invalid) self.assert_(not valid) self.assertEquals( - len(schematron.error_log), expected, 'expected %s errors: %s (%s errors)' % + len(schematron.error_log), expected, + 'expected %s errors: %s (%s errors)' % (expected, schematron.error_log, len(schematron.error_log))) # check phase mandatory schematron = isoschematron.Schematron( - schema, compile_params={'phase': '"mandatory"'}) + schema, compile_params={'phase': 'mandatory'}) self.assert_(schematron(tree_valid), schematron.error_log) expected = 1 valid = schematron(tree_invalid) self.assert_(not valid) self.assertEquals( - len(schematron.error_log), expected, 'expected %s errors: %s (%s errors)' % + len(schematron.error_log), expected, + 'expected %s errors: %s (%s errors)' % (expected, schematron.error_log, len(schematron.error_log))) # check phase datetime_checks schematron = isoschematron.Schematron( - schema, compile_params={'phase': '"datetime_checks"'}) + schema, compile_params={'phase': 'datetime_checks'}) self.assert_(schematron(tree_valid), schematron.error_log) expected = 2 valid = schematron(tree_invalid) self.assert_(not valid) self.assertEquals( - len(schematron.error_log), expected, 'expected %s errors: %s (%s errors)' % + len(schematron.error_log), expected, + 'expected %s errors: %s (%s errors)' % (expected, schematron.error_log, len(schematron.error_log))) # check phase full schematron = isoschematron.Schematron( - schema, compile_params={'phase': '"full"'}) + schema, compile_params={'phase': 'full'}) + self.assert_(schematron(tree_valid), schematron.error_log) + expected = 3 + valid = schematron(tree_invalid) + self.assert_(not valid) + self.assertEquals( + len(schematron.error_log), expected, + 'expected %s errors: %s (%s errors)' % + (expected, schematron.error_log, len(schematron.error_log))) + + def test_schematron_phases_kwarg(self): + schema = self.parse('''\ + + iso schematron validation + + + + + + + + + + + + + + + + + + + + + + + + + [ERROR] element () dateTime value () is not qualified as UTC (tz: ) + + + + + + + + [ERROR] element () dateTime value () is not qualified as UTC (tz: ) + + + + + mandatory number_of_entries test + + [ERROR] number_of_entries () must equal the number of entries/entry elements () + + + + + + + + + + + + +''') + tree_valid = self.parse('''\ + + 2009-12-10T15:21:00Z + + 0 + + + +''') + tree_invalid = self.parse('''\ + + 2009-12-10T16:21:00+01:00 + 2009-12-10T16:21:00+01:00 + 3 + + Entry 1 + Entry 2 + + +''') + # check everything (default phase #ALL) + schematron = isoschematron.Schematron(schema) + self.assert_(schematron(tree_valid), schematron.error_log) + expected = 3 + valid = schematron(tree_invalid) + self.assert_(not valid) + self.assertEquals( + len(schematron.error_log), expected, + 'expected %s errors: %s (%s errors)' % + (expected, schematron.error_log, len(schematron.error_log))) + + # check phase mandatory + schematron = isoschematron.Schematron(schema, phase='mandatory') + self.assert_(schematron(tree_valid), schematron.error_log) + expected = 1 + valid = schematron(tree_invalid) + self.assert_(not valid) + self.assertEquals( + len(schematron.error_log), expected, + 'expected %s errors: %s (%s errors)' % + (expected, schematron.error_log, len(schematron.error_log))) + + # check phase datetime_checks + schematron = isoschematron.Schematron(schema, phase='datetime_checks') + self.assert_(schematron(tree_valid), schematron.error_log) + expected = 2 + valid = schematron(tree_invalid) + self.assert_(not valid) + self.assertEquals( + len(schematron.error_log), expected, + 'expected %s errors: %s (%s errors)' % + (expected, schematron.error_log, len(schematron.error_log))) + + # check phase full + schematron = isoschematron.Schematron(schema, phase='full') self.assert_(schematron(tree_valid), schematron.error_log) expected = 3 valid = schematron(tree_invalid) From scoder at codespeak.net Sun Dec 27 10:54:20 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 27 Dec 2009 10:54:20 +0100 (CET) Subject: [Lxml-checkins] r70277 - in lxml/trunk: . doc Message-ID: <20091227095420.AEDFE168020@codespeak.net> Author: scoder Date: Sun Dec 27 10:54:19 2009 New Revision: 70277 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/xpathxslt.txt Log: r5367 at lenny: sbehnel | 2009-12-22 18:51:01 +0100 doc fix Modified: lxml/trunk/doc/xpathxslt.txt ============================================================================== --- lxml/trunk/doc/xpathxslt.txt (original) +++ lxml/trunk/doc/xpathxslt.txt Sun Dec 27 10:54:19 2009 @@ -192,6 +192,8 @@ of interest. For these cases, you can deactivate the parental relationship using the keyword argument ``smart_strings``. +.. sourcecode:: pycon + >>> root = etree.XML("TEXT") >>> find_text = etree.XPath("//text()") From scoder at codespeak.net Sun Dec 27 11:05:34 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 27 Dec 2009 11:05:34 +0100 (CET) Subject: [Lxml-checkins] r70278 - in lxml/trunk: . src/lxml Message-ID: <20091227100534.57DA9168020@codespeak.net> Author: scoder Date: Sun Dec 27 11:05:33 2009 New Revision: 70278 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/apihelpers.pxi Log: r5368 at lenny: sbehnel | 2009-12-22 18:52:31 +0100 code cleanup with Cython 0.12 Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Sun Dec 27 11:05:33 2009 @@ -1284,7 +1284,7 @@ return s[:slen].decode('UTF-8') return s[:slen] -cdef object _utf8(object s): +cdef bytes _utf8(object s): cdef int invalid if python.PyBytes_CheckExact(s): invalid = check_string_utf8(s) @@ -1354,10 +1354,10 @@ except UnicodeDecodeError: pass try: - return python.PyUnicode_DecodeUTF8(c_path, c_len, NULL) + return c_path[:c_len].decode('UTF-8') except UnicodeDecodeError: # this is a stupid fallback, but it might still work... - return python.PyUnicode_DecodeLatin1(c_path, c_len, 'replace') + return c_path[:c_len].decode('latin-1', 'replace') cdef object _encodeFilenameUTF8(object filename): u"""Recode filename as UTF-8. Tries ASCII, local filesystem encoding and From scoder at codespeak.net Sun Dec 27 11:05:37 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 27 Dec 2009 11:05:37 +0100 (CET) Subject: [Lxml-checkins] r70279 - in lxml/trunk: . src/lxml Message-ID: <20091227100537.8D642168021@codespeak.net> Author: scoder Date: Sun Dec 27 11:05:37 2009 New Revision: 70279 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/xslt.pxi Log: r5369 at lenny: sbehnel | 2009-12-25 12:31:21 +0100 fix class attribute type Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Sun Dec 27 11:05:37 2009 @@ -319,7 +319,7 @@ u"""A wrapper class for literal XSLT string parameters that require quote escaping. """ - cdef str strval + cdef bytes strval def __init__(self, strval): self.strval = _utf8(strval) From scoder at codespeak.net Sun Dec 27 11:05:40 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 27 Dec 2009 11:05:40 +0100 (CET) Subject: [Lxml-checkins] r70280 - in lxml/trunk: . src/lxml Message-ID: <20091227100540.7CDDB168020@codespeak.net> Author: scoder Date: Sun Dec 27 11:05:40 2009 New Revision: 70280 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/apihelpers.pxi Log: r5370 at lenny: sbehnel | 2009-12-25 14:01:54 +0100 make sure _utf8() always returns a plain byte string object, not a subtype Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Sun Dec 27 11:05:40 2009 @@ -1292,6 +1292,7 @@ s = python.PyUnicode_AsUTF8String(s) invalid = check_string_utf8(s) == -1 elif python.PyBytes_Check(s): + s = bytes(s) invalid = check_string_utf8(s) else: raise TypeError, u"Argument must be string or unicode." From scoder at codespeak.net Sun Dec 27 11:05:43 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sun, 27 Dec 2009 11:05:43 +0100 (CET) Subject: [Lxml-checkins] r70281 - in lxml/trunk: . src/lxml Message-ID: <20091227100543.E596B168021@codespeak.net> Author: scoder Date: Sun Dec 27 11:05:43 2009 New Revision: 70281 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/apihelpers.pxi Log: r5371 at lenny: sbehnel | 2009-12-25 15:33:46 +0100 code cleanup Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Sun Dec 27 11:05:43 2009 @@ -1159,10 +1159,8 @@ u"""Append a new child to a parent element. """ cdef xmlNode* c_next - cdef xmlNode* c_node - cdef xmlDoc* c_source_doc - c_node = child._c_node - c_source_doc = c_node.doc + cdef xmlNode* c_node = child._c_node + cdef xmlDoc* c_source_doc = c_node.doc # store possible text node c_next = c_node.next # move node itself @@ -1178,10 +1176,8 @@ """ cdef xmlNode* c_next cdef xmlNode* c_child - cdef xmlNode* c_node - cdef xmlDoc* c_source_doc - c_node = child._c_node - c_source_doc = c_node.doc + cdef xmlNode* c_node = child._c_node + cdef xmlDoc* c_source_doc = c_node.doc # store possible text node c_next = c_node.next # move node itself @@ -1199,11 +1195,9 @@ cdef int _appendSibling(_Element element, _Element sibling) except -1: u"""Append a new child to a parent element. """ + cdef xmlNode* c_node = sibling._c_node + cdef xmlDoc* c_source_doc = c_node.doc cdef xmlNode* c_next - cdef xmlNode* c_node - cdef xmlDoc* c_source_doc - c_node = sibling._c_node - c_source_doc = c_node.doc # store possible text node c_next = c_node.next # move node itself @@ -1216,11 +1210,9 @@ cdef int _prependSibling(_Element element, _Element sibling) except -1: u"""Append a new child to a parent element. """ + cdef xmlNode* c_node = sibling._c_node + cdef xmlDoc* c_source_doc = c_node.doc cdef xmlNode* c_next - cdef xmlNode* c_node - cdef xmlDoc* c_source_doc - c_node = sibling._c_node - c_source_doc = c_node.doc # store possible text node c_next = c_node.next # move node itself @@ -1231,8 +1223,7 @@ moveNodeToDocument(element._doc, c_source_doc, c_node) cdef inline int isutf8(char* s): - cdef char c - c = s[0] + cdef char c = s[0] while c != c'\0': if c & 0x80: return 1 @@ -1245,13 +1236,9 @@ for ASCII, 1 for UTF-8 and -1 in the case of errors, such as NULL bytes or ASCII control characters. """ - cdef char* s - cdef char* c_end - cdef char c - cdef bint is_non_ascii - s = _cstr(pystring) - c_end = s + python.PyBytes_GET_SIZE(pystring) - is_non_ascii = 0 + cdef char* s = _cstr(pystring) + cdef char* c_end = s + python.PyBytes_GET_SIZE(pystring) + cdef bint is_non_ascii = 0 while s < c_end: if s[0] & 0x80: # skip the entire multi byte sequence From scoder at codespeak.net Mon Dec 28 20:19:20 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 28 Dec 2009 20:19:20 +0100 (CET) Subject: [Lxml-checkins] r70310 - in lxml/trunk: . doc src/lxml src/lxml/tests Message-ID: <20091228191920.8B5C9168015@codespeak.net> Author: scoder Date: Mon Dec 28 20:19:18 2009 New Revision: 70310 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/doc/tutorial.txt lxml/trunk/src/lxml/apihelpers.pxi lxml/trunk/src/lxml/lxml.etree.pyx lxml/trunk/src/lxml/proxy.pxi lxml/trunk/src/lxml/public-api.pxi lxml/trunk/src/lxml/tests/test_elementtree.py lxml/trunk/src/lxml/tree.pxd Log: r5383 at lenny: sbehnel | 2009-12-28 20:17:38 +0100 always assign a prefix to namespaced attributes using a dedicated _searchNsByHref() implementation Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Mon Dec 28 20:19:18 2009 @@ -8,6 +8,12 @@ Features added -------------- +* Setting the value of a namespaced attribute always uses a prefixed + namespace instead of the default namespace even if both declare the + same namespace URI. This avoids serialisation problems when an + attribute from a default namespace is set on an element from a + different namespace. + * XSLT extension elements: support for XSLT context nodes other than elements: document root, comments, processing instructions. Modified: lxml/trunk/doc/tutorial.txt ============================================================================== --- lxml/trunk/doc/tutorial.txt (original) +++ lxml/trunk/doc/tutorial.txt Mon Dec 28 20:19:18 2009 @@ -1013,7 +1013,13 @@ Hello World -Namespaces on attributes work alike: +Namespaces on attributes work alike, but since version 2.3, lxml.etree +will make sure that the attribute uses a prefixed namespace +declaration. This is because unprefixed attribute names are not +considered being in a namespace by the XML namespace specification +(`section 6.2`_), so they may end up loosing their namespace on a +serialise-parse roundtrip, even if they appear in a namespaced +element. .. sourcecode:: pycon @@ -1021,7 +1027,7 @@ >>> print(etree.tostring(xhtml, pretty_print=True)) - Hello World + Hello World >>> print(body.get("bgcolor")) @@ -1029,7 +1035,9 @@ >>> body.get(XHTML + "bgcolor") '#CCFFAA' -You can also use XPath in this way: +.. _`section 6.2`: http://www.w3.org/TR/2009/REC-xml-names-20091208/#defaulting + +You can also use XPath with fully qualified names: .. sourcecode:: pycon Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Mon Dec 28 20:19:18 2009 @@ -289,7 +289,7 @@ tree.xmlNewProp(c_node, _cstr(attr_name_utf), _cstr(value_utf)) else: _uriValidOrRaise(attr_ns_utf) - c_ns = doc._findOrBuildNodeNs(c_node, _cstr(attr_ns_utf), NULL) + c_ns = doc._findOrBuildNodeNs(c_node, _cstr(attr_ns_utf), NULL, 1) tree.xmlNewNsProp(c_node, c_ns, _cstr(attr_name_utf), _cstr(value_utf)) @@ -399,6 +399,64 @@ cstd.free(c_ns_list) return 0 +cdef xmlNs* _searchNsByHref(xmlNode* c_node, char* c_href, bint is_attribute): + u"""Search a namespace declaration that covers a node (element or + attribute). + + For attributes, try to find a prefixed namespace declaration + instead of the default namespaces. This helps in supporting + round-trips for attributes on elements with a different namespace. + """ + cdef xmlNs* c_ns + cdef xmlNs* c_default_ns = NULL + cdef xmlNode* c_element + if c_href is NULL or c_node is NULL or c_node.type == tree.XML_ENTITY_REF_NODE: + return NULL + if cstd.strcmp(c_href, tree.XML_XML_NAMESPACE) == 0: + # no special cases here, let libxml2 handle this + return tree.xmlSearchNsByHref(c_node.doc, c_node, c_href) + if c_node.type == tree.XML_ATTRIBUTE_NODE: + is_attribute = 1 + while c_node is not NULL and c_node.type != tree.XML_ELEMENT_NODE: + c_node = c_node.parent + c_element = c_node + while c_node is not NULL: + if c_node.type == tree.XML_ELEMENT_NODE: + c_ns = c_node.nsDef + while c_ns is not NULL: + if c_ns.href is not NULL and cstd.strcmp(c_href, c_ns.href) == 0: + if c_ns.prefix is NULL and is_attribute: + # for attributes, continue searching a named + # prefix, but keep the first default namespace + # declaration that we found + if c_default_ns is NULL: + c_default_ns = c_ns + elif tree.xmlSearchNs( + c_element.doc, c_element, c_ns.prefix) is c_ns: + # start node is in namespace scope => found! + return c_ns + c_ns = c_ns.next + if c_node is not c_element and c_node.ns is not NULL: + # optimise: the node may have the namespace itself + c_ns = c_node.ns + if c_ns.href is not NULL and cstd.strcmp(c_href, c_ns.href) == 0: + if c_ns.prefix is NULL and is_attribute: + # for attributes, continue searching a named + # prefix, but keep the first default namespace + # declaration that we found + if c_default_ns is NULL: + c_default_ns = c_ns + elif tree.xmlSearchNs( + c_element.doc, c_element, c_ns.prefix) is c_ns: + # start node is in namespace scope => found! + return c_ns + c_node = c_node.parent + # nothing found => use a matching default namespace or fail + if c_default_ns is not NULL: + if tree.xmlSearchNs(c_element.doc, c_element, NULL) is c_default_ns: + return c_default_ns + return NULL + cdef int _replaceNodeByChildren(_Document doc, xmlNode* c_node) except -1: # NOTE: this does not deallocate the node, just unlink it! cdef xmlNode* c_parent @@ -496,7 +554,7 @@ c_ns = NULL else: c_ns = element._doc._findOrBuildNodeNs(element._c_node, - _cstr(ns), NULL) + _cstr(ns), NULL, 1) tree.xmlSetNsProp(element._c_node, c_ns, c_tag, c_value) return 0 @@ -683,7 +741,7 @@ return tag else: c_ns = element._doc._findOrBuildNodeNs( - element._c_node, _cstr(ns), NULL) + element._c_node, _cstr(ns), NULL, 0) return python.PyBytes_FromFormat('%s:%s', c_ns.prefix, _cstr(tag)) cdef inline bint _hasChild(xmlNode* c_node): Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Mon Dec 28 20:19:18 2009 @@ -346,7 +346,8 @@ return ns cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node, - char* c_href, char* c_prefix) except NULL: + char* c_href, char* c_prefix, + bint is_attribute) except NULL: u"""Get or create namespace structure for a node. Reuses the prefix if possible. """ @@ -358,9 +359,14 @@ u"invalid node type %d, expected %d" % ( c_node.type, tree.XML_ELEMENT_NODE) # look for existing ns declaration - c_ns = tree.xmlSearchNsByHref(self._c_doc, c_node, c_href) + c_ns = _searchNsByHref(c_node, c_href, is_attribute) if c_ns is not NULL: - return c_ns + if is_attribute and c_ns.prefix is NULL: + # do not put namespaced attributes into the default + # namespace as this would break serialisation + pass + else: + return c_ns # none found => determine a suitable new prefix if c_prefix is NULL: @@ -386,7 +392,7 @@ cdef int _setNodeNs(self, xmlNode* c_node, char* href) except -1: u"Lookup namespace structure and set it for the node." cdef xmlNs* c_ns - c_ns = self._findOrBuildNodeNs(c_node, href, NULL) + c_ns = self._findOrBuildNodeNs(c_node, href, NULL, 0) tree.xmlSetNs(c_node, c_ns) cdef __initPrefixCache(): Modified: lxml/trunk/src/lxml/proxy.pxi ============================================================================== --- lxml/trunk/src/lxml/proxy.pxi (original) +++ lxml/trunk/src/lxml/proxy.pxi Mon Dec 28 20:19:18 2009 @@ -261,11 +261,8 @@ _appendToNsCache(c_ns_cache, c_nsdef[0], c_nsdef[0]) c_nsdef = &c_nsdef[0].next else: - # known namespace href => strip the ns - if c_ns is tree.xmlSearchNs(c_element.doc, c_element.parent, - c_ns.prefix): - # prefix is not shadowed by parents => ns is reusable - _appendToNsCache(c_ns_cache, c_nsdef[0], c_ns) + # known namespace href => cache mapping and strip old ns + _appendToNsCache(c_ns_cache, c_nsdef[0], c_ns) # cut out c_nsdef.next and prepend it to garbage chain c_ns_next = c_nsdef[0].next c_nsdef[0].next = c_del_ns_list[0] @@ -348,7 +345,8 @@ else: # not in cache => find a replacement from this document c_ns = doc._findOrBuildNodeNs( - c_start_node, c_node.ns.href, c_node.ns.prefix) + c_start_node, c_node.ns.href, c_node.ns.prefix, + c_node.type == tree.XML_ATTRIBUTE_NODE) _appendToNsCache(&c_ns_cache, c_node.ns, c_ns) c_node.ns = c_ns Modified: lxml/trunk/src/lxml/public-api.pxi ============================================================================== --- lxml/trunk/src/lxml/public-api.pxi (original) +++ lxml/trunk/src/lxml/public-api.pxi Mon Dec 28 20:19:18 2009 @@ -154,4 +154,4 @@ _Document doc, xmlNode* c_node, char* href, char* prefix) except NULL: if doc is None: raise TypeError - return doc._findOrBuildNodeNs(c_node, href, prefix) + return doc._findOrBuildNodeNs(c_node, href, prefix, 0) Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Mon Dec 28 20:19:18 2009 @@ -2585,6 +2585,40 @@ parsed = parse(BytesIO( tostring(baz) )).getroot() self.assertEquals('{%s}baz' % ns_href, parsed.tag) + def test_attribute_namespace_roundtrip(self): + fromstring = self.etree.fromstring + tostring = self.etree.tostring + + ns_href = "http://a.b.c" + xml = _bytes('' % ( + ns_href,ns_href)) + root = fromstring(xml) + self.assertEquals('test', root[0].get('{%s}a' % ns_href)) + + xml2 = tostring(root) + self.assertTrue(':a=' in xml2, xml2) + + root2 = fromstring(xml2) + self.assertEquals('test', root[0].get('{%s}a' % ns_href)) + + def test_attribute_namespace_roundtrip_replaced(self): + fromstring = self.etree.fromstring + tostring = self.etree.tostring + + ns_href = "http://a.b.c" + xml = _bytes('' % ( + ns_href,ns_href)) + root = fromstring(xml) + self.assertEquals('test', root[0].get('{%s}a' % ns_href)) + + root[0].set('{%s}a' % ns_href, 'TEST') + + xml2 = tostring(root) + self.assertTrue(':a=' in xml2, xml2) + + root2 = fromstring(xml2) + self.assertEquals('TEST', root[0].get('{%s}a' % ns_href)) + def test_tostring(self): tostring = self.etree.tostring Element = self.etree.Element Modified: lxml/trunk/src/lxml/tree.pxd ============================================================================== --- lxml/trunk/src/lxml/tree.pxd (original) +++ lxml/trunk/src/lxml/tree.pxd Mon Dec 28 20:19:18 2009 @@ -172,6 +172,8 @@ xmlBuffer* buffer xmlBuffer* conv int error + + char* XML_XML_NAMESPACE cdef void xmlFreeDoc(xmlDoc* cur) nogil cdef void xmlFreeDtd(xmlDtd* cur) nogil From scoder at codespeak.net Mon Dec 28 20:19:22 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 28 Dec 2009 20:19:22 +0100 (CET) Subject: [Lxml-checkins] r70311 - in lxml/trunk: . doc Message-ID: <20091228191922.0E3EC168015@codespeak.net> Author: scoder Date: Mon Dec 28 20:19:21 2009 New Revision: 70311 Modified: lxml/trunk/ (props changed) lxml/trunk/doc/build.txt Log: r5384 at lenny: sbehnel | 2009-12-28 20:19:11 +0100 doc fix Modified: lxml/trunk/doc/build.txt ============================================================================== --- lxml/trunk/doc/build.txt (original) +++ lxml/trunk/doc/build.txt Mon Dec 28 20:19:21 2009 @@ -43,7 +43,7 @@ *Only* if you are interested in building lxml from a Subversion checkout (e.g. to test a bug fix that has not been release yet) or if -want to be an lxml developer, then you do need a working Cython +you want to be an lxml developer, then you do need a working Cython installation. You can use EasyInstall_ to install it:: easy_install Cython>=0.12 From scoder at codespeak.net Tue Dec 29 10:42:18 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 29 Dec 2009 10:42:18 +0100 (CET) Subject: [Lxml-checkins] r70331 - in lxml/trunk: . src/lxml Message-ID: <20091229094218.1CCC4168011@codespeak.net> Author: scoder Date: Tue Dec 29 10:42:16 2009 New Revision: 70331 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/cleanup.pxi Log: r5387 at lenny: sbehnel | 2009-12-29 10:07:42 +0100 Py3 fix Modified: lxml/trunk/src/lxml/cleanup.pxi ============================================================================== --- lxml/trunk/src/lxml/cleanup.pxi (original) +++ lxml/trunk/src/lxml/cleanup.pxi Tue Dec 29 10:42:16 2009 @@ -57,7 +57,7 @@ tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1) if c_node.type == tree.XML_ELEMENT_NODE: if c_node.properties is not NULL: - for i in xrange(c_tag_count): + for i in range(c_tag_count): c_href = c_ns_tags[2*i] c_name = c_ns_tags[2*i+1] # must compare attributes manually to make sure we @@ -152,7 +152,7 @@ while c_child is not NULL: c_next = _nextElement(c_child) if c_child.type == tree.XML_ELEMENT_NODE: - for i in xrange(c_tag_count): + for i in range(c_tag_count): if _tagMatchesExactly(c_child, c_ns_tags[2*i], c_ns_tags[2*i+1]): if not with_tail: tree.xmlUnlinkNode(c_child) @@ -241,7 +241,7 @@ c_child = _findChildForwards(c_node, 0) while c_child is not NULL: if c_child.type == tree.XML_ELEMENT_NODE: - for i in xrange(c_tag_count): + for i in range(c_tag_count): if _tagMatchesExactly(c_child, c_ns_tags[2*i], c_ns_tags[2*i+1]): c_next = _findChildForwards(c_child, 0) or _nextElement(c_child) _replaceNodeByChildren(doc, c_child) From scoder at codespeak.net Tue Dec 29 10:42:21 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Tue, 29 Dec 2009 10:42:21 +0100 (CET) Subject: [Lxml-checkins] r70332 - in lxml/trunk: . src/lxml Message-ID: <20091229094221.B0E4C168022@codespeak.net> Author: scoder Date: Tue Dec 29 10:42:21 2009 New Revision: 70332 Modified: lxml/trunk/ (props changed) lxml/trunk/src/lxml/extensions.pxi Log: r5388 at lenny: sbehnel | 2009-12-29 10:42:09 +0100 Py3 fix for Cython 0.12 Modified: lxml/trunk/src/lxml/extensions.pxi ============================================================================== --- lxml/trunk/src/lxml/extensions.pxi (original) +++ lxml/trunk/src/lxml/extensions.pxi Tue Dec 29 10:42:21 2009 @@ -448,10 +448,10 @@ return rexpc.sub(replacement, s, count) cdef _register_in_context(self, _BaseContext context): - ns = "http://exslt.org/regular-expressions" - context._addLocalExtensionFunction(ns, "test", self.test) - context._addLocalExtensionFunction(ns, "match", self.match) - context._addLocalExtensionFunction(ns, "replace", self.replace) + ns = b"http://exslt.org/regular-expressions" + context._addLocalExtensionFunction(ns, b"test", self.test) + context._addLocalExtensionFunction(ns, b"match", self.match) + context._addLocalExtensionFunction(ns, b"replace", self.replace) ################################################################################ From scoder at codespeak.net Wed Dec 30 13:01:16 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 30 Dec 2009 13:01:16 +0100 (CET) Subject: [Lxml-checkins] r70351 - in lxml/trunk: . src/lxml Message-ID: <20091230120116.19563168021@codespeak.net> Author: scoder Date: Wed Dec 30 13:01:15 2009 New Revision: 70351 Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/src/lxml/_elementpath.py Log: r5391 at lenny: sbehnel | 2009-12-30 12:48:32 +0100 enable caching for ElementPath parser Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Dec 30 13:01:15 2009 @@ -8,6 +8,9 @@ Features added -------------- +* Enable path caching in ElementPath (``el.find*()``) to avoid parsing + overhead. + * Setting the value of a namespaced attribute always uses a prefixed namespace instead of the default namespace even if both declare the same namespace URI. This avoids serialisation problems when an Modified: lxml/trunk/src/lxml/_elementpath.py ============================================================================== --- lxml/trunk/src/lxml/_elementpath.py (original) +++ lxml/trunk/src/lxml/_elementpath.py Wed Dec 30 13:01:15 2009 @@ -190,6 +190,7 @@ token = _next() except StopIteration: break + _cache[path] = selector return selector ## From scoder at codespeak.net Wed Dec 30 13:23:09 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 30 Dec 2009 13:23:09 +0100 (CET) Subject: [Lxml-checkins] r70353 - in lxml/branch/iso-schematron/src/lxml: . isoschematron Message-ID: <20091230122309.ACBB8168022@codespeak.net> Author: scoder Date: Wed Dec 30 13:23:09 2009 New Revision: 70353 Modified: lxml/branch/iso-schematron/src/lxml/isoschematron/__init__.py lxml/branch/iso-schematron/src/lxml/xmlerror.pxi Log: remove unused left-over class _SettableLogEntry Modified: lxml/branch/iso-schematron/src/lxml/isoschematron/__init__.py ============================================================================== --- lxml/branch/iso-schematron/src/lxml/isoschematron/__init__.py (original) +++ lxml/branch/iso-schematron/src/lxml/isoschematron/__init__.py Wed Dec 30 13:23:09 2009 @@ -252,7 +252,6 @@ else: fname = etree.docinfo.URL or '' for error in errors: - logEntry = _etree._SettableLogEntry() # Does svrl report the line number, anywhere? Don't think so. self._append_log_message( domain=self._domain, type=self._error_type, Modified: lxml/branch/iso-schematron/src/lxml/xmlerror.pxi ============================================================================== --- lxml/branch/iso-schematron/src/lxml/xmlerror.pxi (original) +++ lxml/branch/iso-schematron/src/lxml/xmlerror.pxi Wed Dec 30 13:23:09 2009 @@ -101,12 +101,6 @@ property level_name: def __get__(self): return ErrorLevels._getName(self.level, u"unknown") - -#FIXME: Can _LogEntry be settable itself so we don't need this? -cdef class _SettableLogEntry(_LogEntry): - cpdef setGeneric(self, int domain, int type, int level, int line, - message, filename): - self._setGeneric(domain, type, level, line, message, filename) cdef class _BaseErrorLog: cdef _LogEntry _first_error From scoder at codespeak.net Wed Dec 30 13:24:26 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 30 Dec 2009 13:24:26 +0100 (CET) Subject: [Lxml-checkins] r70354 - lxml/branch/iso-schematron/src/lxml Message-ID: <20091230122426.31C02168022@codespeak.net> Author: scoder Date: Wed Dec 30 13:24:25 2009 New Revision: 70354 Modified: lxml/branch/iso-schematron/src/lxml/xmlerror.pxi Log: whitespace fix Modified: lxml/branch/iso-schematron/src/lxml/xmlerror.pxi ============================================================================== --- lxml/branch/iso-schematron/src/lxml/xmlerror.pxi (original) +++ lxml/branch/iso-schematron/src/lxml/xmlerror.pxi Wed Dec 30 13:24:25 2009 @@ -101,7 +101,7 @@ property level_name: def __get__(self): return ErrorLevels._getName(self.level, u"unknown") - + cdef class _BaseErrorLog: cdef _LogEntry _first_error cdef readonly object last_error From scoder at codespeak.net Wed Dec 30 14:49:40 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 30 Dec 2009 14:49:40 +0100 (CET) Subject: [Lxml-checkins] r70356 - lxml/branch/iso-schematron/src/lxml Message-ID: <20091230134940.8E566168021@codespeak.net> Author: scoder Date: Wed Dec 30 14:49:38 2009 New Revision: 70356 Modified: lxml/branch/iso-schematron/src/lxml/xpath.pxi lxml/branch/iso-schematron/src/lxml/xslt.pxi Log: safer stylesheet parameter handling: properly report conversion errors Modified: lxml/branch/iso-schematron/src/lxml/xpath.pxi ============================================================================== --- lxml/branch/iso-schematron/src/lxml/xpath.pxi (original) +++ lxml/branch/iso-schematron/src/lxml/xpath.pxi Wed Dec 30 14:49:38 2009 @@ -389,21 +389,20 @@ ``smart_strings=False``. """ cdef xpath.xmlXPathCompExpr* _xpath - cdef readonly object path + cdef bytes _path def __init__(self, path, *, namespaces=None, extensions=None, regexp=True, smart_strings=True): cdef xpath.xmlXPathContext* xpathCtxt _XPathEvaluatorBase.__init__(self, namespaces, extensions, regexp, smart_strings) - self.path = path - path = _utf8(path) + self._path = _utf8(path) xpathCtxt = xpath.xmlXPathNewContext(NULL) if xpathCtxt is NULL: python.PyErr_NoMemory() self.set_context(xpathCtxt) self._error_log.connect() - self._xpath = xpath.xmlXPathCtxtCompile(xpathCtxt, _cstr(path)) + self._xpath = xpath.xmlXPathCtxtCompile(xpathCtxt, _cstr(self._path)) self._error_log.disconnect() if self._xpath is NULL: self._raise_parse_error() @@ -435,6 +434,12 @@ self._unlock() return result + property path: + u"""The literal XPath expression. + """ + def __get__(self): + return self._path.decode(u'UTF-8') + def __dealloc__(self): if self._xpath is not NULL: xpath.xmlXPathFreeCompExpr(self._xpath) Modified: lxml/branch/iso-schematron/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/iso-schematron/src/lxml/xslt.pxi (original) +++ lxml/branch/iso-schematron/src/lxml/xslt.pxi Wed Dec 30 14:49:38 2009 @@ -475,6 +475,7 @@ cdef xmlDoc* c_result = NULL cdef xmlDoc* c_doc cdef tree.xmlDict* c_dict + cdef char** params input_doc = _documentOrRaise(_input) root_node = _rootNodeOrRaise(_input) @@ -509,8 +510,13 @@ resolver_context = self._xslt_resolver_context._copy() transform_ctxt._private = resolver_context + live_refs = _convert_xslt_parameters(transform_ctxt, kw, ¶ms) c_result = self._run_transform( - c_doc, kw, context, transform_ctxt) + c_doc, params, context, transform_ctxt) + if params is not NULL: + # deallocate space for parameters + python.PyMem_Free(params) + live_refs = None if transform_ctxt.state != xslt.XSLT_STATE_OK: if c_result is not NULL: @@ -579,63 +585,59 @@ return _xsltResultTreeFactory(result_doc, self, profile_doc) cdef xmlDoc* _run_transform(self, xmlDoc* c_input_doc, - dict parameters, _XSLTContext context, + char** params, _XSLTContext context, xslt.xsltTransformContext* transform_ctxt): cdef xmlDoc* c_result - cdef char** params - cdef Py_ssize_t i, parameter_count - cdef list keep_ref - xslt.xsltSetTransformErrorFunc(transform_ctxt, self._error_log, _receiveXSLTError) - if self._access_control is not None: self._access_control._register_in_context(transform_ctxt) - - parameter_count = len(parameters) - if parameter_count > 0: - # allocate space for parameters - # * 2 as we want an entry for both key and value, - # and + 1 as array is NULL terminated - params = python.PyMem_Malloc( - sizeof(char*) * (parameter_count * 2 + 1)) - try: - i = 0 - keep_ref = [] - for key, value in parameters.iteritems(): - k = _utf8(key) - if isinstance(value, _XSLTQuotedStringParam): - v = (<_XSLTQuotedStringParam>value).strval - xslt.xsltQuoteOneUserParam( - transform_ctxt, _cstr(k), _cstr(v)) - else: - if isinstance(value, XPath): - v = _utf8((value).path) - else: - v = _utf8(value) - params[i] = _cstr(k) - i += 1 - params[i] = _cstr(v) - i += 1 - keep_ref.append(k) - keep_ref.append(v) - except: - python.PyMem_Free(params) - raise - params[i] = NULL - else: - params = NULL - with nogil: c_result = xslt.xsltApplyStylesheetUser( self._c_style, c_input_doc, params, NULL, NULL, transform_ctxt) - - if params is not NULL: - # deallocate space for parameters - python.PyMem_Free(params) - return c_result +cdef _convert_xslt_parameters(xslt.xsltTransformContext* transform_ctxt, + dict parameters, char*** params_ptr): + cdef Py_ssize_t i, parameter_count + cdef char** params + cdef list keep_ref + params_ptr[0] = NULL + parameter_count = len(parameters) + if parameter_count == 0: + return None + # allocate space for parameters + # * 2 as we want an entry for both key and value, + # and + 1 as array is NULL terminated + params = python.PyMem_Malloc( + sizeof(char*) * (parameter_count * 2 + 1)) + try: + i = 0 + keep_ref = [] + for key, value in parameters.iteritems(): + k = _utf8(key) + keep_ref.append(k) + if isinstance(value, _XSLTQuotedStringParam): + v = (<_XSLTQuotedStringParam>value).strval + xslt.xsltQuoteOneUserParam( + transform_ctxt, _cstr(k), _cstr(v)) + else: + if isinstance(value, XPath): + v = (value)._path + else: + v = _utf8(value) + keep_ref.append(v) + params[i] = _cstr(k) + i += 1 + params[i] = _cstr(v) + i += 1 + except: + python.PyMem_Free(params) + raise + params[i] = NULL + params_ptr[0] = params + return keep_ref + cdef extern from "etree_defs.h": # macro call to 't->tp_new()' for instantiation without calling __init__() cdef XSLT NEW_XSLT "PY_NEW" (object t) From scoder at codespeak.net Wed Dec 30 14:51:41 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 30 Dec 2009 14:51:41 +0100 (CET) Subject: [Lxml-checkins] r70357 - lxml/branch/iso-schematron/src/lxml/isoschematron Message-ID: <20091230135141.3C6FC168021@codespeak.net> Author: scoder Date: Wed Dec 30 14:51:40 2009 New Revision: 70357 Modified: lxml/branch/iso-schematron/src/lxml/isoschematron/__init__.py Log: cleanup and minor fixes Modified: lxml/branch/iso-schematron/src/lxml/isoschematron/__init__.py ============================================================================== --- lxml/branch/iso-schematron/src/lxml/isoschematron/__init__.py (original) +++ lxml/branch/iso-schematron/src/lxml/isoschematron/__init__.py Wed Dec 30 14:51:40 2009 @@ -3,7 +3,6 @@ """ import os.path -from copy import copy from lxml import etree as _etree # due to validator __init__ signature # some compat stuff, borrowed from lxml.html @@ -21,7 +20,7 @@ basestring = __builtins__["basestring"] except (KeyError, NameError): # Python 3 - basestring = (str, bytes) + basestring = str # some namespaces @@ -60,7 +59,7 @@ # svrl result accessors svrl_validation_errors = _etree.XPath( - '//svrl:failed-assert', namespaces={'sch': SCHEMATRON_NS, 'svrl': SVRL_NS}) + '//svrl:failed-assert', namespaces={'svrl': SVRL_NS}) # RelaxNG validator for schematron schemas @@ -71,20 +70,18 @@ def stylesheet_params(**kwargs): """Convert keyword args to a dictionary of stylesheet parameters. Conversion follows these rules: - If an arg is string wrap it with XSLT.strparam(). - If an arg is an XPath object just use its .path property. - If arg is None pass None. + If an arg is a string wrap it with XSLT.strparam(). + If an arg is an XPath object use its path string. + If arg is None ignore the parameter. Else convert arg to string. """ result = {} - for key, val in kwargs.iteritems(): + for key, val in kwargs.items(): if isinstance(val, basestring): val = _etree.XSLT.strparam(val) - elif isinstance(val, _etree.XPath): - val = val.path elif val is None: - pass - else: + continue + elif not isinstance(val, _etree.XPath): val = unicode(val) result[key] = val return result @@ -96,20 +93,20 @@ stylesheet arguments. kwargsDict entries with a value of None are ignored. """ - kwargsDict = dict([ (k, v) for k, v in kwargsDict.iteritems() - if v is not None ]) - # beware of changing mutable default arg - paramsDict = copy(paramsDict) - paramsDict.update(kwargsDict) + if paramsDict: + # beware of changing mutable default arg + paramsDict = dict(paramsDict) + for k, v in kwargsDict.items(): + if v is not None: # None values do not override + paramsDict[k] = v + else: + paramsDict = kwargsDict paramsDict = stylesheet_params(**paramsDict) return paramsDict class Schematron(_etree._Validator): - """Schematron(self, etree=None, file=None, include=True, expand=True, - include_params={}, expand_params={}, compile_params={}, - store_schematron=False, store_xslt=False, store_report=True) - An ISO Schematron validator. + """An ISO Schematron validator. Pass a root Element or an ElementTree to turn it into a validator. Alternatively, pass a filename as keyword argument 'file' to parse from @@ -117,7 +114,7 @@ Built on the Schematron language 'reference' skeleton pure-xslt implementation, the validator is created as an XSLT 1.0 stylesheet using these steps: - (0) (Extract from XML Schema or RelaxNG schema) + 0) (Extract from XML Schema or RelaxNG schema) 1) Process inclusions 2) Process abstract patterns 3) Compile the schematron schema to XSLT @@ -181,7 +178,7 @@ def __init__(self, etree=None, file=None, include=True, expand=True, include_params={}, expand_params={}, compile_params={}, store_schematron=False, store_xslt=False, store_report=False, - phase=None, **kwargs): + phase=None): super(self.__class__, self).__init__() self._store_report = store_report @@ -235,9 +232,7 @@ self._validator = _etree.XSLT(validator_xslt) def __call__(self, etree): - """__call__(self, etree) - - Validate doc using Schematron. + """Validate doc using Schematron. Returns true if document is valid, false if not. """ From scoder at codespeak.net Wed Dec 30 14:57:54 2009 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 30 Dec 2009 14:57:54 +0100 (CET) Subject: [Lxml-checkins] r70358 - in lxml/trunk: . doc src/lxml src/lxml/isoschematron src/lxml/isoschematron/resources src/lxml/isoschematron/resources/rng src/lxml/isoschematron/resources/xsl src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1 src/lxml/tests Message-ID: <20091230135754.92226168021@codespeak.net> Author: scoder Date: Wed Dec 30 14:57:53 2009 New Revision: 70358 Added: lxml/trunk/src/lxml/isoschematron/ - copied from r70357, lxml/branch/iso-schematron/src/lxml/isoschematron/ lxml/trunk/src/lxml/isoschematron/__init__.py - copied unchanged from r70357, lxml/branch/iso-schematron/src/lxml/isoschematron/__init__.py lxml/trunk/src/lxml/isoschematron/resources/ - copied from r70357, lxml/branch/iso-schematron/src/lxml/isoschematron/resources/ lxml/trunk/src/lxml/isoschematron/resources/rng/ - copied from r70357, lxml/branch/iso-schematron/src/lxml/isoschematron/resources/rng/ lxml/trunk/src/lxml/isoschematron/resources/rng/iso-schematron.rng - copied unchanged from r70357, lxml/branch/iso-schematron/src/lxml/isoschematron/resources/rng/iso-schematron.rng lxml/trunk/src/lxml/isoschematron/resources/xsl/ - copied from r70357, lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/ lxml/trunk/src/lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl - copied unchanged from r70357, lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl lxml/trunk/src/lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl - copied unchanged from r70357, lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl lxml/trunk/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/ - copied from r70357, lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/ lxml/trunk/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl - copied unchanged from r70357, lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl lxml/trunk/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl - copied unchanged from r70357, lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl lxml/trunk/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl - copied unchanged from r70357, lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl lxml/trunk/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl - copied unchanged from r70357, lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl lxml/trunk/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl - copied unchanged from r70357, lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl lxml/trunk/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt - copied unchanged from r70357, lxml/branch/iso-schematron/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt lxml/trunk/src/lxml/tests/test_isoschematron.py - copied unchanged from r70357, lxml/branch/iso-schematron/src/lxml/tests/test_isoschematron.py Modified: lxml/trunk/ (props changed) lxml/trunk/CHANGES.txt lxml/trunk/LICENSES.txt lxml/trunk/doc/validation.txt lxml/trunk/doc/xpathxslt.txt lxml/trunk/setup.py lxml/trunk/src/lxml/lxml.etree.pyx lxml/trunk/src/lxml/tests/test_xslt.py lxml/trunk/src/lxml/xpath.pxi lxml/trunk/src/lxml/xslt.pxi Log: merge iso-schematron branch Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Wed Dec 30 14:57:53 2009 @@ -34,6 +34,11 @@ * Target parsers show their target object in the ``.target`` property (compatible with ElementTree). +* XSLT objects now take XPath object as __call__ stylesheet parameters + +* ISO-Schematron support based on the de-facto Schematron reference + 'skeleton implementation' + Bugs fixed ---------- Modified: lxml/trunk/LICENSES.txt ============================================================================== --- lxml/trunk/LICENSES.txt (original) +++ lxml/trunk/LICENSES.txt Wed Dec 30 14:57:53 2009 @@ -13,3 +13,17 @@ the doctest.py module is taken from the Python library and falls under the PSF Python License. + +The isoschematron implementation uses several XSL and RelaxNG resources: + * The (XML syntax) RelaxNG schema for schematron, copyright International + Organization for Standardization (see + src/lxml/isoschematron/resources/rng/iso-schematron.rng for the license + text) + * The skeleton iso-schematron-xlt1 pure-xslt schematron implementation + xsl stylesheets, copyright Rick Jelliffe and Academia Sinica Computing + Center, Taiwan (see the xsl files here for the license text: + src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/) + * The xsd/rng schema schematron extraction xsl transformations are unlicensed + and copyright the respective authors as noted (see + src/lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl and + src/lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl) Modified: lxml/trunk/doc/validation.txt ============================================================================== --- lxml/trunk/doc/validation.txt (original) +++ lxml/trunk/doc/validation.txt Wed Dec 30 14:57:53 2009 @@ -11,11 +11,18 @@ .. _`Relax NG`: http://www.relaxng.org/ .. _`XML Schema`: http://www.w3.org/XML/Schema -There is also initial support for Schematron_. However, it does not currently -support error reporting in the validation phase due to insufficiencies in the -implementation as of libxml2 2.6.30. +lxml also provides support for ISO-`Schematron`_, based on the pure-XSLT +`skeleton implementation`_ of Schematron: -.. _Schematron: http://www.ascc.net/xml/schematron +.. _Schematron: http://www.schematron.com +.. _`skeleton implementation`: http://www.schematron.com/implementation.html + +There is also basic support for `pre-ISO-Schematron` through the libxml2 +Schematron features. However, this does not currently support error reporting +in the validation phase due to insufficiencies in the implementation as of +libxml2 2.6.30. + +.. _`pre-ISO-Schematron`: http://www.ascc.net/xml/schematron .. contents:: .. @@ -24,6 +31,7 @@ 3 RelaxNG 4 XMLSchema 5 Schematron + 6 (Pre-ISO-Schematron) The usual setup procedure: @@ -341,11 +349,179 @@ >>> doc2.xmlschema(xmlschema_doc) False - Schematron ---------- -Since version 2.0, lxml.etree features Schematron_ support, using the +From version 2.3 on lxml features ISO-`Schematron`_ support built on the +de-facto reference implementation of Schematron, the pure-XSLT-1.0 +`skeleton implementation`_. This is provided by the lxml.isoschematron package +that implements the Schematron class, with an API compatible to the other +validators'. Pass an Element or ElementTree object to construct a Schematron +validator: + +.. sourcecode:: pycon + >>> from lxml import isoschematron + >>> f = StringIO('''\ + ... + ... + ... Sum equals 100%. + ... + ... Sum is not 100%. + ... + ... + ... + ... ''') + + >>> sct_doc = etree.parse(f) + >>> schematron = isoschematron.Schematron(sct_doc) + +You can then validate some ElementTree document with this. Just like with +XMLSchema or RelaxNG, you'll get back true if the document is valid against the +schema, and false if not: + +.. sourcecode:: pycon + + >>> valid = StringIO('''\ + ... + ... 20 + ... 30 + ... 50 + ... + ... ''') + + >>> doc = etree.parse(valid) + >>> schematron.validate(doc) + True + + >>> etree.SubElement(doc.getroot(), "Percent").text = "10" + + >>> schematron.validate(doc) + False + +Calling the schema object has the same effect as calling its validate method. +This can be useful for conditional statements: + +.. sourcecode:: pycon + + >>> is_valid = isoschematron.Schematron(sct_doc) + + >>> if not is_valid(doc): + ... print("invalid!") + invalid! + +Built on a pure-xslt implementation, the actual validator is created as an +XSLT 1.0 stylesheet using these steps: + +0. (Extract embedded Schematron from XML Schema or RelaxNG schema) +1. Process inclusions +2. Process abstract patterns +3. Compile the schematron schema to XSLT + +To allow more control over the individual steps, isoschematron.Schematron +supports an extended API: + +The ``include`` and ``expand`` keyword arguments can be used to switch off +steps 1) and 2). + +To set parameters for steps 1), 2) and 3) dictionaries containing parameters +for XSLT can be provided using the keyword arguments ``include_params``, +``expand_params`` or ``compile_params``. Schematron automatically converts these +parameters to stylesheet parameters so you need not worry to set string +parameters using quotes or to use XSLT.strparam(). If you ever need to pass an +XPath as argument to the XSLT stylesheet you can pass in an etree.XPath object +(see XPath and XSLT with lxml: Stylesheet-parameters_ for background on this). + +The ``phase`` parameter of the compile step is additionally exposed as a keyword +argument. If set, it overrides occurrence in ``compile_params``. Note that +isoschematron.Schematron might expose more common parameters as additional keyword +args in the future. + +By setting ``store_schematron`` to True, the (included-and-expanded) schematron +document tree is stored and made available through the ``schematron`` property. + +Similarly, setting ``store_xslt`` to True will result in the validation XSLT +document tree being kept; it can be retrieved through the ``validator_xslt`` +property. + +Finally, with ``store_report`` set to True (default: False), the resulting +validation report document gets stored and can be accessed as the +``validation_report`` property. + +.. _Stylesheet-parameters: xpathxslt.html#stylesheet-parameters + +Using the ``phase`` parameter of isoschematron.Schematron allows for selective +validation of predefined pattern groups: + +.. sourcecode:: pycon + + >>> f = StringIO('''\ + ... + ... + ... + ... + ... + ... + ... + ... + ... Sum equals 100%. + ... + ... Sum is not 100%. + ... + ... + ... + ... All entries must be positive. + ... + ... Number () not positive + ... + ... + ... + ... ''') + + >>> sct_doc = etree.parse(f) + >>> schematron = isoschematron.Schematron(sct_doc) + + >>> valid = StringIO('''\ + ... + ... 20 + ... 30 + ... 50 + ... + ... ''') + + >>> doc = etree.parse(valid) + >>> schematron.validate(doc) + True + + >>> invalid_positive = StringIO('''\ + ... + ... 0 + ... 50 + ... 50 + ... + ... ''') + + >>> doc = etree.parse(invalid_positive) + + >>> schematron.validate(doc) + False + +If the constraint of Percent entries being positive is not of interest in a +certain validation scenario, it can now be disabled: + +.. sourcecode:: pycon + + >>> selective = isoschematron.Schematron(sct_doc, phase="phase.sum_check") + >>> selective.validate(doc) + True + +The usage of validation phases is a unique feature of ISO-Schematron and can be +a very powerful tool e.g. for establishing validation stages or to provide +different validators for different "validation audiences". + +(Pre-ISO-Schematron) +-------------------- + +Since version 2.0, lxml.etree features `pre-ISO-Schematron`_ support, using the class lxml.etree.Schematron. It requires at least libxml2 2.6.21 to work. The API is the same as for the other validators. Pass an ElementTree object to construct a Schematron validator: Modified: lxml/trunk/doc/xpathxslt.txt ============================================================================== --- lxml/trunk/doc/xpathxslt.txt (original) +++ lxml/trunk/doc/xpathxslt.txt Wed Dec 30 14:57:53 2009 @@ -543,6 +543,14 @@ >>> str(result) '\nText\n' +It's also possible to pass an XPath object as a parameter: + +.. sourcecode:: pycon + + >>> result = transform(doc, a=etree.XPath("/a/b/text()")) + >>> str(result) + '\nText\n' + Passing a string expression looks like this: .. sourcecode:: pycon Modified: lxml/trunk/setup.py ============================================================================== --- lxml/trunk/setup.py (original) +++ lxml/trunk/setup.py Wed Dec 30 14:57:53 2009 @@ -113,7 +113,12 @@ ], package_dir = {'': 'src'}, - packages = ['lxml', 'lxml.html'], + packages = ['lxml', 'lxml.html', 'lxml.isoschematron'], + package_data = {'lxml.isoschematron': + ['resources/rng/iso-schematron.rng', + 'resources/xsl/*.xsl', + 'resources/xsl/iso-schematron-xslt1/*.xsl', + 'resources/xsl/iso-schematron-xslt1/readme.txt']}, ext_modules = setupinfo.ext_modules( STATIC_INCLUDE_DIRS, STATIC_LIBRARY_DIRS, STATIC_CFLAGS, STATIC_BINARIES), Modified: lxml/trunk/src/lxml/lxml.etree.pyx ============================================================================== --- lxml/trunk/src/lxml/lxml.etree.pyx (original) +++ lxml/trunk/src/lxml/lxml.etree.pyx Wed Dec 30 14:57:53 2009 @@ -2789,6 +2789,14 @@ raise AssertionError, self._error_log._buildExceptionMessage( u"Document does not comply with schema") + cpdef _append_log_message(self, int domain, int type, int level, int line, + message, filename): + self._error_log._receiveGeneric(domain, type, level, line, message, + filename) + + cpdef _clear_error_log(self): + self._error_log.clear() + property error_log: u"The log of validation errors and warnings." def __get__(self): Modified: lxml/trunk/src/lxml/tests/test_xslt.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xslt.py (original) +++ lxml/trunk/src/lxml/tests/test_xslt.py Wed Dec 30 14:57:53 2009 @@ -420,6 +420,24 @@ ''', str(res)) + def test_xslt_parameter_xpath_object(self): + tree = self.parse('BC') + style = self.parse('''\ + + + + + +''') + + st = etree.XSLT(style) + res = st(tree, bar=etree.XPath("/a/b/text()")) + self.assertEquals('''\ + +B +''', + str(res)) def test_xslt_default_parameters(self): tree = self.parse('BC') Modified: lxml/trunk/src/lxml/xpath.pxi ============================================================================== --- lxml/trunk/src/lxml/xpath.pxi (original) +++ lxml/trunk/src/lxml/xpath.pxi Wed Dec 30 14:57:53 2009 @@ -389,21 +389,20 @@ ``smart_strings=False``. """ cdef xpath.xmlXPathCompExpr* _xpath - cdef readonly object path + cdef bytes _path def __init__(self, path, *, namespaces=None, extensions=None, regexp=True, smart_strings=True): cdef xpath.xmlXPathContext* xpathCtxt _XPathEvaluatorBase.__init__(self, namespaces, extensions, regexp, smart_strings) - self.path = path - path = _utf8(path) + self._path = _utf8(path) xpathCtxt = xpath.xmlXPathNewContext(NULL) if xpathCtxt is NULL: python.PyErr_NoMemory() self.set_context(xpathCtxt) self._error_log.connect() - self._xpath = xpath.xmlXPathCtxtCompile(xpathCtxt, _cstr(path)) + self._xpath = xpath.xmlXPathCtxtCompile(xpathCtxt, _cstr(self._path)) self._error_log.disconnect() if self._xpath is NULL: self._raise_parse_error() @@ -435,6 +434,12 @@ self._unlock() return result + property path: + u"""The literal XPath expression. + """ + def __get__(self): + return self._path.decode(u'UTF-8') + def __dealloc__(self): if self._xpath is not NULL: xpath.xmlXPathFreeCompExpr(self._xpath) Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Wed Dec 30 14:57:53 2009 @@ -475,6 +475,7 @@ cdef xmlDoc* c_result = NULL cdef xmlDoc* c_doc cdef tree.xmlDict* c_dict + cdef char** params input_doc = _documentOrRaise(_input) root_node = _rootNodeOrRaise(_input) @@ -509,8 +510,13 @@ resolver_context = self._xslt_resolver_context._copy() transform_ctxt._private = resolver_context + live_refs = _convert_xslt_parameters(transform_ctxt, kw, ¶ms) c_result = self._run_transform( - c_doc, kw, context, transform_ctxt) + c_doc, params, context, transform_ctxt) + if params is not NULL: + # deallocate space for parameters + python.PyMem_Free(params) + live_refs = None if transform_ctxt.state != xslt.XSLT_STATE_OK: if c_result is not NULL: @@ -579,60 +585,59 @@ return _xsltResultTreeFactory(result_doc, self, profile_doc) cdef xmlDoc* _run_transform(self, xmlDoc* c_input_doc, - dict parameters, _XSLTContext context, + char** params, _XSLTContext context, xslt.xsltTransformContext* transform_ctxt): cdef xmlDoc* c_result - cdef char** params - cdef Py_ssize_t i, parameter_count - cdef list keep_ref - xslt.xsltSetTransformErrorFunc(transform_ctxt, self._error_log, _receiveXSLTError) - if self._access_control is not None: self._access_control._register_in_context(transform_ctxt) - - parameter_count = len(parameters) - if parameter_count > 0: - # allocate space for parameters - # * 2 as we want an entry for both key and value, - # and + 1 as array is NULL terminated - params = python.PyMem_Malloc( - sizeof(char*) * (parameter_count * 2 + 1)) - try: - i = 0 - keep_ref = [] - for key, value in parameters.iteritems(): - k = _utf8(key) - if isinstance(value, _XSLTQuotedStringParam): - v = (<_XSLTQuotedStringParam>value).strval - xslt.xsltQuoteOneUserParam( - transform_ctxt, _cstr(k), _cstr(v)) - else: - v = _utf8(value) - params[i] = _cstr(k) - i += 1 - params[i] = _cstr(v) - i += 1 - keep_ref.append(k) - keep_ref.append(v) - except: - python.PyMem_Free(params) - raise - params[i] = NULL - else: - params = NULL - with nogil: c_result = xslt.xsltApplyStylesheetUser( self._c_style, c_input_doc, params, NULL, NULL, transform_ctxt) - - if params is not NULL: - # deallocate space for parameters - python.PyMem_Free(params) - return c_result +cdef _convert_xslt_parameters(xslt.xsltTransformContext* transform_ctxt, + dict parameters, char*** params_ptr): + cdef Py_ssize_t i, parameter_count + cdef char** params + cdef list keep_ref + params_ptr[0] = NULL + parameter_count = len(parameters) + if parameter_count == 0: + return None + # allocate space for parameters + # * 2 as we want an entry for both key and value, + # and + 1 as array is NULL terminated + params = python.PyMem_Malloc( + sizeof(char*) * (parameter_count * 2 + 1)) + try: + i = 0 + keep_ref = [] + for key, value in parameters.iteritems(): + k = _utf8(key) + keep_ref.append(k) + if isinstance(value, _XSLTQuotedStringParam): + v = (<_XSLTQuotedStringParam>value).strval + xslt.xsltQuoteOneUserParam( + transform_ctxt, _cstr(k), _cstr(v)) + else: + if isinstance(value, XPath): + v = (value)._path + else: + v = _utf8(value) + keep_ref.append(v) + params[i] = _cstr(k) + i += 1 + params[i] = _cstr(v) + i += 1 + except: + python.PyMem_Free(params) + raise + params[i] = NULL + params_ptr[0] = params + return keep_ref + cdef extern from "etree_defs.h": # macro call to 't->tp_new()' for instantiation without calling __init__() cdef XSLT NEW_XSLT "PY_NEW" (object t)