From scoder at codespeak.net Sat Mar 3 13:35:12 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 3 Mar 2007 13:35:12 +0100 (CET) Subject: [Lxml-checkins] r39785 - lxml/trunk/src/lxml Message-ID: <20070303123512.139F410060@code0.codespeak.net> Author: scoder Date: Sat Mar 3 13:35:10 2007 New Revision: 39785 Modified: lxml/trunk/src/lxml/tree.pxd Log: cleanup Modified: lxml/trunk/src/lxml/tree.pxd ============================================================================== --- lxml/trunk/src/lxml/tree.pxd (original) +++ lxml/trunk/src/lxml/tree.pxd Sat Mar 3 13:35:10 2007 @@ -242,8 +242,7 @@ char* URI, xmlCharEncodingHandler* encoder, int compression) cdef extern from "libxml/xmlsave.h": - ctypedef struct xmlSaveCtxt: - pass + ctypedef struct xmlSaveCtxt cdef xmlSaveCtxt* xmlSaveToFilename(char* filename, char* encoding, int options) From scoder at codespeak.net Sat Mar 3 13:35:46 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 3 Mar 2007 13:35:46 +0100 (CET) Subject: [Lxml-checkins] r39786 - lxml/trunk/src/lxml Message-ID: <20070303123546.8618410068@code0.codespeak.net> Author: scoder Date: Sat Mar 3 13:35:44 2007 New Revision: 39786 Modified: lxml/trunk/src/lxml/dtd.pxi Log: cleanup Modified: lxml/trunk/src/lxml/dtd.pxi ============================================================================== --- lxml/trunk/src/lxml/dtd.pxi (original) +++ lxml/trunk/src/lxml/dtd.pxi Sat Mar 3 13:35:44 2007 @@ -16,8 +16,9 @@ cdef class DTD(_Validator): """A DTD validator. - Can load from filesystem directly given a filename. Alternatively, pass - the keyword parameter ``external_id`` to load from a catalog. + Can load from filesystem directly given a filename or file-like object. + Alternatively, pass the keyword parameter ``external_id`` to load from a + catalog. """ cdef tree.xmlDtd* _c_dtd def __init__(self, file=None, external_id=None): From scoder at codespeak.net Sat Mar 3 13:38:23 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 3 Mar 2007 13:38:23 +0100 (CET) Subject: [Lxml-checkins] r39788 - in lxml/trunk: doc src/lxml Message-ID: <20070303123823.2D00610068@code0.codespeak.net> Author: scoder Date: Sat Mar 3 13:38:21 2007 New Revision: 39788 Added: lxml/trunk/src/lxml/schematron.pxd lxml/trunk/src/lxml/schematron.pxi Modified: lxml/trunk/doc/validation.txt lxml/trunk/src/lxml/etree.pyx Log: schematron support (disabled by default: requires libxml2 2.6.21+, better 2.6.27) Modified: lxml/trunk/doc/validation.txt ============================================================================== --- lxml/trunk/doc/validation.txt (original) +++ lxml/trunk/doc/validation.txt Sat Mar 3 13:38:21 2007 @@ -11,11 +11,17 @@ .. _`Relax NG`: http://www.relaxng.org/ .. _`XML Schema`: http://www.w3.org/XML/Schema +There is also initial support for Schematron_. However, it is currently +disabled in lxml builds due to insufficiencies in the implementation as of +libxml2 2.6.27. + +.. _Schematron: http://www.ascc.net/xml/schematron + .. contents:: .. 1 DTD 2 RelaxNG - 2 XMLSchema + 3 XMLSchema The usual setup procedure:: @@ -114,10 +120,9 @@ [...] AssertionError: Document does not comply with schema -Starting with version 0.9, lxml now has a simple API to report the errors -generated by libxml2. If you want to find out why the validation failed in the -second case, you can look up the error log of the validation process and check -it for relevant messages:: +If you want to find out why the validation failed in the second case, you can +look up the error log of the validation process and check it for relevant +messages:: >>> log = relaxng.error_log >>> print log.last_error @@ -126,7 +131,7 @@ You can see that the error (ERROR) happened during RelaxNG validation (RELAXNGV). The message then tells you what went wrong. Note that this error is local to the RelaxNG object. It will only contain log entries that -appeares during the validation. The DocumentInvalid exception raised by the +appeared during the validation. The DocumentInvalid exception raised by the ``assertValid`` method above provides access to the global error log (like all other lxml exceptions). Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Sat Mar 3 13:38:21 2007 @@ -1893,9 +1893,10 @@ def __get__(self): return self._error_log.copy() -include "dtd.pxi" # DTD -include "relaxng.pxi" # RelaxNG -include "xmlschema.pxi" # XMLSchema +include "dtd.pxi" # DTD +include "relaxng.pxi" # RelaxNG +include "xmlschema.pxi" # XMLSchema +#include "schematron.pxi" # Schematron ################################################################################ # Public C API Added: lxml/trunk/src/lxml/schematron.pxd ============================================================================== --- (empty file) +++ lxml/trunk/src/lxml/schematron.pxd Sat Mar 3 13:38:21 2007 @@ -0,0 +1,28 @@ +cimport tree +from tree cimport xmlDoc, xmlDtd + +cdef extern from "libxml/schematron.h": + ctypedef struct xmlSchematron + ctypedef struct xmlSchematronParserCtxt + ctypedef struct xmlSchematronValidCtxt + + ctypedef enum xmlSchematronValidOptions: + XML_SCHEMATRON_OUT_QUIET = 1 # quiet no report + XML_SCHEMATRON_OUT_TEXT = 2 # build a textual report + XML_SCHEMATRON_OUT_XML = 4 # output SVRL + XML_SCHEMATRON_OUT_FILE = 256 # output to a file descriptor + XML_SCHEMATRON_OUT_BUFFER = 512 # output to a buffer + XML_SCHEMATRON_OUT_IO = 1024 # output to I/O mechanism + + cdef xmlSchematronParserCtxt* xmlSchematronNewDocParserCtxt(xmlDoc* doc) + cdef xmlSchematronParserCtxt* xmlSchematronNewParserCtxt(char* filename) + cdef xmlSchematronValidCtxt* xmlSchematronNewValidCtxt(xmlSchematron* schema, + int options) + + cdef xmlSchematron* xmlSchematronParse(xmlSchematronParserCtxt* ctxt) + cdef int xmlSchematronValidateDoc(xmlSchematronValidCtxt* ctxt, + xmlDoc* instance) + + cdef void xmlSchematronFreeParserCtxt(xmlSchematronParserCtxt* ctxt) + cdef void xmlSchematronFreeValidCtxt(xmlSchematronValidCtxt* ctxt) + cdef void xmlSchematronFree(xmlSchematron* schema) Added: lxml/trunk/src/lxml/schematron.pxi ============================================================================== --- (empty file) +++ lxml/trunk/src/lxml/schematron.pxi Sat Mar 3 13:38:21 2007 @@ -0,0 +1,145 @@ +# support for Schematron validation +cimport schematron + +""" +Schematron +---------- + +Schematron is a less well known, but very powerful schema language. The main +idea is to use the capabilities of XPath to put restrictions on the structure +and the content of XML documents. Here is a simple example:: + + >>> schematron = etree.Schematron(etree.XML(""" + ... + ... + ... + ... Attribute + ... is forbidden + ... + ... + ... + ... + ... """)) + + >>> xml = etree.XML(""" + ... + ... + ... + ... + ... """) + + >>> schematron.validate(xml) + 0 + + >>> xml = etree.XML(""" + ... + ... + ... + ... + ... """) + + >>> schematron.validate(xml) + 1 + +Schematron was added to libxml2 in version 2.6.21. As of version 2.6.27, +however, Schematron lacks support for error reporting other than to stderr. +It is therefore not possible to retrieve validation warnings and errors in +lxml. +""" + +class SchematronError(LxmlError): + pass + +class SchematronParseError(SchematronError): + pass + +class SchematronValidateError(SchematronError): + pass + +################################################################################ +# Schematron + +cdef class Schematron(_Validator): + """A Schematron validator. + + Pass a root Element or an ElementTree to turn it into a validator. + Alternatively, pass a filename as keyword argument 'file' to parse from + the file system. + """ + cdef schematron.xmlSchematron* _c_schema + cdef tree.xmlDoc* _c_doc + def __init__(self, etree=None, file=None): + cdef _Document doc + cdef _Element root_node + cdef xmlNode* c_node + cdef xmlDoc* c_doc + cdef char* c_href + cdef schematron.xmlSchematronParserCtxt* parser_ctxt + self._c_schema = NULL + self._c_doc = NULL + if etree is not None: + doc = _documentOrRaise(etree) + root_node = _rootNodeOrRaise(etree) + self._c_doc = _copyDocRoot(doc._c_doc, root_node._c_node) + parser_ctxt = schematron.xmlSchematronNewDocParserCtxt(self._c_doc) + elif file is not None: + filename = _getFilenameForFile(file) + if filename is None: + # XXX assume a string object + filename = file + filename = _encodeFilename(filename) + parser_ctxt = schematron.xmlSchematronNewParserCtxt(_cstr(filename)) + else: + raise SchematronParseError, "No tree or file given" + + if parser_ctxt is NULL: + if self._c_doc is not NULL: + tree.xmlFreeDoc(self._c_doc) + raise SchematronParseError, "Document is not parsable as Schematron" + self._c_schema = schematron.xmlSchematronParse(parser_ctxt) + + if self._c_schema is NULL: + if self._c_doc is not NULL: + schematron.xmlSchematronFreeParserCtxt(parser_ctxt) + tree.xmlFreeDoc(self._c_doc) + raise SchematronParseError, "Document is not a valid Schematron schema" + schematron.xmlSchematronFreeParserCtxt(parser_ctxt) + _Validator.__init__(self) + + def __dealloc__(self): + schematron.xmlSchematronFree(self._c_schema) + tree.xmlFreeDoc(self._c_doc) + + def __call__(self, etree): + """Validate doc using Schematron. + + Returns true if document is valid, false if not.""" + cdef python.PyThreadState* state + cdef _Document doc + cdef _Element root_node + cdef xmlDoc* c_doc + cdef schematron.xmlSchematronValidCtxt* valid_ctxt + cdef int ret + + doc = _documentOrRaise(etree) + root_node = _rootNodeOrRaise(etree) + + self._error_log.connect() + valid_ctxt = schematron.xmlSchematronNewValidCtxt( + self._c_schema, schematron.XML_SCHEMATRON_OUT_QUIET) + if valid_ctxt is NULL: + self._error_log.disconnect() + raise SchematronError, "Failed to create validation context" + + c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) + state = python.PyEval_SaveThread() + ret = schematron.xmlSchematronValidateDoc(valid_ctxt, c_doc) + python.PyEval_RestoreThread(state) + _destroyFakeDoc(doc._c_doc, c_doc) + + schematron.xmlSchematronFreeValidCtxt(valid_ctxt) + + self._error_log.disconnect() + if ret == -1: + raise SchematronValidateError, "Internal error in Schematron validation" + return ret == 0 From scoder at codespeak.net Mon Mar 5 17:49:20 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 5 Mar 2007 17:49:20 +0100 (CET) Subject: [Lxml-checkins] r39965 - lxml/trunk/src/lxml Message-ID: <20070305164920.E3BE21007D@code0.codespeak.net> Author: scoder Date: Mon Mar 5 17:49:18 2007 New Revision: 39965 Modified: lxml/trunk/src/lxml/parser.pxi Log: set error return to -1 instead of 1 Modified: lxml/trunk/src/lxml/parser.pxi ============================================================================== --- lxml/trunk/src/lxml/parser.pxi (original) +++ lxml/trunk/src/lxml/parser.pxi Mon Mar 5 17:49:18 2007 @@ -406,7 +406,7 @@ if pctxt.spaceTab is not NULL: # work around bug in libxml2 xmlparser.xmlClearParserCtxt(pctxt) - cdef int _lockParser(self) except 1: + cdef int _lockParser(self) except -1: cdef python.PyThreadState* state cdef int result if config.ENABLE_THREADING and self._parser_lock != NULL: From scoder at codespeak.net Mon Mar 5 17:50:14 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 5 Mar 2007 17:50:14 +0100 (CET) Subject: [Lxml-checkins] r39966 - lxml/trunk/src/lxml Message-ID: <20070305165014.5AB611007D@code0.codespeak.net> Author: scoder Date: Mon Mar 5 17:50:13 2007 New Revision: 39966 Modified: lxml/trunk/src/lxml/extensions.pxi Log: cleanups and C-ifications Modified: lxml/trunk/src/lxml/extensions.pxi ============================================================================== --- lxml/trunk/src/lxml/extensions.pxi (original) +++ lxml/trunk/src/lxml/extensions.pxi Mon Mar 5 17:50:13 2007 @@ -102,16 +102,16 @@ # namespaces (internal UTF-8 methods with leading '_') - def addNamespace(self, prefix, uri): + cdef addNamespace(self, prefix, uri): if self._namespaces is None: self._namespaces = {} python.PyDict_SetItem(self._namespaces, prefix, uri) - def registerNamespaces(self, namespaces): + cdef registerNamespaces(self, namespaces): for prefix, uri in namespaces.items(): self.registerNamespace(prefix, uri) - def registerNamespace(self, prefix, ns_uri): + cdef registerNamespace(self, prefix, ns_uri): prefix_utf = self._to_utf(prefix) ns_uri_utf = self._to_utf(ns_uri) xpath.xmlXPathRegisterNs(self._xpathCtxt, prefix_utf, ns_uri_utf) @@ -238,12 +238,14 @@ cdef xpath.xmlXPathFunction _function_check(void* ctxt, char* c_name, char* c_ns_uri): "Module level lookup function for XPath/XSLT functions" + cdef xpath.xmlXPathFunction c_func cdef _BaseContext context context = <_BaseContext>ctxt if context._prepare_function_call(c_ns_uri, c_name): - return _call_prepared_function + c_func = _call_prepared_function else: - return NULL + c_func = NULL + return c_func cdef xpath.xmlXPathObject* _wrapXPathObject(object obj) except NULL: cdef xpath.xmlNodeSet* resultSet @@ -358,7 +360,6 @@ cdef void _extension_function_call(_BaseContext context, function, xpath.xmlXPathParserContext* ctxt, int nargs): - cdef _Element node cdef _Document doc cdef xpath.xmlXPathObject* obj cdef int i From scoder at codespeak.net Mon Mar 5 17:52:51 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 5 Mar 2007 17:52:51 +0100 (CET) Subject: [Lxml-checkins] r39967 - lxml/trunk/src/lxml Message-ID: <20070305165251.5B7351007D@code0.codespeak.net> Author: scoder Date: Mon Mar 5 17:52:47 2007 New Revision: 39967 Modified: lxml/trunk/src/lxml/xslt.pxi Log: cleanup, doc strings Modified: lxml/trunk/src/lxml/xslt.pxi ============================================================================== --- lxml/trunk/src/lxml/xslt.pxi (original) +++ lxml/trunk/src/lxml/xslt.pxi Mon Mar 5 17:52:47 2007 @@ -1,4 +1,4 @@ -# XSLT and XPath classes, supports for extension functions +# XSLT cimport xslt @@ -265,6 +265,15 @@ cdef class XSLT: """Turn a document into an XSLT object. + + Keyword arguments of the constructor: + * regexp - enable exslt regular expression support in XPath (default: True) + * access_control - access restrictions for network or file system + + Keyword arguments of the XSLT run: + * profile_run - enable XSLT profiling + + Other keyword arguments are passed to the stylesheet. """ cdef _XSLTContext _context cdef xslt.xsltStylesheet* _c_style @@ -415,6 +424,7 @@ if params is not NULL: # deallocate space for parameters python.PyMem_Free(params) + keep_ref = None if transform_ctxt.profile: c_profile_doc = xslt.xsltGetProfileInformation(transform_ctxt) From scoder at codespeak.net Mon Mar 5 17:53:09 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Mon, 5 Mar 2007 17:53:09 +0100 (CET) Subject: [Lxml-checkins] r39968 - lxml/trunk Message-ID: <20070305165309.651CA1007E@code0.codespeak.net> Author: scoder Date: Mon Mar 5 17:53:08 2007 New Revision: 39968 Modified: lxml/trunk/TODO.txt Log: cleanup Modified: lxml/trunk/TODO.txt ============================================================================== --- lxml/trunk/TODO.txt (original) +++ lxml/trunk/TODO.txt Mon Mar 5 17:53:08 2007 @@ -41,5 +41,5 @@ Features -------- -* Relaxed NG compact notation (rnc versus rng) support. Currently not - supported by libxml2 (patch exists) +* RelaxNG compact notation (rnc versus rng) support. Currently not supported + by libxml2 (patch exists) From scoder at codespeak.net Sat Mar 10 20:05:37 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 10 Mar 2007 20:05:37 +0100 (CET) Subject: [Lxml-checkins] r40177 - lxml/trunk/src/lxml Message-ID: <20070310190537.158731006F@code0.codespeak.net> Author: scoder Date: Sat Mar 10 20:05:36 2007 New Revision: 40177 Modified: lxml/trunk/src/lxml/xpath.pxi Log: fix for compile problem Modified: lxml/trunk/src/lxml/xpath.pxi ============================================================================== --- lxml/trunk/src/lxml/xpath.pxi (original) +++ lxml/trunk/src/lxml/xpath.pxi Sat Mar 10 20:05:36 2007 @@ -143,9 +143,8 @@ def registerNamespaces(self, namespaces): """Register a prefix -> uri dict. """ - add = self._context.addNamespace for prefix, uri in namespaces.items(): - add(prefix, uri) + self._context.addNamespace(prefix, uri) def __call__(self, _path, **_variables): """Evaluate an XPath expression on the document. From scoder at codespeak.net Fri Mar 16 20:24:42 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 16 Mar 2007 20:24:42 +0100 (CET) Subject: [Lxml-checkins] r40611 - lxml/trunk/src/lxml Message-ID: <20070316192442.A40981008A@code0.codespeak.net> Author: scoder Date: Fri Mar 16 20:24:40 2007 New Revision: 40611 Modified: lxml/trunk/src/lxml/etree.pyx Log: allow threading in xinclude(), some docstring updates Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Fri Mar 16 20:24:40 2007 @@ -1289,7 +1289,7 @@ def relaxng(self, relaxng): """Validate this document using other document. - relaxng is a tree that should contain Relax NG XML + The relaxng argument is a tree that should contain a Relax NG schema. Returns True or False, depending on whether validation succeeded. @@ -1305,7 +1305,7 @@ def xmlschema(self, xmlschema): """Validate this document using other document. - xmlschema is a tree that should contain XML Schema XML. + The xmlschema argument is a tree that should contain an XML Schema. Returns True or False, depending on whether validation succeeded. @@ -1321,7 +1321,13 @@ def xinclude(self): """Process the XInclude nodes in this document and include the referenced XML fragments. + + There is support for loading files through the file system, HTTP and + FTP. + + Note that XInclude does not support custom resolvers in Python space. """ + cdef python.PyThreadState* state cdef int result # We cannot pass the XML_PARSE_NOXINCNODE option as this would free # the XInclude nodes - there may still be Python references to them! @@ -1331,13 +1337,15 @@ # typed as elements. The included fragment is added between the two, # i.e. as a sibling, which does not conflict with traversal. self._assertHasRoot() - if self._context_node._doc._parser != None: + state = python.PyEval_SaveThread() + if self._context_node._doc._parser is not None: result = xinclude.xmlXIncludeProcessTreeFlags( self._context_node._c_node, self._context_node._doc._parser._parse_options) else: result = xinclude.xmlXIncludeProcessTree( self._context_node._c_node) + python.PyEval_RestoreThread(state) if result == -1: raise XIncludeError, "XInclude processing failed" From scoder at codespeak.net Fri Mar 16 20:25:31 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 16 Mar 2007 20:25:31 +0100 (CET) Subject: [Lxml-checkins] r40612 - lxml/trunk/doc/html Message-ID: <20070316192531.55B3D1008A@code0.codespeak.net> Author: scoder Date: Fri Mar 16 20:25:29 2007 New Revision: 40612 Modified: lxml/trunk/doc/html/style.css Log: friendlier colours for the web page as a better match with the codespeak logo Modified: lxml/trunk/doc/html/style.css ============================================================================== --- lxml/trunk/doc/html/style.css (original) +++ lxml/trunk/doc/html/style.css Fri Mar 16 20:25:29 2007 @@ -67,10 +67,12 @@ font-size: 130%; } -div.sidemenu ul.menu.current > li { - color: orange; - border: groove orange; - background-color: #FFFACA; +div.sidemenu ul.menu.current li { + color: #CC0000; +} + +div.sidemenu ul.menu.current > li > a { + color: #CC0000; } div.sidemenu ul.menu.current ul.submenu { @@ -85,12 +87,13 @@ div.sidemenu ul.menu.foreign li.menu:hover ul.submenu { display: block; position: absolute; - border: groove orange; + border: groove #990000; padding: 1ex 1ex 1ex 3ex; margin-top: 0px; margin-left: 4em; margin-right: -20em; - background-color: #FFFACA; + color: #990000; + background-color: white; } div.sidemenu ul.submenu { @@ -121,7 +124,7 @@ @media screen { div.section > h1 > a:before { margin-left: -2ex; - color: orange; + color: #CC0000; content: "\00BB" " "; } } From scoder at codespeak.net Fri Mar 16 20:26:16 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 16 Mar 2007 20:26:16 +0100 (CET) Subject: [Lxml-checkins] r40613 - lxml/trunk/benchmark Message-ID: <20070316192616.5636F1008A@code0.codespeak.net> Author: scoder Date: Fri Mar 16 20:26:13 2007 New Revision: 40613 Modified: lxml/trunk/benchmark/bench_xpath.py Log: benchmark both the old and the new way of using extension functions Modified: lxml/trunk/benchmark/bench_xpath.py ============================================================================== --- lxml/trunk/benchmark/bench_xpath.py (original) +++ lxml/trunk/benchmark/bench_xpath.py Fri Mar 16 20:26:13 2007 @@ -34,7 +34,7 @@ child.xpath("./*[0]") @onlylib('lxe') - def bench_xpath_extensions_old(self, root): + def bench_xpath_old_extensions(self, root): def return_child(_, element): if element: return element[0] @@ -45,5 +45,21 @@ for child in root: xpath(child) + @onlylib('lxe') + def bench_xpath_extensions(self, root): + def return_child(_, element): + if element: + return element[0] + else: + return () + self.etree.FunctionNamespace("test")["t"] = return_child + + try: + xpath = self.etree.XPath("test:t(.)", {"test":"test"}) + for child in root: + xpath(child) + finally: + del self.etree.FunctionNamespace("test")["t"] + if __name__ == '__main__': benchbase.main(XPathBenchMark) From scoder at codespeak.net Fri Mar 16 20:26:56 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 16 Mar 2007 20:26:56 +0100 (CET) Subject: [Lxml-checkins] r40614 - lxml/trunk/src/lxml Message-ID: <20070316192656.4E7EC1008A@code0.codespeak.net> Author: scoder Date: Fri Mar 16 20:26:53 2007 New Revision: 40614 Modified: lxml/trunk/src/lxml/python.pxd Log: added a Python API function Modified: lxml/trunk/src/lxml/python.pxd ============================================================================== --- lxml/trunk/src/lxml/python.pxd (original) +++ lxml/trunk/src/lxml/python.pxd Fri Mar 16 20:26:53 2007 @@ -44,7 +44,8 @@ cdef int PyList_Append(object l, object obj) except -1 cdef int PyList_Reverse(object l) except -1 cdef int PyList_Insert(object l, Py_ssize_t index, object o) except -1 - cdef object PyList_AsTuple(object o) + cdef object PyList_AsTuple(object l) + cdef void PyList_Clear(object l) cdef int PyDict_SetItemString(object d, char* key, object value) except -1 cdef int PyDict_SetItem(object d, object key, object value) except -1 From scoder at codespeak.net Fri Mar 16 20:28:38 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Fri, 16 Mar 2007 20:28:38 +0100 (CET) Subject: [Lxml-checkins] r40615 - lxml/trunk/src/lxml/tests Message-ID: <20070316192838.2B2921008A@code0.codespeak.net> Author: scoder Date: Fri Mar 16 20:28:33 2007 New Revision: 40615 Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py Log: test case split Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original) +++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Fri Mar 16 20:28:33 2007 @@ -104,6 +104,10 @@ self.assertEquals( [root[0]], root.xpath('//baz:b', {'baz': 'uri:a'})) + + def test_xpath_ns_none(self): + tree = self.parse('') + root = tree.getroot() self.assertRaises( TypeError, root.xpath, '//b', {None: 'uri:a'}) From scoder at codespeak.net Sat Mar 17 06:48:03 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 17 Mar 2007 06:48:03 +0100 (CET) Subject: [Lxml-checkins] r40625 - lxml/trunk/doc Message-ID: <20070317054803.D5C3E10082@code0.codespeak.net> Author: scoder Date: Sat Mar 17 06:48:01 2007 New Revision: 40625 Modified: lxml/trunk/doc/FAQ.txt Log: FAQ entry on standard compliance Modified: lxml/trunk/doc/FAQ.txt ============================================================================== --- lxml/trunk/doc/FAQ.txt (original) +++ lxml/trunk/doc/FAQ.txt Sat Mar 17 06:48:01 2007 @@ -12,10 +12,11 @@ 1 General Questions 1.1 Is there a tutorial? 1.2 Where can I find more documentation about lxml? - 1.3 Where are the Windows binaries? - 1.4 What is the difference between lxml.etree and lxml.objectify? - 1.5 Why is my application so slow? - 1.6 Why do I get errors about missing UCS4 symbols when installing lxml? + 1.3 What standards does lxml implement? + 1.4 Where are the Windows binaries? + 1.5 What is the difference between lxml.etree and lxml.objectify? + 1.6 Why is my application so slow? + 1.7 Why do I get errors about missing UCS4 symbols when installing lxml? 2 Bugs 2.1 My application crashes! Why does lxml.etree do that? 2.2 I think I have found a bug in lxml. What should I do? @@ -64,6 +65,23 @@ .. _`the web page`: http://codespeak.net/lxml/#documentation +What standards does lxml implement? +----------------------------------- + +The compliance to XML Standards depends on the support in libxml2 and libxslt. +Here is a quote from `http://xmlsoft.org/`: + + In most cases libxml2 tries to implement the specifications in a relatively + strictly compliant way. As of release 2.4.16, libxml2 passed all 1800+ tests + from the OASIS XML Tests Suite. + +lxml currently supports libxml2 2.6.16 or later, which has even better support +for various XML standards. Some of the more important ones are: HTML, XML +namespaces, XPath, XInclude, XSLT, XML catalogs, canonical XML, RelaxNG, +XML:ID. Support for XML Schema and Schematron is currently incomplete. +libxml2 also supports loading documents through HTTP and FTP. + + Where are the Windows binaries? ------------------------------- From scoder at codespeak.net Sat Mar 17 06:58:12 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 17 Mar 2007 06:58:12 +0100 (CET) Subject: [Lxml-checkins] r40626 - lxml/trunk/doc Message-ID: <20070317055812.56C8510082@code0.codespeak.net> Author: scoder Date: Sat Mar 17 06:58:10 2007 New Revision: 40626 Modified: lxml/trunk/doc/FAQ.txt Log: FAQ entry on standard compliance Modified: lxml/trunk/doc/FAQ.txt ============================================================================== --- lxml/trunk/doc/FAQ.txt (original) +++ lxml/trunk/doc/FAQ.txt Sat Mar 17 06:58:10 2007 @@ -78,8 +78,9 @@ lxml currently supports libxml2 2.6.16 or later, which has even better support for various XML standards. Some of the more important ones are: HTML, XML namespaces, XPath, XInclude, XSLT, XML catalogs, canonical XML, RelaxNG, -XML:ID. Support for XML Schema and Schematron is currently incomplete. -libxml2 also supports loading documents through HTTP and FTP. +XML:ID. Support for XML Schema and Schematron is currently incomplete in +libxml2, but is mostly usable and still being worked on. libxml2 also +supports loading documents through HTTP and FTP. Where are the Windows binaries? From scoder at codespeak.net Sat Mar 17 07:01:29 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 17 Mar 2007 07:01:29 +0100 (CET) Subject: [Lxml-checkins] r40627 - lxml/branch/extension_refactoring Message-ID: <20070317060129.C1CBC10082@code0.codespeak.net> Author: scoder Date: Sat Mar 17 07:01:27 2007 New Revision: 40627 Added: lxml/branch/extension_refactoring/ - copied from r40626, lxml/trunk/ Log: new branch for refactoring the extension function setup From scoder at codespeak.net Sat Mar 17 07:03:29 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Sat, 17 Mar 2007 07:03:29 +0100 (CET) Subject: [Lxml-checkins] r40628 - in lxml/branch/extension_refactoring: . src/lxml Message-ID: <20070317060329.7A93A10082@code0.codespeak.net> Author: scoder Date: Sat Mar 17 07:03:26 2007 New Revision: 40628 Modified: lxml/branch/extension_refactoring/CHANGES.txt lxml/branch/extension_refactoring/src/lxml/extensions.pxi lxml/branch/extension_refactoring/src/lxml/nsclasses.pxi lxml/branch/extension_refactoring/src/lxml/xpath.pxi lxml/branch/extension_refactoring/src/lxml/xslt.pxd lxml/branch/extension_refactoring/src/lxml/xslt.pxi Log: initial branch import, mainly complete but buggy Modified: lxml/branch/extension_refactoring/CHANGES.txt ============================================================================== --- lxml/branch/extension_refactoring/CHANGES.txt (original) +++ lxml/branch/extension_refactoring/CHANGES.txt Sat Mar 17 07:03:26 2007 @@ -2,6 +2,25 @@ lxml changelog ============== +Under Development +================= + +Features added +-------------- + +* EXSLT RegExp support in standard XPath (not only XSLT) + +Bugs fixed +---------- + +* Thread safety in XPath evaluators + +Other changes +------------- + +* major refactoring in XPath/XSLT extension function code + + 1.3beta (2007-02-27) ==================== Modified: lxml/branch/extension_refactoring/src/lxml/extensions.pxi ============================================================================== --- lxml/branch/extension_refactoring/src/lxml/extensions.pxi (original) +++ lxml/branch/extension_refactoring/src/lxml/extensions.pxi Sat Mar 17 07:03:26 2007 @@ -12,11 +12,14 @@ ################################################################################ # Base class for XSLT and XPath evaluation contexts: functions, namespaces, ... +ctypedef int _register_function(void* ctxt, name_utf, ns_uri_utf) + cdef class _BaseContext: cdef xpath.xmlXPathContext* _xpathCtxt cdef _Document _doc cdef object _extensions cdef object _namespaces + cdef object _registered_namespaces cdef object _utf_refs cdef object _function_cache cdef object _function_cache_ns @@ -28,9 +31,9 @@ def __init__(self, namespaces, extensions): self._xpathCtxt = NULL self._utf_refs = {} + self._registered_namespaces = [] self._function_cache = {} self._function_cache_ns = {} - self._called_function = None if extensions is not None: # convert extensions to UTF-8 @@ -90,7 +93,8 @@ self.registerNamespaces(namespaces) cdef _unregister_context(self): - xpath.xmlXPathRegisteredNsCleanup(self._xpathCtxt) + self._unregisterNamespaces() +# xpath.xmlXPathRegisteredNsCleanup(self._xpathCtxt) self._free_context() cdef _free_context(self): @@ -112,12 +116,86 @@ self.registerNamespace(prefix, uri) cdef registerNamespace(self, prefix, ns_uri): + if prefix is None: + raise TypeError, "empty prefix is not supported in XPath" prefix_utf = self._to_utf(prefix) ns_uri_utf = self._to_utf(ns_uri) - xpath.xmlXPathRegisterNs(self._xpathCtxt, prefix_utf, ns_uri_utf) + python.PyList_Append(self._registered_namespaces, prefix_utf) + xpath.xmlXPathRegisterNs(self._xpathCtxt, + _cstr(prefix_utf), _cstr(ns_uri_utf)) + + cdef _registerNamespace(self, prefix_utf, ns_uri_utf): + python.PyList_Append(self._registered_namespaces, prefix_utf) + xpath.xmlXPathRegisterNs(self._xpathCtxt, + _cstr(prefix_utf), _cstr(ns_uri_utf)) + + cdef void _unregisterNamespaces(self): + if python.PyList_GET_SIZE(self._registered_namespaces) > 0: + for prefix_utf in self._registered_namespaces: + sys.stderr.write(prefix_utf) + sys.stderr.flush() + xpath.xmlXPathRegisterNs(self._xpathCtxt, + _cstr(prefix_utf), NULL) + self._registered_namespaces = [] + + cdef void _unregisterNamespace(self, prefix_utf): + xpath.xmlXPathRegisterNs(self._xpathCtxt, + _cstr(prefix_utf), NULL) # extension functions + cdef void _addLocalExtensionFunction(self, ns_utf, name_utf, function): + if self._extensions is None: + self._extensions = {} + python.PyDict_SetItem(self._extensions, (ns_utf, name_utf), function) + + cdef void _registerAllFunctions(self, void* ctxt, + _register_function reg_func): + cdef python.PyObject* dict_result + for ns_utf, ns_functions in _iter_ns_extension_functions(): + if ns_utf is None: + d = self._function_cache + else: + dict_result = python.PyDict_GetItem( + self._function_cache_ns, ns_utf) + if dict_result is NULL: + d = {} + python.PyDict_SetItem( + self._function_cache_ns, ns_utf, d) + else: + d = dict_result + for name_utf, function in ns_functions.iteritems(): + python.PyDict_SetItem(d, name_utf, function) + reg_func(ctxt, name_utf, ns_utf) + if self._extensions is None: + return # done + last_ns = None + d = self._function_cache + for (ns_utf, name_utf), function in self._extensions.iteritems(): + if ns_utf is not last_ns: + last_ns = ns_utf + if ns_utf is None: + d = self._function_cache + else: + dict_result = python.PyDict_GetItem( + self._function_cache_ns, ns_utf) + if dict_result is NULL: + d = {} + python.PyDict_SetItem(self._function_cache_ns, + ns_utf, d) + else: + d = dict_result + python.PyDict_SetItem(d, name_utf, function) + reg_func(ctxt, name_utf, ns_utf) + + cdef void _unregisterAllFunctions(self, void* ctxt, + _register_function unreg_func): + for name_utf in self._function_cache: + unreg_func(ctxt, name_utf, None) + for ns_utf, functions in self._function_cache_ns.iteritems(): + for name_utf in functions: + unreg_func(ctxt, name_utf, ns_utf) + cdef _find_cached_function(self, char* c_ns_uri, char* c_name): """Lookup an extension function in the cache and return it. @@ -233,10 +311,99 @@ ################################################################################ +# EXSLT regexp implementation + +cdef class _ExsltRegExp: + cdef object _compile_map + def __init__(self): + self._compile_map = {} + + cdef _make_string(self, value): + if _isString(value): + return value + else: + raise TypeError, "Invalid argument type %s" % type(value) + + cdef _compile(self, rexp, ignore_case): + cdef python.PyObject* c_result + rexp = self._make_string(rexp) + key = (rexp, ignore_case) + c_result = python.PyDict_GetItem(self._compile_map, key) + if c_result is not NULL: + return c_result + py_flags = re.UNICODE + if ignore_case: + py_flags = py_flags | re.IGNORECASE + rexp_compiled = re.compile(rexp, py_flags) + python.PyDict_SetItem(self._compile_map, key, rexp_compiled) + return rexp_compiled + + def test(self, ctxt, s, rexp, flags=''): + flags = self._make_string(flags) + s = self._make_string(s) + rexpc = self._compile(rexp, 'i' in flags) + if rexpc.search(s) is None: + return False + else: + return True + + def match(self, ctxt, s, rexp, flags=''): + flags = self._make_string(flags) + s = self._make_string(s) + rexpc = self._compile(rexp, 'i' in flags) + if 'g' in flags: + results = rexpc.findall(s) + if not results: + return () + else: + result = rexpc.search(s) + if not result: + return () + results = [ result.group() ] + results.extend( result.groups('') ) + result_list = [] + root = Element('matches') + join_groups = ''.join + for s_match in results: + if python.PyTuple_CheckExact(s_match): + s_match = join_groups(s_match) + elem = SubElement(root, 'match') + elem.text = s_match + python.PyList_Append(result_list, elem) + return result_list + + def replace(self, ctxt, s, rexp, flags, replacement): + replacement = self._make_string(replacement) + flags = self._make_string(flags) + s = self._make_string(s) + rexpc = self._compile(rexp, 'i' in flags) + if 'g' in flags: + count = 0 + else: + count = 1 + return rexpc.sub(replacement, s, count) + + cdef _register_in_context(self, _BaseContext context): + ns = "http://exslt.org/regular-expressions" + context._addLocalExtensionFunction(ns, "test", self.test) + context._addLocalExtensionFunction(ns, "match", self.match) + context._addLocalExtensionFunction(ns, "replace", self.replace) + + +################################################################################ # helper functions cdef xpath.xmlXPathFunction _function_check(void* ctxt, char* c_name, char* c_ns_uri): + cdef python.PyGILState_STATE gil_state + cdef xpath.xmlXPathFunction c_func + gil_state = python.PyGILState_Ensure() + c_func = _python_function_check(ctxt, c_name, c_ns_uri) + python.PyGILState_Release(gil_state) + return c_func + +cdef xpath.xmlXPathFunction _python_function_check(void* ctxt, + char* c_name, char* c_ns_uri): "Module level lookup function for XPath/XSLT functions" cdef xpath.xmlXPathFunction c_func cdef _BaseContext context @@ -405,7 +572,7 @@ fref = "{%s}%s" % (rctxt.functionURI, rctxt.function) else: fref = rctxt.function - xpath.xmlXPathErr(ctxt, xpath.XML_XPATH_UNKNOWN_FUNC_ERROR) + xpath.xmlXPathErr(ctxt, xpath.XPATH_UNKNOWN_FUNC_ERROR) exception = XPathFunctionError("XPath function '%s' not found" % fref) context._exc._store_exception(exception) Modified: lxml/branch/extension_refactoring/src/lxml/nsclasses.pxi ============================================================================== --- lxml/branch/extension_refactoring/src/lxml/nsclasses.pxi (original) +++ lxml/branch/extension_refactoring/src/lxml/nsclasses.pxi Sat Mar 17 07:03:26 2007 @@ -75,6 +75,11 @@ name = _utf8(name) return self._get(name) + def __delitem__(self, name): + if name is not None: + name = _utf8(name) + python.PyDict_DelItem(self._entries, name) + cdef object _get(self, object name): cdef python.PyObject* dict_result dict_result = python.PyDict_GetItem(self._entries, name) @@ -99,7 +104,7 @@ return self._entries.iteritems() def clear(self): - self._entries.clear() + python.PyDict_Clear(self._entries) cdef class _ClassNamespaceRegistry(_NamespaceRegistry): "Dictionary-like registry for namespace implementation classes" @@ -130,32 +135,39 @@ cdef class _XPathFunctionNamespaceRegistry(_FunctionNamespaceRegistry): cdef object _prefix cdef object _prefix_utf + property prefix: "Namespace prefix for extension functions." def __del__(self): self._prefix = None # no prefix configured + self._prefix_utf = None def __get__(self): - return self._prefix + if self._prefix is None: + return '' + else: + return self._prefix def __set__(self, prefix): + if prefix == '': + prefix = None # empty prefix if prefix is None: - prefix = '' # empty prefix - self._prefix_utf = _utf8(prefix) + self._prefix_utf = None + else: + self._prefix_utf = _utf8(prefix) self._prefix = prefix cdef object _find_all_extension_prefixes(): "Internal lookup function to find all function prefixes for XSLT/XPath." cdef _XPathFunctionNamespaceRegistry registry - ns_prefixes = {} - for (ns_utf, registry) in __FUNCTION_NAMESPACE_REGISTRIES.iteritems(): + ns_prefixes = [] + for registry in __FUNCTION_NAMESPACE_REGISTRIES.itervalues(): if registry._prefix_utf is not None: - ns_prefixes[registry._prefix_utf] = ns_utf + if registry._ns_uri_utf is not None: + python.PyList_Append( + ns_prefixes, (registry._prefix_utf, registry._ns_uri_utf)) return ns_prefixes -cdef object _iter_extension_function_names(): - l = [] - for (ns_utf, registry) in __FUNCTION_NAMESPACE_REGISTRIES.iteritems(): - python.PyList_Append(l, (ns_utf, registry)) - return l +cdef object _iter_ns_extension_functions(): + return __FUNCTION_NAMESPACE_REGISTRIES.iteritems() cdef object _find_extension(ns_uri_utf, name_utf): cdef python.PyObject* dict_result Modified: lxml/branch/extension_refactoring/src/lxml/xpath.pxi ============================================================================== --- lxml/branch/extension_refactoring/src/lxml/xpath.pxi (original) +++ lxml/branch/extension_refactoring/src/lxml/xpath.pxi Sat Mar 17 07:03:26 2007 @@ -9,6 +9,25 @@ ################################################################################ # XPath +cdef int _register_xpath_function(void* ctxt, name_utf, ns_utf): + if ns_utf is None: + return xpath.xmlXPathRegisterFunc( + ctxt, _cstr(name_utf), + _xpath_function_call) + else: + return xpath.xmlXPathRegisterFuncNS( + ctxt, _cstr(name_utf), _cstr(ns_utf), + _xpath_function_call) + +cdef int _unregister_xpath_function(void* ctxt, name_utf, ns_utf): + if ns_utf is None: + return xpath.xmlXPathRegisterFunc( + ctxt, _cstr(name_utf), NULL) + else: + return xpath.xmlXPathRegisterFuncNS( + ctxt, _cstr(name_utf), _cstr(ns_utf), NULL) + + cdef class _XPathContext(_BaseContext): cdef object _variables def __init__(self, namespaces, extensions, variables): @@ -18,13 +37,13 @@ cdef register_context(self, xpath.xmlXPathContext* xpathCtxt, _Document doc): self._set_xpath_context(xpathCtxt) ns_prefixes = _find_all_extension_prefixes() - if ns_prefixes: - self.registerNamespaces(ns_prefixes) + if python.PyList_GET_SIZE(ns_prefixes) > 0: + for (prefix, ns_uri) in ns_prefixes: + self._registerNamespace(prefix, ns_uri) self._register_context(doc) if self._variables is not None: self.registerVariables(self._variables) - xpath.xmlXPathRegisterFuncLookup( - self._xpathCtxt, _function_check, self) + self._registerAllFunctions(xpathCtxt, _register_xpath_function) cdef unregister_context(self): cdef xpath.xmlXPathContext* xpathCtxt @@ -32,15 +51,16 @@ if xpathCtxt is NULL: return xpath.xmlXPathRegisteredVariablesCleanup(xpathCtxt) + self._unregisterAllFunctions(xpathCtxt, _unregister_xpath_function) self._unregister_context() - def registerVariables(self, variable_dict): + cdef registerVariables(self, variable_dict): for name, value in variable_dict.items(): name_utf = self._to_utf(name) xpath.xmlXPathRegisterVariable( self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value)) - def registerVariable(self, name, value): + cdef registerVariable(self, name, value): name_utf = self._to_utf(name) xpath.xmlXPathRegisterVariable( self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value)) @@ -55,9 +75,14 @@ cdef class _XPathEvaluatorBase: cdef xpath.xmlXPathContext* _xpathCtxt cdef _XPathContext _context + cdef python.PyThread_type_lock _eval_lock - def __init__(self, namespaces, extensions, variables=None): - self._context = _XPathContext(namespaces, extensions, variables) + def __init__(self, namespaces, extensions, regexp): + cdef _ExsltRegExp _regexp + self._context = _XPathContext(namespaces, extensions, None) + if regexp: + _regexp = _ExsltRegExp() + _regexp._register_in_context(self._context) def __dealloc__(self): if self._xpathCtxt is not NULL: @@ -84,6 +109,22 @@ c = path[0] return c == c'/' + cdef int _lock(self) except -1: + cdef python.PyThreadState* state + cdef int result + if config.ENABLE_THREADING and self._eval_lock != NULL: + state = python.PyEval_SaveThread() + result = python.PyThread_acquire_lock( + self._eval_lock, python.WAIT_LOCK) + python.PyEval_RestoreThread(state) + if result == 0: + raise ParserError, "parser locking failed" + return 0 + + cdef void _unlock(self): + if config.ENABLE_THREADING and self._eval_lock != NULL: + python.PyThread_release_lock(self._eval_lock) + cdef _raise_parse_error(self): if self._xpathCtxt is not NULL and \ self._xpathCtxt.lastError.message is not NULL: @@ -119,10 +160,13 @@ Absolute XPath expressions (starting with '/') will be evaluated against the ElementTree as returned by getroottree(). - XPath evaluators must not be shared between threads. + Additional namespace declarations can be passed with the 'namespace' + keyword argument. EXSLT regular expression support can be disabled with + the 'regexp' boolean keyword (defaults to True). """ cdef _Element _element - def __init__(self, _Element element not None, namespaces=None, extensions=None): + def __init__(self, _Element element not None, namespaces=None, + extensions=None, regexp=True): cdef xpath.xmlXPathContext* xpathCtxt cdef int ns_register_status cdef _Document doc @@ -133,7 +177,7 @@ raise XPathContextError, "Unable to create new XPath context" _setupDict(xpathCtxt) self._element = element - _XPathEvaluatorBase.__init__(self, namespaces, extensions) + _XPathEvaluatorBase.__init__(self, namespaces, extensions, regexp) def registerNamespace(self, prefix, uri): """Register a namespace with the XPath context. @@ -155,6 +199,7 @@ Absolute XPath expressions (starting with '/') will be evaluated against the ElementTree as returned by getroottree(). """ + cdef python.PyThreadState* state cdef xpath.xmlXPathContext* xpathCtxt cdef xpath.xmlXPathObject* xpathObj cdef _Document doc @@ -164,12 +209,16 @@ xpathCtxt.node = self._element._c_node doc = self._element._doc + self._lock() self._context.register_context(xpathCtxt, doc) try: self._context.registerVariables(_variables) + state = python.PyEval_SaveThread() xpathObj = xpath.xmlXPathEvalExpression(_cstr(path), xpathCtxt) finally: + python.PyEval_RestoreThread(state) self._context.unregister_context() + self._unlock() return self._handle_result(xpathObj, doc) @@ -177,11 +226,14 @@ cdef class XPathDocumentEvaluator(XPathElementEvaluator): """Create an XPath evaluator for an ElementTree. - XPath evaluators must not be shared between threads. + Additional namespace declarations can be passed with the 'namespace' + keyword argument. EXSLT regular expression support can be disabled with + the 'regexp' boolean keyword (defaults to True). """ - def __init__(self, _ElementTree etree not None, namespaces=None, extensions=None): + def __init__(self, _ElementTree etree not None, namespaces=None, + extensions=None, regexp=True): XPathElementEvaluator.__init__( - self, etree._context_node, namespaces, extensions) + self, etree._context_node, namespaces, extensions, regexp) def __call__(self, _path, **_variables): """Evaluate an XPath expression on the document. @@ -189,6 +241,7 @@ Variables may be provided as keyword arguments. Note that namespaces are currently not supported for variables. """ + cdef python.PyThreadState* state cdef xpath.xmlXPathContext* xpathCtxt cdef xpath.xmlXPathObject* xpathObj cdef xmlDoc* c_doc @@ -197,47 +250,57 @@ xpathCtxt = self._xpathCtxt doc = self._element._doc + self._lock() self._context.register_context(xpathCtxt, doc) c_doc = _fakeRootDoc(doc._c_doc, self._element._c_node) try: self._context.registerVariables(_variables) + state = python.PyEval_SaveThread() xpathCtxt.doc = c_doc xpathCtxt.node = tree.xmlDocGetRootElement(c_doc) xpathObj = xpath.xmlXPathEvalExpression(_cstr(path), xpathCtxt) finally: + python.PyEval_RestoreThread(state) _destroyFakeDoc(doc._c_doc, c_doc) self._context.unregister_context() + self._unlock() return self._handle_result(xpathObj, doc) -def XPathEvaluator(etree_or_element, namespaces=None, extensions=None): +def XPathEvaluator(etree_or_element, namespaces=None, extensions=None, + regexp=True): """Creates an XPath evaluator for an ElementTree or an Element. The resulting object can be called with an XPath expression as argument and XPath variables provided as keyword arguments. - XPath evaluators must not be shared between threads. + Additional namespace declarations can be passed with the 'namespace' + keyword argument. EXSLT regular expression support can be disabled with + the 'regexp' boolean keyword (defaults to True). """ if isinstance(etree_or_element, _ElementTree): - return XPathDocumentEvaluator(etree_or_element, namespaces, extensions) + return XPathDocumentEvaluator(etree_or_element, namespaces, + extensions, regexp) else: - return XPathElementEvaluator(etree_or_element, namespaces, extensions) + return XPathElementEvaluator(etree_or_element, namespaces, + extensions, regexp) cdef class XPath(_XPathEvaluatorBase): """A compiled XPath expression that can be called on Elements and ElementTrees. - Besides the XPath expression, you can pass namespace mappings and - extensions to the constructor through the keyword arguments ``namespaces`` - and ``extensions``. + Besides the XPath expression, you can pass prefix-namespace mappings and + extension functions to the constructor through the keyword arguments + ``namespaces`` and ``extensions``. EXSLT regular expression support can + be disabled with the 'regexp' boolean keyword (defaults to True). """ cdef xpath.xmlXPathCompExpr* _xpath cdef readonly object path - def __init__(self, path, namespaces=None, extensions=None): - _XPathEvaluatorBase.__init__(self, namespaces, extensions) + def __init__(self, path, namespaces=None, extensions=None, regexp=True): + _XPathEvaluatorBase.__init__(self, namespaces, extensions, regexp) self._xpath = NULL self.path = path path = _utf8(path) @@ -258,19 +321,21 @@ document = _documentOrRaise(_etree_or_element) element = _rootNodeOrRaise(_etree_or_element) - xpathCtxt = self._xpathCtxt - xpathCtxt.doc = document._c_doc - xpathCtxt.node = element._c_node + self._lock() + self._xpathCtxt.doc = document._c_doc + self._xpathCtxt.node = element._c_node context = self._context - context.register_context(xpathCtxt, document) + context.register_context(self._xpathCtxt, document) + context.registerVariables(_variables) try: - context.registerVariables(_variables) state = python.PyEval_SaveThread() - xpathObj = xpath.xmlXPathCompiledEval(self._xpath, xpathCtxt) - python.PyEval_RestoreThread(state) + xpathObj = xpath.xmlXPathCompiledEval( + self._xpath, self._xpathCtxt) finally: + python.PyEval_RestoreThread(state) context.unregister_context() + self._unlock() return self._handle_result(xpathObj, document) def __dealloc__(self): Modified: lxml/branch/extension_refactoring/src/lxml/xslt.pxd ============================================================================== --- lxml/branch/extension_refactoring/src/lxml/xslt.pxd (original) +++ lxml/branch/extension_refactoring/src/lxml/xslt.pxd Sat Mar 17 07:03:26 2007 @@ -35,6 +35,8 @@ xmlXPathFunction function) cdef int xsltUnregisterExtModuleFunction(char* name, char* URI) cdef xmlXPathFunction xsltExtModuleFunctionLookup(char* name, char* URI) + cdef int xsltRegisterExtPrefix(xsltStylesheet* style, + char* prefix, char* URI) cdef extern from "libxslt/documents.h": ctypedef enum xsltLoadType: Modified: lxml/branch/extension_refactoring/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/extension_refactoring/src/lxml/xslt.pxi (original) +++ lxml/branch/extension_refactoring/src/lxml/xslt.pxi Sat Mar 17 07:03:26 2007 @@ -193,6 +193,21 @@ ################################################################################ # XSLT +cdef int _register_xslt_function(void* ctxt, name_utf, ns_utf): + if ns_utf is None: + return 0 + return xslt.xsltRegisterExtFunction( + ctxt, _cstr(name_utf), _cstr(ns_utf), + _xpath_function_call) + +cdef int _unregister_xslt_function(void* ctxt, name_utf, ns_utf): + if ns_utf is None: + return 0 + return xslt.xsltRegisterExtFunction( + ctxt, _cstr(name_utf), _cstr(ns_utf), + NULL) + + cdef class _XSLTContext(_BaseContext): cdef xslt.xsltTransformContext* _xsltCtxt def __init__(self, namespaces, extensions): @@ -207,7 +222,7 @@ self._set_xpath_context(xsltCtxt.xpathCtxt) self._register_context(doc) xsltCtxt.xpathCtxt.userData = self - self._registerExtensionFunctions() + self._registerAllFunctions(xsltCtxt, _register_xslt_function) cdef free_context(self): cdef xslt.xsltTransformContext* xsltCtxt @@ -219,49 +234,6 @@ xslt.xsltFreeTransformContext(xsltCtxt) self._release_temp_refs() - cdef void _addLocalExtensionFunction(self, ns_utf, name_utf, function): - if self._extensions is None: - self._extensions = {} - python.PyDict_SetItem(self._extensions, (ns_utf, name_utf), function) - - cdef void _registerExtensionFunctions(self): - cdef python.PyObject* dict_result - for ns_utf, functions in _iter_extension_function_names(): - if ns_utf is None: - continue - dict_result = python.PyDict_GetItem(self._function_cache_ns, ns_utf) - if dict_result is NULL: - d = {} - python.PyDict_SetItem(self._function_cache_ns, ns_utf, d) - else: - d = dict_result - for name_utf, function in functions.iteritems(): - python.PyDict_SetItem(d, name_utf, function) - xslt.xsltRegisterExtFunction( - self._xsltCtxt, _cstr(name_utf), _cstr(ns_utf), - _xpath_function_call) - if self._extensions is None: - return # done - last_ns = None - for (ns_utf, name_utf), function in self._extensions.iteritems(): - if ns_utf is None: - raise ValueError, \ - "extensions must have non empty namespaces" - elif ns_utf is not last_ns: - last_ns = ns_utf - dict_result = python.PyDict_GetItem( - self._function_cache_ns, ns_utf) - if dict_result is NULL: - d = {} - python.PyDict_SetItem(self._function_cache_ns, ns_utf, d) - else: - d = dict_result - python.PyDict_SetItem(d, name_utf, function) - xslt.xsltRegisterExtFunction( - self._xsltCtxt, _cstr(name_utf), _cstr(ns_utf), - _xpath_function_call) - -cdef class _ExsltRegExp # forward declaration cdef class XSLT: """Turn a document into an XSLT object. @@ -279,7 +251,6 @@ cdef xslt.xsltStylesheet* _c_style cdef _XSLTResolverContext _xslt_resolver_context cdef XSLTAccessControl _access_control - cdef _ExsltRegExp _regexp cdef _ErrorLog _error_log def __init__(self, xslt_input, extensions=None, regexp=True, access_control=None): @@ -289,6 +260,7 @@ cdef xmlDoc* fake_c_doc cdef _Document doc cdef _Element root_node + cdef _ExsltRegExp _regexp doc = _documentOrRaise(xslt_input) root_node = _rootNodeOrRaise(xslt_input) @@ -329,11 +301,8 @@ self._context = _XSLTContext(None, extensions) if regexp: - self._regexp = _ExsltRegExp() - self._regexp._register_in_context(self._context) - else: - self._regexp = None - # XXX is it worthwile to use xsltPrecomputeStylesheet here? + _regexp = _ExsltRegExp() + _regexp._register_in_context(self._context) def __dealloc__(self): if self._xslt_resolver_context is not None and \ @@ -649,82 +618,3 @@ if attr == key: return value return default - -################################################################################ -# EXSLT regexp implementation - -cdef class _ExsltRegExp: - cdef object _compile_map - def __init__(self): - self._compile_map = {} - - cdef _make_string(self, value): - if _isString(value): - return value - else: - raise TypeError, "Invalid argument type %s" % type(value) - - cdef _compile(self, rexp, ignore_case): - cdef python.PyObject* c_result - rexp = self._make_string(rexp) - key = (rexp, ignore_case) - c_result = python.PyDict_GetItem(self._compile_map, key) - if c_result is not NULL: - return c_result - py_flags = re.UNICODE - if ignore_case: - py_flags = py_flags | re.IGNORECASE - rexp_compiled = re.compile(rexp, py_flags) - python.PyDict_SetItem(self._compile_map, key, rexp_compiled) - return rexp_compiled - - def test(self, ctxt, s, rexp, flags=''): - flags = self._make_string(flags) - s = self._make_string(s) - rexpc = self._compile(rexp, 'i' in flags) - if rexpc.search(s) is None: - return False - else: - return True - - def match(self, ctxt, s, rexp, flags=''): - flags = self._make_string(flags) - s = self._make_string(s) - rexpc = self._compile(rexp, 'i' in flags) - if 'g' in flags: - results = rexpc.findall(s) - if not results: - return () - else: - result = rexpc.search(s) - if not result: - return () - results = [ result.group() ] - results.extend( result.groups('') ) - result_list = [] - root = Element('matches') - join_groups = ''.join - for s_match in results: - if python.PyTuple_CheckExact(s_match): - s_match = join_groups(s_match) - elem = SubElement(root, 'match') - elem.text = s_match - python.PyList_Append(result_list, elem) - return result_list - - def replace(self, ctxt, s, rexp, flags, replacement): - replacement = self._make_string(replacement) - flags = self._make_string(flags) - s = self._make_string(s) - rexpc = self._compile(rexp, 'i' in flags) - if 'g' in flags: - count = 0 - else: - count = 1 - return rexpc.sub(replacement, s, count) - - cdef _register_in_context(self, _XSLTContext context): - ns = "http://exslt.org/regular-expressions" - context._addLocalExtensionFunction(ns, "test", self.test) - context._addLocalExtensionFunction(ns, "match", self.match) - context._addLocalExtensionFunction(ns, "replace", self.replace) From scoder at codespeak.net Wed Mar 21 09:48:19 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 21 Mar 2007 09:48:19 +0100 (CET) Subject: [Lxml-checkins] r40876 - in lxml/trunk/src/lxml: . tests Message-ID: <20070321084819.2575E1007D@code0.codespeak.net> Author: scoder Date: Wed Mar 21 09:48:17 2007 New Revision: 40876 Modified: lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/tests/test_etree.py lxml/trunk/src/lxml/tree.pxd Log: 'sourceline' property on Elements Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Wed Mar 21 09:48:17 2007 @@ -673,6 +673,24 @@ return None # not in ElementTree, read-only + property sourceline: + """Original line number as found by the parser or None if unknown. + """ + def __get__(self): + cdef long line + line = tree.xmlGetLineNo(self._c_node) + if line > 0: + return line + else: + return None + + def __set__(self, line): + if line < 0: + self._c_node.line = 0 + else: + self._c_node.line = line + + # not in ElementTree, read-only property nsmap: """Namespace prefix->URI mapping known in the context of this Element. """ Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Wed Mar 21 09:48:17 2007 @@ -1119,6 +1119,37 @@ ["tail0", "tail1", "tail2", "TAIL0", "TAIL1", "TAIL2"], [ el.tail for el in root ]) + def test_sourceline_XML(self): + XML = self.etree.XML + root = XML(''' + + + + + ''') + + self.assertEquals( + [2, 2, 4], + [ el.sourceline for el in root.getiterator() ]) + + def test_sourceline_parse(self): + parse = self.etree.parse + tree = parse(fileInTestDir('test_xinclude.xml')) + + self.assertEquals( + [1, 2, 3], + [ el.sourceline for el in tree.getiterator() ]) + + def test_sourceline_element(self): + Element = self.etree.Element + SubElement = self.etree.SubElement + el = Element("test") + self.assertEquals(None, el.sourceline) + + child = SubElement(el, "test") + self.assertEquals(None, el.sourceline) + self.assertEquals(None, child.sourceline) + def test_docinfo_public(self): etree = self.etree xml_header = '' Modified: lxml/trunk/src/lxml/tree.pxd ============================================================================== --- lxml/trunk/src/lxml/tree.pxd (original) +++ lxml/trunk/src/lxml/tree.pxd Wed Mar 21 09:48:17 2007 @@ -97,6 +97,7 @@ xmlAttr* properties xmlNs* ns xmlNs* nsDef + unsigned short line ctypedef struct xmlDtd: char* ExternalID @@ -198,6 +199,7 @@ cdef xmlNs* xmlSearchNs(xmlDoc* doc, xmlNode* node, char* prefix) cdef xmlNs* xmlSearchNsByHref(xmlDoc* doc, xmlNode* node, char* href) cdef int xmlIsBlankNode(xmlNode* node) + cdef long xmlGetLineNo(xmlNode* node) cdef void xmlElemDump(FILE* f, xmlDoc* doc, xmlNode* cur) cdef void xmlNodeDumpOutput(xmlOutputBuffer* buf, xmlDoc* doc, xmlNode* cur, int level, From scoder at codespeak.net Wed Mar 21 14:46:26 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 21 Mar 2007 14:46:26 +0100 (CET) Subject: [Lxml-checkins] r40927 - lxml/branch/extension_refactoring/doc Message-ID: <20070321134626.ADAB8100A0@code0.codespeak.net> Author: scoder Date: Wed Mar 21 14:46:25 2007 New Revision: 40927 Modified: lxml/branch/extension_refactoring/doc/xpathxslt.txt Log: cleanup Modified: lxml/branch/extension_refactoring/doc/xpathxslt.txt ============================================================================== --- lxml/branch/extension_refactoring/doc/xpathxslt.txt (original) +++ lxml/branch/extension_refactoring/doc/xpathxslt.txt Wed Mar 21 14:46:25 2007 @@ -72,11 +72,12 @@ >>> f = StringIO('''\ ... + ... xmlns:b="http://codespeak.net/ns/test2"> ... Text ... ... ''') >>> doc = etree.parse(f) + >>> r = doc.xpath('/t:foo/b:bar', {'t': 'http://codespeak.net/ns/test1', ... 'b': 'http://codespeak.net/ns/test2'}) >>> len(r) From scoder at codespeak.net Wed Mar 21 14:46:51 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 21 Mar 2007 14:46:51 +0100 (CET) Subject: [Lxml-checkins] r40928 - lxml/branch/extension_refactoring/benchmark Message-ID: <20070321134651.997B7100A2@code0.codespeak.net> Author: scoder Date: Wed Mar 21 14:46:50 2007 New Revision: 40928 Modified: lxml/branch/extension_refactoring/benchmark/bench_xpath.py Log: cleanup Modified: lxml/branch/extension_refactoring/benchmark/bench_xpath.py ============================================================================== --- lxml/branch/extension_refactoring/benchmark/bench_xpath.py (original) +++ lxml/branch/extension_refactoring/benchmark/bench_xpath.py Wed Mar 21 14:46:50 2007 @@ -35,31 +35,32 @@ @onlylib('lxe') def bench_xpath_old_extensions(self, root): - def return_child(_, element): - if element: - return element[0] + def return_child(_, elements): + if elements: + return elements[0][0] else: return () - extensions = {(None, 'child') : return_child} - xpath = self.etree.XPath("child(.)", extensions=extensions) + extensions = {("test", "child") : return_child} + xpath = self.etree.XPath("t:child(.)", namespaces={"test":"t"}, + extensions=extensions) for child in root: xpath(child) @onlylib('lxe') def bench_xpath_extensions(self, root): - def return_child(_, element): - if element: - return element[0] + def return_child(_, elements): + if elements: + return elements[0][0] else: return () - self.etree.FunctionNamespace("test")["t"] = return_child + self.etree.FunctionNamespace("testns")["t"] = return_child try: - xpath = self.etree.XPath("test:t(.)", {"test":"test"}) + xpath = self.etree.XPath("test:t(.)", {"test":"testns"}) for child in root: xpath(child) finally: - del self.etree.FunctionNamespace("test")["t"] + del self.etree.FunctionNamespace("testns")["t"] if __name__ == '__main__': benchbase.main(XPathBenchMark) From scoder at codespeak.net Wed Mar 21 14:48:10 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Wed, 21 Mar 2007 14:48:10 +0100 (CET) Subject: [Lxml-checkins] r40929 - lxml/branch/extension_refactoring/src/lxml Message-ID: <20070321134810.BA5ED100A2@code0.codespeak.net> Author: scoder Date: Wed Mar 21 14:48:09 2007 New Revision: 40929 Modified: lxml/branch/extension_refactoring/src/lxml/extensions.pxi lxml/branch/extension_refactoring/src/lxml/xpath.pxi lxml/branch/extension_refactoring/src/lxml/xslt.pxi Log: another major rewrite of extension function registration, cleanup Modified: lxml/branch/extension_refactoring/src/lxml/extensions.pxi ============================================================================== --- lxml/branch/extension_refactoring/src/lxml/extensions.pxi (original) +++ lxml/branch/extension_refactoring/src/lxml/extensions.pxi Wed Mar 21 14:48:09 2007 @@ -1,4 +1,4 @@ -# supports for extension functions in XPath and XSLT +# support for extension functions in XPath and XSLT class XPathError(LxmlError): pass @@ -9,17 +9,20 @@ class XPathResultError(XPathError): pass -################################################################################ -# Base class for XSLT and XPath evaluation contexts: functions, namespaces, ... +# forward declarations ctypedef int _register_function(void* ctxt, name_utf, ns_uri_utf) +cdef class _ExsltRegExp + +################################################################################ +# Base class for XSLT and XPath evaluation contexts: functions, namespaces, ... cdef class _BaseContext: cdef xpath.xmlXPathContext* _xpathCtxt cdef _Document _doc cdef object _extensions cdef object _namespaces - cdef object _registered_namespaces + cdef object _global_namespaces cdef object _utf_refs cdef object _function_cache cdef object _function_cache_ns @@ -28,10 +31,10 @@ cdef _TempStore _temp_refs cdef _ExceptionContext _exc - def __init__(self, namespaces, extensions): - self._xpathCtxt = NULL + def __init__(self, namespaces, extensions, enable_regexp): + cdef _ExsltRegExp _regexp self._utf_refs = {} - self._registered_namespaces = [] + self._global_namespaces = [] self._function_cache = {} self._function_cache_ns = {} @@ -39,7 +42,7 @@ # convert extensions to UTF-8 if python.PyDict_Check(extensions): extensions = (extensions,) - # format: [ {(ns,name):function} ] -> {(ns_utf,name_utf):function} + # format: [ {(ns, name):function} ] -> {(ns_utf, name_utf):function} new_extensions = {} for extension in extensions: for (ns_uri, name), function in extension.items(): @@ -52,17 +55,38 @@ new_extensions, (ns_utf, name_utf), function) extensions = new_extensions or None + if namespaces is not None: + if python.PyDict_Check(namespaces): + namespaces = namespaces.items() + if namespaces: + ns = [] + for prefix, ns_uri in namespaces: + if prefix is None: + raise TypeError, \ + "empty namespace prefix is not supported in XPath" + if ns_uri is None: + raise TypeError, \ + "setting default namespace is not supported in XPath" + prefix_utf = self._to_utf(prefix) + ns_uri_utf = self._to_utf(ns_uri) + python.PyList_Append(ns, (prefix_utf, ns_uri_utf)) + namespaces = ns + self._doc = None self._exc = _ExceptionContext() self._extensions = extensions self._namespaces = namespaces self._temp_refs = _TempStore() + if enable_regexp: + _regexp = _ExsltRegExp() + _regexp._register_in_context(self) + cdef _copy(self): cdef _BaseContext context if self._namespaces is not None: - namespaces = python.PyDict_Copy(self._namespaces) - context = self.__class__(namespaces, None) + namespaces = self._namespaces[:] + context = self.__class__(namespaces, None, False) if self._extensions is not None: context._extensions = python.PyDict_Copy(self._extensions) return context @@ -86,57 +110,72 @@ cdef _register_context(self, _Document doc): self._doc = doc self._exc.clear() - python.PyDict_Clear(self._function_cache) - python.PyDict_Clear(self._function_cache_ns) - namespaces = self._namespaces - if namespaces is not None: - self.registerNamespaces(namespaces) - cdef _unregister_context(self): - self._unregisterNamespaces() -# xpath.xmlXPathRegisteredNsCleanup(self._xpathCtxt) - self._free_context() - - cdef _free_context(self): + cdef _cleanup_context(self): + #xpath.xmlXPathRegisteredNsCleanup(self._xpathCtxt) + #self.unregisterGlobalNamespaces() python.PyDict_Clear(self._utf_refs) self._doc = None + + cdef _release_context(self): if self._xpathCtxt is not NULL: self._xpathCtxt.userData = NULL self._xpathCtxt = NULL # namespaces (internal UTF-8 methods with leading '_') - cdef addNamespace(self, prefix, uri): + cdef addNamespace(self, prefix, ns_uri): + if prefix is None: + raise TypeError, "empty prefix is not supported in XPath" + prefix_utf = self._to_utf(prefix) + ns_uri_utf = self._to_utf(ns_uri) + new_item = (prefix_utf, ns_uri_utf) if self._namespaces is None: - self._namespaces = {} - python.PyDict_SetItem(self._namespaces, prefix, uri) + self._namespaces = [new_item] + else: + namespaces = [] + for item in self._namespaces: + if item[0] == prefix_utf: + item = new_item + new_item = None + python.PyList_Append(namespaces, item) + if new_item is not None: + python.PyList_Append(namespaces, new_item) + self._namespaces = namespaces + if self._xpathCtxt is not NULL: + xpath.xmlXPathRegisterNs( + self._xpathCtxt, _cstr(prefix_utf), _cstr(ns_uri_utf)) - cdef registerNamespaces(self, namespaces): - for prefix, uri in namespaces.items(): - self.registerNamespace(prefix, uri) - cdef registerNamespace(self, prefix, ns_uri): if prefix is None: raise TypeError, "empty prefix is not supported in XPath" prefix_utf = self._to_utf(prefix) ns_uri_utf = self._to_utf(ns_uri) - python.PyList_Append(self._registered_namespaces, prefix_utf) + python.PyList_Append(self._global_namespaces, prefix_utf) xpath.xmlXPathRegisterNs(self._xpathCtxt, _cstr(prefix_utf), _cstr(ns_uri_utf)) - cdef _registerNamespace(self, prefix_utf, ns_uri_utf): - python.PyList_Append(self._registered_namespaces, prefix_utf) - xpath.xmlXPathRegisterNs(self._xpathCtxt, - _cstr(prefix_utf), _cstr(ns_uri_utf)) - - cdef void _unregisterNamespaces(self): - if python.PyList_GET_SIZE(self._registered_namespaces) > 0: - for prefix_utf in self._registered_namespaces: - sys.stderr.write(prefix_utf) - sys.stderr.flush() + cdef registerLocalNamespaces(self): + if self._namespaces is None: + return + for prefix_utf, ns_uri_utf in self._namespaces: + xpath.xmlXPathRegisterNs( + self._xpathCtxt, _cstr(prefix_utf), _cstr(ns_uri_utf)) + + cdef registerGlobalNamespaces(self): + ns_prefixes = _find_all_extension_prefixes() + if python.PyList_GET_SIZE(ns_prefixes) > 0: + for prefix_utf, ns_uri_utf in ns_prefixes: + python.PyList_Append(self._global_namespaces, prefix_utf) + xpath.xmlXPathRegisterNs( + self._xpathCtxt, _cstr(prefix_utf), _cstr(ns_uri_utf)) + + cdef unregisterGlobalNamespaces(self): + if python.PyList_GET_SIZE(self._global_namespaces) > 0: + for prefix_utf in self._global_namespaces: xpath.xmlXPathRegisterNs(self._xpathCtxt, _cstr(prefix_utf), NULL) - self._registered_namespaces = [] + del self._global_namespaces[:] cdef void _unregisterNamespace(self, prefix_utf): xpath.xmlXPathRegisterNs(self._xpathCtxt, @@ -149,7 +188,7 @@ self._extensions = {} python.PyDict_SetItem(self._extensions, (ns_utf, name_utf), function) - cdef void _registerAllFunctions(self, void* ctxt, + cdef void registerGlobalFunctions(self, void* ctxt, _register_function reg_func): cdef python.PyObject* dict_result for ns_utf, ns_functions in _iter_ns_extension_functions(): @@ -167,6 +206,10 @@ for name_utf, function in ns_functions.iteritems(): python.PyDict_SetItem(d, name_utf, function) reg_func(ctxt, name_utf, ns_utf) + + cdef void registerLocalFunctions(self, void* ctxt, + _register_function reg_func): + cdef python.PyObject* dict_result if self._extensions is None: return # done last_ns = None @@ -188,7 +231,7 @@ python.PyDict_SetItem(d, name_utf, function) reg_func(ctxt, name_utf, ns_utf) - cdef void _unregisterAllFunctions(self, void* ctxt, + cdef unregisterAllFunctions(self, void* ctxt, _register_function unreg_func): for name_utf in self._function_cache: unreg_func(ctxt, name_utf, None) @@ -196,6 +239,18 @@ for name_utf in functions: unreg_func(ctxt, name_utf, ns_utf) + cdef unregisterGlobalFunctions(self, void* ctxt, + _register_function unreg_func): + for name_utf in self._function_cache: + if self._extensions is None or \ + (None, name_utf) not in self._extensions: + unreg_func(ctxt, name_utf, None) + for ns_utf, functions in self._function_cache_ns.iteritems(): + for name_utf in functions: + if self._extensions is None or \ + (ns_utf, name_utf) not in self._extensions: + unreg_func(ctxt, name_utf, ns_utf) + cdef _find_cached_function(self, char* c_ns_uri, char* c_name): """Lookup an extension function in the cache and return it. @@ -215,7 +270,7 @@ return dict_result return None - cdef int _prepare_function_call(self, char* c_ns_uri, char* c_name): + cdef int __prepare_function_call(self, char* c_ns_uri, char* c_name): """Find an extension function and store it in 'self._called_function'. This is absolutely performance-critical for XPath/XSLT! @@ -393,27 +448,6 @@ ################################################################################ # helper functions -cdef xpath.xmlXPathFunction _function_check(void* ctxt, - char* c_name, char* c_ns_uri): - cdef python.PyGILState_STATE gil_state - cdef xpath.xmlXPathFunction c_func - gil_state = python.PyGILState_Ensure() - c_func = _python_function_check(ctxt, c_name, c_ns_uri) - python.PyGILState_Release(gil_state) - return c_func - -cdef xpath.xmlXPathFunction _python_function_check(void* ctxt, - char* c_name, char* c_ns_uri): - "Module level lookup function for XPath/XSLT functions" - cdef xpath.xmlXPathFunction c_func - cdef _BaseContext context - context = <_BaseContext>ctxt - if context._prepare_function_call(c_ns_uri, c_name): - c_func = _call_prepared_function - else: - c_func = NULL - return c_func - cdef xpath.xmlXPathObject* _wrapXPathObject(object obj) except NULL: cdef xpath.xmlNodeSet* resultSet cdef _Element node @@ -575,19 +609,3 @@ xpath.xmlXPathErr(ctxt, xpath.XPATH_UNKNOWN_FUNC_ERROR) exception = XPathFunctionError("XPath function '%s' not found" % fref) context._exc._store_exception(exception) - -# call the function that was stored in 'context._called_function' - -cdef void _call_prepared_function(xpath.xmlXPathParserContext* ctxt, int nargs): - cdef python.PyGILState_STATE gil_state - gil_state = python.PyGILState_Ensure() - _call_prepared_python_function(ctxt, nargs) - python.PyGILState_Release(gil_state) - -cdef void _call_prepared_python_function(xpath.xmlXPathParserContext* ctxt, - int nargs): - cdef xpath.xmlXPathContext* rctxt - cdef _BaseContext context - rctxt = ctxt.context - context = <_BaseContext>(rctxt.userData) - _extension_function_call(context, context._called_function, ctxt, nargs) Modified: lxml/branch/extension_refactoring/src/lxml/xpath.pxi ============================================================================== --- lxml/branch/extension_refactoring/src/lxml/xpath.pxi (original) +++ lxml/branch/extension_refactoring/src/lxml/xpath.pxi Wed Mar 21 14:48:09 2007 @@ -30,29 +30,29 @@ cdef class _XPathContext(_BaseContext): cdef object _variables - def __init__(self, namespaces, extensions, variables): + def __init__(self, namespaces, extensions, enable_regexp, variables): self._variables = variables - _BaseContext.__init__(self, namespaces, extensions) - - cdef register_context(self, xpath.xmlXPathContext* xpathCtxt, _Document doc): + _BaseContext.__init__(self, namespaces, extensions, enable_regexp) + + cdef set_context(self, xpath.xmlXPathContext* xpathCtxt): self._set_xpath_context(xpathCtxt) - ns_prefixes = _find_all_extension_prefixes() - if python.PyList_GET_SIZE(ns_prefixes) > 0: - for (prefix, ns_uri) in ns_prefixes: - self._registerNamespace(prefix, ns_uri) + self._setupDict(xpathCtxt) + self.registerLocalNamespaces() + self.registerLocalFunctions(xpathCtxt, _register_xpath_function) + + cdef register_context(self, _Document doc): self._register_context(doc) + self.registerGlobalNamespaces() + self.registerGlobalFunctions(self._xpathCtxt, _register_xpath_function) if self._variables is not None: self.registerVariables(self._variables) - self._registerAllFunctions(xpathCtxt, _register_xpath_function) cdef unregister_context(self): - cdef xpath.xmlXPathContext* xpathCtxt - xpathCtxt = self._xpathCtxt - if xpathCtxt is NULL: - return - xpath.xmlXPathRegisteredVariablesCleanup(xpathCtxt) - self._unregisterAllFunctions(xpathCtxt, _unregister_xpath_function) - self._unregister_context() + self.unregisterGlobalFunctions( + self._xpathCtxt, _unregister_xpath_function) + self.unregisterGlobalNamespaces() + xpath.xmlXPathRegisteredVariablesCleanup(self._xpathCtxt) + self._cleanup_context() cdef registerVariables(self, variable_dict): for name, value in variable_dict.items(): @@ -69,25 +69,26 @@ xpath.xmlXPathRegisterVariable( self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value)) -cdef void _setupDict(xpath.xmlXPathContext* xpathCtxt): - __GLOBAL_PARSER_CONTEXT.initXPathParserDict(xpathCtxt) + cdef void _setupDict(self, xpath.xmlXPathContext* xpathCtxt): + __GLOBAL_PARSER_CONTEXT.initXPathParserDict(xpathCtxt) cdef class _XPathEvaluatorBase: cdef xpath.xmlXPathContext* _xpathCtxt cdef _XPathContext _context cdef python.PyThread_type_lock _eval_lock - def __init__(self, namespaces, extensions, regexp): - cdef _ExsltRegExp _regexp - self._context = _XPathContext(namespaces, extensions, None) - if regexp: - _regexp = _ExsltRegExp() - _regexp._register_in_context(self._context) + def __init__(self, namespaces, extensions, enable_regexp): + self._context = _XPathContext(namespaces, extensions, + enable_regexp, None) def __dealloc__(self): if self._xpathCtxt is not NULL: xpath.xmlXPathFreeContext(self._xpathCtxt) + cdef set_context(self, xpath.xmlXPathContext* xpathCtxt): + self._xpathCtxt = xpathCtxt + self._context.set_context(xpathCtxt) + def evaluate(self, _eval_arg, **_variables): """Evaluate an XPath expression. @@ -170,14 +171,13 @@ cdef xpath.xmlXPathContext* xpathCtxt cdef int ns_register_status cdef _Document doc + self._element = element doc = element._doc + _XPathEvaluatorBase.__init__(self, namespaces, extensions, regexp) xpathCtxt = xpath.xmlXPathNewContext(doc._c_doc) - self._xpathCtxt = xpathCtxt if xpathCtxt is NULL: raise XPathContextError, "Unable to create new XPath context" - _setupDict(xpathCtxt) - self._element = element - _XPathEvaluatorBase.__init__(self, namespaces, extensions, regexp) + self.set_context(xpathCtxt) def registerNamespace(self, prefix, uri): """Register a namespace with the XPath context. @@ -200,27 +200,27 @@ against the ElementTree as returned by getroottree(). """ cdef python.PyThreadState* state - cdef xpath.xmlXPathContext* xpathCtxt cdef xpath.xmlXPathObject* xpathObj cdef _Document doc cdef char* c_path path = _utf8(_path) - xpathCtxt = self._xpathCtxt - xpathCtxt.node = self._element._c_node doc = self._element._doc self._lock() - self._context.register_context(xpathCtxt, doc) + self._xpathCtxt.node = self._element._c_node try: + self._context.register_context(doc) self._context.registerVariables(_variables) state = python.PyEval_SaveThread() - xpathObj = xpath.xmlXPathEvalExpression(_cstr(path), xpathCtxt) - finally: + xpathObj = xpath.xmlXPathEvalExpression( + _cstr(path), self._xpathCtxt) python.PyEval_RestoreThread(state) + result = self._handle_result(xpathObj, doc) + finally: self._context.unregister_context() self._unlock() - return self._handle_result(xpathObj, doc) + return result cdef class XPathDocumentEvaluator(XPathElementEvaluator): @@ -242,30 +242,32 @@ are currently not supported for variables. """ cdef python.PyThreadState* state - cdef xpath.xmlXPathContext* xpathCtxt cdef xpath.xmlXPathObject* xpathObj cdef xmlDoc* c_doc cdef _Document doc path = _utf8(_path) - xpathCtxt = self._xpathCtxt doc = self._element._doc self._lock() - self._context.register_context(xpathCtxt, doc) - c_doc = _fakeRootDoc(doc._c_doc, self._element._c_node) try: - self._context.registerVariables(_variables) - state = python.PyEval_SaveThread() - xpathCtxt.doc = c_doc - xpathCtxt.node = tree.xmlDocGetRootElement(c_doc) - xpathObj = xpath.xmlXPathEvalExpression(_cstr(path), xpathCtxt) + self._context.register_context(doc) + c_doc = _fakeRootDoc(doc._c_doc, self._element._c_node) + try: + self._context.registerVariables(_variables) + state = python.PyEval_SaveThread() + self._xpathCtxt.doc = c_doc + self._xpathCtxt.node = tree.xmlDocGetRootElement(c_doc) + xpathObj = xpath.xmlXPathEvalExpression( + _cstr(path), self._xpathCtxt) + python.PyEval_RestoreThread(state) + result = self._handle_result(xpathObj, doc) + finally: + _destroyFakeDoc(doc._c_doc, c_doc) + self._context.unregister_context() finally: - python.PyEval_RestoreThread(state) - _destroyFakeDoc(doc._c_doc, c_doc) - self._context.unregister_context() self._unlock() - return self._handle_result(xpathObj, doc) + return result def XPathEvaluator(etree_or_element, namespaces=None, extensions=None, @@ -300,19 +302,20 @@ cdef readonly object path def __init__(self, path, namespaces=None, extensions=None, regexp=True): + cdef xpath.xmlXPathContext* xpathCtxt _XPathEvaluatorBase.__init__(self, namespaces, extensions, regexp) - self._xpath = NULL self.path = path path = _utf8(path) - self._xpathCtxt = xpath.xmlXPathNewContext(NULL) - _setupDict(self._xpathCtxt) - self._xpath = xpath.xmlXPathCtxtCompile(self._xpathCtxt, _cstr(path)) + xpathCtxt = xpath.xmlXPathNewContext(NULL) + if xpathCtxt is NULL: + raise XPathContextError, "Unable to create new XPath context" + self.set_context(xpathCtxt) + self._xpath = xpath.xmlXPathCtxtCompile(xpathCtxt, _cstr(path)) if self._xpath is NULL: self._raise_parse_error() def __call__(self, _etree_or_element, **_variables): cdef python.PyThreadState* state - cdef xpath.xmlXPathContext* xpathCtxt cdef xpath.xmlXPathObject* xpathObj cdef _Document document cdef _Element element @@ -325,18 +328,18 @@ self._xpathCtxt.doc = document._c_doc self._xpathCtxt.node = element._c_node - context = self._context - context.register_context(self._xpathCtxt, document) - context.registerVariables(_variables) try: + self._context.register_context(document) + self._context.registerVariables(_variables) state = python.PyEval_SaveThread() xpathObj = xpath.xmlXPathCompiledEval( self._xpath, self._xpathCtxt) - finally: python.PyEval_RestoreThread(state) - context.unregister_context() + result = self._handle_result(xpathObj, document) + finally: + self._context.unregister_context() self._unlock() - return self._handle_result(xpathObj, document) + return result def __dealloc__(self): if self._xpath is not NULL: Modified: lxml/branch/extension_refactoring/src/lxml/xslt.pxi ============================================================================== --- lxml/branch/extension_refactoring/src/lxml/xslt.pxi (original) +++ lxml/branch/extension_refactoring/src/lxml/xslt.pxi Wed Mar 21 14:48:09 2007 @@ -210,28 +210,29 @@ cdef class _XSLTContext(_BaseContext): cdef xslt.xsltTransformContext* _xsltCtxt - def __init__(self, namespaces, extensions): + def __init__(self, namespaces, extensions, enable_regexp): self._xsltCtxt = NULL - if extensions and None in extensions: - raise XSLTExtensionError, "extensions must not have empty namespaces" - _BaseContext.__init__(self, namespaces, extensions) + if extensions is not None: + for ns, prefix in extensions: + if ns is None: + raise XSLTExtensionError, \ + "extensions must not have empty namespaces" + _BaseContext.__init__(self, namespaces, extensions, enable_regexp) cdef register_context(self, xslt.xsltTransformContext* xsltCtxt, _Document doc): self._xsltCtxt = xsltCtxt self._set_xpath_context(xsltCtxt.xpathCtxt) self._register_context(doc) - xsltCtxt.xpathCtxt.userData = self - self._registerAllFunctions(xsltCtxt, _register_xslt_function) + self.registerLocalFunctions(xsltCtxt, _register_xslt_function) + self.registerGlobalFunctions(xsltCtxt, _register_xslt_function) cdef free_context(self): - cdef xslt.xsltTransformContext* xsltCtxt - xsltCtxt = self._xsltCtxt - if xsltCtxt is NULL: - return - self._free_context() - self._xsltCtxt = NULL - xslt.xsltFreeTransformContext(xsltCtxt) + self._cleanup_context() + self._release_context() + if self._xsltCtxt is not NULL: + xslt.xsltFreeTransformContext(self._xsltCtxt) + self._xsltCtxt = NULL self._release_temp_refs() @@ -253,7 +254,8 @@ cdef XSLTAccessControl _access_control cdef _ErrorLog _error_log - def __init__(self, xslt_input, extensions=None, regexp=True, access_control=None): + def __init__(self, xslt_input, extensions=None, regexp=True, + access_control=None): cdef python.PyThreadState* state cdef xslt.xsltStylesheet* c_style cdef xmlDoc* c_doc @@ -299,10 +301,7 @@ c_doc._private = NULL # no longer used! self._c_style = c_style - self._context = _XSLTContext(None, extensions) - if regexp: - _regexp = _ExsltRegExp() - _regexp._register_in_context(self._context) + self._context = _XSLTContext(None, extensions, regexp) def __dealloc__(self): if self._xslt_resolver_context is not None and \ @@ -315,20 +314,24 @@ def __get__(self): return self._error_log.copy() + def apply(self, _input, profile_run=False, **_kw): + return self(_input, profile_run, **_kw) + + def tostring(self, _ElementTree result_tree): + """Save result doc to string based on stylesheet output method. + """ + return str(result_tree) + def __call__(self, _input, profile_run=False, **_kw): - cdef python.PyThreadState* state cdef _XSLTContext context cdef _Document input_doc cdef _Element root_node cdef _Document result_doc cdef _Document profile_doc cdef xmlDoc* c_profile_doc - cdef _XSLTResolverContext resolver_context cdef xslt.xsltTransformContext* transform_ctxt cdef xmlDoc* c_result cdef xmlDoc* c_doc - cdef char** params - cdef Py_ssize_t i, kw_count if not _checkThreadDict(self._c_style.doc.dict): raise RuntimeError, "stylesheet is not usable in this thread" @@ -336,9 +339,6 @@ input_doc = _documentOrRaise(_input) root_node = _rootNodeOrRaise(_input) - resolver_context = _XSLTResolverContext(input_doc._parser) - resolver_context._c_style_doc = self._xslt_resolver_context._c_style_doc - c_doc = _fakeRootDoc(input_doc._c_doc, root_node._c_node) transform_ctxt = xslt.xsltNewTransformContext(self._c_style, c_doc) @@ -348,28 +348,82 @@ initTransformDict(transform_ctxt) - self._error_log.connect() + if profile_run: + transform_ctxt.profile = 1 + + try: + self._error_log.connect() + context = self._context._copy() + context.register_context(transform_ctxt, input_doc) + + c_result = self._run_transform( + input_doc, c_doc, _kw, context, transform_ctxt) + + if transform_ctxt.profile: + c_profile_doc = xslt.xsltGetProfileInformation(transform_ctxt) + if c_profile_doc is not NULL: + profile_doc = _documentFactory( + c_profile_doc, input_doc._parser) + finally: + if context is not None: + context.free_context() + _destroyFakeDoc(input_doc._c_doc, c_doc) + self._error_log.disconnect() + + try: + if self._xslt_resolver_context._has_raised(): + if c_result is not NULL: + tree.xmlFreeDoc(c_result) + self._xslt_resolver_context._raise_if_stored() + + if c_result is NULL: + error = self._error_log.last_error + if error is not None and error.message: + if error.line >= 0: + message = "%s, line %d" % (error.message, error.line) + else: + message = error.message + elif error.line >= 0: + message = "Error applying stylesheet, line %d" % error.line + else: + message = "Error applying stylesheet" + raise XSLTApplyError, message + finally: + self._xslt_resolver_context.clear() + + result_doc = _documentFactory(c_result, input_doc._parser) + return _xsltResultTreeFactory(result_doc, self, profile_doc) + + cdef xmlDoc* _run_transform(self, _Document input_doc, xmlDoc* c_input_doc, + parameters, _XSLTContext context, + xslt.xsltTransformContext* transform_ctxt): + cdef python.PyThreadState* state + cdef _XSLTResolverContext resolver_context + cdef xmlDoc* c_result + cdef char** params + cdef Py_ssize_t i, parameter_count + + resolver_context = _XSLTResolverContext(input_doc._parser) + resolver_context._c_style_doc = self._xslt_resolver_context._c_style_doc + xslt.xsltSetTransformErrorFunc(transform_ctxt, self._error_log, _receiveXSLTError) if self._access_control is not None: self._access_control._register_in_context(transform_ctxt) - if profile_run: - transform_ctxt.profile = 1 - transform_ctxt._private = self._xslt_resolver_context - kw_count = python.PyDict_Size(_kw) - if kw_count > 0: + parameter_count = python.PyDict_Size(parameters) + if parameter_count > 0: # allocate space for parameters # * 2 as we want an entry for both key and value, # and + 1 as array is NULL terminated params = python.PyMem_Malloc( - sizeof(char*) * (kw_count * 2 + 1)) + sizeof(char*) * (parameter_count * 2 + 1)) i = 0 keep_ref = [] - for key, value in _kw.iteritems(): + for key, value in parameters.iteritems(): k = _utf8(key) python.PyList_Append(keep_ref, k) v = _utf8(value) @@ -382,59 +436,16 @@ else: params = NULL - context = self._context._copy() - context.register_context(transform_ctxt, input_doc) - state = python.PyEval_SaveThread() - c_result = xslt.xsltApplyStylesheetUser(self._c_style, c_doc, params, - NULL, NULL, transform_ctxt) + c_result = xslt.xsltApplyStylesheetUser( + self._c_style, c_input_doc, params, NULL, NULL, transform_ctxt) python.PyEval_RestoreThread(state) if params is not NULL: # deallocate space for parameters python.PyMem_Free(params) - keep_ref = None - - if transform_ctxt.profile: - c_profile_doc = xslt.xsltGetProfileInformation(transform_ctxt) - if c_profile_doc is not NULL: - profile_doc = _documentFactory(c_profile_doc, input_doc._parser) - context.free_context() - _destroyFakeDoc(input_doc._c_doc, c_doc) - - self._error_log.disconnect() - try: - if self._xslt_resolver_context._has_raised(): - if c_result is not NULL: - tree.xmlFreeDoc(c_result) - self._xslt_resolver_context._raise_if_stored() - - if c_result is NULL: - error = self._error_log.last_error - if error is not None and error.message: - if error.line >= 0: - message = "%s, line %d" % (error.message, error.line) - else: - message = error.message - elif error.line >= 0: - message = "Error applying stylesheet, line %d" % error.line - else: - message = "Error applying stylesheet" - raise XSLTApplyError, message - finally: - self._xslt_resolver_context.clear() - - result_doc = _documentFactory(c_result, input_doc._parser) - return _xsltResultTreeFactory(result_doc, self, profile_doc) - - def apply(self, _input, profile_run=False, **_kw): - return self(_input, profile_run, **_kw) - - def tostring(self, _ElementTree result_tree): - """Save result doc to string based on stylesheet output method. - """ - return str(result_tree) + return c_result cdef class _XSLTResultTree(_ElementTree): cdef XSLT _xslt @@ -511,17 +522,6 @@ # enable EXSLT support for XSLT xslt.exsltRegisterAll() -# extension function lookup for XSLT -cdef xpath.xmlXPathFunction _xslt_function_check(void* ctxt, - char* c_name, char* c_ns_uri): - "Find XSLT extension function from set of XPath and XSLT functions" - cdef xpath.xmlXPathFunction result - result = _function_check(ctxt, c_name, c_ns_uri) - if result is NULL: - return xslt.xsltExtModuleFunctionLookup(c_name, c_ns_uri) - else: - return result - cdef void initTransformDict(xslt.xsltTransformContext* transform_ctxt): __GLOBAL_PARSER_CONTEXT.initThreadDictRef(&transform_ctxt.dict) From scoder at codespeak.net Thu Mar 22 08:31:16 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 22 Mar 2007 08:31:16 +0100 (CET) Subject: [Lxml-checkins] r41008 - lxml/trunk/src/lxml/tests Message-ID: <20070322073116.B216C1006E@code0.codespeak.net> Author: scoder Date: Thu Mar 22 08:31:15 2007 New Revision: 41008 Modified: lxml/trunk/src/lxml/tests/test_objectify.py Log: doc cleanup Modified: lxml/trunk/src/lxml/tests/test_objectify.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_objectify.py (original) +++ lxml/trunk/src/lxml/tests/test_objectify.py Thu Mar 22 08:31:15 2007 @@ -1,10 +1,7 @@ # -*- coding: utf-8 -*- """ -Tests specific to the extended etree API - -Tests that apply to the general ElementTree API should go into -test_elementtree +Tests specific to the lxml.objectify API """ @@ -28,7 +25,7 @@ ''' class ObjectifyTestCase(HelperTestCase): - """Test cases for lxml.elementlib.objectify + """Test cases for lxml.objectify """ etree = etree From scoder at codespeak.net Thu Mar 22 08:31:28 2007 From: scoder at codespeak.net (scoder at codespeak.net) Date: Thu, 22 Mar 2007 08:31:28 +0100 (CET) Subject: [Lxml-checkins] r41009 - in lxml/trunk: . doc src/lxml src/lxml/tests Message-ID: <20070322073128.E5A5C1006F@code0.codespeak.net> Author: scoder Date: Thu Mar 22 08:31:26 2007 New Revision: 41009 Added: lxml/trunk/src/lxml/pyclasslookup.pyx lxml/trunk/src/lxml/tests/test_pyclasslookup.py Modified: lxml/trunk/CHANGES.txt lxml/trunk/doc/element_classes.txt lxml/trunk/setupinfo.py lxml/trunk/src/lxml/apihelpers.pxi lxml/trunk/src/lxml/classlookup.pxi lxml/trunk/src/lxml/etree.pyx lxml/trunk/src/lxml/etreepublic.pxd lxml/trunk/src/lxml/public-api.pxi lxml/trunk/src/lxml/tests/test_elementtree.py lxml/trunk/src/lxml/tests/test_etree.py Log: lxml.pyclasslookup - element class lookup mechanism with tree access in Python space, collectAttributes() C-function, general cleanup Modified: lxml/trunk/CHANGES.txt ============================================================================== --- lxml/trunk/CHANGES.txt (original) +++ lxml/trunk/CHANGES.txt Thu Mar 22 08:31:26 2007 @@ -2,6 +2,29 @@ lxml changelog ============== +under development +================= + +Features added +-------------- + +* ``lxml.pyclasslookup`` module that can access the entire tree to determine a + suitable Element class + +* ``Element.values()`` to accompany the existing ``keys()`` and ``items()`` + +* ``collectAttributes()`` C-function to build a list of attribute + keys/values/items for a libxml2 node + +Bugs fixed +---------- + +Other changes +------------- + +* major rewrite of internal extension function setup + + 1.3beta (2007-02-27) ==================== Modified: lxml/trunk/doc/element_classes.txt ============================================================================== --- lxml/trunk/doc/element_classes.txt (original) +++ lxml/trunk/doc/element_classes.txt Thu Mar 22 08:31:26 2007 @@ -89,7 +89,8 @@ >>> parser.setElementClassLookup(parser_lookup) There is one drawback of the parser based scheme: the ``Element()`` factory -creates a new document that deploys the default parser:: +does not know about your specialised parser and creates a new document that +deploys the default parser:: >>> el = etree.Element("root") >>> print isinstance(el, HonkElement) @@ -231,8 +232,8 @@ Custom element class lookup ........................... -This is the most customisable way of finding element classes. It allows you -to implement a custom lookup scheme in a subclass:: +This is the most customisable way of finding element classes on a per-element +basis. It allows you to implement a custom lookup scheme in a subclass:: >>> class MyLookup(etree.CustomElementClassLookup): ... def lookup(self, node_type, document, namespace, name): @@ -250,6 +251,45 @@ per-parser setup. +Tree based element class lookup in Python +......................................... + +Taking more elaborate decisions than allowed by the custom scheme is difficult +to achieve in pure Python. It would require access to the tree - before the +elements in the tree have been instantiated as Python Element objects. + +Luckily, there is a way to do this. The separate module +``lxml.pyclasslookup`` provides a lookup class called +``PythonElementClassLookup`` that works similar to the custom lookup scheme:: + + >>> from lxml.pyclasslookup import PythonElementClassLookup + >>> class MyLookup(PythonElementClassLookup): + ... def lookup(self, document, element): + ... return MyElementClass # defined elsewhere + + >>> parser = etree.XMLParser() + >>> parser.setElementClassLookup(MyLookup()) + +As before, the first argument to the ``lookup()`` method is the opaque +document instance that contains the Element. The second arguments is a +lightweight Element proxy implementation that is only valid during the lookup. +Do not try to keep a reference to it. Once the lookup is finished, the proxy +will become invalid. You will get an ``AssertionError`` if you access any of +the properties or methods outside the scope of the lookup call where they were +instantiated. + +During the lookup, the element object behaves mostly like a normal Element +instance. It provides the properties ``tag``, ``text``, ``tail`` etc. and +supports indexing, slicing and the ``getchildren()``, ``getparent()`` +etc. methods. It does *not* support iteration, nor does it support any kind +of modification. All of its properties are read-only and it cannot be removed +or inserted into other trees. You can use it as a starting point to freely +traverse the tree and collect any kind of information that its elements +provide. Once you have taken the decision which class to use for this +element, you can simply return it and have lxml take care of cleaning up the +instantiated proxy classes. + + Implementing namespaces ----------------------- Modified: lxml/trunk/setupinfo.py ============================================================================== --- lxml/trunk/setupinfo.py (original) +++ lxml/trunk/setupinfo.py Thu Mar 22 08:31:26 2007 @@ -8,8 +8,9 @@ PYREX_INSTALLED = False EXT_MODULES = [ - ("etree", "lxml.etree"), - ("objectify", "lxml.objectify") + ("etree", "lxml.etree"), + ("objectify", "lxml.objectify"), + ("pyclasslookup", "lxml.pyclasslookup") ] Modified: lxml/trunk/src/lxml/apihelpers.pxi ============================================================================== --- lxml/trunk/src/lxml/apihelpers.pxi (original) +++ lxml/trunk/src/lxml/apihelpers.pxi Thu Mar 22 08:31:26 2007 @@ -232,6 +232,29 @@ tree.xmlRemoveProp(c_attr) return 0 +cdef object _collectAttributes(xmlNode* c_node, int collecttype): + """Collect all attributes of a node in a list. Depending on collecttype, + it collects either the name (1), the value (2) or the name-value tuples. + """ + cdef xmlAttr* c_attr + c_attr = c_node.properties + attributes = [] + while c_attr is not NULL: + if c_attr.type == tree.XML_ATTRIBUTE_NODE: + if collecttype == 1: + item = _namespacedName(c_attr) + elif collecttype == 2: + item = _attributeValue(c_node, c_attr) + else: + item = (_namespacedName(c_attr), + _attributeValue(c_node, c_attr)) + + ret = python.PyList_Append(attributes, item) + if ret: + raise + c_attr = c_attr.next + return attributes + cdef object __RE_XML_ENCODING __RE_XML_ENCODING = re.compile( r'^(\s*<\?\s*xml[^>]+)\s+encoding\s*=\s*"[^"]*"\s*', re.U) Modified: lxml/trunk/src/lxml/classlookup.pxi ============================================================================== --- lxml/trunk/src/lxml/classlookup.pxi (original) +++ lxml/trunk/src/lxml/classlookup.pxi Thu Mar 22 08:31:26 2007 @@ -206,7 +206,7 @@ You can inherit from this class and override the method - lookup(type, doc, namespace, name) + lookup(self, type, doc, namespace, name) to lookup the element class for a node. Arguments of the method: * type: one of 'element', 'comment', 'PI' Modified: lxml/trunk/src/lxml/etree.pyx ============================================================================== --- lxml/trunk/src/lxml/etree.pyx (original) +++ lxml/trunk/src/lxml/etree.pyx Thu Mar 22 08:31:26 2007 @@ -717,8 +717,8 @@ return "" % (self.tag, id(self)) def __getitem__(self, Py_ssize_t index): - """Returns the given subelement. - """ + """Returns the subelement at the given position. + """ cdef xmlNode* c_node c_node = _findChild(self._c_node, index) if c_node is NULL: @@ -739,10 +739,10 @@ return [] c = start result = [] - doc = self._doc while c_node is not NULL and c < stop: if _isElement(c_node): - ret = python.PyList_Append(result, _elementFactory(doc, c_node)) + ret = python.PyList_Append( + result, _elementFactory(self._doc, c_node)) if ret: raise c = c + 1 @@ -858,29 +858,34 @@ return _getAttributeValue(self, key, default) def keys(self): - """Gets a list of attribute names. The names are returned in an arbitrary - order (just like for an ordinary Python dictionary). + """Gets a list of attribute names. The names are returned in an + arbitrary order (just like for an ordinary Python dictionary). """ - return python.PySequence_List( _attributeIteratorFactory(self, 1) ) + return _collectAttributes(self._c_node, 1) + + def values(self): + """Gets element attribute values as a sequence of strings. The + attributes are returned in an arbitrary order. + """ + return _collectAttributes(self._c_node, 2) def items(self): """Gets element attributes, as a sequence. The attributes are returned in an arbitrary order. """ - return python.PySequence_List( _attributeIteratorFactory(self, 3) ) + return _collectAttributes(self._c_node, 3) def getchildren(self): """Returns all subelements. The elements are returned in document order. """ cdef xmlNode* c_node - cdef _Document doc cdef int ret result = [] - doc = self._doc c_node = self._c_node.children while c_node is not NULL: if _isElement(c_node): - ret = python.PyList_Append(result, _elementFactory(doc, c_node)) + ret = python.PyList_Append( + result, _elementFactory(self._doc, c_node)) if ret: raise c_node = c_node.next @@ -1441,28 +1446,25 @@ return _getAttributeValue(self._element, key, default) def keys(self): - return python.PySequence_List( - _attributeIteratorFactory(self._element, 1) ) + return _collectAttributes(self._element._c_node, 1) def __iter__(self): - return iter(self.keys()) + return iter(_collectAttributes(self._element._c_node, 1)) def iterkeys(self): - return iter(self.keys()) + return iter(_collectAttributes(self._element._c_node, 1)) def values(self): - return python.PySequence_List( - _attributeIteratorFactory(self._element, 2) ) + return _collectAttributes(self._element._c_node, 2) def itervalues(self): - return iter(self.values()) + return iter(_collectAttributes(self._element._c_node, 2)) def items(self): - return python.PySequence_List( - _attributeIteratorFactory(self._element, 3) ) + return _collectAttributes(self._element._c_node, 3) def iteritems(self): - return iter(self.items()) + return iter(_collectAttributes(self._element._c_node, 3)) def has_key(self, key): if key in self: Modified: lxml/trunk/src/lxml/etreepublic.pxd ============================================================================== --- lxml/trunk/src/lxml/etreepublic.pxd (original) +++ lxml/trunk/src/lxml/etreepublic.pxd Thu Mar 22 08:31:26 2007 @@ -104,6 +104,9 @@ # attributes must not be removed during iteration! cdef object iterattributes(_Element element, int keysvalues) + # return the list of all attribute names (1), values (2) or items (3) + cdef object collectAttributes(tree.xmlNode* c_element, int keysvalues) + # set an attribute value on an element # on failure, sets an exception and returns -1 cdef int setAttributeValue(_Element element, key, value) except -1 Modified: lxml/trunk/src/lxml/public-api.pxi ============================================================================== --- lxml/trunk/src/lxml/public-api.pxi (original) +++ lxml/trunk/src/lxml/public-api.pxi Thu Mar 22 08:31:26 2007 @@ -83,6 +83,9 @@ cdef public object iterattributes(_Element element, int keysvalues): return _attributeIteratorFactory(element, keysvalues) +cdef public object collectAttributes(xmlNode* c_element, int keysvalues): + return _collectAttributes(c_element, keysvalues) + cdef public int setAttributeValue(_Element element, key, value) except -1: return _setAttributeValue(element, key, value) Added: lxml/trunk/src/lxml/pyclasslookup.pyx ============================================================================== --- (empty file) +++ lxml/trunk/src/lxml/pyclasslookup.pyx Thu Mar 22 08:31:26 2007 @@ -0,0 +1,277 @@ +from etreepublic cimport _Document, _Element, ElementBase +from etreepublic cimport ElementClassLookup, FallbackElementClassLookup +from etreepublic cimport elementFactory, import_etree +from python cimport str, repr, isinstance, issubclass, iter +from python cimport _cstr, Py_ssize_t +cimport etreepublic as cetree +cimport python +cimport tree +cimport cstd + +__all__ = ["PythonElementClassLookup"] + +cdef object etree +from lxml import etree +# initialize C-API of lxml.etree +import_etree(etree) + +cdef class _ElementProxy: + cdef tree.xmlNode* _c_node + cdef object _source_proxy + cdef object _dependent_proxies + + cdef int _assertNode(self) except -1: + """This is our way of saying: this proxy is invalid! + """ + assert self._c_node is not NULL, "Proxy invalidated!" + return 0 + + property tag: + """Element tag + """ + def __get__(self): + self._assertNode() + return cetree.namespacedName(self._c_node) + + property text: + """Text before the first subelement. This is either a string or + the value None, if there was no text. + """ + def __get__(self): + self._assertNode() + return cetree.textOf(self._c_node) + + property tail: + """Text after this element's end tag, but before the next sibling + element's start tag. This is either a string or the value None, if + there was no text. + """ + def __get__(self): + self._assertNode() + return cetree.tailOf(self._c_node) + + property attrib: + def __get__(self): + self._assertNode() + return dict(cetree.collectAttributes(self._c_node, 3)) + + property prefix: + """Namespace prefix or None. + """ + def __get__(self): + self._assertNode() + if self._c_node.ns is not NULL: + if self._c_node.ns.prefix is not NULL: + return cetree.pyunicode(self._c_node.ns.prefix) + return None + + property sourceline: + """Original line number as found by the parser or None if unknown. + """ + def __get__(self): + cdef long line + self._assertNode() + line = tree.xmlGetLineNo(self._c_node) + if line > 0: + return line + else: + return None + + def __repr__(self): + return "" % (self.tag, id(self)) + + def __getitem__(self, Py_ssize_t index): + """Returns the subelement at the given position. + """ + cdef tree.xmlNode* c_node + c_node = cetree.findChild(self._c_node, index) + if c_node is NULL: + raise IndexError, "list index out of range" + return _newProxy(self._source_proxy, c_node) + + def __getslice__(self, Py_ssize_t start, Py_ssize_t stop): + """Returns a list containing subelements in the given range. + """ + cdef tree.xmlNode* c_node + cdef Py_ssize_t c + c_node = cetree.findChild(self._c_node, start) + if c_node is NULL: + return [] + c = start + result = [] + while c_node is not NULL and c < stop: + if tree._isElement(c_node): + ret = python.PyList_Append( + result, _newProxy(self._source_proxy, c_node)) + if ret: + raise + c = c + 1 + c_node = c_node.next + return result + + def __len__(self): + """Returns the number of subelements. + """ + cdef Py_ssize_t c + cdef tree.xmlNode* c_node + self._assertNode() + c = 0 + c_node = self._c_node.children + while c_node is not NULL: + if tree._isElement(c_node): + c = c + 1 + c_node = c_node.next + return c + + def __nonzero__(self): + cdef tree.xmlNode* c_node + self._assertNode() + c_node = cetree.findChildBackwards(self._c_node, 0) + return c_node != NULL + + def get(self, key, default=None): + """Gets an element attribute. + """ + self._assertNode() + return _getAttributeValue(self._c_node, key, default) + + def keys(self): + """Gets a list of attribute names. The names are returned in an + arbitrary order (just like for an ordinary Python dictionary). + """ + self._assertNode() + return cetree.collectAttributes(self._c_node, 1) + + def values(self): + """Gets element attributes, as a sequence. The attributes are returned + in an arbitrary order. + """ + self._assertNode() + return cetree.collectAttributes(self._c_node, 2) + + def items(self): + """Gets element attributes, as a sequence. The attributes are returned + in an arbitrary order. + """ + self._assertNode() + return cetree.collectAttributes(self._c_node, 3) + + def getchildren(self): + """Returns all subelements. The elements are returned in document + order. + """ + cdef tree.xmlNode* c_node + cdef int ret + self._assertNode() + result = [] + c_node = self._c_node.children + while c_node is not NULL: + if tree._isElement(c_node): + ret = python.PyList_Append( + result, _newProxy(self._source_proxy, c_node)) + if ret: + raise + c_node = c_node.next + return result + + def getparent(self): + """Returns the parent of this element or None for the root element. + """ + cdef tree.xmlNode* c_parent + self._assertNode() + c_parent = self._c_node.parent + if c_parent is NULL or not tree._isElement(c_parent): + return None + else: + return _newProxy(self._source_proxy, c_parent) + + def getnext(self): + """Returns the following sibling of this element or None. + """ + cdef tree.xmlNode* c_node + self._assertNode() + c_node = cetree.nextElement(self._c_node) + if c_node is not NULL: + return _newProxy(self._source_proxy, c_node) + return None + + def getprevious(self): + """Returns the preceding sibling of this element or None. + """ + cdef tree.xmlNode* c_node + self._assertNode() + c_node = cetree.previousElement(self._c_node) + if c_node is not NULL: + return _newProxy(self._source_proxy, c_node) + return None + +cdef _ElementProxy _newProxy(_ElementProxy sourceProxy, tree.xmlNode* c_node): + cdef _ElementProxy el + el = _ElementProxy() + el._c_node = c_node + if sourceProxy is None: + sourceProxy = el + el._dependent_proxies = [] + el._source_proxy = sourceProxy + python.PyList_Append(sourceProxy._dependent_proxies, el) + return el + +cdef _freeProxies(_ElementProxy sourceProxy): + cdef _ElementProxy el + if sourceProxy is None: + return + if sourceProxy._dependent_proxies is None: + return + for el in sourceProxy._dependent_proxies: + el._c_node = NULL + del sourceProxy._dependent_proxies[:] + +cdef object _getAttributeValue(tree.xmlNode* c_node, key, default): + cdef char* c_tag + cdef char* c_href + ns, tag = cetree.getNsTag(key) + c_tag = _cstr(tag) + if ns is None: + c_href = NULL + else: + c_href = _cstr(ns) + result = cetree.attributeValueFromNsName(c_node, c_href, c_tag) + if result is None: + return default + return result + + +cdef class PythonElementClassLookup(FallbackElementClassLookup): + """Element class lookup based on a subclass method. + + To use it, inherit from this class and override the method + + lookup(self, document, node_proxy) + + to lookup the element class for a node. The first argument is the opaque + document instance that contains the Element. The second arguments is a + lightweight Element proxy implementation that is only valid during the + lookup. Do not try to keep a reference to it. Once the lookup is done, the + proxy will be invalid. + + If you return None from this method, the fallback will be called. + """ + def __init__(self, ElementClassLookup fallback=None): + FallbackElementClassLookup.__init__(self, fallback) + self._lookup_function = _lookup_class + + def lookup(self, doc, element): + return None + +cdef object _lookup_class(state, _Document doc, tree.xmlNode* c_node): + cdef PythonElementClassLookup lookup + cdef _ElementProxy proxy + lookup = state + + proxy = _newProxy(None, c_node) + cls = lookup.lookup(doc, proxy) + _freeProxies(proxy) + + if cls is not None: + return cls + return cetree.callLookupFallback(lookup, doc, c_node) Modified: lxml/trunk/src/lxml/tests/test_elementtree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_elementtree.py (original) +++ lxml/trunk/src/lxml/tests/test_elementtree.py Thu Mar 22 08:31:26 2007 @@ -360,6 +360,16 @@ keys.sort() self.assertEquals(['alpha', 'beta', 'gamma'], keys) + def test_attribute_items2(self): + XML = self.etree.XML + + root = XML('') + items = root.items() + items.sort() + self.assertEquals( + [('alpha','Alpha'), ('beta','Beta'), ('gamma','Gamma')], + items) + def test_attribute_keys_ns(self): XML = self.etree.XML Modified: lxml/trunk/src/lxml/tests/test_etree.py ============================================================================== --- lxml/trunk/src/lxml/tests/test_etree.py (original) +++ lxml/trunk/src/lxml/tests/test_etree.py Thu Mar 22 08:31:26 2007 @@ -371,6 +371,15 @@ Element = self.etree.Element self.assertRaises(TypeError, Element('a').append, None) + # ET's Elements have items() and key(), but not values() + def test_attribute_values(self): + XML = self.etree.XML + + root = XML('') + values = root.values() + values.sort() + self.assertEquals(['Alpha', 'Beta',