From scoder at codespeak.net Sat Mar 3 13:35:12 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 3 Mar 2007 13:35:12 +0100 (CET)
Subject: [Lxml-checkins] r39785 - lxml/trunk/src/lxml
Message-ID: <20070303123512.139F410060@code0.codespeak.net>
Author: scoder
Date: Sat Mar 3 13:35:10 2007
New Revision: 39785
Modified:
lxml/trunk/src/lxml/tree.pxd
Log:
cleanup
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Sat Mar 3 13:35:10 2007
@@ -242,8 +242,7 @@
char* URI, xmlCharEncodingHandler* encoder, int compression)
cdef extern from "libxml/xmlsave.h":
- ctypedef struct xmlSaveCtxt:
- pass
+ ctypedef struct xmlSaveCtxt
cdef xmlSaveCtxt* xmlSaveToFilename(char* filename, char* encoding,
int options)
From scoder at codespeak.net Sat Mar 3 13:35:46 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 3 Mar 2007 13:35:46 +0100 (CET)
Subject: [Lxml-checkins] r39786 - lxml/trunk/src/lxml
Message-ID: <20070303123546.8618410068@code0.codespeak.net>
Author: scoder
Date: Sat Mar 3 13:35:44 2007
New Revision: 39786
Modified:
lxml/trunk/src/lxml/dtd.pxi
Log:
cleanup
Modified: lxml/trunk/src/lxml/dtd.pxi
==============================================================================
--- lxml/trunk/src/lxml/dtd.pxi (original)
+++ lxml/trunk/src/lxml/dtd.pxi Sat Mar 3 13:35:44 2007
@@ -16,8 +16,9 @@
cdef class DTD(_Validator):
"""A DTD validator.
- Can load from filesystem directly given a filename. Alternatively, pass
- the keyword parameter ``external_id`` to load from a catalog.
+ Can load from filesystem directly given a filename or file-like object.
+ Alternatively, pass the keyword parameter ``external_id`` to load from a
+ catalog.
"""
cdef tree.xmlDtd* _c_dtd
def __init__(self, file=None, external_id=None):
From scoder at codespeak.net Sat Mar 3 13:38:23 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 3 Mar 2007 13:38:23 +0100 (CET)
Subject: [Lxml-checkins] r39788 - in lxml/trunk: doc src/lxml
Message-ID: <20070303123823.2D00610068@code0.codespeak.net>
Author: scoder
Date: Sat Mar 3 13:38:21 2007
New Revision: 39788
Added:
lxml/trunk/src/lxml/schematron.pxd
lxml/trunk/src/lxml/schematron.pxi
Modified:
lxml/trunk/doc/validation.txt
lxml/trunk/src/lxml/etree.pyx
Log:
schematron support (disabled by default: requires libxml2 2.6.21+, better 2.6.27)
Modified: lxml/trunk/doc/validation.txt
==============================================================================
--- lxml/trunk/doc/validation.txt (original)
+++ lxml/trunk/doc/validation.txt Sat Mar 3 13:38:21 2007
@@ -11,11 +11,17 @@
.. _`Relax NG`: http://www.relaxng.org/
.. _`XML Schema`: http://www.w3.org/XML/Schema
+There is also initial support for Schematron_. However, it is currently
+disabled in lxml builds due to insufficiencies in the implementation as of
+libxml2 2.6.27.
+
+.. _Schematron: http://www.ascc.net/xml/schematron
+
.. contents::
..
1 DTD
2 RelaxNG
- 2 XMLSchema
+ 3 XMLSchema
The usual setup procedure::
@@ -114,10 +120,9 @@
[...]
AssertionError: Document does not comply with schema
-Starting with version 0.9, lxml now has a simple API to report the errors
-generated by libxml2. If you want to find out why the validation failed in the
-second case, you can look up the error log of the validation process and check
-it for relevant messages::
+If you want to find out why the validation failed in the second case, you can
+look up the error log of the validation process and check it for relevant
+messages::
>>> log = relaxng.error_log
>>> print log.last_error
@@ -126,7 +131,7 @@
You can see that the error (ERROR) happened during RelaxNG validation
(RELAXNGV). The message then tells you what went wrong. Note that this error
is local to the RelaxNG object. It will only contain log entries that
-appeares during the validation. The DocumentInvalid exception raised by the
+appeared during the validation. The DocumentInvalid exception raised by the
``assertValid`` method above provides access to the global error log (like all
other lxml exceptions).
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Sat Mar 3 13:38:21 2007
@@ -1893,9 +1893,10 @@
def __get__(self):
return self._error_log.copy()
-include "dtd.pxi" # DTD
-include "relaxng.pxi" # RelaxNG
-include "xmlschema.pxi" # XMLSchema
+include "dtd.pxi" # DTD
+include "relaxng.pxi" # RelaxNG
+include "xmlschema.pxi" # XMLSchema
+#include "schematron.pxi" # Schematron
################################################################################
# Public C API
Added: lxml/trunk/src/lxml/schematron.pxd
==============================================================================
--- (empty file)
+++ lxml/trunk/src/lxml/schematron.pxd Sat Mar 3 13:38:21 2007
@@ -0,0 +1,28 @@
+cimport tree
+from tree cimport xmlDoc, xmlDtd
+
+cdef extern from "libxml/schematron.h":
+ ctypedef struct xmlSchematron
+ ctypedef struct xmlSchematronParserCtxt
+ ctypedef struct xmlSchematronValidCtxt
+
+ ctypedef enum xmlSchematronValidOptions:
+ XML_SCHEMATRON_OUT_QUIET = 1 # quiet no report
+ XML_SCHEMATRON_OUT_TEXT = 2 # build a textual report
+ XML_SCHEMATRON_OUT_XML = 4 # output SVRL
+ XML_SCHEMATRON_OUT_FILE = 256 # output to a file descriptor
+ XML_SCHEMATRON_OUT_BUFFER = 512 # output to a buffer
+ XML_SCHEMATRON_OUT_IO = 1024 # output to I/O mechanism
+
+ cdef xmlSchematronParserCtxt* xmlSchematronNewDocParserCtxt(xmlDoc* doc)
+ cdef xmlSchematronParserCtxt* xmlSchematronNewParserCtxt(char* filename)
+ cdef xmlSchematronValidCtxt* xmlSchematronNewValidCtxt(xmlSchematron* schema,
+ int options)
+
+ cdef xmlSchematron* xmlSchematronParse(xmlSchematronParserCtxt* ctxt)
+ cdef int xmlSchematronValidateDoc(xmlSchematronValidCtxt* ctxt,
+ xmlDoc* instance)
+
+ cdef void xmlSchematronFreeParserCtxt(xmlSchematronParserCtxt* ctxt)
+ cdef void xmlSchematronFreeValidCtxt(xmlSchematronValidCtxt* ctxt)
+ cdef void xmlSchematronFree(xmlSchematron* schema)
Added: lxml/trunk/src/lxml/schematron.pxi
==============================================================================
--- (empty file)
+++ lxml/trunk/src/lxml/schematron.pxi Sat Mar 3 13:38:21 2007
@@ -0,0 +1,145 @@
+# support for Schematron validation
+cimport schematron
+
+"""
+Schematron
+----------
+
+Schematron is a less well known, but very powerful schema language. The main
+idea is to use the capabilities of XPath to put restrictions on the structure
+and the content of XML documents. Here is a simple example::
+
+ >>> schematron = etree.Schematron(etree.XML("""
+ ...
+ ...
+ ...
+ ... Attribute
+ ... is forbidden
+ ...
+ ...
+ ...
+ ...
+ ... """))
+
+ >>> xml = etree.XML("""
+ ...
+ ...
+ ...
+ ...
+ ... """)
+
+ >>> schematron.validate(xml)
+ 0
+
+ >>> xml = etree.XML("""
+ ...
+ ...
+ ...
+ ...
+ ... """)
+
+ >>> schematron.validate(xml)
+ 1
+
+Schematron was added to libxml2 in version 2.6.21. As of version 2.6.27,
+however, Schematron lacks support for error reporting other than to stderr.
+It is therefore not possible to retrieve validation warnings and errors in
+lxml.
+"""
+
+class SchematronError(LxmlError):
+ pass
+
+class SchematronParseError(SchematronError):
+ pass
+
+class SchematronValidateError(SchematronError):
+ pass
+
+################################################################################
+# Schematron
+
+cdef class Schematron(_Validator):
+ """A Schematron validator.
+
+ Pass a root Element or an ElementTree to turn it into a validator.
+ Alternatively, pass a filename as keyword argument 'file' to parse from
+ the file system.
+ """
+ cdef schematron.xmlSchematron* _c_schema
+ cdef tree.xmlDoc* _c_doc
+ def __init__(self, etree=None, file=None):
+ cdef _Document doc
+ cdef _Element root_node
+ cdef xmlNode* c_node
+ cdef xmlDoc* c_doc
+ cdef char* c_href
+ cdef schematron.xmlSchematronParserCtxt* parser_ctxt
+ self._c_schema = NULL
+ self._c_doc = NULL
+ if etree is not None:
+ doc = _documentOrRaise(etree)
+ root_node = _rootNodeOrRaise(etree)
+ self._c_doc = _copyDocRoot(doc._c_doc, root_node._c_node)
+ parser_ctxt = schematron.xmlSchematronNewDocParserCtxt(self._c_doc)
+ elif file is not None:
+ filename = _getFilenameForFile(file)
+ if filename is None:
+ # XXX assume a string object
+ filename = file
+ filename = _encodeFilename(filename)
+ parser_ctxt = schematron.xmlSchematronNewParserCtxt(_cstr(filename))
+ else:
+ raise SchematronParseError, "No tree or file given"
+
+ if parser_ctxt is NULL:
+ if self._c_doc is not NULL:
+ tree.xmlFreeDoc(self._c_doc)
+ raise SchematronParseError, "Document is not parsable as Schematron"
+ self._c_schema = schematron.xmlSchematronParse(parser_ctxt)
+
+ if self._c_schema is NULL:
+ if self._c_doc is not NULL:
+ schematron.xmlSchematronFreeParserCtxt(parser_ctxt)
+ tree.xmlFreeDoc(self._c_doc)
+ raise SchematronParseError, "Document is not a valid Schematron schema"
+ schematron.xmlSchematronFreeParserCtxt(parser_ctxt)
+ _Validator.__init__(self)
+
+ def __dealloc__(self):
+ schematron.xmlSchematronFree(self._c_schema)
+ tree.xmlFreeDoc(self._c_doc)
+
+ def __call__(self, etree):
+ """Validate doc using Schematron.
+
+ Returns true if document is valid, false if not."""
+ cdef python.PyThreadState* state
+ cdef _Document doc
+ cdef _Element root_node
+ cdef xmlDoc* c_doc
+ cdef schematron.xmlSchematronValidCtxt* valid_ctxt
+ cdef int ret
+
+ doc = _documentOrRaise(etree)
+ root_node = _rootNodeOrRaise(etree)
+
+ self._error_log.connect()
+ valid_ctxt = schematron.xmlSchematronNewValidCtxt(
+ self._c_schema, schematron.XML_SCHEMATRON_OUT_QUIET)
+ if valid_ctxt is NULL:
+ self._error_log.disconnect()
+ raise SchematronError, "Failed to create validation context"
+
+ c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
+ state = python.PyEval_SaveThread()
+ ret = schematron.xmlSchematronValidateDoc(valid_ctxt, c_doc)
+ python.PyEval_RestoreThread(state)
+ _destroyFakeDoc(doc._c_doc, c_doc)
+
+ schematron.xmlSchematronFreeValidCtxt(valid_ctxt)
+
+ self._error_log.disconnect()
+ if ret == -1:
+ raise SchematronValidateError, "Internal error in Schematron validation"
+ return ret == 0
From scoder at codespeak.net Mon Mar 5 17:49:20 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 5 Mar 2007 17:49:20 +0100 (CET)
Subject: [Lxml-checkins] r39965 - lxml/trunk/src/lxml
Message-ID: <20070305164920.E3BE21007D@code0.codespeak.net>
Author: scoder
Date: Mon Mar 5 17:49:18 2007
New Revision: 39965
Modified:
lxml/trunk/src/lxml/parser.pxi
Log:
set error return to -1 instead of 1
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Mon Mar 5 17:49:18 2007
@@ -406,7 +406,7 @@
if pctxt.spaceTab is not NULL: # work around bug in libxml2
xmlparser.xmlClearParserCtxt(pctxt)
- cdef int _lockParser(self) except 1:
+ cdef int _lockParser(self) except -1:
cdef python.PyThreadState* state
cdef int result
if config.ENABLE_THREADING and self._parser_lock != NULL:
From scoder at codespeak.net Mon Mar 5 17:50:14 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 5 Mar 2007 17:50:14 +0100 (CET)
Subject: [Lxml-checkins] r39966 - lxml/trunk/src/lxml
Message-ID: <20070305165014.5AB611007D@code0.codespeak.net>
Author: scoder
Date: Mon Mar 5 17:50:13 2007
New Revision: 39966
Modified:
lxml/trunk/src/lxml/extensions.pxi
Log:
cleanups and C-ifications
Modified: lxml/trunk/src/lxml/extensions.pxi
==============================================================================
--- lxml/trunk/src/lxml/extensions.pxi (original)
+++ lxml/trunk/src/lxml/extensions.pxi Mon Mar 5 17:50:13 2007
@@ -102,16 +102,16 @@
# namespaces (internal UTF-8 methods with leading '_')
- def addNamespace(self, prefix, uri):
+ cdef addNamespace(self, prefix, uri):
if self._namespaces is None:
self._namespaces = {}
python.PyDict_SetItem(self._namespaces, prefix, uri)
- def registerNamespaces(self, namespaces):
+ cdef registerNamespaces(self, namespaces):
for prefix, uri in namespaces.items():
self.registerNamespace(prefix, uri)
- def registerNamespace(self, prefix, ns_uri):
+ cdef registerNamespace(self, prefix, ns_uri):
prefix_utf = self._to_utf(prefix)
ns_uri_utf = self._to_utf(ns_uri)
xpath.xmlXPathRegisterNs(self._xpathCtxt, prefix_utf, ns_uri_utf)
@@ -238,12 +238,14 @@
cdef xpath.xmlXPathFunction _function_check(void* ctxt,
char* c_name, char* c_ns_uri):
"Module level lookup function for XPath/XSLT functions"
+ cdef xpath.xmlXPathFunction c_func
cdef _BaseContext context
context = <_BaseContext>ctxt
if context._prepare_function_call(c_ns_uri, c_name):
- return _call_prepared_function
+ c_func = _call_prepared_function
else:
- return NULL
+ c_func = NULL
+ return c_func
cdef xpath.xmlXPathObject* _wrapXPathObject(object obj) except NULL:
cdef xpath.xmlNodeSet* resultSet
@@ -358,7 +360,6 @@
cdef void _extension_function_call(_BaseContext context, function,
xpath.xmlXPathParserContext* ctxt, int nargs):
- cdef _Element node
cdef _Document doc
cdef xpath.xmlXPathObject* obj
cdef int i
From scoder at codespeak.net Mon Mar 5 17:52:51 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 5 Mar 2007 17:52:51 +0100 (CET)
Subject: [Lxml-checkins] r39967 - lxml/trunk/src/lxml
Message-ID: <20070305165251.5B7351007D@code0.codespeak.net>
Author: scoder
Date: Mon Mar 5 17:52:47 2007
New Revision: 39967
Modified:
lxml/trunk/src/lxml/xslt.pxi
Log:
cleanup, doc strings
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Mon Mar 5 17:52:47 2007
@@ -1,4 +1,4 @@
-# XSLT and XPath classes, supports for extension functions
+# XSLT
cimport xslt
@@ -265,6 +265,15 @@
cdef class XSLT:
"""Turn a document into an XSLT object.
+
+ Keyword arguments of the constructor:
+ * regexp - enable exslt regular expression support in XPath (default: True)
+ * access_control - access restrictions for network or file system
+
+ Keyword arguments of the XSLT run:
+ * profile_run - enable XSLT profiling
+
+ Other keyword arguments are passed to the stylesheet.
"""
cdef _XSLTContext _context
cdef xslt.xsltStylesheet* _c_style
@@ -415,6 +424,7 @@
if params is not NULL:
# deallocate space for parameters
python.PyMem_Free(params)
+ keep_ref = None
if transform_ctxt.profile:
c_profile_doc = xslt.xsltGetProfileInformation(transform_ctxt)
From scoder at codespeak.net Mon Mar 5 17:53:09 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 5 Mar 2007 17:53:09 +0100 (CET)
Subject: [Lxml-checkins] r39968 - lxml/trunk
Message-ID: <20070305165309.651CA1007E@code0.codespeak.net>
Author: scoder
Date: Mon Mar 5 17:53:08 2007
New Revision: 39968
Modified:
lxml/trunk/TODO.txt
Log:
cleanup
Modified: lxml/trunk/TODO.txt
==============================================================================
--- lxml/trunk/TODO.txt (original)
+++ lxml/trunk/TODO.txt Mon Mar 5 17:53:08 2007
@@ -41,5 +41,5 @@
Features
--------
-* Relaxed NG compact notation (rnc versus rng) support. Currently not
- supported by libxml2 (patch exists)
+* RelaxNG compact notation (rnc versus rng) support. Currently not supported
+ by libxml2 (patch exists)
From scoder at codespeak.net Sat Mar 10 20:05:37 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 10 Mar 2007 20:05:37 +0100 (CET)
Subject: [Lxml-checkins] r40177 - lxml/trunk/src/lxml
Message-ID: <20070310190537.158731006F@code0.codespeak.net>
Author: scoder
Date: Sat Mar 10 20:05:36 2007
New Revision: 40177
Modified:
lxml/trunk/src/lxml/xpath.pxi
Log:
fix for compile problem
Modified: lxml/trunk/src/lxml/xpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxi (original)
+++ lxml/trunk/src/lxml/xpath.pxi Sat Mar 10 20:05:36 2007
@@ -143,9 +143,8 @@
def registerNamespaces(self, namespaces):
"""Register a prefix -> uri dict.
"""
- add = self._context.addNamespace
for prefix, uri in namespaces.items():
- add(prefix, uri)
+ self._context.addNamespace(prefix, uri)
def __call__(self, _path, **_variables):
"""Evaluate an XPath expression on the document.
From scoder at codespeak.net Fri Mar 16 20:24:42 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 16 Mar 2007 20:24:42 +0100 (CET)
Subject: [Lxml-checkins] r40611 - lxml/trunk/src/lxml
Message-ID: <20070316192442.A40981008A@code0.codespeak.net>
Author: scoder
Date: Fri Mar 16 20:24:40 2007
New Revision: 40611
Modified:
lxml/trunk/src/lxml/etree.pyx
Log:
allow threading in xinclude(), some docstring updates
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Fri Mar 16 20:24:40 2007
@@ -1289,7 +1289,7 @@
def relaxng(self, relaxng):
"""Validate this document using other document.
- relaxng is a tree that should contain Relax NG XML
+ The relaxng argument is a tree that should contain a Relax NG schema.
Returns True or False, depending on whether validation
succeeded.
@@ -1305,7 +1305,7 @@
def xmlschema(self, xmlschema):
"""Validate this document using other document.
- xmlschema is a tree that should contain XML Schema XML.
+ The xmlschema argument is a tree that should contain an XML Schema.
Returns True or False, depending on whether validation
succeeded.
@@ -1321,7 +1321,13 @@
def xinclude(self):
"""Process the XInclude nodes in this document and include the
referenced XML fragments.
+
+ There is support for loading files through the file system, HTTP and
+ FTP.
+
+ Note that XInclude does not support custom resolvers in Python space.
"""
+ cdef python.PyThreadState* state
cdef int result
# We cannot pass the XML_PARSE_NOXINCNODE option as this would free
# the XInclude nodes - there may still be Python references to them!
@@ -1331,13 +1337,15 @@
# typed as elements. The included fragment is added between the two,
# i.e. as a sibling, which does not conflict with traversal.
self._assertHasRoot()
- if self._context_node._doc._parser != None:
+ state = python.PyEval_SaveThread()
+ if self._context_node._doc._parser is not None:
result = xinclude.xmlXIncludeProcessTreeFlags(
self._context_node._c_node,
self._context_node._doc._parser._parse_options)
else:
result = xinclude.xmlXIncludeProcessTree(
self._context_node._c_node)
+ python.PyEval_RestoreThread(state)
if result == -1:
raise XIncludeError, "XInclude processing failed"
From scoder at codespeak.net Fri Mar 16 20:25:31 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 16 Mar 2007 20:25:31 +0100 (CET)
Subject: [Lxml-checkins] r40612 - lxml/trunk/doc/html
Message-ID: <20070316192531.55B3D1008A@code0.codespeak.net>
Author: scoder
Date: Fri Mar 16 20:25:29 2007
New Revision: 40612
Modified:
lxml/trunk/doc/html/style.css
Log:
friendlier colours for the web page as a better match with the codespeak logo
Modified: lxml/trunk/doc/html/style.css
==============================================================================
--- lxml/trunk/doc/html/style.css (original)
+++ lxml/trunk/doc/html/style.css Fri Mar 16 20:25:29 2007
@@ -67,10 +67,12 @@
font-size: 130%;
}
-div.sidemenu ul.menu.current > li {
- color: orange;
- border: groove orange;
- background-color: #FFFACA;
+div.sidemenu ul.menu.current li {
+ color: #CC0000;
+}
+
+div.sidemenu ul.menu.current > li > a {
+ color: #CC0000;
}
div.sidemenu ul.menu.current ul.submenu {
@@ -85,12 +87,13 @@
div.sidemenu ul.menu.foreign li.menu:hover ul.submenu {
display: block;
position: absolute;
- border: groove orange;
+ border: groove #990000;
padding: 1ex 1ex 1ex 3ex;
margin-top: 0px;
margin-left: 4em;
margin-right: -20em;
- background-color: #FFFACA;
+ color: #990000;
+ background-color: white;
}
div.sidemenu ul.submenu {
@@ -121,7 +124,7 @@
@media screen {
div.section > h1 > a:before {
margin-left: -2ex;
- color: orange;
+ color: #CC0000;
content: "\00BB" " ";
}
}
From scoder at codespeak.net Fri Mar 16 20:26:16 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 16 Mar 2007 20:26:16 +0100 (CET)
Subject: [Lxml-checkins] r40613 - lxml/trunk/benchmark
Message-ID: <20070316192616.5636F1008A@code0.codespeak.net>
Author: scoder
Date: Fri Mar 16 20:26:13 2007
New Revision: 40613
Modified:
lxml/trunk/benchmark/bench_xpath.py
Log:
benchmark both the old and the new way of using extension functions
Modified: lxml/trunk/benchmark/bench_xpath.py
==============================================================================
--- lxml/trunk/benchmark/bench_xpath.py (original)
+++ lxml/trunk/benchmark/bench_xpath.py Fri Mar 16 20:26:13 2007
@@ -34,7 +34,7 @@
child.xpath("./*[0]")
@onlylib('lxe')
- def bench_xpath_extensions_old(self, root):
+ def bench_xpath_old_extensions(self, root):
def return_child(_, element):
if element:
return element[0]
@@ -45,5 +45,21 @@
for child in root:
xpath(child)
+ @onlylib('lxe')
+ def bench_xpath_extensions(self, root):
+ def return_child(_, element):
+ if element:
+ return element[0]
+ else:
+ return ()
+ self.etree.FunctionNamespace("test")["t"] = return_child
+
+ try:
+ xpath = self.etree.XPath("test:t(.)", {"test":"test"})
+ for child in root:
+ xpath(child)
+ finally:
+ del self.etree.FunctionNamespace("test")["t"]
+
if __name__ == '__main__':
benchbase.main(XPathBenchMark)
From scoder at codespeak.net Fri Mar 16 20:26:56 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 16 Mar 2007 20:26:56 +0100 (CET)
Subject: [Lxml-checkins] r40614 - lxml/trunk/src/lxml
Message-ID: <20070316192656.4E7EC1008A@code0.codespeak.net>
Author: scoder
Date: Fri Mar 16 20:26:53 2007
New Revision: 40614
Modified:
lxml/trunk/src/lxml/python.pxd
Log:
added a Python API function
Modified: lxml/trunk/src/lxml/python.pxd
==============================================================================
--- lxml/trunk/src/lxml/python.pxd (original)
+++ lxml/trunk/src/lxml/python.pxd Fri Mar 16 20:26:53 2007
@@ -44,7 +44,8 @@
cdef int PyList_Append(object l, object obj) except -1
cdef int PyList_Reverse(object l) except -1
cdef int PyList_Insert(object l, Py_ssize_t index, object o) except -1
- cdef object PyList_AsTuple(object o)
+ cdef object PyList_AsTuple(object l)
+ cdef void PyList_Clear(object l)
cdef int PyDict_SetItemString(object d, char* key, object value) except -1
cdef int PyDict_SetItem(object d, object key, object value) except -1
From scoder at codespeak.net Fri Mar 16 20:28:38 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 16 Mar 2007 20:28:38 +0100 (CET)
Subject: [Lxml-checkins] r40615 - lxml/trunk/src/lxml/tests
Message-ID: <20070316192838.2B2921008A@code0.codespeak.net>
Author: scoder
Date: Fri Mar 16 20:28:33 2007
New Revision: 40615
Modified:
lxml/trunk/src/lxml/tests/test_xpathevaluator.py
Log:
test case split
Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original)
+++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Fri Mar 16 20:28:33 2007
@@ -104,6 +104,10 @@
self.assertEquals(
[root[0]],
root.xpath('//baz:b', {'baz': 'uri:a'}))
+
+ def test_xpath_ns_none(self):
+ tree = self.parse(' ')
+ root = tree.getroot()
self.assertRaises(
TypeError,
root.xpath, '//b', {None: 'uri:a'})
From scoder at codespeak.net Sat Mar 17 06:48:03 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 17 Mar 2007 06:48:03 +0100 (CET)
Subject: [Lxml-checkins] r40625 - lxml/trunk/doc
Message-ID: <20070317054803.D5C3E10082@code0.codespeak.net>
Author: scoder
Date: Sat Mar 17 06:48:01 2007
New Revision: 40625
Modified:
lxml/trunk/doc/FAQ.txt
Log:
FAQ entry on standard compliance
Modified: lxml/trunk/doc/FAQ.txt
==============================================================================
--- lxml/trunk/doc/FAQ.txt (original)
+++ lxml/trunk/doc/FAQ.txt Sat Mar 17 06:48:01 2007
@@ -12,10 +12,11 @@
1 General Questions
1.1 Is there a tutorial?
1.2 Where can I find more documentation about lxml?
- 1.3 Where are the Windows binaries?
- 1.4 What is the difference between lxml.etree and lxml.objectify?
- 1.5 Why is my application so slow?
- 1.6 Why do I get errors about missing UCS4 symbols when installing lxml?
+ 1.3 What standards does lxml implement?
+ 1.4 Where are the Windows binaries?
+ 1.5 What is the difference between lxml.etree and lxml.objectify?
+ 1.6 Why is my application so slow?
+ 1.7 Why do I get errors about missing UCS4 symbols when installing lxml?
2 Bugs
2.1 My application crashes! Why does lxml.etree do that?
2.2 I think I have found a bug in lxml. What should I do?
@@ -64,6 +65,23 @@
.. _`the web page`: http://codespeak.net/lxml/#documentation
+What standards does lxml implement?
+-----------------------------------
+
+The compliance to XML Standards depends on the support in libxml2 and libxslt.
+Here is a quote from `http://xmlsoft.org/`:
+
+ In most cases libxml2 tries to implement the specifications in a relatively
+ strictly compliant way. As of release 2.4.16, libxml2 passed all 1800+ tests
+ from the OASIS XML Tests Suite.
+
+lxml currently supports libxml2 2.6.16 or later, which has even better support
+for various XML standards. Some of the more important ones are: HTML, XML
+namespaces, XPath, XInclude, XSLT, XML catalogs, canonical XML, RelaxNG,
+XML:ID. Support for XML Schema and Schematron is currently incomplete.
+libxml2 also supports loading documents through HTTP and FTP.
+
+
Where are the Windows binaries?
-------------------------------
From scoder at codespeak.net Sat Mar 17 06:58:12 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 17 Mar 2007 06:58:12 +0100 (CET)
Subject: [Lxml-checkins] r40626 - lxml/trunk/doc
Message-ID: <20070317055812.56C8510082@code0.codespeak.net>
Author: scoder
Date: Sat Mar 17 06:58:10 2007
New Revision: 40626
Modified:
lxml/trunk/doc/FAQ.txt
Log:
FAQ entry on standard compliance
Modified: lxml/trunk/doc/FAQ.txt
==============================================================================
--- lxml/trunk/doc/FAQ.txt (original)
+++ lxml/trunk/doc/FAQ.txt Sat Mar 17 06:58:10 2007
@@ -78,8 +78,9 @@
lxml currently supports libxml2 2.6.16 or later, which has even better support
for various XML standards. Some of the more important ones are: HTML, XML
namespaces, XPath, XInclude, XSLT, XML catalogs, canonical XML, RelaxNG,
-XML:ID. Support for XML Schema and Schematron is currently incomplete.
-libxml2 also supports loading documents through HTTP and FTP.
+XML:ID. Support for XML Schema and Schematron is currently incomplete in
+libxml2, but is mostly usable and still being worked on. libxml2 also
+supports loading documents through HTTP and FTP.
Where are the Windows binaries?
From scoder at codespeak.net Sat Mar 17 07:01:29 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 17 Mar 2007 07:01:29 +0100 (CET)
Subject: [Lxml-checkins] r40627 - lxml/branch/extension_refactoring
Message-ID: <20070317060129.C1CBC10082@code0.codespeak.net>
Author: scoder
Date: Sat Mar 17 07:01:27 2007
New Revision: 40627
Added:
lxml/branch/extension_refactoring/
- copied from r40626, lxml/trunk/
Log:
new branch for refactoring the extension function setup
From scoder at codespeak.net Sat Mar 17 07:03:29 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 17 Mar 2007 07:03:29 +0100 (CET)
Subject: [Lxml-checkins] r40628 - in lxml/branch/extension_refactoring: .
src/lxml
Message-ID: <20070317060329.7A93A10082@code0.codespeak.net>
Author: scoder
Date: Sat Mar 17 07:03:26 2007
New Revision: 40628
Modified:
lxml/branch/extension_refactoring/CHANGES.txt
lxml/branch/extension_refactoring/src/lxml/extensions.pxi
lxml/branch/extension_refactoring/src/lxml/nsclasses.pxi
lxml/branch/extension_refactoring/src/lxml/xpath.pxi
lxml/branch/extension_refactoring/src/lxml/xslt.pxd
lxml/branch/extension_refactoring/src/lxml/xslt.pxi
Log:
initial branch import, mainly complete but buggy
Modified: lxml/branch/extension_refactoring/CHANGES.txt
==============================================================================
--- lxml/branch/extension_refactoring/CHANGES.txt (original)
+++ lxml/branch/extension_refactoring/CHANGES.txt Sat Mar 17 07:03:26 2007
@@ -2,6 +2,25 @@
lxml changelog
==============
+Under Development
+=================
+
+Features added
+--------------
+
+* EXSLT RegExp support in standard XPath (not only XSLT)
+
+Bugs fixed
+----------
+
+* Thread safety in XPath evaluators
+
+Other changes
+-------------
+
+* major refactoring in XPath/XSLT extension function code
+
+
1.3beta (2007-02-27)
====================
Modified: lxml/branch/extension_refactoring/src/lxml/extensions.pxi
==============================================================================
--- lxml/branch/extension_refactoring/src/lxml/extensions.pxi (original)
+++ lxml/branch/extension_refactoring/src/lxml/extensions.pxi Sat Mar 17 07:03:26 2007
@@ -12,11 +12,14 @@
################################################################################
# Base class for XSLT and XPath evaluation contexts: functions, namespaces, ...
+ctypedef int _register_function(void* ctxt, name_utf, ns_uri_utf)
+
cdef class _BaseContext:
cdef xpath.xmlXPathContext* _xpathCtxt
cdef _Document _doc
cdef object _extensions
cdef object _namespaces
+ cdef object _registered_namespaces
cdef object _utf_refs
cdef object _function_cache
cdef object _function_cache_ns
@@ -28,9 +31,9 @@
def __init__(self, namespaces, extensions):
self._xpathCtxt = NULL
self._utf_refs = {}
+ self._registered_namespaces = []
self._function_cache = {}
self._function_cache_ns = {}
- self._called_function = None
if extensions is not None:
# convert extensions to UTF-8
@@ -90,7 +93,8 @@
self.registerNamespaces(namespaces)
cdef _unregister_context(self):
- xpath.xmlXPathRegisteredNsCleanup(self._xpathCtxt)
+ self._unregisterNamespaces()
+# xpath.xmlXPathRegisteredNsCleanup(self._xpathCtxt)
self._free_context()
cdef _free_context(self):
@@ -112,12 +116,86 @@
self.registerNamespace(prefix, uri)
cdef registerNamespace(self, prefix, ns_uri):
+ if prefix is None:
+ raise TypeError, "empty prefix is not supported in XPath"
prefix_utf = self._to_utf(prefix)
ns_uri_utf = self._to_utf(ns_uri)
- xpath.xmlXPathRegisterNs(self._xpathCtxt, prefix_utf, ns_uri_utf)
+ python.PyList_Append(self._registered_namespaces, prefix_utf)
+ xpath.xmlXPathRegisterNs(self._xpathCtxt,
+ _cstr(prefix_utf), _cstr(ns_uri_utf))
+
+ cdef _registerNamespace(self, prefix_utf, ns_uri_utf):
+ python.PyList_Append(self._registered_namespaces, prefix_utf)
+ xpath.xmlXPathRegisterNs(self._xpathCtxt,
+ _cstr(prefix_utf), _cstr(ns_uri_utf))
+
+ cdef void _unregisterNamespaces(self):
+ if python.PyList_GET_SIZE(self._registered_namespaces) > 0:
+ for prefix_utf in self._registered_namespaces:
+ sys.stderr.write(prefix_utf)
+ sys.stderr.flush()
+ xpath.xmlXPathRegisterNs(self._xpathCtxt,
+ _cstr(prefix_utf), NULL)
+ self._registered_namespaces = []
+
+ cdef void _unregisterNamespace(self, prefix_utf):
+ xpath.xmlXPathRegisterNs(self._xpathCtxt,
+ _cstr(prefix_utf), NULL)
# extension functions
+ cdef void _addLocalExtensionFunction(self, ns_utf, name_utf, function):
+ if self._extensions is None:
+ self._extensions = {}
+ python.PyDict_SetItem(self._extensions, (ns_utf, name_utf), function)
+
+ cdef void _registerAllFunctions(self, void* ctxt,
+ _register_function reg_func):
+ cdef python.PyObject* dict_result
+ for ns_utf, ns_functions in _iter_ns_extension_functions():
+ if ns_utf is None:
+ d = self._function_cache
+ else:
+ dict_result = python.PyDict_GetItem(
+ self._function_cache_ns, ns_utf)
+ if dict_result is NULL:
+ d = {}
+ python.PyDict_SetItem(
+ self._function_cache_ns, ns_utf, d)
+ else:
+ d = dict_result
+ for name_utf, function in ns_functions.iteritems():
+ python.PyDict_SetItem(d, name_utf, function)
+ reg_func(ctxt, name_utf, ns_utf)
+ if self._extensions is None:
+ return # done
+ last_ns = None
+ d = self._function_cache
+ for (ns_utf, name_utf), function in self._extensions.iteritems():
+ if ns_utf is not last_ns:
+ last_ns = ns_utf
+ if ns_utf is None:
+ d = self._function_cache
+ else:
+ dict_result = python.PyDict_GetItem(
+ self._function_cache_ns, ns_utf)
+ if dict_result is NULL:
+ d = {}
+ python.PyDict_SetItem(self._function_cache_ns,
+ ns_utf, d)
+ else:
+ d = dict_result
+ python.PyDict_SetItem(d, name_utf, function)
+ reg_func(ctxt, name_utf, ns_utf)
+
+ cdef void _unregisterAllFunctions(self, void* ctxt,
+ _register_function unreg_func):
+ for name_utf in self._function_cache:
+ unreg_func(ctxt, name_utf, None)
+ for ns_utf, functions in self._function_cache_ns.iteritems():
+ for name_utf in functions:
+ unreg_func(ctxt, name_utf, ns_utf)
+
cdef _find_cached_function(self, char* c_ns_uri, char* c_name):
"""Lookup an extension function in the cache and return it.
@@ -233,10 +311,99 @@
################################################################################
+# EXSLT regexp implementation
+
+cdef class _ExsltRegExp:
+ cdef object _compile_map
+ def __init__(self):
+ self._compile_map = {}
+
+ cdef _make_string(self, value):
+ if _isString(value):
+ return value
+ else:
+ raise TypeError, "Invalid argument type %s" % type(value)
+
+ cdef _compile(self, rexp, ignore_case):
+ cdef python.PyObject* c_result
+ rexp = self._make_string(rexp)
+ key = (rexp, ignore_case)
+ c_result = python.PyDict_GetItem(self._compile_map, key)
+ if c_result is not NULL:
+ return c_result
+ py_flags = re.UNICODE
+ if ignore_case:
+ py_flags = py_flags | re.IGNORECASE
+ rexp_compiled = re.compile(rexp, py_flags)
+ python.PyDict_SetItem(self._compile_map, key, rexp_compiled)
+ return rexp_compiled
+
+ def test(self, ctxt, s, rexp, flags=''):
+ flags = self._make_string(flags)
+ s = self._make_string(s)
+ rexpc = self._compile(rexp, 'i' in flags)
+ if rexpc.search(s) is None:
+ return False
+ else:
+ return True
+
+ def match(self, ctxt, s, rexp, flags=''):
+ flags = self._make_string(flags)
+ s = self._make_string(s)
+ rexpc = self._compile(rexp, 'i' in flags)
+ if 'g' in flags:
+ results = rexpc.findall(s)
+ if not results:
+ return ()
+ else:
+ result = rexpc.search(s)
+ if not result:
+ return ()
+ results = [ result.group() ]
+ results.extend( result.groups('') )
+ result_list = []
+ root = Element('matches')
+ join_groups = ''.join
+ for s_match in results:
+ if python.PyTuple_CheckExact(s_match):
+ s_match = join_groups(s_match)
+ elem = SubElement(root, 'match')
+ elem.text = s_match
+ python.PyList_Append(result_list, elem)
+ return result_list
+
+ def replace(self, ctxt, s, rexp, flags, replacement):
+ replacement = self._make_string(replacement)
+ flags = self._make_string(flags)
+ s = self._make_string(s)
+ rexpc = self._compile(rexp, 'i' in flags)
+ if 'g' in flags:
+ count = 0
+ else:
+ count = 1
+ return rexpc.sub(replacement, s, count)
+
+ cdef _register_in_context(self, _BaseContext context):
+ ns = "http://exslt.org/regular-expressions"
+ context._addLocalExtensionFunction(ns, "test", self.test)
+ context._addLocalExtensionFunction(ns, "match", self.match)
+ context._addLocalExtensionFunction(ns, "replace", self.replace)
+
+
+################################################################################
# helper functions
cdef xpath.xmlXPathFunction _function_check(void* ctxt,
char* c_name, char* c_ns_uri):
+ cdef python.PyGILState_STATE gil_state
+ cdef xpath.xmlXPathFunction c_func
+ gil_state = python.PyGILState_Ensure()
+ c_func = _python_function_check(ctxt, c_name, c_ns_uri)
+ python.PyGILState_Release(gil_state)
+ return c_func
+
+cdef xpath.xmlXPathFunction _python_function_check(void* ctxt,
+ char* c_name, char* c_ns_uri):
"Module level lookup function for XPath/XSLT functions"
cdef xpath.xmlXPathFunction c_func
cdef _BaseContext context
@@ -405,7 +572,7 @@
fref = "{%s}%s" % (rctxt.functionURI, rctxt.function)
else:
fref = rctxt.function
- xpath.xmlXPathErr(ctxt, xpath.XML_XPATH_UNKNOWN_FUNC_ERROR)
+ xpath.xmlXPathErr(ctxt, xpath.XPATH_UNKNOWN_FUNC_ERROR)
exception = XPathFunctionError("XPath function '%s' not found" % fref)
context._exc._store_exception(exception)
Modified: lxml/branch/extension_refactoring/src/lxml/nsclasses.pxi
==============================================================================
--- lxml/branch/extension_refactoring/src/lxml/nsclasses.pxi (original)
+++ lxml/branch/extension_refactoring/src/lxml/nsclasses.pxi Sat Mar 17 07:03:26 2007
@@ -75,6 +75,11 @@
name = _utf8(name)
return self._get(name)
+ def __delitem__(self, name):
+ if name is not None:
+ name = _utf8(name)
+ python.PyDict_DelItem(self._entries, name)
+
cdef object _get(self, object name):
cdef python.PyObject* dict_result
dict_result = python.PyDict_GetItem(self._entries, name)
@@ -99,7 +104,7 @@
return self._entries.iteritems()
def clear(self):
- self._entries.clear()
+ python.PyDict_Clear(self._entries)
cdef class _ClassNamespaceRegistry(_NamespaceRegistry):
"Dictionary-like registry for namespace implementation classes"
@@ -130,32 +135,39 @@
cdef class _XPathFunctionNamespaceRegistry(_FunctionNamespaceRegistry):
cdef object _prefix
cdef object _prefix_utf
+
property prefix:
"Namespace prefix for extension functions."
def __del__(self):
self._prefix = None # no prefix configured
+ self._prefix_utf = None
def __get__(self):
- return self._prefix
+ if self._prefix is None:
+ return ''
+ else:
+ return self._prefix
def __set__(self, prefix):
+ if prefix == '':
+ prefix = None # empty prefix
if prefix is None:
- prefix = '' # empty prefix
- self._prefix_utf = _utf8(prefix)
+ self._prefix_utf = None
+ else:
+ self._prefix_utf = _utf8(prefix)
self._prefix = prefix
cdef object _find_all_extension_prefixes():
"Internal lookup function to find all function prefixes for XSLT/XPath."
cdef _XPathFunctionNamespaceRegistry registry
- ns_prefixes = {}
- for (ns_utf, registry) in __FUNCTION_NAMESPACE_REGISTRIES.iteritems():
+ ns_prefixes = []
+ for registry in __FUNCTION_NAMESPACE_REGISTRIES.itervalues():
if registry._prefix_utf is not None:
- ns_prefixes[registry._prefix_utf] = ns_utf
+ if registry._ns_uri_utf is not None:
+ python.PyList_Append(
+ ns_prefixes, (registry._prefix_utf, registry._ns_uri_utf))
return ns_prefixes
-cdef object _iter_extension_function_names():
- l = []
- for (ns_utf, registry) in __FUNCTION_NAMESPACE_REGISTRIES.iteritems():
- python.PyList_Append(l, (ns_utf, registry))
- return l
+cdef object _iter_ns_extension_functions():
+ return __FUNCTION_NAMESPACE_REGISTRIES.iteritems()
cdef object _find_extension(ns_uri_utf, name_utf):
cdef python.PyObject* dict_result
Modified: lxml/branch/extension_refactoring/src/lxml/xpath.pxi
==============================================================================
--- lxml/branch/extension_refactoring/src/lxml/xpath.pxi (original)
+++ lxml/branch/extension_refactoring/src/lxml/xpath.pxi Sat Mar 17 07:03:26 2007
@@ -9,6 +9,25 @@
################################################################################
# XPath
+cdef int _register_xpath_function(void* ctxt, name_utf, ns_utf):
+ if ns_utf is None:
+ return xpath.xmlXPathRegisterFunc(
+ ctxt, _cstr(name_utf),
+ _xpath_function_call)
+ else:
+ return xpath.xmlXPathRegisterFuncNS(
+ ctxt, _cstr(name_utf), _cstr(ns_utf),
+ _xpath_function_call)
+
+cdef int _unregister_xpath_function(void* ctxt, name_utf, ns_utf):
+ if ns_utf is None:
+ return xpath.xmlXPathRegisterFunc(
+ ctxt, _cstr(name_utf), NULL)
+ else:
+ return xpath.xmlXPathRegisterFuncNS(
+ ctxt, _cstr(name_utf), _cstr(ns_utf), NULL)
+
+
cdef class _XPathContext(_BaseContext):
cdef object _variables
def __init__(self, namespaces, extensions, variables):
@@ -18,13 +37,13 @@
cdef register_context(self, xpath.xmlXPathContext* xpathCtxt, _Document doc):
self._set_xpath_context(xpathCtxt)
ns_prefixes = _find_all_extension_prefixes()
- if ns_prefixes:
- self.registerNamespaces(ns_prefixes)
+ if python.PyList_GET_SIZE(ns_prefixes) > 0:
+ for (prefix, ns_uri) in ns_prefixes:
+ self._registerNamespace(prefix, ns_uri)
self._register_context(doc)
if self._variables is not None:
self.registerVariables(self._variables)
- xpath.xmlXPathRegisterFuncLookup(
- self._xpathCtxt, _function_check, self)
+ self._registerAllFunctions(xpathCtxt, _register_xpath_function)
cdef unregister_context(self):
cdef xpath.xmlXPathContext* xpathCtxt
@@ -32,15 +51,16 @@
if xpathCtxt is NULL:
return
xpath.xmlXPathRegisteredVariablesCleanup(xpathCtxt)
+ self._unregisterAllFunctions(xpathCtxt, _unregister_xpath_function)
self._unregister_context()
- def registerVariables(self, variable_dict):
+ cdef registerVariables(self, variable_dict):
for name, value in variable_dict.items():
name_utf = self._to_utf(name)
xpath.xmlXPathRegisterVariable(
self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value))
- def registerVariable(self, name, value):
+ cdef registerVariable(self, name, value):
name_utf = self._to_utf(name)
xpath.xmlXPathRegisterVariable(
self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value))
@@ -55,9 +75,14 @@
cdef class _XPathEvaluatorBase:
cdef xpath.xmlXPathContext* _xpathCtxt
cdef _XPathContext _context
+ cdef python.PyThread_type_lock _eval_lock
- def __init__(self, namespaces, extensions, variables=None):
- self._context = _XPathContext(namespaces, extensions, variables)
+ def __init__(self, namespaces, extensions, regexp):
+ cdef _ExsltRegExp _regexp
+ self._context = _XPathContext(namespaces, extensions, None)
+ if regexp:
+ _regexp = _ExsltRegExp()
+ _regexp._register_in_context(self._context)
def __dealloc__(self):
if self._xpathCtxt is not NULL:
@@ -84,6 +109,22 @@
c = path[0]
return c == c'/'
+ cdef int _lock(self) except -1:
+ cdef python.PyThreadState* state
+ cdef int result
+ if config.ENABLE_THREADING and self._eval_lock != NULL:
+ state = python.PyEval_SaveThread()
+ result = python.PyThread_acquire_lock(
+ self._eval_lock, python.WAIT_LOCK)
+ python.PyEval_RestoreThread(state)
+ if result == 0:
+ raise ParserError, "parser locking failed"
+ return 0
+
+ cdef void _unlock(self):
+ if config.ENABLE_THREADING and self._eval_lock != NULL:
+ python.PyThread_release_lock(self._eval_lock)
+
cdef _raise_parse_error(self):
if self._xpathCtxt is not NULL and \
self._xpathCtxt.lastError.message is not NULL:
@@ -119,10 +160,13 @@
Absolute XPath expressions (starting with '/') will be evaluated against
the ElementTree as returned by getroottree().
- XPath evaluators must not be shared between threads.
+ Additional namespace declarations can be passed with the 'namespace'
+ keyword argument. EXSLT regular expression support can be disabled with
+ the 'regexp' boolean keyword (defaults to True).
"""
cdef _Element _element
- def __init__(self, _Element element not None, namespaces=None, extensions=None):
+ def __init__(self, _Element element not None, namespaces=None,
+ extensions=None, regexp=True):
cdef xpath.xmlXPathContext* xpathCtxt
cdef int ns_register_status
cdef _Document doc
@@ -133,7 +177,7 @@
raise XPathContextError, "Unable to create new XPath context"
_setupDict(xpathCtxt)
self._element = element
- _XPathEvaluatorBase.__init__(self, namespaces, extensions)
+ _XPathEvaluatorBase.__init__(self, namespaces, extensions, regexp)
def registerNamespace(self, prefix, uri):
"""Register a namespace with the XPath context.
@@ -155,6 +199,7 @@
Absolute XPath expressions (starting with '/') will be evaluated
against the ElementTree as returned by getroottree().
"""
+ cdef python.PyThreadState* state
cdef xpath.xmlXPathContext* xpathCtxt
cdef xpath.xmlXPathObject* xpathObj
cdef _Document doc
@@ -164,12 +209,16 @@
xpathCtxt.node = self._element._c_node
doc = self._element._doc
+ self._lock()
self._context.register_context(xpathCtxt, doc)
try:
self._context.registerVariables(_variables)
+ state = python.PyEval_SaveThread()
xpathObj = xpath.xmlXPathEvalExpression(_cstr(path), xpathCtxt)
finally:
+ python.PyEval_RestoreThread(state)
self._context.unregister_context()
+ self._unlock()
return self._handle_result(xpathObj, doc)
@@ -177,11 +226,14 @@
cdef class XPathDocumentEvaluator(XPathElementEvaluator):
"""Create an XPath evaluator for an ElementTree.
- XPath evaluators must not be shared between threads.
+ Additional namespace declarations can be passed with the 'namespace'
+ keyword argument. EXSLT regular expression support can be disabled with
+ the 'regexp' boolean keyword (defaults to True).
"""
- def __init__(self, _ElementTree etree not None, namespaces=None, extensions=None):
+ def __init__(self, _ElementTree etree not None, namespaces=None,
+ extensions=None, regexp=True):
XPathElementEvaluator.__init__(
- self, etree._context_node, namespaces, extensions)
+ self, etree._context_node, namespaces, extensions, regexp)
def __call__(self, _path, **_variables):
"""Evaluate an XPath expression on the document.
@@ -189,6 +241,7 @@
Variables may be provided as keyword arguments. Note that namespaces
are currently not supported for variables.
"""
+ cdef python.PyThreadState* state
cdef xpath.xmlXPathContext* xpathCtxt
cdef xpath.xmlXPathObject* xpathObj
cdef xmlDoc* c_doc
@@ -197,47 +250,57 @@
xpathCtxt = self._xpathCtxt
doc = self._element._doc
+ self._lock()
self._context.register_context(xpathCtxt, doc)
c_doc = _fakeRootDoc(doc._c_doc, self._element._c_node)
try:
self._context.registerVariables(_variables)
+ state = python.PyEval_SaveThread()
xpathCtxt.doc = c_doc
xpathCtxt.node = tree.xmlDocGetRootElement(c_doc)
xpathObj = xpath.xmlXPathEvalExpression(_cstr(path), xpathCtxt)
finally:
+ python.PyEval_RestoreThread(state)
_destroyFakeDoc(doc._c_doc, c_doc)
self._context.unregister_context()
+ self._unlock()
return self._handle_result(xpathObj, doc)
-def XPathEvaluator(etree_or_element, namespaces=None, extensions=None):
+def XPathEvaluator(etree_or_element, namespaces=None, extensions=None,
+ regexp=True):
"""Creates an XPath evaluator for an ElementTree or an Element.
The resulting object can be called with an XPath expression as argument
and XPath variables provided as keyword arguments.
- XPath evaluators must not be shared between threads.
+ Additional namespace declarations can be passed with the 'namespace'
+ keyword argument. EXSLT regular expression support can be disabled with
+ the 'regexp' boolean keyword (defaults to True).
"""
if isinstance(etree_or_element, _ElementTree):
- return XPathDocumentEvaluator(etree_or_element, namespaces, extensions)
+ return XPathDocumentEvaluator(etree_or_element, namespaces,
+ extensions, regexp)
else:
- return XPathElementEvaluator(etree_or_element, namespaces, extensions)
+ return XPathElementEvaluator(etree_or_element, namespaces,
+ extensions, regexp)
cdef class XPath(_XPathEvaluatorBase):
"""A compiled XPath expression that can be called on Elements and
ElementTrees.
- Besides the XPath expression, you can pass namespace mappings and
- extensions to the constructor through the keyword arguments ``namespaces``
- and ``extensions``.
+ Besides the XPath expression, you can pass prefix-namespace mappings and
+ extension functions to the constructor through the keyword arguments
+ ``namespaces`` and ``extensions``. EXSLT regular expression support can
+ be disabled with the 'regexp' boolean keyword (defaults to True).
"""
cdef xpath.xmlXPathCompExpr* _xpath
cdef readonly object path
- def __init__(self, path, namespaces=None, extensions=None):
- _XPathEvaluatorBase.__init__(self, namespaces, extensions)
+ def __init__(self, path, namespaces=None, extensions=None, regexp=True):
+ _XPathEvaluatorBase.__init__(self, namespaces, extensions, regexp)
self._xpath = NULL
self.path = path
path = _utf8(path)
@@ -258,19 +321,21 @@
document = _documentOrRaise(_etree_or_element)
element = _rootNodeOrRaise(_etree_or_element)
- xpathCtxt = self._xpathCtxt
- xpathCtxt.doc = document._c_doc
- xpathCtxt.node = element._c_node
+ self._lock()
+ self._xpathCtxt.doc = document._c_doc
+ self._xpathCtxt.node = element._c_node
context = self._context
- context.register_context(xpathCtxt, document)
+ context.register_context(self._xpathCtxt, document)
+ context.registerVariables(_variables)
try:
- context.registerVariables(_variables)
state = python.PyEval_SaveThread()
- xpathObj = xpath.xmlXPathCompiledEval(self._xpath, xpathCtxt)
- python.PyEval_RestoreThread(state)
+ xpathObj = xpath.xmlXPathCompiledEval(
+ self._xpath, self._xpathCtxt)
finally:
+ python.PyEval_RestoreThread(state)
context.unregister_context()
+ self._unlock()
return self._handle_result(xpathObj, document)
def __dealloc__(self):
Modified: lxml/branch/extension_refactoring/src/lxml/xslt.pxd
==============================================================================
--- lxml/branch/extension_refactoring/src/lxml/xslt.pxd (original)
+++ lxml/branch/extension_refactoring/src/lxml/xslt.pxd Sat Mar 17 07:03:26 2007
@@ -35,6 +35,8 @@
xmlXPathFunction function)
cdef int xsltUnregisterExtModuleFunction(char* name, char* URI)
cdef xmlXPathFunction xsltExtModuleFunctionLookup(char* name, char* URI)
+ cdef int xsltRegisterExtPrefix(xsltStylesheet* style,
+ char* prefix, char* URI)
cdef extern from "libxslt/documents.h":
ctypedef enum xsltLoadType:
Modified: lxml/branch/extension_refactoring/src/lxml/xslt.pxi
==============================================================================
--- lxml/branch/extension_refactoring/src/lxml/xslt.pxi (original)
+++ lxml/branch/extension_refactoring/src/lxml/xslt.pxi Sat Mar 17 07:03:26 2007
@@ -193,6 +193,21 @@
################################################################################
# XSLT
+cdef int _register_xslt_function(void* ctxt, name_utf, ns_utf):
+ if ns_utf is None:
+ return 0
+ return xslt.xsltRegisterExtFunction(
+ ctxt, _cstr(name_utf), _cstr(ns_utf),
+ _xpath_function_call)
+
+cdef int _unregister_xslt_function(void* ctxt, name_utf, ns_utf):
+ if ns_utf is None:
+ return 0
+ return xslt.xsltRegisterExtFunction(
+ ctxt, _cstr(name_utf), _cstr(ns_utf),
+ NULL)
+
+
cdef class _XSLTContext(_BaseContext):
cdef xslt.xsltTransformContext* _xsltCtxt
def __init__(self, namespaces, extensions):
@@ -207,7 +222,7 @@
self._set_xpath_context(xsltCtxt.xpathCtxt)
self._register_context(doc)
xsltCtxt.xpathCtxt.userData = self
- self._registerExtensionFunctions()
+ self._registerAllFunctions(xsltCtxt, _register_xslt_function)
cdef free_context(self):
cdef xslt.xsltTransformContext* xsltCtxt
@@ -219,49 +234,6 @@
xslt.xsltFreeTransformContext(xsltCtxt)
self._release_temp_refs()
- cdef void _addLocalExtensionFunction(self, ns_utf, name_utf, function):
- if self._extensions is None:
- self._extensions = {}
- python.PyDict_SetItem(self._extensions, (ns_utf, name_utf), function)
-
- cdef void _registerExtensionFunctions(self):
- cdef python.PyObject* dict_result
- for ns_utf, functions in _iter_extension_function_names():
- if ns_utf is None:
- continue
- dict_result = python.PyDict_GetItem(self._function_cache_ns, ns_utf)
- if dict_result is NULL:
- d = {}
- python.PyDict_SetItem(self._function_cache_ns, ns_utf, d)
- else:
- d = dict_result
- for name_utf, function in functions.iteritems():
- python.PyDict_SetItem(d, name_utf, function)
- xslt.xsltRegisterExtFunction(
- self._xsltCtxt, _cstr(name_utf), _cstr(ns_utf),
- _xpath_function_call)
- if self._extensions is None:
- return # done
- last_ns = None
- for (ns_utf, name_utf), function in self._extensions.iteritems():
- if ns_utf is None:
- raise ValueError, \
- "extensions must have non empty namespaces"
- elif ns_utf is not last_ns:
- last_ns = ns_utf
- dict_result = python.PyDict_GetItem(
- self._function_cache_ns, ns_utf)
- if dict_result is NULL:
- d = {}
- python.PyDict_SetItem(self._function_cache_ns, ns_utf, d)
- else:
- d = dict_result
- python.PyDict_SetItem(d, name_utf, function)
- xslt.xsltRegisterExtFunction(
- self._xsltCtxt, _cstr(name_utf), _cstr(ns_utf),
- _xpath_function_call)
-
-cdef class _ExsltRegExp # forward declaration
cdef class XSLT:
"""Turn a document into an XSLT object.
@@ -279,7 +251,6 @@
cdef xslt.xsltStylesheet* _c_style
cdef _XSLTResolverContext _xslt_resolver_context
cdef XSLTAccessControl _access_control
- cdef _ExsltRegExp _regexp
cdef _ErrorLog _error_log
def __init__(self, xslt_input, extensions=None, regexp=True, access_control=None):
@@ -289,6 +260,7 @@
cdef xmlDoc* fake_c_doc
cdef _Document doc
cdef _Element root_node
+ cdef _ExsltRegExp _regexp
doc = _documentOrRaise(xslt_input)
root_node = _rootNodeOrRaise(xslt_input)
@@ -329,11 +301,8 @@
self._context = _XSLTContext(None, extensions)
if regexp:
- self._regexp = _ExsltRegExp()
- self._regexp._register_in_context(self._context)
- else:
- self._regexp = None
- # XXX is it worthwile to use xsltPrecomputeStylesheet here?
+ _regexp = _ExsltRegExp()
+ _regexp._register_in_context(self._context)
def __dealloc__(self):
if self._xslt_resolver_context is not None and \
@@ -649,82 +618,3 @@
if attr == key:
return value
return default
-
-################################################################################
-# EXSLT regexp implementation
-
-cdef class _ExsltRegExp:
- cdef object _compile_map
- def __init__(self):
- self._compile_map = {}
-
- cdef _make_string(self, value):
- if _isString(value):
- return value
- else:
- raise TypeError, "Invalid argument type %s" % type(value)
-
- cdef _compile(self, rexp, ignore_case):
- cdef python.PyObject* c_result
- rexp = self._make_string(rexp)
- key = (rexp, ignore_case)
- c_result = python.PyDict_GetItem(self._compile_map, key)
- if c_result is not NULL:
- return c_result
- py_flags = re.UNICODE
- if ignore_case:
- py_flags = py_flags | re.IGNORECASE
- rexp_compiled = re.compile(rexp, py_flags)
- python.PyDict_SetItem(self._compile_map, key, rexp_compiled)
- return rexp_compiled
-
- def test(self, ctxt, s, rexp, flags=''):
- flags = self._make_string(flags)
- s = self._make_string(s)
- rexpc = self._compile(rexp, 'i' in flags)
- if rexpc.search(s) is None:
- return False
- else:
- return True
-
- def match(self, ctxt, s, rexp, flags=''):
- flags = self._make_string(flags)
- s = self._make_string(s)
- rexpc = self._compile(rexp, 'i' in flags)
- if 'g' in flags:
- results = rexpc.findall(s)
- if not results:
- return ()
- else:
- result = rexpc.search(s)
- if not result:
- return ()
- results = [ result.group() ]
- results.extend( result.groups('') )
- result_list = []
- root = Element('matches')
- join_groups = ''.join
- for s_match in results:
- if python.PyTuple_CheckExact(s_match):
- s_match = join_groups(s_match)
- elem = SubElement(root, 'match')
- elem.text = s_match
- python.PyList_Append(result_list, elem)
- return result_list
-
- def replace(self, ctxt, s, rexp, flags, replacement):
- replacement = self._make_string(replacement)
- flags = self._make_string(flags)
- s = self._make_string(s)
- rexpc = self._compile(rexp, 'i' in flags)
- if 'g' in flags:
- count = 0
- else:
- count = 1
- return rexpc.sub(replacement, s, count)
-
- cdef _register_in_context(self, _XSLTContext context):
- ns = "http://exslt.org/regular-expressions"
- context._addLocalExtensionFunction(ns, "test", self.test)
- context._addLocalExtensionFunction(ns, "match", self.match)
- context._addLocalExtensionFunction(ns, "replace", self.replace)
From scoder at codespeak.net Wed Mar 21 09:48:19 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 21 Mar 2007 09:48:19 +0100 (CET)
Subject: [Lxml-checkins] r40876 - in lxml/trunk/src/lxml: . tests
Message-ID: <20070321084819.2575E1007D@code0.codespeak.net>
Author: scoder
Date: Wed Mar 21 09:48:17 2007
New Revision: 40876
Modified:
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/tests/test_etree.py
lxml/trunk/src/lxml/tree.pxd
Log:
'sourceline' property on Elements
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Wed Mar 21 09:48:17 2007
@@ -673,6 +673,24 @@
return None
# not in ElementTree, read-only
+ property sourceline:
+ """Original line number as found by the parser or None if unknown.
+ """
+ def __get__(self):
+ cdef long line
+ line = tree.xmlGetLineNo(self._c_node)
+ if line > 0:
+ return line
+ else:
+ return None
+
+ def __set__(self, line):
+ if line < 0:
+ self._c_node.line = 0
+ else:
+ self._c_node.line = line
+
+ # not in ElementTree, read-only
property nsmap:
"""Namespace prefix->URI mapping known in the context of this Element.
"""
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Wed Mar 21 09:48:17 2007
@@ -1119,6 +1119,37 @@
["tail0", "tail1", "tail2", "TAIL0", "TAIL1", "TAIL2"],
[ el.tail for el in root ])
+ def test_sourceline_XML(self):
+ XML = self.etree.XML
+ root = XML('''
+
+
+
+
+ ''')
+
+ self.assertEquals(
+ [2, 2, 4],
+ [ el.sourceline for el in root.getiterator() ])
+
+ def test_sourceline_parse(self):
+ parse = self.etree.parse
+ tree = parse(fileInTestDir('test_xinclude.xml'))
+
+ self.assertEquals(
+ [1, 2, 3],
+ [ el.sourceline for el in tree.getiterator() ])
+
+ def test_sourceline_element(self):
+ Element = self.etree.Element
+ SubElement = self.etree.SubElement
+ el = Element("test")
+ self.assertEquals(None, el.sourceline)
+
+ child = SubElement(el, "test")
+ self.assertEquals(None, el.sourceline)
+ self.assertEquals(None, child.sourceline)
+
def test_docinfo_public(self):
etree = self.etree
xml_header = ''
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Wed Mar 21 09:48:17 2007
@@ -97,6 +97,7 @@
xmlAttr* properties
xmlNs* ns
xmlNs* nsDef
+ unsigned short line
ctypedef struct xmlDtd:
char* ExternalID
@@ -198,6 +199,7 @@
cdef xmlNs* xmlSearchNs(xmlDoc* doc, xmlNode* node, char* prefix)
cdef xmlNs* xmlSearchNsByHref(xmlDoc* doc, xmlNode* node, char* href)
cdef int xmlIsBlankNode(xmlNode* node)
+ cdef long xmlGetLineNo(xmlNode* node)
cdef void xmlElemDump(FILE* f, xmlDoc* doc, xmlNode* cur)
cdef void xmlNodeDumpOutput(xmlOutputBuffer* buf,
xmlDoc* doc, xmlNode* cur, int level,
From scoder at codespeak.net Wed Mar 21 14:46:26 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 21 Mar 2007 14:46:26 +0100 (CET)
Subject: [Lxml-checkins] r40927 - lxml/branch/extension_refactoring/doc
Message-ID: <20070321134626.ADAB8100A0@code0.codespeak.net>
Author: scoder
Date: Wed Mar 21 14:46:25 2007
New Revision: 40927
Modified:
lxml/branch/extension_refactoring/doc/xpathxslt.txt
Log:
cleanup
Modified: lxml/branch/extension_refactoring/doc/xpathxslt.txt
==============================================================================
--- lxml/branch/extension_refactoring/doc/xpathxslt.txt (original)
+++ lxml/branch/extension_refactoring/doc/xpathxslt.txt Wed Mar 21 14:46:25 2007
@@ -72,11 +72,12 @@
>>> f = StringIO('''\
...
+ ... xmlns:b="http://codespeak.net/ns/test2">
... Text
...
... ''')
>>> doc = etree.parse(f)
+
>>> r = doc.xpath('/t:foo/b:bar', {'t': 'http://codespeak.net/ns/test1',
... 'b': 'http://codespeak.net/ns/test2'})
>>> len(r)
From scoder at codespeak.net Wed Mar 21 14:46:51 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 21 Mar 2007 14:46:51 +0100 (CET)
Subject: [Lxml-checkins] r40928 - lxml/branch/extension_refactoring/benchmark
Message-ID: <20070321134651.997B7100A2@code0.codespeak.net>
Author: scoder
Date: Wed Mar 21 14:46:50 2007
New Revision: 40928
Modified:
lxml/branch/extension_refactoring/benchmark/bench_xpath.py
Log:
cleanup
Modified: lxml/branch/extension_refactoring/benchmark/bench_xpath.py
==============================================================================
--- lxml/branch/extension_refactoring/benchmark/bench_xpath.py (original)
+++ lxml/branch/extension_refactoring/benchmark/bench_xpath.py Wed Mar 21 14:46:50 2007
@@ -35,31 +35,32 @@
@onlylib('lxe')
def bench_xpath_old_extensions(self, root):
- def return_child(_, element):
- if element:
- return element[0]
+ def return_child(_, elements):
+ if elements:
+ return elements[0][0]
else:
return ()
- extensions = {(None, 'child') : return_child}
- xpath = self.etree.XPath("child(.)", extensions=extensions)
+ extensions = {("test", "child") : return_child}
+ xpath = self.etree.XPath("t:child(.)", namespaces={"test":"t"},
+ extensions=extensions)
for child in root:
xpath(child)
@onlylib('lxe')
def bench_xpath_extensions(self, root):
- def return_child(_, element):
- if element:
- return element[0]
+ def return_child(_, elements):
+ if elements:
+ return elements[0][0]
else:
return ()
- self.etree.FunctionNamespace("test")["t"] = return_child
+ self.etree.FunctionNamespace("testns")["t"] = return_child
try:
- xpath = self.etree.XPath("test:t(.)", {"test":"test"})
+ xpath = self.etree.XPath("test:t(.)", {"test":"testns"})
for child in root:
xpath(child)
finally:
- del self.etree.FunctionNamespace("test")["t"]
+ del self.etree.FunctionNamespace("testns")["t"]
if __name__ == '__main__':
benchbase.main(XPathBenchMark)
From scoder at codespeak.net Wed Mar 21 14:48:10 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Wed, 21 Mar 2007 14:48:10 +0100 (CET)
Subject: [Lxml-checkins] r40929 - lxml/branch/extension_refactoring/src/lxml
Message-ID: <20070321134810.BA5ED100A2@code0.codespeak.net>
Author: scoder
Date: Wed Mar 21 14:48:09 2007
New Revision: 40929
Modified:
lxml/branch/extension_refactoring/src/lxml/extensions.pxi
lxml/branch/extension_refactoring/src/lxml/xpath.pxi
lxml/branch/extension_refactoring/src/lxml/xslt.pxi
Log:
another major rewrite of extension function registration, cleanup
Modified: lxml/branch/extension_refactoring/src/lxml/extensions.pxi
==============================================================================
--- lxml/branch/extension_refactoring/src/lxml/extensions.pxi (original)
+++ lxml/branch/extension_refactoring/src/lxml/extensions.pxi Wed Mar 21 14:48:09 2007
@@ -1,4 +1,4 @@
-# supports for extension functions in XPath and XSLT
+# support for extension functions in XPath and XSLT
class XPathError(LxmlError):
pass
@@ -9,17 +9,20 @@
class XPathResultError(XPathError):
pass
-################################################################################
-# Base class for XSLT and XPath evaluation contexts: functions, namespaces, ...
+# forward declarations
ctypedef int _register_function(void* ctxt, name_utf, ns_uri_utf)
+cdef class _ExsltRegExp
+
+################################################################################
+# Base class for XSLT and XPath evaluation contexts: functions, namespaces, ...
cdef class _BaseContext:
cdef xpath.xmlXPathContext* _xpathCtxt
cdef _Document _doc
cdef object _extensions
cdef object _namespaces
- cdef object _registered_namespaces
+ cdef object _global_namespaces
cdef object _utf_refs
cdef object _function_cache
cdef object _function_cache_ns
@@ -28,10 +31,10 @@
cdef _TempStore _temp_refs
cdef _ExceptionContext _exc
- def __init__(self, namespaces, extensions):
- self._xpathCtxt = NULL
+ def __init__(self, namespaces, extensions, enable_regexp):
+ cdef _ExsltRegExp _regexp
self._utf_refs = {}
- self._registered_namespaces = []
+ self._global_namespaces = []
self._function_cache = {}
self._function_cache_ns = {}
@@ -39,7 +42,7 @@
# convert extensions to UTF-8
if python.PyDict_Check(extensions):
extensions = (extensions,)
- # format: [ {(ns,name):function} ] -> {(ns_utf,name_utf):function}
+ # format: [ {(ns, name):function} ] -> {(ns_utf, name_utf):function}
new_extensions = {}
for extension in extensions:
for (ns_uri, name), function in extension.items():
@@ -52,17 +55,38 @@
new_extensions, (ns_utf, name_utf), function)
extensions = new_extensions or None
+ if namespaces is not None:
+ if python.PyDict_Check(namespaces):
+ namespaces = namespaces.items()
+ if namespaces:
+ ns = []
+ for prefix, ns_uri in namespaces:
+ if prefix is None:
+ raise TypeError, \
+ "empty namespace prefix is not supported in XPath"
+ if ns_uri is None:
+ raise TypeError, \
+ "setting default namespace is not supported in XPath"
+ prefix_utf = self._to_utf(prefix)
+ ns_uri_utf = self._to_utf(ns_uri)
+ python.PyList_Append(ns, (prefix_utf, ns_uri_utf))
+ namespaces = ns
+
self._doc = None
self._exc = _ExceptionContext()
self._extensions = extensions
self._namespaces = namespaces
self._temp_refs = _TempStore()
+ if enable_regexp:
+ _regexp = _ExsltRegExp()
+ _regexp._register_in_context(self)
+
cdef _copy(self):
cdef _BaseContext context
if self._namespaces is not None:
- namespaces = python.PyDict_Copy(self._namespaces)
- context = self.__class__(namespaces, None)
+ namespaces = self._namespaces[:]
+ context = self.__class__(namespaces, None, False)
if self._extensions is not None:
context._extensions = python.PyDict_Copy(self._extensions)
return context
@@ -86,57 +110,72 @@
cdef _register_context(self, _Document doc):
self._doc = doc
self._exc.clear()
- python.PyDict_Clear(self._function_cache)
- python.PyDict_Clear(self._function_cache_ns)
- namespaces = self._namespaces
- if namespaces is not None:
- self.registerNamespaces(namespaces)
- cdef _unregister_context(self):
- self._unregisterNamespaces()
-# xpath.xmlXPathRegisteredNsCleanup(self._xpathCtxt)
- self._free_context()
-
- cdef _free_context(self):
+ cdef _cleanup_context(self):
+ #xpath.xmlXPathRegisteredNsCleanup(self._xpathCtxt)
+ #self.unregisterGlobalNamespaces()
python.PyDict_Clear(self._utf_refs)
self._doc = None
+
+ cdef _release_context(self):
if self._xpathCtxt is not NULL:
self._xpathCtxt.userData = NULL
self._xpathCtxt = NULL
# namespaces (internal UTF-8 methods with leading '_')
- cdef addNamespace(self, prefix, uri):
+ cdef addNamespace(self, prefix, ns_uri):
+ if prefix is None:
+ raise TypeError, "empty prefix is not supported in XPath"
+ prefix_utf = self._to_utf(prefix)
+ ns_uri_utf = self._to_utf(ns_uri)
+ new_item = (prefix_utf, ns_uri_utf)
if self._namespaces is None:
- self._namespaces = {}
- python.PyDict_SetItem(self._namespaces, prefix, uri)
+ self._namespaces = [new_item]
+ else:
+ namespaces = []
+ for item in self._namespaces:
+ if item[0] == prefix_utf:
+ item = new_item
+ new_item = None
+ python.PyList_Append(namespaces, item)
+ if new_item is not None:
+ python.PyList_Append(namespaces, new_item)
+ self._namespaces = namespaces
+ if self._xpathCtxt is not NULL:
+ xpath.xmlXPathRegisterNs(
+ self._xpathCtxt, _cstr(prefix_utf), _cstr(ns_uri_utf))
- cdef registerNamespaces(self, namespaces):
- for prefix, uri in namespaces.items():
- self.registerNamespace(prefix, uri)
-
cdef registerNamespace(self, prefix, ns_uri):
if prefix is None:
raise TypeError, "empty prefix is not supported in XPath"
prefix_utf = self._to_utf(prefix)
ns_uri_utf = self._to_utf(ns_uri)
- python.PyList_Append(self._registered_namespaces, prefix_utf)
+ python.PyList_Append(self._global_namespaces, prefix_utf)
xpath.xmlXPathRegisterNs(self._xpathCtxt,
_cstr(prefix_utf), _cstr(ns_uri_utf))
- cdef _registerNamespace(self, prefix_utf, ns_uri_utf):
- python.PyList_Append(self._registered_namespaces, prefix_utf)
- xpath.xmlXPathRegisterNs(self._xpathCtxt,
- _cstr(prefix_utf), _cstr(ns_uri_utf))
-
- cdef void _unregisterNamespaces(self):
- if python.PyList_GET_SIZE(self._registered_namespaces) > 0:
- for prefix_utf in self._registered_namespaces:
- sys.stderr.write(prefix_utf)
- sys.stderr.flush()
+ cdef registerLocalNamespaces(self):
+ if self._namespaces is None:
+ return
+ for prefix_utf, ns_uri_utf in self._namespaces:
+ xpath.xmlXPathRegisterNs(
+ self._xpathCtxt, _cstr(prefix_utf), _cstr(ns_uri_utf))
+
+ cdef registerGlobalNamespaces(self):
+ ns_prefixes = _find_all_extension_prefixes()
+ if python.PyList_GET_SIZE(ns_prefixes) > 0:
+ for prefix_utf, ns_uri_utf in ns_prefixes:
+ python.PyList_Append(self._global_namespaces, prefix_utf)
+ xpath.xmlXPathRegisterNs(
+ self._xpathCtxt, _cstr(prefix_utf), _cstr(ns_uri_utf))
+
+ cdef unregisterGlobalNamespaces(self):
+ if python.PyList_GET_SIZE(self._global_namespaces) > 0:
+ for prefix_utf in self._global_namespaces:
xpath.xmlXPathRegisterNs(self._xpathCtxt,
_cstr(prefix_utf), NULL)
- self._registered_namespaces = []
+ del self._global_namespaces[:]
cdef void _unregisterNamespace(self, prefix_utf):
xpath.xmlXPathRegisterNs(self._xpathCtxt,
@@ -149,7 +188,7 @@
self._extensions = {}
python.PyDict_SetItem(self._extensions, (ns_utf, name_utf), function)
- cdef void _registerAllFunctions(self, void* ctxt,
+ cdef void registerGlobalFunctions(self, void* ctxt,
_register_function reg_func):
cdef python.PyObject* dict_result
for ns_utf, ns_functions in _iter_ns_extension_functions():
@@ -167,6 +206,10 @@
for name_utf, function in ns_functions.iteritems():
python.PyDict_SetItem(d, name_utf, function)
reg_func(ctxt, name_utf, ns_utf)
+
+ cdef void registerLocalFunctions(self, void* ctxt,
+ _register_function reg_func):
+ cdef python.PyObject* dict_result
if self._extensions is None:
return # done
last_ns = None
@@ -188,7 +231,7 @@
python.PyDict_SetItem(d, name_utf, function)
reg_func(ctxt, name_utf, ns_utf)
- cdef void _unregisterAllFunctions(self, void* ctxt,
+ cdef unregisterAllFunctions(self, void* ctxt,
_register_function unreg_func):
for name_utf in self._function_cache:
unreg_func(ctxt, name_utf, None)
@@ -196,6 +239,18 @@
for name_utf in functions:
unreg_func(ctxt, name_utf, ns_utf)
+ cdef unregisterGlobalFunctions(self, void* ctxt,
+ _register_function unreg_func):
+ for name_utf in self._function_cache:
+ if self._extensions is None or \
+ (None, name_utf) not in self._extensions:
+ unreg_func(ctxt, name_utf, None)
+ for ns_utf, functions in self._function_cache_ns.iteritems():
+ for name_utf in functions:
+ if self._extensions is None or \
+ (ns_utf, name_utf) not in self._extensions:
+ unreg_func(ctxt, name_utf, ns_utf)
+
cdef _find_cached_function(self, char* c_ns_uri, char* c_name):
"""Lookup an extension function in the cache and return it.
@@ -215,7 +270,7 @@
return dict_result
return None
- cdef int _prepare_function_call(self, char* c_ns_uri, char* c_name):
+ cdef int __prepare_function_call(self, char* c_ns_uri, char* c_name):
"""Find an extension function and store it in 'self._called_function'.
This is absolutely performance-critical for XPath/XSLT!
@@ -393,27 +448,6 @@
################################################################################
# helper functions
-cdef xpath.xmlXPathFunction _function_check(void* ctxt,
- char* c_name, char* c_ns_uri):
- cdef python.PyGILState_STATE gil_state
- cdef xpath.xmlXPathFunction c_func
- gil_state = python.PyGILState_Ensure()
- c_func = _python_function_check(ctxt, c_name, c_ns_uri)
- python.PyGILState_Release(gil_state)
- return c_func
-
-cdef xpath.xmlXPathFunction _python_function_check(void* ctxt,
- char* c_name, char* c_ns_uri):
- "Module level lookup function for XPath/XSLT functions"
- cdef xpath.xmlXPathFunction c_func
- cdef _BaseContext context
- context = <_BaseContext>ctxt
- if context._prepare_function_call(c_ns_uri, c_name):
- c_func = _call_prepared_function
- else:
- c_func = NULL
- return c_func
-
cdef xpath.xmlXPathObject* _wrapXPathObject(object obj) except NULL:
cdef xpath.xmlNodeSet* resultSet
cdef _Element node
@@ -575,19 +609,3 @@
xpath.xmlXPathErr(ctxt, xpath.XPATH_UNKNOWN_FUNC_ERROR)
exception = XPathFunctionError("XPath function '%s' not found" % fref)
context._exc._store_exception(exception)
-
-# call the function that was stored in 'context._called_function'
-
-cdef void _call_prepared_function(xpath.xmlXPathParserContext* ctxt, int nargs):
- cdef python.PyGILState_STATE gil_state
- gil_state = python.PyGILState_Ensure()
- _call_prepared_python_function(ctxt, nargs)
- python.PyGILState_Release(gil_state)
-
-cdef void _call_prepared_python_function(xpath.xmlXPathParserContext* ctxt,
- int nargs):
- cdef xpath.xmlXPathContext* rctxt
- cdef _BaseContext context
- rctxt = ctxt.context
- context = <_BaseContext>(rctxt.userData)
- _extension_function_call(context, context._called_function, ctxt, nargs)
Modified: lxml/branch/extension_refactoring/src/lxml/xpath.pxi
==============================================================================
--- lxml/branch/extension_refactoring/src/lxml/xpath.pxi (original)
+++ lxml/branch/extension_refactoring/src/lxml/xpath.pxi Wed Mar 21 14:48:09 2007
@@ -30,29 +30,29 @@
cdef class _XPathContext(_BaseContext):
cdef object _variables
- def __init__(self, namespaces, extensions, variables):
+ def __init__(self, namespaces, extensions, enable_regexp, variables):
self._variables = variables
- _BaseContext.__init__(self, namespaces, extensions)
-
- cdef register_context(self, xpath.xmlXPathContext* xpathCtxt, _Document doc):
+ _BaseContext.__init__(self, namespaces, extensions, enable_regexp)
+
+ cdef set_context(self, xpath.xmlXPathContext* xpathCtxt):
self._set_xpath_context(xpathCtxt)
- ns_prefixes = _find_all_extension_prefixes()
- if python.PyList_GET_SIZE(ns_prefixes) > 0:
- for (prefix, ns_uri) in ns_prefixes:
- self._registerNamespace(prefix, ns_uri)
+ self._setupDict(xpathCtxt)
+ self.registerLocalNamespaces()
+ self.registerLocalFunctions(xpathCtxt, _register_xpath_function)
+
+ cdef register_context(self, _Document doc):
self._register_context(doc)
+ self.registerGlobalNamespaces()
+ self.registerGlobalFunctions(self._xpathCtxt, _register_xpath_function)
if self._variables is not None:
self.registerVariables(self._variables)
- self._registerAllFunctions(xpathCtxt, _register_xpath_function)
cdef unregister_context(self):
- cdef xpath.xmlXPathContext* xpathCtxt
- xpathCtxt = self._xpathCtxt
- if xpathCtxt is NULL:
- return
- xpath.xmlXPathRegisteredVariablesCleanup(xpathCtxt)
- self._unregisterAllFunctions(xpathCtxt, _unregister_xpath_function)
- self._unregister_context()
+ self.unregisterGlobalFunctions(
+ self._xpathCtxt, _unregister_xpath_function)
+ self.unregisterGlobalNamespaces()
+ xpath.xmlXPathRegisteredVariablesCleanup(self._xpathCtxt)
+ self._cleanup_context()
cdef registerVariables(self, variable_dict):
for name, value in variable_dict.items():
@@ -69,25 +69,26 @@
xpath.xmlXPathRegisterVariable(
self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value))
-cdef void _setupDict(xpath.xmlXPathContext* xpathCtxt):
- __GLOBAL_PARSER_CONTEXT.initXPathParserDict(xpathCtxt)
+ cdef void _setupDict(self, xpath.xmlXPathContext* xpathCtxt):
+ __GLOBAL_PARSER_CONTEXT.initXPathParserDict(xpathCtxt)
cdef class _XPathEvaluatorBase:
cdef xpath.xmlXPathContext* _xpathCtxt
cdef _XPathContext _context
cdef python.PyThread_type_lock _eval_lock
- def __init__(self, namespaces, extensions, regexp):
- cdef _ExsltRegExp _regexp
- self._context = _XPathContext(namespaces, extensions, None)
- if regexp:
- _regexp = _ExsltRegExp()
- _regexp._register_in_context(self._context)
+ def __init__(self, namespaces, extensions, enable_regexp):
+ self._context = _XPathContext(namespaces, extensions,
+ enable_regexp, None)
def __dealloc__(self):
if self._xpathCtxt is not NULL:
xpath.xmlXPathFreeContext(self._xpathCtxt)
+ cdef set_context(self, xpath.xmlXPathContext* xpathCtxt):
+ self._xpathCtxt = xpathCtxt
+ self._context.set_context(xpathCtxt)
+
def evaluate(self, _eval_arg, **_variables):
"""Evaluate an XPath expression.
@@ -170,14 +171,13 @@
cdef xpath.xmlXPathContext* xpathCtxt
cdef int ns_register_status
cdef _Document doc
+ self._element = element
doc = element._doc
+ _XPathEvaluatorBase.__init__(self, namespaces, extensions, regexp)
xpathCtxt = xpath.xmlXPathNewContext(doc._c_doc)
- self._xpathCtxt = xpathCtxt
if xpathCtxt is NULL:
raise XPathContextError, "Unable to create new XPath context"
- _setupDict(xpathCtxt)
- self._element = element
- _XPathEvaluatorBase.__init__(self, namespaces, extensions, regexp)
+ self.set_context(xpathCtxt)
def registerNamespace(self, prefix, uri):
"""Register a namespace with the XPath context.
@@ -200,27 +200,27 @@
against the ElementTree as returned by getroottree().
"""
cdef python.PyThreadState* state
- cdef xpath.xmlXPathContext* xpathCtxt
cdef xpath.xmlXPathObject* xpathObj
cdef _Document doc
cdef char* c_path
path = _utf8(_path)
- xpathCtxt = self._xpathCtxt
- xpathCtxt.node = self._element._c_node
doc = self._element._doc
self._lock()
- self._context.register_context(xpathCtxt, doc)
+ self._xpathCtxt.node = self._element._c_node
try:
+ self._context.register_context(doc)
self._context.registerVariables(_variables)
state = python.PyEval_SaveThread()
- xpathObj = xpath.xmlXPathEvalExpression(_cstr(path), xpathCtxt)
- finally:
+ xpathObj = xpath.xmlXPathEvalExpression(
+ _cstr(path), self._xpathCtxt)
python.PyEval_RestoreThread(state)
+ result = self._handle_result(xpathObj, doc)
+ finally:
self._context.unregister_context()
self._unlock()
- return self._handle_result(xpathObj, doc)
+ return result
cdef class XPathDocumentEvaluator(XPathElementEvaluator):
@@ -242,30 +242,32 @@
are currently not supported for variables.
"""
cdef python.PyThreadState* state
- cdef xpath.xmlXPathContext* xpathCtxt
cdef xpath.xmlXPathObject* xpathObj
cdef xmlDoc* c_doc
cdef _Document doc
path = _utf8(_path)
- xpathCtxt = self._xpathCtxt
doc = self._element._doc
self._lock()
- self._context.register_context(xpathCtxt, doc)
- c_doc = _fakeRootDoc(doc._c_doc, self._element._c_node)
try:
- self._context.registerVariables(_variables)
- state = python.PyEval_SaveThread()
- xpathCtxt.doc = c_doc
- xpathCtxt.node = tree.xmlDocGetRootElement(c_doc)
- xpathObj = xpath.xmlXPathEvalExpression(_cstr(path), xpathCtxt)
+ self._context.register_context(doc)
+ c_doc = _fakeRootDoc(doc._c_doc, self._element._c_node)
+ try:
+ self._context.registerVariables(_variables)
+ state = python.PyEval_SaveThread()
+ self._xpathCtxt.doc = c_doc
+ self._xpathCtxt.node = tree.xmlDocGetRootElement(c_doc)
+ xpathObj = xpath.xmlXPathEvalExpression(
+ _cstr(path), self._xpathCtxt)
+ python.PyEval_RestoreThread(state)
+ result = self._handle_result(xpathObj, doc)
+ finally:
+ _destroyFakeDoc(doc._c_doc, c_doc)
+ self._context.unregister_context()
finally:
- python.PyEval_RestoreThread(state)
- _destroyFakeDoc(doc._c_doc, c_doc)
- self._context.unregister_context()
self._unlock()
- return self._handle_result(xpathObj, doc)
+ return result
def XPathEvaluator(etree_or_element, namespaces=None, extensions=None,
@@ -300,19 +302,20 @@
cdef readonly object path
def __init__(self, path, namespaces=None, extensions=None, regexp=True):
+ cdef xpath.xmlXPathContext* xpathCtxt
_XPathEvaluatorBase.__init__(self, namespaces, extensions, regexp)
- self._xpath = NULL
self.path = path
path = _utf8(path)
- self._xpathCtxt = xpath.xmlXPathNewContext(NULL)
- _setupDict(self._xpathCtxt)
- self._xpath = xpath.xmlXPathCtxtCompile(self._xpathCtxt, _cstr(path))
+ xpathCtxt = xpath.xmlXPathNewContext(NULL)
+ if xpathCtxt is NULL:
+ raise XPathContextError, "Unable to create new XPath context"
+ self.set_context(xpathCtxt)
+ self._xpath = xpath.xmlXPathCtxtCompile(xpathCtxt, _cstr(path))
if self._xpath is NULL:
self._raise_parse_error()
def __call__(self, _etree_or_element, **_variables):
cdef python.PyThreadState* state
- cdef xpath.xmlXPathContext* xpathCtxt
cdef xpath.xmlXPathObject* xpathObj
cdef _Document document
cdef _Element element
@@ -325,18 +328,18 @@
self._xpathCtxt.doc = document._c_doc
self._xpathCtxt.node = element._c_node
- context = self._context
- context.register_context(self._xpathCtxt, document)
- context.registerVariables(_variables)
try:
+ self._context.register_context(document)
+ self._context.registerVariables(_variables)
state = python.PyEval_SaveThread()
xpathObj = xpath.xmlXPathCompiledEval(
self._xpath, self._xpathCtxt)
- finally:
python.PyEval_RestoreThread(state)
- context.unregister_context()
+ result = self._handle_result(xpathObj, document)
+ finally:
+ self._context.unregister_context()
self._unlock()
- return self._handle_result(xpathObj, document)
+ return result
def __dealloc__(self):
if self._xpath is not NULL:
Modified: lxml/branch/extension_refactoring/src/lxml/xslt.pxi
==============================================================================
--- lxml/branch/extension_refactoring/src/lxml/xslt.pxi (original)
+++ lxml/branch/extension_refactoring/src/lxml/xslt.pxi Wed Mar 21 14:48:09 2007
@@ -210,28 +210,29 @@
cdef class _XSLTContext(_BaseContext):
cdef xslt.xsltTransformContext* _xsltCtxt
- def __init__(self, namespaces, extensions):
+ def __init__(self, namespaces, extensions, enable_regexp):
self._xsltCtxt = NULL
- if extensions and None in extensions:
- raise XSLTExtensionError, "extensions must not have empty namespaces"
- _BaseContext.__init__(self, namespaces, extensions)
+ if extensions is not None:
+ for ns, prefix in extensions:
+ if ns is None:
+ raise XSLTExtensionError, \
+ "extensions must not have empty namespaces"
+ _BaseContext.__init__(self, namespaces, extensions, enable_regexp)
cdef register_context(self, xslt.xsltTransformContext* xsltCtxt,
_Document doc):
self._xsltCtxt = xsltCtxt
self._set_xpath_context(xsltCtxt.xpathCtxt)
self._register_context(doc)
- xsltCtxt.xpathCtxt.userData = self
- self._registerAllFunctions(xsltCtxt, _register_xslt_function)
+ self.registerLocalFunctions(xsltCtxt, _register_xslt_function)
+ self.registerGlobalFunctions(xsltCtxt, _register_xslt_function)
cdef free_context(self):
- cdef xslt.xsltTransformContext* xsltCtxt
- xsltCtxt = self._xsltCtxt
- if xsltCtxt is NULL:
- return
- self._free_context()
- self._xsltCtxt = NULL
- xslt.xsltFreeTransformContext(xsltCtxt)
+ self._cleanup_context()
+ self._release_context()
+ if self._xsltCtxt is not NULL:
+ xslt.xsltFreeTransformContext(self._xsltCtxt)
+ self._xsltCtxt = NULL
self._release_temp_refs()
@@ -253,7 +254,8 @@
cdef XSLTAccessControl _access_control
cdef _ErrorLog _error_log
- def __init__(self, xslt_input, extensions=None, regexp=True, access_control=None):
+ def __init__(self, xslt_input, extensions=None, regexp=True,
+ access_control=None):
cdef python.PyThreadState* state
cdef xslt.xsltStylesheet* c_style
cdef xmlDoc* c_doc
@@ -299,10 +301,7 @@
c_doc._private = NULL # no longer used!
self._c_style = c_style
- self._context = _XSLTContext(None, extensions)
- if regexp:
- _regexp = _ExsltRegExp()
- _regexp._register_in_context(self._context)
+ self._context = _XSLTContext(None, extensions, regexp)
def __dealloc__(self):
if self._xslt_resolver_context is not None and \
@@ -315,20 +314,24 @@
def __get__(self):
return self._error_log.copy()
+ def apply(self, _input, profile_run=False, **_kw):
+ return self(_input, profile_run, **_kw)
+
+ def tostring(self, _ElementTree result_tree):
+ """Save result doc to string based on stylesheet output method.
+ """
+ return str(result_tree)
+
def __call__(self, _input, profile_run=False, **_kw):
- cdef python.PyThreadState* state
cdef _XSLTContext context
cdef _Document input_doc
cdef _Element root_node
cdef _Document result_doc
cdef _Document profile_doc
cdef xmlDoc* c_profile_doc
- cdef _XSLTResolverContext resolver_context
cdef xslt.xsltTransformContext* transform_ctxt
cdef xmlDoc* c_result
cdef xmlDoc* c_doc
- cdef char** params
- cdef Py_ssize_t i, kw_count
if not _checkThreadDict(self._c_style.doc.dict):
raise RuntimeError, "stylesheet is not usable in this thread"
@@ -336,9 +339,6 @@
input_doc = _documentOrRaise(_input)
root_node = _rootNodeOrRaise(_input)
- resolver_context = _XSLTResolverContext(input_doc._parser)
- resolver_context._c_style_doc = self._xslt_resolver_context._c_style_doc
-
c_doc = _fakeRootDoc(input_doc._c_doc, root_node._c_node)
transform_ctxt = xslt.xsltNewTransformContext(self._c_style, c_doc)
@@ -348,28 +348,82 @@
initTransformDict(transform_ctxt)
- self._error_log.connect()
+ if profile_run:
+ transform_ctxt.profile = 1
+
+ try:
+ self._error_log.connect()
+ context = self._context._copy()
+ context.register_context(transform_ctxt, input_doc)
+
+ c_result = self._run_transform(
+ input_doc, c_doc, _kw, context, transform_ctxt)
+
+ if transform_ctxt.profile:
+ c_profile_doc = xslt.xsltGetProfileInformation(transform_ctxt)
+ if c_profile_doc is not NULL:
+ profile_doc = _documentFactory(
+ c_profile_doc, input_doc._parser)
+ finally:
+ if context is not None:
+ context.free_context()
+ _destroyFakeDoc(input_doc._c_doc, c_doc)
+ self._error_log.disconnect()
+
+ try:
+ if self._xslt_resolver_context._has_raised():
+ if c_result is not NULL:
+ tree.xmlFreeDoc(c_result)
+ self._xslt_resolver_context._raise_if_stored()
+
+ if c_result is NULL:
+ error = self._error_log.last_error
+ if error is not None and error.message:
+ if error.line >= 0:
+ message = "%s, line %d" % (error.message, error.line)
+ else:
+ message = error.message
+ elif error.line >= 0:
+ message = "Error applying stylesheet, line %d" % error.line
+ else:
+ message = "Error applying stylesheet"
+ raise XSLTApplyError, message
+ finally:
+ self._xslt_resolver_context.clear()
+
+ result_doc = _documentFactory(c_result, input_doc._parser)
+ return _xsltResultTreeFactory(result_doc, self, profile_doc)
+
+ cdef xmlDoc* _run_transform(self, _Document input_doc, xmlDoc* c_input_doc,
+ parameters, _XSLTContext context,
+ xslt.xsltTransformContext* transform_ctxt):
+ cdef python.PyThreadState* state
+ cdef _XSLTResolverContext resolver_context
+ cdef xmlDoc* c_result
+ cdef char** params
+ cdef Py_ssize_t i, parameter_count
+
+ resolver_context = _XSLTResolverContext(input_doc._parser)
+ resolver_context._c_style_doc = self._xslt_resolver_context._c_style_doc
+
xslt.xsltSetTransformErrorFunc(transform_ctxt, self._error_log,
_receiveXSLTError)
if self._access_control is not None:
self._access_control._register_in_context(transform_ctxt)
- if profile_run:
- transform_ctxt.profile = 1
-
transform_ctxt._private = self._xslt_resolver_context
- kw_count = python.PyDict_Size(_kw)
- if kw_count > 0:
+ parameter_count = python.PyDict_Size(parameters)
+ if parameter_count > 0:
# allocate space for parameters
# * 2 as we want an entry for both key and value,
# and + 1 as array is NULL terminated
params = python.PyMem_Malloc(
- sizeof(char*) * (kw_count * 2 + 1))
+ sizeof(char*) * (parameter_count * 2 + 1))
i = 0
keep_ref = []
- for key, value in _kw.iteritems():
+ for key, value in parameters.iteritems():
k = _utf8(key)
python.PyList_Append(keep_ref, k)
v = _utf8(value)
@@ -382,59 +436,16 @@
else:
params = NULL
- context = self._context._copy()
- context.register_context(transform_ctxt, input_doc)
-
state = python.PyEval_SaveThread()
- c_result = xslt.xsltApplyStylesheetUser(self._c_style, c_doc, params,
- NULL, NULL, transform_ctxt)
+ c_result = xslt.xsltApplyStylesheetUser(
+ self._c_style, c_input_doc, params, NULL, NULL, transform_ctxt)
python.PyEval_RestoreThread(state)
if params is not NULL:
# deallocate space for parameters
python.PyMem_Free(params)
- keep_ref = None
-
- if transform_ctxt.profile:
- c_profile_doc = xslt.xsltGetProfileInformation(transform_ctxt)
- if c_profile_doc is not NULL:
- profile_doc = _documentFactory(c_profile_doc, input_doc._parser)
- context.free_context()
- _destroyFakeDoc(input_doc._c_doc, c_doc)
-
- self._error_log.disconnect()
- try:
- if self._xslt_resolver_context._has_raised():
- if c_result is not NULL:
- tree.xmlFreeDoc(c_result)
- self._xslt_resolver_context._raise_if_stored()
-
- if c_result is NULL:
- error = self._error_log.last_error
- if error is not None and error.message:
- if error.line >= 0:
- message = "%s, line %d" % (error.message, error.line)
- else:
- message = error.message
- elif error.line >= 0:
- message = "Error applying stylesheet, line %d" % error.line
- else:
- message = "Error applying stylesheet"
- raise XSLTApplyError, message
- finally:
- self._xslt_resolver_context.clear()
-
- result_doc = _documentFactory(c_result, input_doc._parser)
- return _xsltResultTreeFactory(result_doc, self, profile_doc)
-
- def apply(self, _input, profile_run=False, **_kw):
- return self(_input, profile_run, **_kw)
-
- def tostring(self, _ElementTree result_tree):
- """Save result doc to string based on stylesheet output method.
- """
- return str(result_tree)
+ return c_result
cdef class _XSLTResultTree(_ElementTree):
cdef XSLT _xslt
@@ -511,17 +522,6 @@
# enable EXSLT support for XSLT
xslt.exsltRegisterAll()
-# extension function lookup for XSLT
-cdef xpath.xmlXPathFunction _xslt_function_check(void* ctxt,
- char* c_name, char* c_ns_uri):
- "Find XSLT extension function from set of XPath and XSLT functions"
- cdef xpath.xmlXPathFunction result
- result = _function_check(ctxt, c_name, c_ns_uri)
- if result is NULL:
- return xslt.xsltExtModuleFunctionLookup(c_name, c_ns_uri)
- else:
- return result
-
cdef void initTransformDict(xslt.xsltTransformContext* transform_ctxt):
__GLOBAL_PARSER_CONTEXT.initThreadDictRef(&transform_ctxt.dict)
From scoder at codespeak.net Thu Mar 22 08:31:16 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 22 Mar 2007 08:31:16 +0100 (CET)
Subject: [Lxml-checkins] r41008 - lxml/trunk/src/lxml/tests
Message-ID: <20070322073116.B216C1006E@code0.codespeak.net>
Author: scoder
Date: Thu Mar 22 08:31:15 2007
New Revision: 41008
Modified:
lxml/trunk/src/lxml/tests/test_objectify.py
Log:
doc cleanup
Modified: lxml/trunk/src/lxml/tests/test_objectify.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_objectify.py (original)
+++ lxml/trunk/src/lxml/tests/test_objectify.py Thu Mar 22 08:31:15 2007
@@ -1,10 +1,7 @@
# -*- coding: utf-8 -*-
"""
-Tests specific to the extended etree API
-
-Tests that apply to the general ElementTree API should go into
-test_elementtree
+Tests specific to the lxml.objectify API
"""
@@ -28,7 +25,7 @@
'''
class ObjectifyTestCase(HelperTestCase):
- """Test cases for lxml.elementlib.objectify
+ """Test cases for lxml.objectify
"""
etree = etree
From scoder at codespeak.net Thu Mar 22 08:31:28 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 22 Mar 2007 08:31:28 +0100 (CET)
Subject: [Lxml-checkins] r41009 - in lxml/trunk: . doc src/lxml
src/lxml/tests
Message-ID: <20070322073128.E5A5C1006F@code0.codespeak.net>
Author: scoder
Date: Thu Mar 22 08:31:26 2007
New Revision: 41009
Added:
lxml/trunk/src/lxml/pyclasslookup.pyx
lxml/trunk/src/lxml/tests/test_pyclasslookup.py
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/doc/element_classes.txt
lxml/trunk/setupinfo.py
lxml/trunk/src/lxml/apihelpers.pxi
lxml/trunk/src/lxml/classlookup.pxi
lxml/trunk/src/lxml/etree.pyx
lxml/trunk/src/lxml/etreepublic.pxd
lxml/trunk/src/lxml/public-api.pxi
lxml/trunk/src/lxml/tests/test_elementtree.py
lxml/trunk/src/lxml/tests/test_etree.py
Log:
lxml.pyclasslookup - element class lookup mechanism with tree access in Python space, collectAttributes() C-function, general cleanup
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Thu Mar 22 08:31:26 2007
@@ -2,6 +2,29 @@
lxml changelog
==============
+under development
+=================
+
+Features added
+--------------
+
+* ``lxml.pyclasslookup`` module that can access the entire tree to determine a
+ suitable Element class
+
+* ``Element.values()`` to accompany the existing ``keys()`` and ``items()``
+
+* ``collectAttributes()`` C-function to build a list of attribute
+ keys/values/items for a libxml2 node
+
+Bugs fixed
+----------
+
+Other changes
+-------------
+
+* major rewrite of internal extension function setup
+
+
1.3beta (2007-02-27)
====================
Modified: lxml/trunk/doc/element_classes.txt
==============================================================================
--- lxml/trunk/doc/element_classes.txt (original)
+++ lxml/trunk/doc/element_classes.txt Thu Mar 22 08:31:26 2007
@@ -89,7 +89,8 @@
>>> parser.setElementClassLookup(parser_lookup)
There is one drawback of the parser based scheme: the ``Element()`` factory
-creates a new document that deploys the default parser::
+does not know about your specialised parser and creates a new document that
+deploys the default parser::
>>> el = etree.Element("root")
>>> print isinstance(el, HonkElement)
@@ -231,8 +232,8 @@
Custom element class lookup
...........................
-This is the most customisable way of finding element classes. It allows you
-to implement a custom lookup scheme in a subclass::
+This is the most customisable way of finding element classes on a per-element
+basis. It allows you to implement a custom lookup scheme in a subclass::
>>> class MyLookup(etree.CustomElementClassLookup):
... def lookup(self, node_type, document, namespace, name):
@@ -250,6 +251,45 @@
per-parser setup.
+Tree based element class lookup in Python
+.........................................
+
+Taking more elaborate decisions than allowed by the custom scheme is difficult
+to achieve in pure Python. It would require access to the tree - before the
+elements in the tree have been instantiated as Python Element objects.
+
+Luckily, there is a way to do this. The separate module
+``lxml.pyclasslookup`` provides a lookup class called
+``PythonElementClassLookup`` that works similar to the custom lookup scheme::
+
+ >>> from lxml.pyclasslookup import PythonElementClassLookup
+ >>> class MyLookup(PythonElementClassLookup):
+ ... def lookup(self, document, element):
+ ... return MyElementClass # defined elsewhere
+
+ >>> parser = etree.XMLParser()
+ >>> parser.setElementClassLookup(MyLookup())
+
+As before, the first argument to the ``lookup()`` method is the opaque
+document instance that contains the Element. The second arguments is a
+lightweight Element proxy implementation that is only valid during the lookup.
+Do not try to keep a reference to it. Once the lookup is finished, the proxy
+will become invalid. You will get an ``AssertionError`` if you access any of
+the properties or methods outside the scope of the lookup call where they were
+instantiated.
+
+During the lookup, the element object behaves mostly like a normal Element
+instance. It provides the properties ``tag``, ``text``, ``tail`` etc. and
+supports indexing, slicing and the ``getchildren()``, ``getparent()``
+etc. methods. It does *not* support iteration, nor does it support any kind
+of modification. All of its properties are read-only and it cannot be removed
+or inserted into other trees. You can use it as a starting point to freely
+traverse the tree and collect any kind of information that its elements
+provide. Once you have taken the decision which class to use for this
+element, you can simply return it and have lxml take care of cleaning up the
+instantiated proxy classes.
+
+
Implementing namespaces
-----------------------
Modified: lxml/trunk/setupinfo.py
==============================================================================
--- lxml/trunk/setupinfo.py (original)
+++ lxml/trunk/setupinfo.py Thu Mar 22 08:31:26 2007
@@ -8,8 +8,9 @@
PYREX_INSTALLED = False
EXT_MODULES = [
- ("etree", "lxml.etree"),
- ("objectify", "lxml.objectify")
+ ("etree", "lxml.etree"),
+ ("objectify", "lxml.objectify"),
+ ("pyclasslookup", "lxml.pyclasslookup")
]
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Thu Mar 22 08:31:26 2007
@@ -232,6 +232,29 @@
tree.xmlRemoveProp(c_attr)
return 0
+cdef object _collectAttributes(xmlNode* c_node, int collecttype):
+ """Collect all attributes of a node in a list. Depending on collecttype,
+ it collects either the name (1), the value (2) or the name-value tuples.
+ """
+ cdef xmlAttr* c_attr
+ c_attr = c_node.properties
+ attributes = []
+ while c_attr is not NULL:
+ if c_attr.type == tree.XML_ATTRIBUTE_NODE:
+ if collecttype == 1:
+ item = _namespacedName(c_attr)
+ elif collecttype == 2:
+ item = _attributeValue(c_node, c_attr)
+ else:
+ item = (_namespacedName(c_attr),
+ _attributeValue(c_node, c_attr))
+
+ ret = python.PyList_Append(attributes, item)
+ if ret:
+ raise
+ c_attr = c_attr.next
+ return attributes
+
cdef object __RE_XML_ENCODING
__RE_XML_ENCODING = re.compile(
r'^(\s*<\?\s*xml[^>]+)\s+encoding\s*=\s*"[^"]*"\s*', re.U)
Modified: lxml/trunk/src/lxml/classlookup.pxi
==============================================================================
--- lxml/trunk/src/lxml/classlookup.pxi (original)
+++ lxml/trunk/src/lxml/classlookup.pxi Thu Mar 22 08:31:26 2007
@@ -206,7 +206,7 @@
You can inherit from this class and override the method
- lookup(type, doc, namespace, name)
+ lookup(self, type, doc, namespace, name)
to lookup the element class for a node. Arguments of the method:
* type: one of 'element', 'comment', 'PI'
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Thu Mar 22 08:31:26 2007
@@ -717,8 +717,8 @@
return "" % (self.tag, id(self))
def __getitem__(self, Py_ssize_t index):
- """Returns the given subelement.
- """
+ """Returns the subelement at the given position.
+ """
cdef xmlNode* c_node
c_node = _findChild(self._c_node, index)
if c_node is NULL:
@@ -739,10 +739,10 @@
return []
c = start
result = []
- doc = self._doc
while c_node is not NULL and c < stop:
if _isElement(c_node):
- ret = python.PyList_Append(result, _elementFactory(doc, c_node))
+ ret = python.PyList_Append(
+ result, _elementFactory(self._doc, c_node))
if ret:
raise
c = c + 1
@@ -858,29 +858,34 @@
return _getAttributeValue(self, key, default)
def keys(self):
- """Gets a list of attribute names. The names are returned in an arbitrary
- order (just like for an ordinary Python dictionary).
+ """Gets a list of attribute names. The names are returned in an
+ arbitrary order (just like for an ordinary Python dictionary).
"""
- return python.PySequence_List( _attributeIteratorFactory(self, 1) )
+ return _collectAttributes(self._c_node, 1)
+
+ def values(self):
+ """Gets element attribute values as a sequence of strings. The
+ attributes are returned in an arbitrary order.
+ """
+ return _collectAttributes(self._c_node, 2)
def items(self):
"""Gets element attributes, as a sequence. The attributes are returned in
an arbitrary order.
"""
- return python.PySequence_List( _attributeIteratorFactory(self, 3) )
+ return _collectAttributes(self._c_node, 3)
def getchildren(self):
"""Returns all subelements. The elements are returned in document order.
"""
cdef xmlNode* c_node
- cdef _Document doc
cdef int ret
result = []
- doc = self._doc
c_node = self._c_node.children
while c_node is not NULL:
if _isElement(c_node):
- ret = python.PyList_Append(result, _elementFactory(doc, c_node))
+ ret = python.PyList_Append(
+ result, _elementFactory(self._doc, c_node))
if ret:
raise
c_node = c_node.next
@@ -1441,28 +1446,25 @@
return _getAttributeValue(self._element, key, default)
def keys(self):
- return python.PySequence_List(
- _attributeIteratorFactory(self._element, 1) )
+ return _collectAttributes(self._element._c_node, 1)
def __iter__(self):
- return iter(self.keys())
+ return iter(_collectAttributes(self._element._c_node, 1))
def iterkeys(self):
- return iter(self.keys())
+ return iter(_collectAttributes(self._element._c_node, 1))
def values(self):
- return python.PySequence_List(
- _attributeIteratorFactory(self._element, 2) )
+ return _collectAttributes(self._element._c_node, 2)
def itervalues(self):
- return iter(self.values())
+ return iter(_collectAttributes(self._element._c_node, 2))
def items(self):
- return python.PySequence_List(
- _attributeIteratorFactory(self._element, 3) )
+ return _collectAttributes(self._element._c_node, 3)
def iteritems(self):
- return iter(self.items())
+ return iter(_collectAttributes(self._element._c_node, 3))
def has_key(self, key):
if key in self:
Modified: lxml/trunk/src/lxml/etreepublic.pxd
==============================================================================
--- lxml/trunk/src/lxml/etreepublic.pxd (original)
+++ lxml/trunk/src/lxml/etreepublic.pxd Thu Mar 22 08:31:26 2007
@@ -104,6 +104,9 @@
# attributes must not be removed during iteration!
cdef object iterattributes(_Element element, int keysvalues)
+ # return the list of all attribute names (1), values (2) or items (3)
+ cdef object collectAttributes(tree.xmlNode* c_element, int keysvalues)
+
# set an attribute value on an element
# on failure, sets an exception and returns -1
cdef int setAttributeValue(_Element element, key, value) except -1
Modified: lxml/trunk/src/lxml/public-api.pxi
==============================================================================
--- lxml/trunk/src/lxml/public-api.pxi (original)
+++ lxml/trunk/src/lxml/public-api.pxi Thu Mar 22 08:31:26 2007
@@ -83,6 +83,9 @@
cdef public object iterattributes(_Element element, int keysvalues):
return _attributeIteratorFactory(element, keysvalues)
+cdef public object collectAttributes(xmlNode* c_element, int keysvalues):
+ return _collectAttributes(c_element, keysvalues)
+
cdef public int setAttributeValue(_Element element, key, value) except -1:
return _setAttributeValue(element, key, value)
Added: lxml/trunk/src/lxml/pyclasslookup.pyx
==============================================================================
--- (empty file)
+++ lxml/trunk/src/lxml/pyclasslookup.pyx Thu Mar 22 08:31:26 2007
@@ -0,0 +1,277 @@
+from etreepublic cimport _Document, _Element, ElementBase
+from etreepublic cimport ElementClassLookup, FallbackElementClassLookup
+from etreepublic cimport elementFactory, import_etree
+from python cimport str, repr, isinstance, issubclass, iter
+from python cimport _cstr, Py_ssize_t
+cimport etreepublic as cetree
+cimport python
+cimport tree
+cimport cstd
+
+__all__ = ["PythonElementClassLookup"]
+
+cdef object etree
+from lxml import etree
+# initialize C-API of lxml.etree
+import_etree(etree)
+
+cdef class _ElementProxy:
+ cdef tree.xmlNode* _c_node
+ cdef object _source_proxy
+ cdef object _dependent_proxies
+
+ cdef int _assertNode(self) except -1:
+ """This is our way of saying: this proxy is invalid!
+ """
+ assert self._c_node is not NULL, "Proxy invalidated!"
+ return 0
+
+ property tag:
+ """Element tag
+ """
+ def __get__(self):
+ self._assertNode()
+ return cetree.namespacedName(self._c_node)
+
+ property text:
+ """Text before the first subelement. This is either a string or
+ the value None, if there was no text.
+ """
+ def __get__(self):
+ self._assertNode()
+ return cetree.textOf(self._c_node)
+
+ property tail:
+ """Text after this element's end tag, but before the next sibling
+ element's start tag. This is either a string or the value None, if
+ there was no text.
+ """
+ def __get__(self):
+ self._assertNode()
+ return cetree.tailOf(self._c_node)
+
+ property attrib:
+ def __get__(self):
+ self._assertNode()
+ return dict(cetree.collectAttributes(self._c_node, 3))
+
+ property prefix:
+ """Namespace prefix or None.
+ """
+ def __get__(self):
+ self._assertNode()
+ if self._c_node.ns is not NULL:
+ if self._c_node.ns.prefix is not NULL:
+ return cetree.pyunicode(self._c_node.ns.prefix)
+ return None
+
+ property sourceline:
+ """Original line number as found by the parser or None if unknown.
+ """
+ def __get__(self):
+ cdef long line
+ self._assertNode()
+ line = tree.xmlGetLineNo(self._c_node)
+ if line > 0:
+ return line
+ else:
+ return None
+
+ def __repr__(self):
+ return "" % (self.tag, id(self))
+
+ def __getitem__(self, Py_ssize_t index):
+ """Returns the subelement at the given position.
+ """
+ cdef tree.xmlNode* c_node
+ c_node = cetree.findChild(self._c_node, index)
+ if c_node is NULL:
+ raise IndexError, "list index out of range"
+ return _newProxy(self._source_proxy, c_node)
+
+ def __getslice__(self, Py_ssize_t start, Py_ssize_t stop):
+ """Returns a list containing subelements in the given range.
+ """
+ cdef tree.xmlNode* c_node
+ cdef Py_ssize_t c
+ c_node = cetree.findChild(self._c_node, start)
+ if c_node is NULL:
+ return []
+ c = start
+ result = []
+ while c_node is not NULL and c < stop:
+ if tree._isElement(c_node):
+ ret = python.PyList_Append(
+ result, _newProxy(self._source_proxy, c_node))
+ if ret:
+ raise
+ c = c + 1
+ c_node = c_node.next
+ return result
+
+ def __len__(self):
+ """Returns the number of subelements.
+ """
+ cdef Py_ssize_t c
+ cdef tree.xmlNode* c_node
+ self._assertNode()
+ c = 0
+ c_node = self._c_node.children
+ while c_node is not NULL:
+ if tree._isElement(c_node):
+ c = c + 1
+ c_node = c_node.next
+ return c
+
+ def __nonzero__(self):
+ cdef tree.xmlNode* c_node
+ self._assertNode()
+ c_node = cetree.findChildBackwards(self._c_node, 0)
+ return c_node != NULL
+
+ def get(self, key, default=None):
+ """Gets an element attribute.
+ """
+ self._assertNode()
+ return _getAttributeValue(self._c_node, key, default)
+
+ def keys(self):
+ """Gets a list of attribute names. The names are returned in an
+ arbitrary order (just like for an ordinary Python dictionary).
+ """
+ self._assertNode()
+ return cetree.collectAttributes(self._c_node, 1)
+
+ def values(self):
+ """Gets element attributes, as a sequence. The attributes are returned
+ in an arbitrary order.
+ """
+ self._assertNode()
+ return cetree.collectAttributes(self._c_node, 2)
+
+ def items(self):
+ """Gets element attributes, as a sequence. The attributes are returned
+ in an arbitrary order.
+ """
+ self._assertNode()
+ return cetree.collectAttributes(self._c_node, 3)
+
+ def getchildren(self):
+ """Returns all subelements. The elements are returned in document
+ order.
+ """
+ cdef tree.xmlNode* c_node
+ cdef int ret
+ self._assertNode()
+ result = []
+ c_node = self._c_node.children
+ while c_node is not NULL:
+ if tree._isElement(c_node):
+ ret = python.PyList_Append(
+ result, _newProxy(self._source_proxy, c_node))
+ if ret:
+ raise
+ c_node = c_node.next
+ return result
+
+ def getparent(self):
+ """Returns the parent of this element or None for the root element.
+ """
+ cdef tree.xmlNode* c_parent
+ self._assertNode()
+ c_parent = self._c_node.parent
+ if c_parent is NULL or not tree._isElement(c_parent):
+ return None
+ else:
+ return _newProxy(self._source_proxy, c_parent)
+
+ def getnext(self):
+ """Returns the following sibling of this element or None.
+ """
+ cdef tree.xmlNode* c_node
+ self._assertNode()
+ c_node = cetree.nextElement(self._c_node)
+ if c_node is not NULL:
+ return _newProxy(self._source_proxy, c_node)
+ return None
+
+ def getprevious(self):
+ """Returns the preceding sibling of this element or None.
+ """
+ cdef tree.xmlNode* c_node
+ self._assertNode()
+ c_node = cetree.previousElement(self._c_node)
+ if c_node is not NULL:
+ return _newProxy(self._source_proxy, c_node)
+ return None
+
+cdef _ElementProxy _newProxy(_ElementProxy sourceProxy, tree.xmlNode* c_node):
+ cdef _ElementProxy el
+ el = _ElementProxy()
+ el._c_node = c_node
+ if sourceProxy is None:
+ sourceProxy = el
+ el._dependent_proxies = []
+ el._source_proxy = sourceProxy
+ python.PyList_Append(sourceProxy._dependent_proxies, el)
+ return el
+
+cdef _freeProxies(_ElementProxy sourceProxy):
+ cdef _ElementProxy el
+ if sourceProxy is None:
+ return
+ if sourceProxy._dependent_proxies is None:
+ return
+ for el in sourceProxy._dependent_proxies:
+ el._c_node = NULL
+ del sourceProxy._dependent_proxies[:]
+
+cdef object _getAttributeValue(tree.xmlNode* c_node, key, default):
+ cdef char* c_tag
+ cdef char* c_href
+ ns, tag = cetree.getNsTag(key)
+ c_tag = _cstr(tag)
+ if ns is None:
+ c_href = NULL
+ else:
+ c_href = _cstr(ns)
+ result = cetree.attributeValueFromNsName(c_node, c_href, c_tag)
+ if result is None:
+ return default
+ return result
+
+
+cdef class PythonElementClassLookup(FallbackElementClassLookup):
+ """Element class lookup based on a subclass method.
+
+ To use it, inherit from this class and override the method
+
+ lookup(self, document, node_proxy)
+
+ to lookup the element class for a node. The first argument is the opaque
+ document instance that contains the Element. The second arguments is a
+ lightweight Element proxy implementation that is only valid during the
+ lookup. Do not try to keep a reference to it. Once the lookup is done, the
+ proxy will be invalid.
+
+ If you return None from this method, the fallback will be called.
+ """
+ def __init__(self, ElementClassLookup fallback=None):
+ FallbackElementClassLookup.__init__(self, fallback)
+ self._lookup_function = _lookup_class
+
+ def lookup(self, doc, element):
+ return None
+
+cdef object _lookup_class(state, _Document doc, tree.xmlNode* c_node):
+ cdef PythonElementClassLookup lookup
+ cdef _ElementProxy proxy
+ lookup = state
+
+ proxy = _newProxy(None, c_node)
+ cls = lookup.lookup(doc, proxy)
+ _freeProxies(proxy)
+
+ if cls is not None:
+ return cls
+ return cetree.callLookupFallback(lookup, doc, c_node)
Modified: lxml/trunk/src/lxml/tests/test_elementtree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_elementtree.py (original)
+++ lxml/trunk/src/lxml/tests/test_elementtree.py Thu Mar 22 08:31:26 2007
@@ -360,6 +360,16 @@
keys.sort()
self.assertEquals(['alpha', 'beta', 'gamma'], keys)
+ def test_attribute_items2(self):
+ XML = self.etree.XML
+
+ root = XML(' ')
+ items = root.items()
+ items.sort()
+ self.assertEquals(
+ [('alpha','Alpha'), ('beta','Beta'), ('gamma','Gamma')],
+ items)
+
def test_attribute_keys_ns(self):
XML = self.etree.XML
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Thu Mar 22 08:31:26 2007
@@ -371,6 +371,15 @@
Element = self.etree.Element
self.assertRaises(TypeError, Element('a').append, None)
+ # ET's Elements have items() and key(), but not values()
+ def test_attribute_values(self):
+ XML = self.etree.XML
+
+ root = XML(' ')
+ values = root.values()
+ values.sort()
+ self.assertEquals(['Alpha', 'Beta', 'Gamma'], values)
+
# gives error in ElementTree
def test_comment_empty(self):
Element = self.etree.Element
Added: lxml/trunk/src/lxml/tests/test_pyclasslookup.py
==============================================================================
--- (empty file)
+++ lxml/trunk/src/lxml/tests/test_pyclasslookup.py Thu Mar 22 08:31:26 2007
@@ -0,0 +1,290 @@
+# -*- coding: utf-8 -*-
+
+"""
+Tests specific to the Python based class lookup.
+"""
+
+
+import unittest, operator
+
+from common_imports import etree, StringIO, HelperTestCase, fileInTestDir
+from common_imports import SillyFileLike, canonicalize, doctest
+from common_imports import itemgetter
+
+from lxml.pyclasslookup import PythonElementClassLookup
+
+xml_str = '''\
+
+
+ 0
+ 1
+ 2
+ 3
+ 3
+
+ '''
+
+
+class PyClassLookupTestCase(HelperTestCase):
+ """Test cases for the lxml.pyclasslookup class lookup mechanism.
+ """
+ etree = etree
+ parser = etree.XMLParser()
+ Element = parser.makeelement
+
+ def tearDown(self):
+ self.parser.setElementClassLookup(None)
+
+ def _setClassLookup(self, lookup_function):
+ class Lookup(PythonElementClassLookup):
+ def lookup(self, *args):
+ return lookup_function(*args)
+ self.parser.setElementClassLookup( Lookup() )
+
+ def _buildElementClass(self):
+ class LocalElement(etree.ElementBase):
+ pass
+ return LocalElement
+
+ def XML(self, xml):
+ return self.etree.XML(xml, self.parser)
+
+ # --- Test cases
+
+ def test_lookup(self):
+ el_class = self._buildElementClass()
+ el_class.i = 1
+ def lookup(*args):
+ if el_class.i == 1:
+ el_class.i = 2
+ return el_class
+ self._setClassLookup(lookup)
+ root = self.XML(xml_str)
+ self.assertEquals(2, el_class.i)
+
+ def test_lookup_keep_ref_assertion(self):
+ el_class = self._buildElementClass()
+ el_class.EL = None
+ def lookup(doc, el):
+ if el_class.EL is None:
+ el_class.EL = el
+ return el_class
+ self._setClassLookup(lookup)
+ root = self.XML(xml_str)
+ self.assertNotEquals(None, el_class.EL)
+ self.assertRaises(AssertionError, el_class.EL.getchildren)
+
+ def test_lookup_tag(self):
+ el_class = self._buildElementClass()
+ el_class.TAG = None
+ def lookup(doc, el):
+ if el_class.TAG is None:
+ el_class.TAG = el.tag
+ return el_class
+ self._setClassLookup(lookup)
+ root = self.XML(xml_str)
+ self.assertNotEquals(None, root.TAG)
+ self.assertEquals(root.tag, root.TAG)
+
+ def test_lookup_text(self):
+ el_class = self._buildElementClass()
+ el_class.TEXT = None
+ def lookup(doc, el):
+ if el_class.TEXT is None:
+ el_class.TEXT = el.text
+ return el_class
+ self._setClassLookup(lookup)
+ root = self.XML(xml_str)
+ self.assertNotEquals(None, root.TEXT)
+ self.assertEquals(root.text, root.TEXT)
+
+ def test_lookup_tail(self):
+ el_class = self._buildElementClass()
+ el_class.TAIL = None
+ def lookup(doc, el):
+ if el_class.TAIL is None:
+ el_class.TAIL = el.tail
+ return el_class
+ self._setClassLookup(lookup)
+ root = self.XML(xml_str)
+ self.assertEquals(root.tail, root.TAIL)
+
+ def test_lookup_attrib(self):
+ el_class = self._buildElementClass()
+ el_class.ATTRIB = None
+ def lookup(doc, el):
+ if el_class.ATTRIB is None:
+ el_class.ATTRIB = el[0].attrib
+ return el_class
+ self._setClassLookup(lookup)
+ root = self.XML(xml_str)
+ items1 = root[0].attrib.items()
+ items1.sort()
+ items2 = root.ATTRIB.items()
+ items2.sort()
+ self.assertEquals(items1, items2)
+
+ def test_lookup_prefix(self):
+ el_class = self._buildElementClass()
+ el_class.PREFIX = None
+ def lookup(doc, el):
+ if el_class.PREFIX is None:
+ el_class.PREFIX = el.prefix
+ return el_class
+ self._setClassLookup(lookup)
+ root = self.XML(xml_str)
+ self.assertEquals(root.prefix, root.PREFIX)
+
+ def test_lookup_sourceline(self):
+ el_class = self._buildElementClass()
+ el_class.LINE = None
+ def lookup(doc, el):
+ if el_class.LINE is None:
+ el_class.LINE = el.sourceline
+ return el_class
+ self._setClassLookup(lookup)
+ root = self.XML(xml_str)
+ self.assertEquals(root.sourceline, root.LINE)
+
+ def test_lookup_getitem(self):
+ el_class = self._buildElementClass()
+ el_class.CHILD_TAG = None
+ def lookup(doc, el):
+ el_class.CHILD_TAG = el[0].tag
+ return el_class
+ self._setClassLookup(lookup)
+ root = self.XML(xml_str)
+ child_tag = root.CHILD_TAG
+ self.assertNotEquals(None, child_tag)
+ self.assertEquals(root[0].tag, child_tag)
+
+ def test_lookup_getitem_neg(self):
+ el_class = self._buildElementClass()
+ el_class.CHILD_TAG = None
+ def lookup(doc, el):
+ if el_class.CHILD_TAG is None:
+ el_class.CHILD_TAG = el[-1].tag
+ return el_class
+ self._setClassLookup(lookup)
+ root = self.XML(xml_str)
+ child_tag = root.CHILD_TAG
+ self.assertNotEquals(None, child_tag)
+ self.assertEquals(root[-1].tag, child_tag)
+
+ def test_lookup_getslice(self):
+ el_class = self._buildElementClass()
+ el_class.CHILD_TAGS = None
+ def lookup(doc, el):
+ if el_class.CHILD_TAGS is None:
+ el_class.CHILD_TAGS = [ c.tag for c in el[1:-1] ]
+ return el_class
+ self._setClassLookup(lookup)
+ root = self.XML(xml_str)
+ child_tags = root.CHILD_TAGS
+ self.assertNotEquals(None, child_tags)
+ self.assertEquals([ c.tag for c in root[1:-1] ],
+ child_tags)
+
+ def test_lookup_len(self):
+ el_class = self._buildElementClass()
+ el_class.LEN = None
+ def lookup(doc, el):
+ if el_class.LEN is None:
+ el_class.LEN = len(el)
+ return el_class
+ self._setClassLookup(lookup)
+ root = self.XML(xml_str)
+ self.assertEquals(1, el_class.LEN)
+
+ def test_lookup_bool(self):
+ el_class = self._buildElementClass()
+ el_class.TRUE = None
+ def lookup(doc, el):
+ if el_class.TRUE is None:
+ el_class.TRUE = bool(el)
+ return el_class
+ self._setClassLookup(lookup)
+ root = self.XML(xml_str)
+ self.assert_(el_class.TRUE)
+
+ def test_lookup_get(self):
+ el_class = self._buildElementClass()
+ el_class.VAL = None
+ def lookup(doc, el):
+ if el_class.VAL is None:
+ el_class.VAL = el[0].get('a1')
+ return el_class
+ self._setClassLookup(lookup)
+ root = self.XML(xml_str)
+ self.assertNotEquals(None, el_class.VAL)
+ self.assertEquals(root[0].get('a1'), el_class.VAL)
+
+ def test_lookup_get_default(self):
+ el_class = self._buildElementClass()
+ default = str(id(el_class))
+ el_class.VAL = None
+ def lookup(doc, el):
+ if el_class.VAL is None:
+ el_class.VAL = el[0].get('unknownattribute', default)
+ return el_class
+ self._setClassLookup(lookup)
+ root = self.XML(xml_str)
+ self.assertEquals(default, el_class.VAL)
+
+ def test_lookup_getchildren(self):
+ el_class = self._buildElementClass()
+ el_class.CHILD_TAGS = None
+ def lookup(doc, el):
+ if el_class.CHILD_TAGS is None:
+ el_class.CHILD_TAGS = [ c.tag for c in el.getchildren() ]
+ return el_class
+ self._setClassLookup(lookup)
+ root = self.XML(xml_str)
+ child_tags = root.CHILD_TAGS
+ self.assertNotEquals(None, child_tags)
+ self.assertEquals([ c.tag for c in root.getchildren() ],
+ child_tags)
+
+ def test_lookup_getparent(self):
+ el_class = self._buildElementClass()
+ el_class.PARENT = None
+ def lookup(doc, el):
+ if el_class.PARENT is None:
+ el_class.PARENT = el[0].getparent().tag
+ return el_class
+ self._setClassLookup(lookup)
+ root = self.XML(xml_str)
+ self.assertEquals(root.tag, root.PARENT)
+
+ def test_lookup_getnext(self):
+ el_class = self._buildElementClass()
+ el_class.NEXT = None
+ def lookup(doc, el):
+ if el_class.NEXT is None:
+ el_class.NEXT = el[0][1].getnext().tag
+ return el_class
+ self._setClassLookup(lookup)
+ root = self.XML(xml_str)
+ self.assertNotEquals(None, el_class.NEXT)
+ self.assertEquals(root[0][1].getnext().tag, el_class.NEXT)
+
+ def test_lookup_getprevious(self):
+ el_class = self._buildElementClass()
+ el_class.PREV = None
+ def lookup(doc, el):
+ if el_class.PREV is None:
+ el_class.PREV = el[0][1].getprevious().tag
+ return el_class
+ self._setClassLookup(lookup)
+ root = self.XML(xml_str)
+ self.assertNotEquals(None, el_class.PREV)
+ self.assertEquals(root[0][1].getprevious().tag, el_class.PREV)
+
+
+def test_suite():
+ suite = unittest.TestSuite()
+ suite.addTests([unittest.makeSuite(PyClassLookupTestCase)])
+ return suite
+
+if __name__ == '__main__':
+ unittest.main()
From scoder at codespeak.net Thu Mar 22 19:57:55 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 22 Mar 2007 19:57:55 +0100 (CET)
Subject: [Lxml-checkins] r41117 - lxml/trunk
Message-ID: <20070322185755.3074210072@code0.codespeak.net>
Author: scoder
Date: Thu Mar 22 19:57:53 2007
New Revision: 41117
Modified:
lxml/trunk/CHANGES.txt
Log:
cleanup
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Thu Mar 22 19:57:53 2007
@@ -8,10 +8,10 @@
Features added
--------------
-* ``lxml.pyclasslookup`` module that can access the entire tree to determine a
- suitable Element class
+* ``lxml.pyclasslookup`` module that can access the entire tree in read-only
+ mode to help determining a suitable Element class
-* ``Element.values()`` to accompany the existing ``keys()`` and ``items()``
+* ``Element.values()`` to accompany the existing ``.keys()`` and ``.items()``
* ``collectAttributes()`` C-function to build a list of attribute
keys/values/items for a libxml2 node
From scoder at codespeak.net Thu Mar 29 09:52:17 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 29 Mar 2007 09:52:17 +0200 (CEST)
Subject: [Lxml-checkins] r41601 - in lxml/trunk: . benchmark doc src/lxml
Message-ID: <20070329075217.5EAA510079@code0.codespeak.net>
Author: scoder
Date: Thu Mar 29 09:52:15 2007
New Revision: 41601
Modified:
lxml/trunk/CHANGES.txt
lxml/trunk/benchmark/bench_xpath.py
lxml/trunk/doc/xpathxslt.txt
lxml/trunk/src/lxml/extensions.pxi
lxml/trunk/src/lxml/nsclasses.pxi
lxml/trunk/src/lxml/xpath.pxi
lxml/trunk/src/lxml/xslt.pxd
lxml/trunk/src/lxml/xslt.pxi
Log:
merged extension_refactoring branch
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Thu Mar 29 09:52:15 2007
@@ -2,12 +2,14 @@
lxml changelog
==============
-under development
+Under Development
=================
Features added
--------------
+* EXSLT RegExp support in standard XPath (not only XSLT)
+
* ``lxml.pyclasslookup`` module that can access the entire tree in read-only
mode to help determining a suitable Element class
@@ -19,10 +21,12 @@
Bugs fixed
----------
+* Thread safety in XPath evaluators
+
Other changes
-------------
-* major rewrite of internal extension function setup
+* major refactoring in XPath/XSLT extension function code
1.3beta (2007-02-27)
Modified: lxml/trunk/benchmark/bench_xpath.py
==============================================================================
--- lxml/trunk/benchmark/bench_xpath.py (original)
+++ lxml/trunk/benchmark/bench_xpath.py Thu Mar 29 09:52:15 2007
@@ -35,31 +35,32 @@
@onlylib('lxe')
def bench_xpath_old_extensions(self, root):
- def return_child(_, element):
- if element:
- return element[0]
+ def return_child(_, elements):
+ if elements:
+ return elements[0][0]
else:
return ()
- extensions = {(None, 'child') : return_child}
- xpath = self.etree.XPath("child(.)", extensions=extensions)
+ extensions = {("test", "child") : return_child}
+ xpath = self.etree.XPath("t:child(.)", namespaces={"test":"t"},
+ extensions=extensions)
for child in root:
xpath(child)
@onlylib('lxe')
def bench_xpath_extensions(self, root):
- def return_child(_, element):
- if element:
- return element[0]
+ def return_child(_, elements):
+ if elements:
+ return elements[0][0]
else:
return ()
- self.etree.FunctionNamespace("test")["t"] = return_child
+ self.etree.FunctionNamespace("testns")["t"] = return_child
try:
- xpath = self.etree.XPath("test:t(.)", {"test":"test"})
+ xpath = self.etree.XPath("test:t(.)", {"test":"testns"})
for child in root:
xpath(child)
finally:
- del self.etree.FunctionNamespace("test")["t"]
+ del self.etree.FunctionNamespace("testns")["t"]
if __name__ == '__main__':
benchbase.main(XPathBenchMark)
Modified: lxml/trunk/doc/xpathxslt.txt
==============================================================================
--- lxml/trunk/doc/xpathxslt.txt (original)
+++ lxml/trunk/doc/xpathxslt.txt Thu Mar 29 09:52:15 2007
@@ -72,11 +72,12 @@
>>> f = StringIO('''\
...
+ ... xmlns:b="http://codespeak.net/ns/test2">
... Text
...
... ''')
>>> doc = etree.parse(f)
+
>>> r = doc.xpath('/t:foo/b:bar', {'t': 'http://codespeak.net/ns/test1',
... 'b': 'http://codespeak.net/ns/test2'})
>>> len(r)
Modified: lxml/trunk/src/lxml/extensions.pxi
==============================================================================
--- lxml/trunk/src/lxml/extensions.pxi (original)
+++ lxml/trunk/src/lxml/extensions.pxi Thu Mar 29 09:52:15 2007
@@ -1,4 +1,4 @@
-# supports for extension functions in XPath and XSLT
+# support for extension functions in XPath and XSLT
class XPathError(LxmlError):
pass
@@ -9,6 +9,11 @@
class XPathResultError(XPathError):
pass
+# forward declarations
+
+ctypedef int _register_function(void* ctxt, name_utf, ns_uri_utf)
+cdef class _ExsltRegExp
+
################################################################################
# Base class for XSLT and XPath evaluation contexts: functions, namespaces, ...
@@ -17,6 +22,7 @@
cdef _Document _doc
cdef object _extensions
cdef object _namespaces
+ cdef object _global_namespaces
cdef object _utf_refs
cdef object _function_cache
cdef object _function_cache_ns
@@ -25,18 +31,18 @@
cdef _TempStore _temp_refs
cdef _ExceptionContext _exc
- def __init__(self, namespaces, extensions):
- self._xpathCtxt = NULL
+ def __init__(self, namespaces, extensions, enable_regexp):
+ cdef _ExsltRegExp _regexp
self._utf_refs = {}
+ self._global_namespaces = []
self._function_cache = {}
self._function_cache_ns = {}
- self._called_function = None
if extensions is not None:
# convert extensions to UTF-8
if python.PyDict_Check(extensions):
extensions = (extensions,)
- # format: [ {(ns,name):function} ] -> {(ns_utf,name_utf):function}
+ # format: [ {(ns, name):function} ] -> {(ns_utf, name_utf):function}
new_extensions = {}
for extension in extensions:
for (ns_uri, name), function in extension.items():
@@ -49,17 +55,38 @@
new_extensions, (ns_utf, name_utf), function)
extensions = new_extensions or None
+ if namespaces is not None:
+ if python.PyDict_Check(namespaces):
+ namespaces = namespaces.items()
+ if namespaces:
+ ns = []
+ for prefix, ns_uri in namespaces:
+ if prefix is None:
+ raise TypeError, \
+ "empty namespace prefix is not supported in XPath"
+ if ns_uri is None:
+ raise TypeError, \
+ "setting default namespace is not supported in XPath"
+ prefix_utf = self._to_utf(prefix)
+ ns_uri_utf = self._to_utf(ns_uri)
+ python.PyList_Append(ns, (prefix_utf, ns_uri_utf))
+ namespaces = ns
+
self._doc = None
self._exc = _ExceptionContext()
self._extensions = extensions
self._namespaces = namespaces
self._temp_refs = _TempStore()
+ if enable_regexp:
+ _regexp = _ExsltRegExp()
+ _regexp._register_in_context(self)
+
cdef _copy(self):
cdef _BaseContext context
if self._namespaces is not None:
- namespaces = python.PyDict_Copy(self._namespaces)
- context = self.__class__(namespaces, None)
+ namespaces = self._namespaces[:]
+ context = self.__class__(namespaces, None, False)
if self._extensions is not None:
context._extensions = python.PyDict_Copy(self._extensions)
return context
@@ -83,41 +110,147 @@
cdef _register_context(self, _Document doc):
self._doc = doc
self._exc.clear()
- python.PyDict_Clear(self._function_cache)
- python.PyDict_Clear(self._function_cache_ns)
- namespaces = self._namespaces
- if namespaces is not None:
- self.registerNamespaces(namespaces)
- cdef _unregister_context(self):
- xpath.xmlXPathRegisteredNsCleanup(self._xpathCtxt)
- self._free_context()
-
- cdef _free_context(self):
+ cdef _cleanup_context(self):
+ #xpath.xmlXPathRegisteredNsCleanup(self._xpathCtxt)
+ #self.unregisterGlobalNamespaces()
python.PyDict_Clear(self._utf_refs)
self._doc = None
+
+ cdef _release_context(self):
if self._xpathCtxt is not NULL:
self._xpathCtxt.userData = NULL
self._xpathCtxt = NULL
# namespaces (internal UTF-8 methods with leading '_')
- cdef addNamespace(self, prefix, uri):
+ cdef addNamespace(self, prefix, ns_uri):
+ if prefix is None:
+ raise TypeError, "empty prefix is not supported in XPath"
+ prefix_utf = self._to_utf(prefix)
+ ns_uri_utf = self._to_utf(ns_uri)
+ new_item = (prefix_utf, ns_uri_utf)
if self._namespaces is None:
- self._namespaces = {}
- python.PyDict_SetItem(self._namespaces, prefix, uri)
+ self._namespaces = [new_item]
+ else:
+ namespaces = []
+ for item in self._namespaces:
+ if item[0] == prefix_utf:
+ item = new_item
+ new_item = None
+ python.PyList_Append(namespaces, item)
+ if new_item is not None:
+ python.PyList_Append(namespaces, new_item)
+ self._namespaces = namespaces
+ if self._xpathCtxt is not NULL:
+ xpath.xmlXPathRegisterNs(
+ self._xpathCtxt, _cstr(prefix_utf), _cstr(ns_uri_utf))
- cdef registerNamespaces(self, namespaces):
- for prefix, uri in namespaces.items():
- self.registerNamespace(prefix, uri)
-
cdef registerNamespace(self, prefix, ns_uri):
+ if prefix is None:
+ raise TypeError, "empty prefix is not supported in XPath"
prefix_utf = self._to_utf(prefix)
ns_uri_utf = self._to_utf(ns_uri)
- xpath.xmlXPathRegisterNs(self._xpathCtxt, prefix_utf, ns_uri_utf)
+ python.PyList_Append(self._global_namespaces, prefix_utf)
+ xpath.xmlXPathRegisterNs(self._xpathCtxt,
+ _cstr(prefix_utf), _cstr(ns_uri_utf))
+
+ cdef registerLocalNamespaces(self):
+ if self._namespaces is None:
+ return
+ for prefix_utf, ns_uri_utf in self._namespaces:
+ xpath.xmlXPathRegisterNs(
+ self._xpathCtxt, _cstr(prefix_utf), _cstr(ns_uri_utf))
+
+ cdef registerGlobalNamespaces(self):
+ ns_prefixes = _find_all_extension_prefixes()
+ if python.PyList_GET_SIZE(ns_prefixes) > 0:
+ for prefix_utf, ns_uri_utf in ns_prefixes:
+ python.PyList_Append(self._global_namespaces, prefix_utf)
+ xpath.xmlXPathRegisterNs(
+ self._xpathCtxt, _cstr(prefix_utf), _cstr(ns_uri_utf))
+
+ cdef unregisterGlobalNamespaces(self):
+ if python.PyList_GET_SIZE(self._global_namespaces) > 0:
+ for prefix_utf in self._global_namespaces:
+ xpath.xmlXPathRegisterNs(self._xpathCtxt,
+ _cstr(prefix_utf), NULL)
+ del self._global_namespaces[:]
+
+ cdef void _unregisterNamespace(self, prefix_utf):
+ xpath.xmlXPathRegisterNs(self._xpathCtxt,
+ _cstr(prefix_utf), NULL)
# extension functions
+ cdef void _addLocalExtensionFunction(self, ns_utf, name_utf, function):
+ if self._extensions is None:
+ self._extensions = {}
+ python.PyDict_SetItem(self._extensions, (ns_utf, name_utf), function)
+
+ cdef void registerGlobalFunctions(self, void* ctxt,
+ _register_function reg_func):
+ cdef python.PyObject* dict_result
+ for ns_utf, ns_functions in _iter_ns_extension_functions():
+ if ns_utf is None:
+ d = self._function_cache
+ else:
+ dict_result = python.PyDict_GetItem(
+ self._function_cache_ns, ns_utf)
+ if dict_result is NULL:
+ d = {}
+ python.PyDict_SetItem(
+ self._function_cache_ns, ns_utf, d)
+ else:
+ d = dict_result
+ for name_utf, function in ns_functions.iteritems():
+ python.PyDict_SetItem(d, name_utf, function)
+ reg_func(ctxt, name_utf, ns_utf)
+
+ cdef void registerLocalFunctions(self, void* ctxt,
+ _register_function reg_func):
+ cdef python.PyObject* dict_result
+ if self._extensions is None:
+ return # done
+ last_ns = None
+ d = self._function_cache
+ for (ns_utf, name_utf), function in self._extensions.iteritems():
+ if ns_utf is not last_ns:
+ last_ns = ns_utf
+ if ns_utf is None:
+ d = self._function_cache
+ else:
+ dict_result = python.PyDict_GetItem(
+ self._function_cache_ns, ns_utf)
+ if dict_result is NULL:
+ d = {}
+ python.PyDict_SetItem(self._function_cache_ns,
+ ns_utf, d)
+ else:
+ d = dict_result
+ python.PyDict_SetItem(d, name_utf, function)
+ reg_func(ctxt, name_utf, ns_utf)
+
+ cdef unregisterAllFunctions(self, void* ctxt,
+ _register_function unreg_func):
+ for name_utf in self._function_cache:
+ unreg_func(ctxt, name_utf, None)
+ for ns_utf, functions in self._function_cache_ns.iteritems():
+ for name_utf in functions:
+ unreg_func(ctxt, name_utf, ns_utf)
+
+ cdef unregisterGlobalFunctions(self, void* ctxt,
+ _register_function unreg_func):
+ for name_utf in self._function_cache:
+ if self._extensions is None or \
+ (None, name_utf) not in self._extensions:
+ unreg_func(ctxt, name_utf, None)
+ for ns_utf, functions in self._function_cache_ns.iteritems():
+ for name_utf in functions:
+ if self._extensions is None or \
+ (ns_utf, name_utf) not in self._extensions:
+ unreg_func(ctxt, name_utf, ns_utf)
+
cdef _find_cached_function(self, char* c_ns_uri, char* c_name):
"""Lookup an extension function in the cache and return it.
@@ -137,7 +270,7 @@
return dict_result
return None
- cdef int _prepare_function_call(self, char* c_ns_uri, char* c_name):
+ cdef int __prepare_function_call(self, char* c_ns_uri, char* c_name):
"""Find an extension function and store it in 'self._called_function'.
This is absolutely performance-critical for XPath/XSLT!
@@ -233,19 +366,87 @@
################################################################################
-# helper functions
+# EXSLT regexp implementation
-cdef xpath.xmlXPathFunction _function_check(void* ctxt,
- char* c_name, char* c_ns_uri):
- "Module level lookup function for XPath/XSLT functions"
- cdef xpath.xmlXPathFunction c_func
- cdef _BaseContext context
- context = <_BaseContext>ctxt
- if context._prepare_function_call(c_ns_uri, c_name):
- c_func = _call_prepared_function
- else:
- c_func = NULL
- return c_func
+cdef class _ExsltRegExp:
+ cdef object _compile_map
+ def __init__(self):
+ self._compile_map = {}
+
+ cdef _make_string(self, value):
+ if _isString(value):
+ return value
+ else:
+ raise TypeError, "Invalid argument type %s" % type(value)
+
+ cdef _compile(self, rexp, ignore_case):
+ cdef python.PyObject* c_result
+ rexp = self._make_string(rexp)
+ key = (rexp, ignore_case)
+ c_result = python.PyDict_GetItem(self._compile_map, key)
+ if c_result is not NULL:
+ return c_result
+ py_flags = re.UNICODE
+ if ignore_case:
+ py_flags = py_flags | re.IGNORECASE
+ rexp_compiled = re.compile(rexp, py_flags)
+ python.PyDict_SetItem(self._compile_map, key, rexp_compiled)
+ return rexp_compiled
+
+ def test(self, ctxt, s, rexp, flags=''):
+ flags = self._make_string(flags)
+ s = self._make_string(s)
+ rexpc = self._compile(rexp, 'i' in flags)
+ if rexpc.search(s) is None:
+ return False
+ else:
+ return True
+
+ def match(self, ctxt, s, rexp, flags=''):
+ flags = self._make_string(flags)
+ s = self._make_string(s)
+ rexpc = self._compile(rexp, 'i' in flags)
+ if 'g' in flags:
+ results = rexpc.findall(s)
+ if not results:
+ return ()
+ else:
+ result = rexpc.search(s)
+ if not result:
+ return ()
+ results = [ result.group() ]
+ results.extend( result.groups('') )
+ result_list = []
+ root = Element('matches')
+ join_groups = ''.join
+ for s_match in results:
+ if python.PyTuple_CheckExact(s_match):
+ s_match = join_groups(s_match)
+ elem = SubElement(root, 'match')
+ elem.text = s_match
+ python.PyList_Append(result_list, elem)
+ return result_list
+
+ def replace(self, ctxt, s, rexp, flags, replacement):
+ replacement = self._make_string(replacement)
+ flags = self._make_string(flags)
+ s = self._make_string(s)
+ rexpc = self._compile(rexp, 'i' in flags)
+ if 'g' in flags:
+ count = 0
+ else:
+ count = 1
+ return rexpc.sub(replacement, s, count)
+
+ cdef _register_in_context(self, _BaseContext context):
+ ns = "http://exslt.org/regular-expressions"
+ context._addLocalExtensionFunction(ns, "test", self.test)
+ context._addLocalExtensionFunction(ns, "match", self.match)
+ context._addLocalExtensionFunction(ns, "replace", self.replace)
+
+
+################################################################################
+# helper functions
cdef xpath.xmlXPathObject* _wrapXPathObject(object obj) except NULL:
cdef xpath.xmlNodeSet* resultSet
@@ -405,22 +606,6 @@
fref = "{%s}%s" % (rctxt.functionURI, rctxt.function)
else:
fref = rctxt.function
- xpath.xmlXPathErr(ctxt, xpath.XML_XPATH_UNKNOWN_FUNC_ERROR)
+ xpath.xmlXPathErr(ctxt, xpath.XPATH_UNKNOWN_FUNC_ERROR)
exception = XPathFunctionError("XPath function '%s' not found" % fref)
context._exc._store_exception(exception)
-
-# call the function that was stored in 'context._called_function'
-
-cdef void _call_prepared_function(xpath.xmlXPathParserContext* ctxt, int nargs):
- cdef python.PyGILState_STATE gil_state
- gil_state = python.PyGILState_Ensure()
- _call_prepared_python_function(ctxt, nargs)
- python.PyGILState_Release(gil_state)
-
-cdef void _call_prepared_python_function(xpath.xmlXPathParserContext* ctxt,
- int nargs):
- cdef xpath.xmlXPathContext* rctxt
- cdef _BaseContext context
- rctxt = ctxt.context
- context = <_BaseContext>(rctxt.userData)
- _extension_function_call(context, context._called_function, ctxt, nargs)
Modified: lxml/trunk/src/lxml/nsclasses.pxi
==============================================================================
--- lxml/trunk/src/lxml/nsclasses.pxi (original)
+++ lxml/trunk/src/lxml/nsclasses.pxi Thu Mar 29 09:52:15 2007
@@ -75,6 +75,11 @@
name = _utf8(name)
return self._get(name)
+ def __delitem__(self, name):
+ if name is not None:
+ name = _utf8(name)
+ python.PyDict_DelItem(self._entries, name)
+
cdef object _get(self, object name):
cdef python.PyObject* dict_result
dict_result = python.PyDict_GetItem(self._entries, name)
@@ -99,7 +104,7 @@
return self._entries.iteritems()
def clear(self):
- self._entries.clear()
+ python.PyDict_Clear(self._entries)
cdef class _ClassNamespaceRegistry(_NamespaceRegistry):
"Dictionary-like registry for namespace implementation classes"
@@ -130,32 +135,39 @@
cdef class _XPathFunctionNamespaceRegistry(_FunctionNamespaceRegistry):
cdef object _prefix
cdef object _prefix_utf
+
property prefix:
"Namespace prefix for extension functions."
def __del__(self):
self._prefix = None # no prefix configured
+ self._prefix_utf = None
def __get__(self):
- return self._prefix
+ if self._prefix is None:
+ return ''
+ else:
+ return self._prefix
def __set__(self, prefix):
+ if prefix == '':
+ prefix = None # empty prefix
if prefix is None:
- prefix = '' # empty prefix
- self._prefix_utf = _utf8(prefix)
+ self._prefix_utf = None
+ else:
+ self._prefix_utf = _utf8(prefix)
self._prefix = prefix
cdef object _find_all_extension_prefixes():
"Internal lookup function to find all function prefixes for XSLT/XPath."
cdef _XPathFunctionNamespaceRegistry registry
- ns_prefixes = {}
- for (ns_utf, registry) in __FUNCTION_NAMESPACE_REGISTRIES.iteritems():
+ ns_prefixes = []
+ for registry in __FUNCTION_NAMESPACE_REGISTRIES.itervalues():
if registry._prefix_utf is not None:
- ns_prefixes[registry._prefix_utf] = ns_utf
+ if registry._ns_uri_utf is not None:
+ python.PyList_Append(
+ ns_prefixes, (registry._prefix_utf, registry._ns_uri_utf))
return ns_prefixes
-cdef object _iter_extension_function_names():
- l = []
- for (ns_utf, registry) in __FUNCTION_NAMESPACE_REGISTRIES.iteritems():
- python.PyList_Append(l, (ns_utf, registry))
- return l
+cdef object _iter_ns_extension_functions():
+ return __FUNCTION_NAMESPACE_REGISTRIES.iteritems()
cdef object _find_extension(ns_uri_utf, name_utf):
cdef python.PyObject* dict_result
Modified: lxml/trunk/src/lxml/xpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxi (original)
+++ lxml/trunk/src/lxml/xpath.pxi Thu Mar 29 09:52:15 2007
@@ -9,38 +9,58 @@
################################################################################
# XPath
+cdef int _register_xpath_function(void* ctxt, name_utf, ns_utf):
+ if ns_utf is None:
+ return xpath.xmlXPathRegisterFunc(
+ ctxt, _cstr(name_utf),
+ _xpath_function_call)
+ else:
+ return xpath.xmlXPathRegisterFuncNS(
+ ctxt, _cstr(name_utf), _cstr(ns_utf),
+ _xpath_function_call)
+
+cdef int _unregister_xpath_function(void* ctxt, name_utf, ns_utf):
+ if ns_utf is None:
+ return xpath.xmlXPathRegisterFunc(
+ ctxt, _cstr(name_utf), NULL)
+ else:
+ return xpath.xmlXPathRegisterFuncNS(
+ ctxt, _cstr(name_utf), _cstr(ns_utf), NULL)
+
+
cdef class _XPathContext(_BaseContext):
cdef object _variables
- def __init__(self, namespaces, extensions, variables):
+ def __init__(self, namespaces, extensions, enable_regexp, variables):
self._variables = variables
- _BaseContext.__init__(self, namespaces, extensions)
-
- cdef register_context(self, xpath.xmlXPathContext* xpathCtxt, _Document doc):
+ _BaseContext.__init__(self, namespaces, extensions, enable_regexp)
+
+ cdef set_context(self, xpath.xmlXPathContext* xpathCtxt):
self._set_xpath_context(xpathCtxt)
- ns_prefixes = _find_all_extension_prefixes()
- if ns_prefixes:
- self.registerNamespaces(ns_prefixes)
+ self._setupDict(xpathCtxt)
+ self.registerLocalNamespaces()
+ self.registerLocalFunctions(xpathCtxt, _register_xpath_function)
+
+ cdef register_context(self, _Document doc):
self._register_context(doc)
+ self.registerGlobalNamespaces()
+ self.registerGlobalFunctions(self._xpathCtxt, _register_xpath_function)
if self._variables is not None:
self.registerVariables(self._variables)
- xpath.xmlXPathRegisterFuncLookup(
- self._xpathCtxt, _function_check, self)
cdef unregister_context(self):
- cdef xpath.xmlXPathContext* xpathCtxt
- xpathCtxt = self._xpathCtxt
- if xpathCtxt is NULL:
- return
- xpath.xmlXPathRegisteredVariablesCleanup(xpathCtxt)
- self._unregister_context()
+ self.unregisterGlobalFunctions(
+ self._xpathCtxt, _unregister_xpath_function)
+ self.unregisterGlobalNamespaces()
+ xpath.xmlXPathRegisteredVariablesCleanup(self._xpathCtxt)
+ self._cleanup_context()
- def registerVariables(self, variable_dict):
+ cdef registerVariables(self, variable_dict):
for name, value in variable_dict.items():
name_utf = self._to_utf(name)
xpath.xmlXPathRegisterVariable(
self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value))
- def registerVariable(self, name, value):
+ cdef registerVariable(self, name, value):
name_utf = self._to_utf(name)
xpath.xmlXPathRegisterVariable(
self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value))
@@ -49,20 +69,26 @@
xpath.xmlXPathRegisterVariable(
self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value))
-cdef void _setupDict(xpath.xmlXPathContext* xpathCtxt):
- __GLOBAL_PARSER_CONTEXT.initXPathParserDict(xpathCtxt)
+ cdef void _setupDict(self, xpath.xmlXPathContext* xpathCtxt):
+ __GLOBAL_PARSER_CONTEXT.initXPathParserDict(xpathCtxt)
cdef class _XPathEvaluatorBase:
cdef xpath.xmlXPathContext* _xpathCtxt
cdef _XPathContext _context
+ cdef python.PyThread_type_lock _eval_lock
- def __init__(self, namespaces, extensions, variables=None):
- self._context = _XPathContext(namespaces, extensions, variables)
+ def __init__(self, namespaces, extensions, enable_regexp):
+ self._context = _XPathContext(namespaces, extensions,
+ enable_regexp, None)
def __dealloc__(self):
if self._xpathCtxt is not NULL:
xpath.xmlXPathFreeContext(self._xpathCtxt)
+ cdef set_context(self, xpath.xmlXPathContext* xpathCtxt):
+ self._xpathCtxt = xpathCtxt
+ self._context.set_context(xpathCtxt)
+
def evaluate(self, _eval_arg, **_variables):
"""Evaluate an XPath expression.
@@ -84,6 +110,22 @@
c = path[0]
return c == c'/'
+ cdef int _lock(self) except -1:
+ cdef python.PyThreadState* state
+ cdef int result
+ if config.ENABLE_THREADING and self._eval_lock != NULL:
+ state = python.PyEval_SaveThread()
+ result = python.PyThread_acquire_lock(
+ self._eval_lock, python.WAIT_LOCK)
+ python.PyEval_RestoreThread(state)
+ if result == 0:
+ raise ParserError, "parser locking failed"
+ return 0
+
+ cdef void _unlock(self):
+ if config.ENABLE_THREADING and self._eval_lock != NULL:
+ python.PyThread_release_lock(self._eval_lock)
+
cdef _raise_parse_error(self):
if self._xpathCtxt is not NULL and \
self._xpathCtxt.lastError.message is not NULL:
@@ -119,21 +161,23 @@
Absolute XPath expressions (starting with '/') will be evaluated against
the ElementTree as returned by getroottree().
- XPath evaluators must not be shared between threads.
+ Additional namespace declarations can be passed with the 'namespace'
+ keyword argument. EXSLT regular expression support can be disabled with
+ the 'regexp' boolean keyword (defaults to True).
"""
cdef _Element _element
- def __init__(self, _Element element not None, namespaces=None, extensions=None):
+ def __init__(self, _Element element not None, namespaces=None,
+ extensions=None, regexp=True):
cdef xpath.xmlXPathContext* xpathCtxt
cdef int ns_register_status
cdef _Document doc
+ self._element = element
doc = element._doc
+ _XPathEvaluatorBase.__init__(self, namespaces, extensions, regexp)
xpathCtxt = xpath.xmlXPathNewContext(doc._c_doc)
- self._xpathCtxt = xpathCtxt
if xpathCtxt is NULL:
raise XPathContextError, "Unable to create new XPath context"
- _setupDict(xpathCtxt)
- self._element = element
- _XPathEvaluatorBase.__init__(self, namespaces, extensions)
+ self.set_context(xpathCtxt)
def registerNamespace(self, prefix, uri):
"""Register a namespace with the XPath context.
@@ -155,33 +199,41 @@
Absolute XPath expressions (starting with '/') will be evaluated
against the ElementTree as returned by getroottree().
"""
- cdef xpath.xmlXPathContext* xpathCtxt
+ cdef python.PyThreadState* state
cdef xpath.xmlXPathObject* xpathObj
cdef _Document doc
cdef char* c_path
path = _utf8(_path)
- xpathCtxt = self._xpathCtxt
- xpathCtxt.node = self._element._c_node
doc = self._element._doc
- self._context.register_context(xpathCtxt, doc)
+ self._lock()
+ self._xpathCtxt.node = self._element._c_node
try:
+ self._context.register_context(doc)
self._context.registerVariables(_variables)
- xpathObj = xpath.xmlXPathEvalExpression(_cstr(path), xpathCtxt)
+ state = python.PyEval_SaveThread()
+ xpathObj = xpath.xmlXPathEvalExpression(
+ _cstr(path), self._xpathCtxt)
+ python.PyEval_RestoreThread(state)
+ result = self._handle_result(xpathObj, doc)
finally:
self._context.unregister_context()
+ self._unlock()
- return self._handle_result(xpathObj, doc)
+ return result
cdef class XPathDocumentEvaluator(XPathElementEvaluator):
"""Create an XPath evaluator for an ElementTree.
- XPath evaluators must not be shared between threads.
+ Additional namespace declarations can be passed with the 'namespace'
+ keyword argument. EXSLT regular expression support can be disabled with
+ the 'regexp' boolean keyword (defaults to True).
"""
- def __init__(self, _ElementTree etree not None, namespaces=None, extensions=None):
+ def __init__(self, _ElementTree etree not None, namespaces=None,
+ extensions=None, regexp=True):
XPathElementEvaluator.__init__(
- self, etree._context_node, namespaces, extensions)
+ self, etree._context_node, namespaces, extensions, regexp)
def __call__(self, _path, **_variables):
"""Evaluate an XPath expression on the document.
@@ -189,67 +241,81 @@
Variables may be provided as keyword arguments. Note that namespaces
are currently not supported for variables.
"""
- cdef xpath.xmlXPathContext* xpathCtxt
+ cdef python.PyThreadState* state
cdef xpath.xmlXPathObject* xpathObj
cdef xmlDoc* c_doc
cdef _Document doc
path = _utf8(_path)
- xpathCtxt = self._xpathCtxt
doc = self._element._doc
- self._context.register_context(xpathCtxt, doc)
- c_doc = _fakeRootDoc(doc._c_doc, self._element._c_node)
+ self._lock()
try:
- self._context.registerVariables(_variables)
- xpathCtxt.doc = c_doc
- xpathCtxt.node = tree.xmlDocGetRootElement(c_doc)
- xpathObj = xpath.xmlXPathEvalExpression(_cstr(path), xpathCtxt)
+ self._context.register_context(doc)
+ c_doc = _fakeRootDoc(doc._c_doc, self._element._c_node)
+ try:
+ self._context.registerVariables(_variables)
+ state = python.PyEval_SaveThread()
+ self._xpathCtxt.doc = c_doc
+ self._xpathCtxt.node = tree.xmlDocGetRootElement(c_doc)
+ xpathObj = xpath.xmlXPathEvalExpression(
+ _cstr(path), self._xpathCtxt)
+ python.PyEval_RestoreThread(state)
+ result = self._handle_result(xpathObj, doc)
+ finally:
+ _destroyFakeDoc(doc._c_doc, c_doc)
+ self._context.unregister_context()
finally:
- _destroyFakeDoc(doc._c_doc, c_doc)
- self._context.unregister_context()
+ self._unlock()
- return self._handle_result(xpathObj, doc)
+ return result
-def XPathEvaluator(etree_or_element, namespaces=None, extensions=None):
+def XPathEvaluator(etree_or_element, namespaces=None, extensions=None,
+ regexp=True):
"""Creates an XPath evaluator for an ElementTree or an Element.
The resulting object can be called with an XPath expression as argument
and XPath variables provided as keyword arguments.
- XPath evaluators must not be shared between threads.
+ Additional namespace declarations can be passed with the 'namespace'
+ keyword argument. EXSLT regular expression support can be disabled with
+ the 'regexp' boolean keyword (defaults to True).
"""
if isinstance(etree_or_element, _ElementTree):
- return XPathDocumentEvaluator(etree_or_element, namespaces, extensions)
+ return XPathDocumentEvaluator(etree_or_element, namespaces,
+ extensions, regexp)
else:
- return XPathElementEvaluator(etree_or_element, namespaces, extensions)
+ return XPathElementEvaluator(etree_or_element, namespaces,
+ extensions, regexp)
cdef class XPath(_XPathEvaluatorBase):
"""A compiled XPath expression that can be called on Elements and
ElementTrees.
- Besides the XPath expression, you can pass namespace mappings and
- extensions to the constructor through the keyword arguments ``namespaces``
- and ``extensions``.
+ Besides the XPath expression, you can pass prefix-namespace mappings and
+ extension functions to the constructor through the keyword arguments
+ ``namespaces`` and ``extensions``. EXSLT regular expression support can
+ be disabled with the 'regexp' boolean keyword (defaults to True).
"""
cdef xpath.xmlXPathCompExpr* _xpath
cdef readonly object path
- def __init__(self, path, namespaces=None, extensions=None):
- _XPathEvaluatorBase.__init__(self, namespaces, extensions)
- self._xpath = NULL
+ def __init__(self, path, namespaces=None, extensions=None, regexp=True):
+ cdef xpath.xmlXPathContext* xpathCtxt
+ _XPathEvaluatorBase.__init__(self, namespaces, extensions, regexp)
self.path = path
path = _utf8(path)
- self._xpathCtxt = xpath.xmlXPathNewContext(NULL)
- _setupDict(self._xpathCtxt)
- self._xpath = xpath.xmlXPathCtxtCompile(self._xpathCtxt, _cstr(path))
+ xpathCtxt = xpath.xmlXPathNewContext(NULL)
+ if xpathCtxt is NULL:
+ raise XPathContextError, "Unable to create new XPath context"
+ self.set_context(xpathCtxt)
+ self._xpath = xpath.xmlXPathCtxtCompile(xpathCtxt, _cstr(path))
if self._xpath is NULL:
self._raise_parse_error()
def __call__(self, _etree_or_element, **_variables):
cdef python.PyThreadState* state
- cdef xpath.xmlXPathContext* xpathCtxt
cdef xpath.xmlXPathObject* xpathObj
cdef _Document document
cdef _Element element
@@ -258,20 +324,22 @@
document = _documentOrRaise(_etree_or_element)
element = _rootNodeOrRaise(_etree_or_element)
- xpathCtxt = self._xpathCtxt
- xpathCtxt.doc = document._c_doc
- xpathCtxt.node = element._c_node
+ self._lock()
+ self._xpathCtxt.doc = document._c_doc
+ self._xpathCtxt.node = element._c_node
- context = self._context
- context.register_context(xpathCtxt, document)
try:
- context.registerVariables(_variables)
+ self._context.register_context(document)
+ self._context.registerVariables(_variables)
state = python.PyEval_SaveThread()
- xpathObj = xpath.xmlXPathCompiledEval(self._xpath, xpathCtxt)
+ xpathObj = xpath.xmlXPathCompiledEval(
+ self._xpath, self._xpathCtxt)
python.PyEval_RestoreThread(state)
+ result = self._handle_result(xpathObj, document)
finally:
- context.unregister_context()
- return self._handle_result(xpathObj, document)
+ self._context.unregister_context()
+ self._unlock()
+ return result
def __dealloc__(self):
if self._xpath is not NULL:
Modified: lxml/trunk/src/lxml/xslt.pxd
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxd (original)
+++ lxml/trunk/src/lxml/xslt.pxd Thu Mar 29 09:52:15 2007
@@ -35,6 +35,8 @@
xmlXPathFunction function)
cdef int xsltUnregisterExtModuleFunction(char* name, char* URI)
cdef xmlXPathFunction xsltExtModuleFunctionLookup(char* name, char* URI)
+ cdef int xsltRegisterExtPrefix(xsltStylesheet* style,
+ char* prefix, char* URI)
cdef extern from "libxslt/documents.h":
ctypedef enum xsltLoadType:
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Thu Mar 29 09:52:15 2007
@@ -193,75 +193,48 @@
################################################################################
# XSLT
+cdef int _register_xslt_function(void* ctxt, name_utf, ns_utf):
+ if ns_utf is None:
+ return 0
+ return xslt.xsltRegisterExtFunction(
+ ctxt, _cstr(name_utf), _cstr(ns_utf),
+ _xpath_function_call)
+
+cdef int _unregister_xslt_function(void* ctxt, name_utf, ns_utf):
+ if ns_utf is None:
+ return 0
+ return xslt.xsltRegisterExtFunction(
+ ctxt, _cstr(name_utf), _cstr(ns_utf),
+ NULL)
+
+
cdef class _XSLTContext(_BaseContext):
cdef xslt.xsltTransformContext* _xsltCtxt
- def __init__(self, namespaces, extensions):
+ def __init__(self, namespaces, extensions, enable_regexp):
self._xsltCtxt = NULL
- if extensions and None in extensions:
- raise XSLTExtensionError, "extensions must not have empty namespaces"
- _BaseContext.__init__(self, namespaces, extensions)
+ if extensions is not None:
+ for ns, prefix in extensions:
+ if ns is None:
+ raise XSLTExtensionError, \
+ "extensions must not have empty namespaces"
+ _BaseContext.__init__(self, namespaces, extensions, enable_regexp)
cdef register_context(self, xslt.xsltTransformContext* xsltCtxt,
_Document doc):
self._xsltCtxt = xsltCtxt
self._set_xpath_context(xsltCtxt.xpathCtxt)
self._register_context(doc)
- xsltCtxt.xpathCtxt.userData = self
- self._registerExtensionFunctions()
+ self.registerLocalFunctions(xsltCtxt, _register_xslt_function)
+ self.registerGlobalFunctions(xsltCtxt, _register_xslt_function)
cdef free_context(self):
- cdef xslt.xsltTransformContext* xsltCtxt
- xsltCtxt = self._xsltCtxt
- if xsltCtxt is NULL:
- return
- self._free_context()
- self._xsltCtxt = NULL
- xslt.xsltFreeTransformContext(xsltCtxt)
+ self._cleanup_context()
+ self._release_context()
+ if self._xsltCtxt is not NULL:
+ xslt.xsltFreeTransformContext(self._xsltCtxt)
+ self._xsltCtxt = NULL
self._release_temp_refs()
- cdef void _addLocalExtensionFunction(self, ns_utf, name_utf, function):
- if self._extensions is None:
- self._extensions = {}
- python.PyDict_SetItem(self._extensions, (ns_utf, name_utf), function)
-
- cdef void _registerExtensionFunctions(self):
- cdef python.PyObject* dict_result
- for ns_utf, functions in _iter_extension_function_names():
- if ns_utf is None:
- continue
- dict_result = python.PyDict_GetItem(self._function_cache_ns, ns_utf)
- if dict_result is NULL:
- d = {}
- python.PyDict_SetItem(self._function_cache_ns, ns_utf, d)
- else:
- d = dict_result
- for name_utf, function in functions.iteritems():
- python.PyDict_SetItem(d, name_utf, function)
- xslt.xsltRegisterExtFunction(
- self._xsltCtxt, _cstr(name_utf), _cstr(ns_utf),
- _xpath_function_call)
- if self._extensions is None:
- return # done
- last_ns = None
- for (ns_utf, name_utf), function in self._extensions.iteritems():
- if ns_utf is None:
- raise ValueError, \
- "extensions must have non empty namespaces"
- elif ns_utf is not last_ns:
- last_ns = ns_utf
- dict_result = python.PyDict_GetItem(
- self._function_cache_ns, ns_utf)
- if dict_result is NULL:
- d = {}
- python.PyDict_SetItem(self._function_cache_ns, ns_utf, d)
- else:
- d = dict_result
- python.PyDict_SetItem(d, name_utf, function)
- xslt.xsltRegisterExtFunction(
- self._xsltCtxt, _cstr(name_utf), _cstr(ns_utf),
- _xpath_function_call)
-
-cdef class _ExsltRegExp # forward declaration
cdef class XSLT:
"""Turn a document into an XSLT object.
@@ -279,16 +252,17 @@
cdef xslt.xsltStylesheet* _c_style
cdef _XSLTResolverContext _xslt_resolver_context
cdef XSLTAccessControl _access_control
- cdef _ExsltRegExp _regexp
cdef _ErrorLog _error_log
- def __init__(self, xslt_input, extensions=None, regexp=True, access_control=None):
+ def __init__(self, xslt_input, extensions=None, regexp=True,
+ access_control=None):
cdef python.PyThreadState* state
cdef xslt.xsltStylesheet* c_style
cdef xmlDoc* c_doc
cdef xmlDoc* fake_c_doc
cdef _Document doc
cdef _Element root_node
+ cdef _ExsltRegExp _regexp
doc = _documentOrRaise(xslt_input)
root_node = _rootNodeOrRaise(xslt_input)
@@ -327,13 +301,7 @@
c_doc._private = NULL # no longer used!
self._c_style = c_style
- self._context = _XSLTContext(None, extensions)
- if regexp:
- self._regexp = _ExsltRegExp()
- self._regexp._register_in_context(self._context)
- else:
- self._regexp = None
- # XXX is it worthwile to use xsltPrecomputeStylesheet here?
+ self._context = _XSLTContext(None, extensions, regexp)
def __dealloc__(self):
if self._xslt_resolver_context is not None and \
@@ -346,20 +314,24 @@
def __get__(self):
return self._error_log.copy()
+ def apply(self, _input, profile_run=False, **_kw):
+ return self(_input, profile_run, **_kw)
+
+ def tostring(self, _ElementTree result_tree):
+ """Save result doc to string based on stylesheet output method.
+ """
+ return str(result_tree)
+
def __call__(self, _input, profile_run=False, **_kw):
- cdef python.PyThreadState* state
cdef _XSLTContext context
cdef _Document input_doc
cdef _Element root_node
cdef _Document result_doc
cdef _Document profile_doc
cdef xmlDoc* c_profile_doc
- cdef _XSLTResolverContext resolver_context
cdef xslt.xsltTransformContext* transform_ctxt
cdef xmlDoc* c_result
cdef xmlDoc* c_doc
- cdef char** params
- cdef Py_ssize_t i, kw_count
if not _checkThreadDict(self._c_style.doc.dict):
raise RuntimeError, "stylesheet is not usable in this thread"
@@ -367,9 +339,6 @@
input_doc = _documentOrRaise(_input)
root_node = _rootNodeOrRaise(_input)
- resolver_context = _XSLTResolverContext(input_doc._parser)
- resolver_context._c_style_doc = self._xslt_resolver_context._c_style_doc
-
c_doc = _fakeRootDoc(input_doc._c_doc, root_node._c_node)
transform_ctxt = xslt.xsltNewTransformContext(self._c_style, c_doc)
@@ -379,28 +348,82 @@
initTransformDict(transform_ctxt)
- self._error_log.connect()
+ if profile_run:
+ transform_ctxt.profile = 1
+
+ try:
+ self._error_log.connect()
+ context = self._context._copy()
+ context.register_context(transform_ctxt, input_doc)
+
+ c_result = self._run_transform(
+ input_doc, c_doc, _kw, context, transform_ctxt)
+
+ if transform_ctxt.profile:
+ c_profile_doc = xslt.xsltGetProfileInformation(transform_ctxt)
+ if c_profile_doc is not NULL:
+ profile_doc = _documentFactory(
+ c_profile_doc, input_doc._parser)
+ finally:
+ if context is not None:
+ context.free_context()
+ _destroyFakeDoc(input_doc._c_doc, c_doc)
+ self._error_log.disconnect()
+
+ try:
+ if self._xslt_resolver_context._has_raised():
+ if c_result is not NULL:
+ tree.xmlFreeDoc(c_result)
+ self._xslt_resolver_context._raise_if_stored()
+
+ if c_result is NULL:
+ error = self._error_log.last_error
+ if error is not None and error.message:
+ if error.line >= 0:
+ message = "%s, line %d" % (error.message, error.line)
+ else:
+ message = error.message
+ elif error.line >= 0:
+ message = "Error applying stylesheet, line %d" % error.line
+ else:
+ message = "Error applying stylesheet"
+ raise XSLTApplyError, message
+ finally:
+ self._xslt_resolver_context.clear()
+
+ result_doc = _documentFactory(c_result, input_doc._parser)
+ return _xsltResultTreeFactory(result_doc, self, profile_doc)
+
+ cdef xmlDoc* _run_transform(self, _Document input_doc, xmlDoc* c_input_doc,
+ parameters, _XSLTContext context,
+ xslt.xsltTransformContext* transform_ctxt):
+ cdef python.PyThreadState* state
+ cdef _XSLTResolverContext resolver_context
+ cdef xmlDoc* c_result
+ cdef char** params
+ cdef Py_ssize_t i, parameter_count
+
+ resolver_context = _XSLTResolverContext(input_doc._parser)
+ resolver_context._c_style_doc = self._xslt_resolver_context._c_style_doc
+
xslt.xsltSetTransformErrorFunc(transform_ctxt, self._error_log,
_receiveXSLTError)
if self._access_control is not None:
self._access_control._register_in_context(transform_ctxt)
- if profile_run:
- transform_ctxt.profile = 1
-
transform_ctxt._private = self._xslt_resolver_context
- kw_count = python.PyDict_Size(_kw)
- if kw_count > 0:
+ parameter_count = python.PyDict_Size(parameters)
+ if parameter_count > 0:
# allocate space for parameters
# * 2 as we want an entry for both key and value,
# and + 1 as array is NULL terminated
params = python.PyMem_Malloc(
- sizeof(char*) * (kw_count * 2 + 1))
+ sizeof(char*) * (parameter_count * 2 + 1))
i = 0
keep_ref = []
- for key, value in _kw.iteritems():
+ for key, value in parameters.iteritems():
k = _utf8(key)
python.PyList_Append(keep_ref, k)
v = _utf8(value)
@@ -413,59 +436,16 @@
else:
params = NULL
- context = self._context._copy()
- context.register_context(transform_ctxt, input_doc)
-
state = python.PyEval_SaveThread()
- c_result = xslt.xsltApplyStylesheetUser(self._c_style, c_doc, params,
- NULL, NULL, transform_ctxt)
+ c_result = xslt.xsltApplyStylesheetUser(
+ self._c_style, c_input_doc, params, NULL, NULL, transform_ctxt)
python.PyEval_RestoreThread(state)
if params is not NULL:
# deallocate space for parameters
python.PyMem_Free(params)
- keep_ref = None
-
- if transform_ctxt.profile:
- c_profile_doc = xslt.xsltGetProfileInformation(transform_ctxt)
- if c_profile_doc is not NULL:
- profile_doc = _documentFactory(c_profile_doc, input_doc._parser)
-
- context.free_context()
- _destroyFakeDoc(input_doc._c_doc, c_doc)
- self._error_log.disconnect()
- try:
- if self._xslt_resolver_context._has_raised():
- if c_result is not NULL:
- tree.xmlFreeDoc(c_result)
- self._xslt_resolver_context._raise_if_stored()
-
- if c_result is NULL:
- error = self._error_log.last_error
- if error is not None and error.message:
- if error.line >= 0:
- message = "%s, line %d" % (error.message, error.line)
- else:
- message = error.message
- elif error.line >= 0:
- message = "Error applying stylesheet, line %d" % error.line
- else:
- message = "Error applying stylesheet"
- raise XSLTApplyError, message
- finally:
- self._xslt_resolver_context.clear()
-
- result_doc = _documentFactory(c_result, input_doc._parser)
- return _xsltResultTreeFactory(result_doc, self, profile_doc)
-
- def apply(self, _input, profile_run=False, **_kw):
- return self(_input, profile_run, **_kw)
-
- def tostring(self, _ElementTree result_tree):
- """Save result doc to string based on stylesheet output method.
- """
- return str(result_tree)
+ return c_result
cdef class _XSLTResultTree(_ElementTree):
cdef XSLT _xslt
@@ -542,17 +522,6 @@
# enable EXSLT support for XSLT
xslt.exsltRegisterAll()
-# extension function lookup for XSLT
-cdef xpath.xmlXPathFunction _xslt_function_check(void* ctxt,
- char* c_name, char* c_ns_uri):
- "Find XSLT extension function from set of XPath and XSLT functions"
- cdef xpath.xmlXPathFunction result
- result = _function_check(ctxt, c_name, c_ns_uri)
- if result is NULL:
- return xslt.xsltExtModuleFunctionLookup(c_name, c_ns_uri)
- else:
- return result
-
cdef void initTransformDict(xslt.xsltTransformContext* transform_ctxt):
__GLOBAL_PARSER_CONTEXT.initThreadDictRef(&transform_ctxt.dict)
@@ -649,82 +618,3 @@
if attr == key:
return value
return default
-
-################################################################################
-# EXSLT regexp implementation
-
-cdef class _ExsltRegExp:
- cdef object _compile_map
- def __init__(self):
- self._compile_map = {}
-
- cdef _make_string(self, value):
- if _isString(value):
- return value
- else:
- raise TypeError, "Invalid argument type %s" % type(value)
-
- cdef _compile(self, rexp, ignore_case):
- cdef python.PyObject* c_result
- rexp = self._make_string(rexp)
- key = (rexp, ignore_case)
- c_result = python.PyDict_GetItem(self._compile_map, key)
- if c_result is not NULL:
- return c_result
- py_flags = re.UNICODE
- if ignore_case:
- py_flags = py_flags | re.IGNORECASE
- rexp_compiled = re.compile(rexp, py_flags)
- python.PyDict_SetItem(self._compile_map, key, rexp_compiled)
- return rexp_compiled
-
- def test(self, ctxt, s, rexp, flags=''):
- flags = self._make_string(flags)
- s = self._make_string(s)
- rexpc = self._compile(rexp, 'i' in flags)
- if rexpc.search(s) is None:
- return False
- else:
- return True
-
- def match(self, ctxt, s, rexp, flags=''):
- flags = self._make_string(flags)
- s = self._make_string(s)
- rexpc = self._compile(rexp, 'i' in flags)
- if 'g' in flags:
- results = rexpc.findall(s)
- if not results:
- return ()
- else:
- result = rexpc.search(s)
- if not result:
- return ()
- results = [ result.group() ]
- results.extend( result.groups('') )
- result_list = []
- root = Element('matches')
- join_groups = ''.join
- for s_match in results:
- if python.PyTuple_CheckExact(s_match):
- s_match = join_groups(s_match)
- elem = SubElement(root, 'match')
- elem.text = s_match
- python.PyList_Append(result_list, elem)
- return result_list
-
- def replace(self, ctxt, s, rexp, flags, replacement):
- replacement = self._make_string(replacement)
- flags = self._make_string(flags)
- s = self._make_string(s)
- rexpc = self._compile(rexp, 'i' in flags)
- if 'g' in flags:
- count = 0
- else:
- count = 1
- return rexpc.sub(replacement, s, count)
-
- cdef _register_in_context(self, _XSLTContext context):
- ns = "http://exslt.org/regular-expressions"
- context._addLocalExtensionFunction(ns, "test", self.test)
- context._addLocalExtensionFunction(ns, "match", self.match)
- context._addLocalExtensionFunction(ns, "replace", self.replace)
From scoder at codespeak.net Thu Mar 29 21:40:29 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 29 Mar 2007 21:40:29 +0200 (CEST)
Subject: [Lxml-checkins] r41643 - lxml/trunk/src/lxml
Message-ID: <20070329194029.A83C71007D@code0.codespeak.net>
Author: scoder
Date: Thu Mar 29 21:40:20 2007
New Revision: 41643
Modified:
lxml/trunk/src/lxml/apihelpers.pxi
Log:
raise AssertionError when string with '\0' bytes are passed into the API
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Thu Mar 29 21:40:20 2007
@@ -560,6 +560,21 @@
c = s[0]
return 0
+cdef int isutf8py(pystring):
+ cdef char* s
+ cdef char* c_end
+ cdef char c
+ s = _cstr(pystring)
+ c_end = s + python.PyString_GET_SIZE(pystring)
+ while s < c_end:
+ c = s[0]
+ if c == c'\0':
+ return -1 # invalid!
+ if c & 0x80:
+ return 1 # non-ASCII
+ s = s + 1
+ return 0 # plain 7-bit ASCII
+
cdef object funicode(char* s):
cdef Py_ssize_t slen
cdef char* spos
@@ -578,7 +593,8 @@
cdef object _utf8(object s):
if python.PyString_Check(s):
- assert not isutf8(_cstr(s)), "All strings must be Unicode or ASCII"
+ assert not isutf8py(s), \
+ "All strings must be Unicode or ASCII"
return s
elif python.PyUnicode_Check(s):
return python.PyUnicode_AsUTF8String(s)
@@ -604,10 +620,10 @@
if filename is None:
return None
elif python.PyString_Check(filename):
- c_filename = _cstr(filename)
- if not isutf8(c_filename):
+ if not isutf8py(filename):
# plain ASCII!
return filename
+ c_filename = _cstr(filename)
try:
# try to decode with default encoding
filename = python.PyUnicode_Decode(
From scoder at codespeak.net Thu Mar 29 21:40:45 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 29 Mar 2007 21:40:45 +0200 (CEST)
Subject: [Lxml-checkins] r41644 - lxml/trunk/src/lxml/tests
Message-ID: <20070329194045.8ADF71008A@code0.codespeak.net>
Author: scoder
Date: Thu Mar 29 21:40:38 2007
New Revision: 41644
Modified:
lxml/trunk/src/lxml/tests/test_etree.py
Log:
raise AssertionError when string with '\0' bytes are passed into the API
Modified: lxml/trunk/src/lxml/tests/test_etree.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_etree.py (original)
+++ lxml/trunk/src/lxml/tests/test_etree.py Thu Mar 29 21:40:38 2007
@@ -1205,6 +1205,15 @@
self.assertEquals(docinfo.root_name, 'html')
self.assertEquals(docinfo.doctype, '')
+ def test_byte_zero(self):
+ Element = self.etree.Element
+
+ a = Element('a')
+ self.assertRaises(AssertionError, setattr, a, "text", 'ha\0ho')
+ self.assertRaises(AssertionError, setattr, a, "tail", 'ha\0ho')
+
+ self.assertRaises(AssertionError, Element, 'ha\0ho')
+
def test_encoding_tostring_utf16(self):
# ElementTree fails to serialize this
tostring = self.etree.tostring
From scoder at codespeak.net Thu Mar 29 21:42:02 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 29 Mar 2007 21:42:02 +0200 (CEST)
Subject: [Lxml-checkins] r41645 - lxml/trunk
Message-ID: <20070329194202.C631C1007D@code0.codespeak.net>
Author: scoder
Date: Thu Mar 29 21:42:01 2007
New Revision: 41645
Modified:
lxml/trunk/CHANGES.txt
Log:
raise AssertionError when string with '\0' bytes are passed into the API
Modified: lxml/trunk/CHANGES.txt
==============================================================================
--- lxml/trunk/CHANGES.txt (original)
+++ lxml/trunk/CHANGES.txt Thu Mar 29 21:42:01 2007
@@ -23,6 +23,8 @@
* Thread safety in XPath evaluators
+* Raise AssertionError when passing strings containing '\0' bytes
+
Other changes
-------------
From scoder at codespeak.net Thu Mar 29 21:43:46 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 29 Mar 2007 21:43:46 +0200 (CEST)
Subject: [Lxml-checkins] r41646 - lxml/trunk/src/lxml
Message-ID: <20070329194346.204241007D@code0.codespeak.net>
Author: scoder
Date: Thu Mar 29 21:43:44 2007
New Revision: 41646
Modified:
lxml/trunk/src/lxml/objectify.pyx
Log:
cleanup and optimisations
Modified: lxml/trunk/src/lxml/objectify.pyx
==============================================================================
--- lxml/trunk/src/lxml/objectify.pyx (original)
+++ lxml/trunk/src/lxml/objectify.pyx Thu Mar 29 21:43:44 2007
@@ -42,8 +42,17 @@
cdef object AttributeError
AttributeError = __builtin__.AttributeError
+cdef object TypeError
+TypeError = __builtin__.TypeError
+cdef object ValueError
+ValueError = __builtin__.ValueError
cdef object IndexError
IndexError = __builtin__.IndexError
+cdef object StopIteration
+StopIteration = __builtin__.StopIteration
+
+cdef object IGNORABLE_ERRORS
+IGNORABLE_ERRORS = (ValueError, TypeError)
cdef object list
list = __builtin__.list
@@ -202,7 +211,7 @@
"""Return the (first) child with the given tag name. If no namespace
is provided, the child will be looked up in the same one as self.
"""
- return _lookupChild(self, tag)
+ return _lookupChildOrRaise(self, tag)
def __setattr__(self, tag, value):
"""Set the value of the (first) child with the given tag name. If no
@@ -223,15 +232,14 @@
return
tag = _buildChildTag(self, tag)
- try:
- element = _lookupChild(self, tag)
- except AttributeError:
+ element = _lookupChild(self, tag)
+ if element is None:
_appendValue(self, tag, value)
else:
_replaceElement(element, value)
def __delattr__(self, tag):
- child = _lookupChild(self, tag)
+ child = _lookupChildOrRaise(self, tag)
self.remove(child)
def addattr(self, tag, value):
@@ -253,7 +261,7 @@
cdef tree.xmlNode* c_parent
cdef tree.xmlNode* c_node
if python._isString(key):
- return _lookupChild(self, key)
+ return _lookupChildOrRaise(self, key)
c_self_node = self._c_node
c_parent = c_self_node.parent
if c_parent is NULL:
@@ -290,9 +298,8 @@
cdef tree.xmlNode* c_node
if python._isString(key):
key = _buildChildTag(self, key)
- try:
- element = _lookupChild(self, key)
- except AttributeError:
+ element = _lookupChild(self, key)
+ if element is None:
_appendValue(self, key, value)
else:
_replaceElement(element, value)
@@ -421,10 +428,16 @@
c_href = _cstr(ns)
c_result = _findFollowingSibling(c_node.children, c_href, c_tag, 0)
if c_result is NULL:
- raise AttributeError, "no such child: " + \
- cetree.namespacedNameFromNsName(c_href, c_tag)
+ return None
return elementFactory(parent._doc, c_result)
+cdef object _lookupChildOrRaise(_Element parent, tag):
+ element = _lookupChild(parent, tag)
+ if element is None:
+ raise AttributeError, "no such child: " + \
+ _buildChildTag(parent, tag)
+ return element
+
cdef object _buildChildTag(_Element parent, tag):
cdef char* c_href
cdef char* c_tag
@@ -910,16 +923,17 @@
"""
types = []
known = set()
+ add_to_known = known.add
for check, pytype in _TYPE_CHECKS:
name = pytype.name
if name not in known:
- known.add(name)
- types.append(pytype)
+ add_to_known(name)
+ python.PyList_Append(types, pytype)
for pytype in _PYTYPE_DICT.itervalues():
name = pytype.name
if name not in known:
- known.add(name)
- types.append(pytype)
+ add_to_known(name)
+ python.PyList_Append(types, pytype)
return types
cdef object _guessElementClass(tree.xmlNode* c_node):
@@ -928,12 +942,11 @@
return None
if value == '':
return StringElement
- errors = (ValueError, TypeError)
for type_check, pytype in _TYPE_CHECKS:
try:
type_check(value)
return (pytype)._type
- except errors:
+ except IGNORABLE_ERRORS:
pass
return None
@@ -1426,7 +1439,6 @@
doc = element._doc
ignore = bool(ignore_old)
- _ValueError = ValueError
StrType = _PYTYPE_DICT.get('str')
c_node = element._c_node
tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
@@ -1443,7 +1455,7 @@
try:
if not (pytype).type_check(value):
pytype = None
- except _ValueError:
+ except ValueError:
pytype = None
if pytype is None:
@@ -1474,7 +1486,7 @@
if type_check(value) is not False:
pytype = tested_pytype
break
- except _ValueError:
+ except ValueError:
pass
else:
pytype = StrType
@@ -1579,13 +1591,12 @@
strval = str(_value)
if _pytype is None:
- errors = (ValueError, TypeError)
for type_check, pytype in _TYPE_CHECKS:
try:
type_check(strval)
_pytype = (pytype).name
break
- except errors:
+ except IGNORABLE_ERRORS:
pass
if _pytype is None:
if _value is None:
From scoder at codespeak.net Thu Mar 29 21:45:40 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 29 Mar 2007 21:45:40 +0200 (CEST)
Subject: [Lxml-checkins] r41648 - lxml/trunk
Message-ID: <20070329194540.DAA7F1007D@code0.codespeak.net>
Author: scoder
Date: Thu Mar 29 21:45:39 2007
New Revision: 41648
Modified:
lxml/trunk/TODO.txt
Log:
cleanup
Modified: lxml/trunk/TODO.txt
==============================================================================
--- lxml/trunk/TODO.txt (original)
+++ lxml/trunk/TODO.txt Thu Mar 29 21:45:39 2007
@@ -16,8 +16,6 @@
* more testing on multi-threading
-* the code on extension functions and XSLT needs some refactoring
-
ElementTree
-----------
@@ -34,8 +32,8 @@
Objectify
---------
-* set special __attributes__ on ObjectifiedElement's as Python attributes, not
- XML children
+* emulate setting special __attributes__ on ObjectifiedElement's as Python
+ attributes, not XML children
Features
From scoder at codespeak.net Thu Mar 29 21:46:58 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 29 Mar 2007 21:46:58 +0200 (CEST)
Subject: [Lxml-checkins] r41649 - lxml/trunk/src/lxml
Message-ID: <20070329194658.17F4B1007D@code0.codespeak.net>
Author: scoder
Date: Thu Mar 29 21:46:57 2007
New Revision: 41649
Modified:
lxml/trunk/src/lxml/extensions.pxi
Log:
cleanup
Modified: lxml/trunk/src/lxml/extensions.pxi
==============================================================================
--- lxml/trunk/src/lxml/extensions.pxi (original)
+++ lxml/trunk/src/lxml/extensions.pxi Thu Mar 29 21:46:57 2007
@@ -25,8 +25,6 @@
cdef object _global_namespaces
cdef object _utf_refs
cdef object _function_cache
- cdef object _function_cache_ns
- cdef object _called_function
# for exception handling and temporary reference keeping:
cdef _TempStore _temp_refs
cdef _ExceptionContext _exc
@@ -36,7 +34,6 @@
self._utf_refs = {}
self._global_namespaces = []
self._function_cache = {}
- self._function_cache_ns = {}
if extensions is not None:
# convert extensions to UTF-8
@@ -192,17 +189,14 @@
_register_function reg_func):
cdef python.PyObject* dict_result
for ns_utf, ns_functions in _iter_ns_extension_functions():
- if ns_utf is None:
- d = self._function_cache
+ dict_result = python.PyDict_GetItem(
+ self._function_cache, ns_utf)
+ if dict_result is not NULL:
+ d = dict_result
else:
- dict_result = python.PyDict_GetItem(
- self._function_cache_ns, ns_utf)
- if dict_result is NULL:
- d = {}
- python.PyDict_SetItem(
- self._function_cache_ns, ns_utf, d)
- else:
- d = dict_result
+ d = {}
+ python.PyDict_SetItem(
+ self._function_cache, ns_utf, d)
for name_utf, function in ns_functions.iteritems():
python.PyDict_SetItem(d, name_utf, function)
reg_func(ctxt, name_utf, ns_utf)
@@ -213,39 +207,30 @@
if self._extensions is None:
return # done
last_ns = None
- d = self._function_cache
+ d = None
for (ns_utf, name_utf), function in self._extensions.iteritems():
- if ns_utf is not last_ns:
+ if ns_utf is not last_ns or d is None:
last_ns = ns_utf
- if ns_utf is None:
- d = self._function_cache
+ dict_result = python.PyDict_GetItem(
+ self._function_cache, ns_utf)
+ if dict_result is not NULL:
+ d = dict_result
else:
- dict_result = python.PyDict_GetItem(
- self._function_cache_ns, ns_utf)
- if dict_result is NULL:
- d = {}
- python.PyDict_SetItem(self._function_cache_ns,
- ns_utf, d)
- else:
- d = dict_result
+ d = {}
+ python.PyDict_SetItem(self._function_cache,
+ ns_utf, d)
python.PyDict_SetItem(d, name_utf, function)
reg_func(ctxt, name_utf, ns_utf)
cdef unregisterAllFunctions(self, void* ctxt,
_register_function unreg_func):
- for name_utf in self._function_cache:
- unreg_func(ctxt, name_utf, None)
- for ns_utf, functions in self._function_cache_ns.iteritems():
+ for ns_utf, functions in self._function_cache.iteritems():
for name_utf in functions:
unreg_func(ctxt, name_utf, ns_utf)
cdef unregisterGlobalFunctions(self, void* ctxt,
_register_function unreg_func):
- for name_utf in self._function_cache:
- if self._extensions is None or \
- (None, name_utf) not in self._extensions:
- unreg_func(ctxt, name_utf, None)
- for ns_utf, functions in self._function_cache_ns.iteritems():
+ for ns_utf, functions in self._function_cache.iteritems():
for name_utf in functions:
if self._extensions is None or \
(ns_utf, name_utf) not in self._extensions:
@@ -259,65 +244,19 @@
cdef python.PyObject* c_dict
cdef python.PyObject* dict_result
if c_ns_uri is NULL:
- c_dict = self._function_cache
+ c_dict = python.PyDict_GetItem(
+ self._function_cache, None)
else:
c_dict = python.PyDict_GetItemString(
- self._function_cache_ns, c_ns_uri)
+ self._function_cache, c_ns_uri)
if c_dict is not NULL:
- dict_result = python.PyDict_GetItemString(c_dict, c_name)
+ dict_result = python.PyDict_GetItemString(
+ c_dict, c_name)
if dict_result is not NULL:
return dict_result
return None
- cdef int __prepare_function_call(self, char* c_ns_uri, char* c_name):
- """Find an extension function and store it in 'self._called_function'.
-
- This is absolutely performance-critical for XPath/XSLT!
- Return 1 if it was found, 0 otherwise.
- Parameters: c_ns_uri may be NULL, c_name must not be NULL
- """
- cdef python.PyObject* c_dict
- cdef python.PyObject* dict_result
- if c_ns_uri is NULL:
- d = self._function_cache
- c_dict = d
- else:
- c_dict = python.PyDict_GetItemString(
- self._function_cache_ns, c_ns_uri)
- if c_dict is NULL:
- d = {}
- python.PyDict_SetItem(self._function_cache_ns, ns_uri_utf, d)
- else:
- d = c_dict
-
- name_utf = c_name
- if c_dict is not NULL:
- dict_result = python.PyDict_GetItem(d, name_utf)
- if dict_result is not NULL:
- function = dict_result
- self._called_function = function
- return function is not None
-
- # first time we look up this function, so the rest is less critical
- if c_ns_uri is not NULL:
- ns_uri_utf = c_ns_uri
-
- if self._extensions is not None:
- dict_result = python.PyDict_GetItem(
- self._extensions, (ns_uri_utf, name_utf))
- else:
- dict_result = NULL
- if dict_result is not NULL:
- function = dict_result
- else:
- function = _find_extension(ns_uri_utf, name_utf)
-
- # we also store None values here to make sure we remember
- python.PyDict_SetItem(d, name_utf, function)
- self._called_function = function
- return function is not None
-
# Python reference keeping during XPath function evaluation
cdef _release_temp_refs(self):
@@ -340,11 +279,10 @@
return
for o in obj:
if isinstance(o, _Element):
- element = <_Element>o
#print "Holding element:", element._c_node
- self._temp_refs.add(element)
+ self._temp_refs.add(o)
#print "Holding document:", element._doc._c_doc
- self._temp_refs.add(element._doc)
+ self._temp_refs.add((<_Element>o)._doc)
def Extension(module, function_mapping, ns=None):
From scoder at codespeak.net Thu Mar 29 21:57:42 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Thu, 29 Mar 2007 21:57:42 +0200 (CEST)
Subject: [Lxml-checkins] r41651 - lxml/trunk/src/lxml
Message-ID: <20070329195742.0F72710090@code0.codespeak.net>
Author: scoder
Date: Thu Mar 29 21:57:41 2007
New Revision: 41651
Modified:
lxml/trunk/src/lxml/etree.pyx
Log:
cleanup
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Thu Mar 29 21:57:41 2007
@@ -1386,7 +1386,7 @@
cdef _ElementTree result
result = baseclass()
if context_node is None and doc is not None:
- context_node = doc.getroot()
+ context_node = doc.getroot()
if context_node is None:
result._doc = doc
result._context_node = context_node