From scoder at codespeak.net Sat Mar 3 13:35:12 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 3 Mar 2007 13:35:12 +0100 (CET)
Subject: [Lxml-checkins] r39785 - lxml/trunk/src/lxml
Message-ID: <20070303123512.139F410060@code0.codespeak.net>
Author: scoder
Date: Sat Mar 3 13:35:10 2007
New Revision: 39785
Modified:
lxml/trunk/src/lxml/tree.pxd
Log:
cleanup
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Sat Mar 3 13:35:10 2007
@@ -242,8 +242,7 @@
char* URI, xmlCharEncodingHandler* encoder, int compression)
cdef extern from "libxml/xmlsave.h":
- ctypedef struct xmlSaveCtxt:
- pass
+ ctypedef struct xmlSaveCtxt
cdef xmlSaveCtxt* xmlSaveToFilename(char* filename, char* encoding,
int options)
From scoder at codespeak.net Sat Mar 3 13:35:46 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 3 Mar 2007 13:35:46 +0100 (CET)
Subject: [Lxml-checkins] r39786 - lxml/trunk/src/lxml
Message-ID: <20070303123546.8618410068@code0.codespeak.net>
Author: scoder
Date: Sat Mar 3 13:35:44 2007
New Revision: 39786
Modified:
lxml/trunk/src/lxml/dtd.pxi
Log:
cleanup
Modified: lxml/trunk/src/lxml/dtd.pxi
==============================================================================
--- lxml/trunk/src/lxml/dtd.pxi (original)
+++ lxml/trunk/src/lxml/dtd.pxi Sat Mar 3 13:35:44 2007
@@ -16,8 +16,9 @@
cdef class DTD(_Validator):
"""A DTD validator.
- Can load from filesystem directly given a filename. Alternatively, pass
- the keyword parameter ``external_id`` to load from a catalog.
+ Can load from filesystem directly given a filename or file-like object.
+ Alternatively, pass the keyword parameter ``external_id`` to load from a
+ catalog.
"""
cdef tree.xmlDtd* _c_dtd
def __init__(self, file=None, external_id=None):
From scoder at codespeak.net Sat Mar 3 13:38:23 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 3 Mar 2007 13:38:23 +0100 (CET)
Subject: [Lxml-checkins] r39788 - in lxml/trunk: doc src/lxml
Message-ID: <20070303123823.2D00610068@code0.codespeak.net>
Author: scoder
Date: Sat Mar 3 13:38:21 2007
New Revision: 39788
Added:
lxml/trunk/src/lxml/schematron.pxd
lxml/trunk/src/lxml/schematron.pxi
Modified:
lxml/trunk/doc/validation.txt
lxml/trunk/src/lxml/etree.pyx
Log:
schematron support (disabled by default: requires libxml2 2.6.21+, better 2.6.27)
Modified: lxml/trunk/doc/validation.txt
==============================================================================
--- lxml/trunk/doc/validation.txt (original)
+++ lxml/trunk/doc/validation.txt Sat Mar 3 13:38:21 2007
@@ -11,11 +11,17 @@
.. _`Relax NG`: http://www.relaxng.org/
.. _`XML Schema`: http://www.w3.org/XML/Schema
+There is also initial support for Schematron_. However, it is currently
+disabled in lxml builds due to insufficiencies in the implementation as of
+libxml2 2.6.27.
+
+.. _Schematron: http://www.ascc.net/xml/schematron
+
.. contents::
..
1 DTD
2 RelaxNG
- 2 XMLSchema
+ 3 XMLSchema
The usual setup procedure::
@@ -114,10 +120,9 @@
[...]
AssertionError: Document does not comply with schema
-Starting with version 0.9, lxml now has a simple API to report the errors
-generated by libxml2. If you want to find out why the validation failed in the
-second case, you can look up the error log of the validation process and check
-it for relevant messages::
+If you want to find out why the validation failed in the second case, you can
+look up the error log of the validation process and check it for relevant
+messages::
>>> log = relaxng.error_log
>>> print log.last_error
@@ -126,7 +131,7 @@
You can see that the error (ERROR) happened during RelaxNG validation
(RELAXNGV). The message then tells you what went wrong. Note that this error
is local to the RelaxNG object. It will only contain log entries that
-appeares during the validation. The DocumentInvalid exception raised by the
+appeared during the validation. The DocumentInvalid exception raised by the
``assertValid`` method above provides access to the global error log (like all
other lxml exceptions).
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Sat Mar 3 13:38:21 2007
@@ -1893,9 +1893,10 @@
def __get__(self):
return self._error_log.copy()
-include "dtd.pxi" # DTD
-include "relaxng.pxi" # RelaxNG
-include "xmlschema.pxi" # XMLSchema
+include "dtd.pxi" # DTD
+include "relaxng.pxi" # RelaxNG
+include "xmlschema.pxi" # XMLSchema
+#include "schematron.pxi" # Schematron
################################################################################
# Public C API
Added: lxml/trunk/src/lxml/schematron.pxd
==============================================================================
--- (empty file)
+++ lxml/trunk/src/lxml/schematron.pxd Sat Mar 3 13:38:21 2007
@@ -0,0 +1,28 @@
+cimport tree
+from tree cimport xmlDoc, xmlDtd
+
+cdef extern from "libxml/schematron.h":
+ ctypedef struct xmlSchematron
+ ctypedef struct xmlSchematronParserCtxt
+ ctypedef struct xmlSchematronValidCtxt
+
+ ctypedef enum xmlSchematronValidOptions:
+ XML_SCHEMATRON_OUT_QUIET = 1 # quiet no report
+ XML_SCHEMATRON_OUT_TEXT = 2 # build a textual report
+ XML_SCHEMATRON_OUT_XML = 4 # output SVRL
+ XML_SCHEMATRON_OUT_FILE = 256 # output to a file descriptor
+ XML_SCHEMATRON_OUT_BUFFER = 512 # output to a buffer
+ XML_SCHEMATRON_OUT_IO = 1024 # output to I/O mechanism
+
+ cdef xmlSchematronParserCtxt* xmlSchematronNewDocParserCtxt(xmlDoc* doc)
+ cdef xmlSchematronParserCtxt* xmlSchematronNewParserCtxt(char* filename)
+ cdef xmlSchematronValidCtxt* xmlSchematronNewValidCtxt(xmlSchematron* schema,
+ int options)
+
+ cdef xmlSchematron* xmlSchematronParse(xmlSchematronParserCtxt* ctxt)
+ cdef int xmlSchematronValidateDoc(xmlSchematronValidCtxt* ctxt,
+ xmlDoc* instance)
+
+ cdef void xmlSchematronFreeParserCtxt(xmlSchematronParserCtxt* ctxt)
+ cdef void xmlSchematronFreeValidCtxt(xmlSchematronValidCtxt* ctxt)
+ cdef void xmlSchematronFree(xmlSchematron* schema)
Added: lxml/trunk/src/lxml/schematron.pxi
==============================================================================
--- (empty file)
+++ lxml/trunk/src/lxml/schematron.pxi Sat Mar 3 13:38:21 2007
@@ -0,0 +1,145 @@
+# support for Schematron validation
+cimport schematron
+
+"""
+Schematron
+----------
+
+Schematron is a less well known, but very powerful schema language. The main
+idea is to use the capabilities of XPath to put restrictions on the structure
+and the content of XML documents. Here is a simple example::
+
+ >>> schematron = etree.Schematron(etree.XML("""
+ ...
+ ...
+ ...
+ ... Attribute
+ ... is forbidden
+ ...
+ ...
+ ...
+ ...
+ ... """))
+
+ >>> xml = etree.XML("""
+ ...
+ ...
+ ...
+ ...
+ ... """)
+
+ >>> schematron.validate(xml)
+ 0
+
+ >>> xml = etree.XML("""
+ ...
+ ...
+ ...
+ ...
+ ... """)
+
+ >>> schematron.validate(xml)
+ 1
+
+Schematron was added to libxml2 in version 2.6.21. As of version 2.6.27,
+however, Schematron lacks support for error reporting other than to stderr.
+It is therefore not possible to retrieve validation warnings and errors in
+lxml.
+"""
+
+class SchematronError(LxmlError):
+ pass
+
+class SchematronParseError(SchematronError):
+ pass
+
+class SchematronValidateError(SchematronError):
+ pass
+
+################################################################################
+# Schematron
+
+cdef class Schematron(_Validator):
+ """A Schematron validator.
+
+ Pass a root Element or an ElementTree to turn it into a validator.
+ Alternatively, pass a filename as keyword argument 'file' to parse from
+ the file system.
+ """
+ cdef schematron.xmlSchematron* _c_schema
+ cdef tree.xmlDoc* _c_doc
+ def __init__(self, etree=None, file=None):
+ cdef _Document doc
+ cdef _Element root_node
+ cdef xmlNode* c_node
+ cdef xmlDoc* c_doc
+ cdef char* c_href
+ cdef schematron.xmlSchematronParserCtxt* parser_ctxt
+ self._c_schema = NULL
+ self._c_doc = NULL
+ if etree is not None:
+ doc = _documentOrRaise(etree)
+ root_node = _rootNodeOrRaise(etree)
+ self._c_doc = _copyDocRoot(doc._c_doc, root_node._c_node)
+ parser_ctxt = schematron.xmlSchematronNewDocParserCtxt(self._c_doc)
+ elif file is not None:
+ filename = _getFilenameForFile(file)
+ if filename is None:
+ # XXX assume a string object
+ filename = file
+ filename = _encodeFilename(filename)
+ parser_ctxt = schematron.xmlSchematronNewParserCtxt(_cstr(filename))
+ else:
+ raise SchematronParseError, "No tree or file given"
+
+ if parser_ctxt is NULL:
+ if self._c_doc is not NULL:
+ tree.xmlFreeDoc(self._c_doc)
+ raise SchematronParseError, "Document is not parsable as Schematron"
+ self._c_schema = schematron.xmlSchematronParse(parser_ctxt)
+
+ if self._c_schema is NULL:
+ if self._c_doc is not NULL:
+ schematron.xmlSchematronFreeParserCtxt(parser_ctxt)
+ tree.xmlFreeDoc(self._c_doc)
+ raise SchematronParseError, "Document is not a valid Schematron schema"
+ schematron.xmlSchematronFreeParserCtxt(parser_ctxt)
+ _Validator.__init__(self)
+
+ def __dealloc__(self):
+ schematron.xmlSchematronFree(self._c_schema)
+ tree.xmlFreeDoc(self._c_doc)
+
+ def __call__(self, etree):
+ """Validate doc using Schematron.
+
+ Returns true if document is valid, false if not."""
+ cdef python.PyThreadState* state
+ cdef _Document doc
+ cdef _Element root_node
+ cdef xmlDoc* c_doc
+ cdef schematron.xmlSchematronValidCtxt* valid_ctxt
+ cdef int ret
+
+ doc = _documentOrRaise(etree)
+ root_node = _rootNodeOrRaise(etree)
+
+ self._error_log.connect()
+ valid_ctxt = schematron.xmlSchematronNewValidCtxt(
+ self._c_schema, schematron.XML_SCHEMATRON_OUT_QUIET)
+ if valid_ctxt is NULL:
+ self._error_log.disconnect()
+ raise SchematronError, "Failed to create validation context"
+
+ c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
+ state = python.PyEval_SaveThread()
+ ret = schematron.xmlSchematronValidateDoc(valid_ctxt, c_doc)
+ python.PyEval_RestoreThread(state)
+ _destroyFakeDoc(doc._c_doc, c_doc)
+
+ schematron.xmlSchematronFreeValidCtxt(valid_ctxt)
+
+ self._error_log.disconnect()
+ if ret == -1:
+ raise SchematronValidateError, "Internal error in Schematron validation"
+ return ret == 0
From scoder at codespeak.net Mon Mar 5 17:49:20 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 5 Mar 2007 17:49:20 +0100 (CET)
Subject: [Lxml-checkins] r39965 - lxml/trunk/src/lxml
Message-ID: <20070305164920.E3BE21007D@code0.codespeak.net>
Author: scoder
Date: Mon Mar 5 17:49:18 2007
New Revision: 39965
Modified:
lxml/trunk/src/lxml/parser.pxi
Log:
set error return to -1 instead of 1
Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi (original)
+++ lxml/trunk/src/lxml/parser.pxi Mon Mar 5 17:49:18 2007
@@ -406,7 +406,7 @@
if pctxt.spaceTab is not NULL: # work around bug in libxml2
xmlparser.xmlClearParserCtxt(pctxt)
- cdef int _lockParser(self) except 1:
+ cdef int _lockParser(self) except -1:
cdef python.PyThreadState* state
cdef int result
if config.ENABLE_THREADING and self._parser_lock != NULL:
From scoder at codespeak.net Mon Mar 5 17:50:14 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 5 Mar 2007 17:50:14 +0100 (CET)
Subject: [Lxml-checkins] r39966 - lxml/trunk/src/lxml
Message-ID: <20070305165014.5AB611007D@code0.codespeak.net>
Author: scoder
Date: Mon Mar 5 17:50:13 2007
New Revision: 39966
Modified:
lxml/trunk/src/lxml/extensions.pxi
Log:
cleanups and C-ifications
Modified: lxml/trunk/src/lxml/extensions.pxi
==============================================================================
--- lxml/trunk/src/lxml/extensions.pxi (original)
+++ lxml/trunk/src/lxml/extensions.pxi Mon Mar 5 17:50:13 2007
@@ -102,16 +102,16 @@
# namespaces (internal UTF-8 methods with leading '_')
- def addNamespace(self, prefix, uri):
+ cdef addNamespace(self, prefix, uri):
if self._namespaces is None:
self._namespaces = {}
python.PyDict_SetItem(self._namespaces, prefix, uri)
- def registerNamespaces(self, namespaces):
+ cdef registerNamespaces(self, namespaces):
for prefix, uri in namespaces.items():
self.registerNamespace(prefix, uri)
- def registerNamespace(self, prefix, ns_uri):
+ cdef registerNamespace(self, prefix, ns_uri):
prefix_utf = self._to_utf(prefix)
ns_uri_utf = self._to_utf(ns_uri)
xpath.xmlXPathRegisterNs(self._xpathCtxt, prefix_utf, ns_uri_utf)
@@ -238,12 +238,14 @@
cdef xpath.xmlXPathFunction _function_check(void* ctxt,
char* c_name, char* c_ns_uri):
"Module level lookup function for XPath/XSLT functions"
+ cdef xpath.xmlXPathFunction c_func
cdef _BaseContext context
context = <_BaseContext>ctxt
if context._prepare_function_call(c_ns_uri, c_name):
- return _call_prepared_function
+ c_func = _call_prepared_function
else:
- return NULL
+ c_func = NULL
+ return c_func
cdef xpath.xmlXPathObject* _wrapXPathObject(object obj) except NULL:
cdef xpath.xmlNodeSet* resultSet
@@ -358,7 +360,6 @@
cdef void _extension_function_call(_BaseContext context, function,
xpath.xmlXPathParserContext* ctxt, int nargs):
- cdef _Element node
cdef _Document doc
cdef xpath.xmlXPathObject* obj
cdef int i
From scoder at codespeak.net Mon Mar 5 17:52:51 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 5 Mar 2007 17:52:51 +0100 (CET)
Subject: [Lxml-checkins] r39967 - lxml/trunk/src/lxml
Message-ID: <20070305165251.5B7351007D@code0.codespeak.net>
Author: scoder
Date: Mon Mar 5 17:52:47 2007
New Revision: 39967
Modified:
lxml/trunk/src/lxml/xslt.pxi
Log:
cleanup, doc strings
Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi (original)
+++ lxml/trunk/src/lxml/xslt.pxi Mon Mar 5 17:52:47 2007
@@ -1,4 +1,4 @@
-# XSLT and XPath classes, supports for extension functions
+# XSLT
cimport xslt
@@ -265,6 +265,15 @@
cdef class XSLT:
"""Turn a document into an XSLT object.
+
+ Keyword arguments of the constructor:
+ * regexp - enable exslt regular expression support in XPath (default: True)
+ * access_control - access restrictions for network or file system
+
+ Keyword arguments of the XSLT run:
+ * profile_run - enable XSLT profiling
+
+ Other keyword arguments are passed to the stylesheet.
"""
cdef _XSLTContext _context
cdef xslt.xsltStylesheet* _c_style
@@ -415,6 +424,7 @@
if params is not NULL:
# deallocate space for parameters
python.PyMem_Free(params)
+ keep_ref = None
if transform_ctxt.profile:
c_profile_doc = xslt.xsltGetProfileInformation(transform_ctxt)
From scoder at codespeak.net Mon Mar 5 17:53:09 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Mon, 5 Mar 2007 17:53:09 +0100 (CET)
Subject: [Lxml-checkins] r39968 - lxml/trunk
Message-ID: <20070305165309.651CA1007E@code0.codespeak.net>
Author: scoder
Date: Mon Mar 5 17:53:08 2007
New Revision: 39968
Modified:
lxml/trunk/TODO.txt
Log:
cleanup
Modified: lxml/trunk/TODO.txt
==============================================================================
--- lxml/trunk/TODO.txt (original)
+++ lxml/trunk/TODO.txt Mon Mar 5 17:53:08 2007
@@ -41,5 +41,5 @@
Features
--------
-* Relaxed NG compact notation (rnc versus rng) support. Currently not
- supported by libxml2 (patch exists)
+* RelaxNG compact notation (rnc versus rng) support. Currently not supported
+ by libxml2 (patch exists)
From scoder at codespeak.net Sat Mar 10 20:05:37 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 10 Mar 2007 20:05:37 +0100 (CET)
Subject: [Lxml-checkins] r40177 - lxml/trunk/src/lxml
Message-ID: <20070310190537.158731006F@code0.codespeak.net>
Author: scoder
Date: Sat Mar 10 20:05:36 2007
New Revision: 40177
Modified:
lxml/trunk/src/lxml/xpath.pxi
Log:
fix for compile problem
Modified: lxml/trunk/src/lxml/xpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxi (original)
+++ lxml/trunk/src/lxml/xpath.pxi Sat Mar 10 20:05:36 2007
@@ -143,9 +143,8 @@
def registerNamespaces(self, namespaces):
"""Register a prefix -> uri dict.
"""
- add = self._context.addNamespace
for prefix, uri in namespaces.items():
- add(prefix, uri)
+ self._context.addNamespace(prefix, uri)
def __call__(self, _path, **_variables):
"""Evaluate an XPath expression on the document.
From scoder at codespeak.net Fri Mar 16 20:24:42 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 16 Mar 2007 20:24:42 +0100 (CET)
Subject: [Lxml-checkins] r40611 - lxml/trunk/src/lxml
Message-ID: <20070316192442.A40981008A@code0.codespeak.net>
Author: scoder
Date: Fri Mar 16 20:24:40 2007
New Revision: 40611
Modified:
lxml/trunk/src/lxml/etree.pyx
Log:
allow threading in xinclude(), some docstring updates
Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx (original)
+++ lxml/trunk/src/lxml/etree.pyx Fri Mar 16 20:24:40 2007
@@ -1289,7 +1289,7 @@
def relaxng(self, relaxng):
"""Validate this document using other document.
- relaxng is a tree that should contain Relax NG XML
+ The relaxng argument is a tree that should contain a Relax NG schema.
Returns True or False, depending on whether validation
succeeded.
@@ -1305,7 +1305,7 @@
def xmlschema(self, xmlschema):
"""Validate this document using other document.
- xmlschema is a tree that should contain XML Schema XML.
+ The xmlschema argument is a tree that should contain an XML Schema.
Returns True or False, depending on whether validation
succeeded.
@@ -1321,7 +1321,13 @@
def xinclude(self):
"""Process the XInclude nodes in this document and include the
referenced XML fragments.
+
+ There is support for loading files through the file system, HTTP and
+ FTP.
+
+ Note that XInclude does not support custom resolvers in Python space.
"""
+ cdef python.PyThreadState* state
cdef int result
# We cannot pass the XML_PARSE_NOXINCNODE option as this would free
# the XInclude nodes - there may still be Python references to them!
@@ -1331,13 +1337,15 @@
# typed as elements. The included fragment is added between the two,
# i.e. as a sibling, which does not conflict with traversal.
self._assertHasRoot()
- if self._context_node._doc._parser != None:
+ state = python.PyEval_SaveThread()
+ if self._context_node._doc._parser is not None:
result = xinclude.xmlXIncludeProcessTreeFlags(
self._context_node._c_node,
self._context_node._doc._parser._parse_options)
else:
result = xinclude.xmlXIncludeProcessTree(
self._context_node._c_node)
+ python.PyEval_RestoreThread(state)
if result == -1:
raise XIncludeError, "XInclude processing failed"
From scoder at codespeak.net Fri Mar 16 20:25:31 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 16 Mar 2007 20:25:31 +0100 (CET)
Subject: [Lxml-checkins] r40612 - lxml/trunk/doc/html
Message-ID: <20070316192531.55B3D1008A@code0.codespeak.net>
Author: scoder
Date: Fri Mar 16 20:25:29 2007
New Revision: 40612
Modified:
lxml/trunk/doc/html/style.css
Log:
friendlier colours for the web page as a better match with the codespeak logo
Modified: lxml/trunk/doc/html/style.css
==============================================================================
--- lxml/trunk/doc/html/style.css (original)
+++ lxml/trunk/doc/html/style.css Fri Mar 16 20:25:29 2007
@@ -67,10 +67,12 @@
font-size: 130%;
}
-div.sidemenu ul.menu.current > li {
- color: orange;
- border: groove orange;
- background-color: #FFFACA;
+div.sidemenu ul.menu.current li {
+ color: #CC0000;
+}
+
+div.sidemenu ul.menu.current > li > a {
+ color: #CC0000;
}
div.sidemenu ul.menu.current ul.submenu {
@@ -85,12 +87,13 @@
div.sidemenu ul.menu.foreign li.menu:hover ul.submenu {
display: block;
position: absolute;
- border: groove orange;
+ border: groove #990000;
padding: 1ex 1ex 1ex 3ex;
margin-top: 0px;
margin-left: 4em;
margin-right: -20em;
- background-color: #FFFACA;
+ color: #990000;
+ background-color: white;
}
div.sidemenu ul.submenu {
@@ -121,7 +124,7 @@
@media screen {
div.section > h1 > a:before {
margin-left: -2ex;
- color: orange;
+ color: #CC0000;
content: "\00BB" " ";
}
}
From scoder at codespeak.net Fri Mar 16 20:26:16 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 16 Mar 2007 20:26:16 +0100 (CET)
Subject: [Lxml-checkins] r40613 - lxml/trunk/benchmark
Message-ID: <20070316192616.5636F1008A@code0.codespeak.net>
Author: scoder
Date: Fri Mar 16 20:26:13 2007
New Revision: 40613
Modified:
lxml/trunk/benchmark/bench_xpath.py
Log:
benchmark both the old and the new way of using extension functions
Modified: lxml/trunk/benchmark/bench_xpath.py
==============================================================================
--- lxml/trunk/benchmark/bench_xpath.py (original)
+++ lxml/trunk/benchmark/bench_xpath.py Fri Mar 16 20:26:13 2007
@@ -34,7 +34,7 @@
child.xpath("./*[0]")
@onlylib('lxe')
- def bench_xpath_extensions_old(self, root):
+ def bench_xpath_old_extensions(self, root):
def return_child(_, element):
if element:
return element[0]
@@ -45,5 +45,21 @@
for child in root:
xpath(child)
+ @onlylib('lxe')
+ def bench_xpath_extensions(self, root):
+ def return_child(_, element):
+ if element:
+ return element[0]
+ else:
+ return ()
+ self.etree.FunctionNamespace("test")["t"] = return_child
+
+ try:
+ xpath = self.etree.XPath("test:t(.)", {"test":"test"})
+ for child in root:
+ xpath(child)
+ finally:
+ del self.etree.FunctionNamespace("test")["t"]
+
if __name__ == '__main__':
benchbase.main(XPathBenchMark)
From scoder at codespeak.net Fri Mar 16 20:26:56 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 16 Mar 2007 20:26:56 +0100 (CET)
Subject: [Lxml-checkins] r40614 - lxml/trunk/src/lxml
Message-ID: <20070316192656.4E7EC1008A@code0.codespeak.net>
Author: scoder
Date: Fri Mar 16 20:26:53 2007
New Revision: 40614
Modified:
lxml/trunk/src/lxml/python.pxd
Log:
added a Python API function
Modified: lxml/trunk/src/lxml/python.pxd
==============================================================================
--- lxml/trunk/src/lxml/python.pxd (original)
+++ lxml/trunk/src/lxml/python.pxd Fri Mar 16 20:26:53 2007
@@ -44,7 +44,8 @@
cdef int PyList_Append(object l, object obj) except -1
cdef int PyList_Reverse(object l) except -1
cdef int PyList_Insert(object l, Py_ssize_t index, object o) except -1
- cdef object PyList_AsTuple(object o)
+ cdef object PyList_AsTuple(object l)
+ cdef void PyList_Clear(object l)
cdef int PyDict_SetItemString(object d, char* key, object value) except -1
cdef int PyDict_SetItem(object d, object key, object value) except -1
From scoder at codespeak.net Fri Mar 16 20:28:38 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Fri, 16 Mar 2007 20:28:38 +0100 (CET)
Subject: [Lxml-checkins] r40615 - lxml/trunk/src/lxml/tests
Message-ID: <20070316192838.2B2921008A@code0.codespeak.net>
Author: scoder
Date: Fri Mar 16 20:28:33 2007
New Revision: 40615
Modified:
lxml/trunk/src/lxml/tests/test_xpathevaluator.py
Log:
test case split
Modified: lxml/trunk/src/lxml/tests/test_xpathevaluator.py
==============================================================================
--- lxml/trunk/src/lxml/tests/test_xpathevaluator.py (original)
+++ lxml/trunk/src/lxml/tests/test_xpathevaluator.py Fri Mar 16 20:28:33 2007
@@ -104,6 +104,10 @@
self.assertEquals(
[root[0]],
root.xpath('//baz:b', {'baz': 'uri:a'}))
+
+ def test_xpath_ns_none(self):
+ tree = self.parse('')
+ root = tree.getroot()
self.assertRaises(
TypeError,
root.xpath, '//b', {None: 'uri:a'})
From scoder at codespeak.net Sat Mar 17 06:48:03 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 17 Mar 2007 06:48:03 +0100 (CET)
Subject: [Lxml-checkins] r40625 - lxml/trunk/doc
Message-ID: <20070317054803.D5C3E10082@code0.codespeak.net>
Author: scoder
Date: Sat Mar 17 06:48:01 2007
New Revision: 40625
Modified:
lxml/trunk/doc/FAQ.txt
Log:
FAQ entry on standard compliance
Modified: lxml/trunk/doc/FAQ.txt
==============================================================================
--- lxml/trunk/doc/FAQ.txt (original)
+++ lxml/trunk/doc/FAQ.txt Sat Mar 17 06:48:01 2007
@@ -12,10 +12,11 @@
1 General Questions
1.1 Is there a tutorial?
1.2 Where can I find more documentation about lxml?
- 1.3 Where are the Windows binaries?
- 1.4 What is the difference between lxml.etree and lxml.objectify?
- 1.5 Why is my application so slow?
- 1.6 Why do I get errors about missing UCS4 symbols when installing lxml?
+ 1.3 What standards does lxml implement?
+ 1.4 Where are the Windows binaries?
+ 1.5 What is the difference between lxml.etree and lxml.objectify?
+ 1.6 Why is my application so slow?
+ 1.7 Why do I get errors about missing UCS4 symbols when installing lxml?
2 Bugs
2.1 My application crashes! Why does lxml.etree do that?
2.2 I think I have found a bug in lxml. What should I do?
@@ -64,6 +65,23 @@
.. _`the web page`: http://codespeak.net/lxml/#documentation
+What standards does lxml implement?
+-----------------------------------
+
+The compliance to XML Standards depends on the support in libxml2 and libxslt.
+Here is a quote from `http://xmlsoft.org/`:
+
+ In most cases libxml2 tries to implement the specifications in a relatively
+ strictly compliant way. As of release 2.4.16, libxml2 passed all 1800+ tests
+ from the OASIS XML Tests Suite.
+
+lxml currently supports libxml2 2.6.16 or later, which has even better support
+for various XML standards. Some of the more important ones are: HTML, XML
+namespaces, XPath, XInclude, XSLT, XML catalogs, canonical XML, RelaxNG,
+XML:ID. Support for XML Schema and Schematron is currently incomplete.
+libxml2 also supports loading documents through HTTP and FTP.
+
+
Where are the Windows binaries?
-------------------------------
From scoder at codespeak.net Sat Mar 17 06:58:12 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 17 Mar 2007 06:58:12 +0100 (CET)
Subject: [Lxml-checkins] r40626 - lxml/trunk/doc
Message-ID: <20070317055812.56C8510082@code0.codespeak.net>
Author: scoder
Date: Sat Mar 17 06:58:10 2007
New Revision: 40626
Modified:
lxml/trunk/doc/FAQ.txt
Log:
FAQ entry on standard compliance
Modified: lxml/trunk/doc/FAQ.txt
==============================================================================
--- lxml/trunk/doc/FAQ.txt (original)
+++ lxml/trunk/doc/FAQ.txt Sat Mar 17 06:58:10 2007
@@ -78,8 +78,9 @@
lxml currently supports libxml2 2.6.16 or later, which has even better support
for various XML standards. Some of the more important ones are: HTML, XML
namespaces, XPath, XInclude, XSLT, XML catalogs, canonical XML, RelaxNG,
-XML:ID. Support for XML Schema and Schematron is currently incomplete.
-libxml2 also supports loading documents through HTTP and FTP.
+XML:ID. Support for XML Schema and Schematron is currently incomplete in
+libxml2, but is mostly usable and still being worked on. libxml2 also
+supports loading documents through HTTP and FTP.
Where are the Windows binaries?
From scoder at codespeak.net Sat Mar 17 07:01:29 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 17 Mar 2007 07:01:29 +0100 (CET)
Subject: [Lxml-checkins] r40627 - lxml/branch/extension_refactoring
Message-ID: <20070317060129.C1CBC10082@code0.codespeak.net>
Author: scoder
Date: Sat Mar 17 07:01:27 2007
New Revision: 40627
Added:
lxml/branch/extension_refactoring/
- copied from r40626, lxml/trunk/
Log:
new branch for refactoring the extension function setup
From scoder at codespeak.net Sat Mar 17 07:03:29 2007
From: scoder at codespeak.net (scoder at codespeak.net)
Date: Sat, 17 Mar 2007 07:03:29 +0100 (CET)
Subject: [Lxml-checkins] r40628 - in lxml/branch/extension_refactoring: .
src/lxml
Message-ID: <20070317060329.7A93A10082@code0.codespeak.net>
Author: scoder
Date: Sat Mar 17 07:03:26 2007
New Revision: 40628
Modified:
lxml/branch/extension_refactoring/CHANGES.txt
lxml/branch/extension_refactoring/src/lxml/extensions.pxi
lxml/branch/extension_refactoring/src/lxml/nsclasses.pxi
lxml/branch/extension_refactoring/src/lxml/xpath.pxi
lxml/branch/extension_refactoring/src/lxml/xslt.pxd
lxml/branch/extension_refactoring/src/lxml/xslt.pxi
Log:
initial branch import, mainly complete but buggy
Modified: lxml/branch/extension_refactoring/CHANGES.txt
==============================================================================
--- lxml/branch/extension_refactoring/CHANGES.txt (original)
+++ lxml/branch/extension_refactoring/CHANGES.txt Sat Mar 17 07:03:26 2007
@@ -2,6 +2,25 @@
lxml changelog
==============
+Under Development
+=================
+
+Features added
+--------------
+
+* EXSLT RegExp support in standard XPath (not only XSLT)
+
+Bugs fixed
+----------
+
+* Thread safety in XPath evaluators
+
+Other changes
+-------------
+
+* major refactoring in XPath/XSLT extension function code
+
+
1.3beta (2007-02-27)
====================
Modified: lxml/branch/extension_refactoring/src/lxml/extensions.pxi
==============================================================================
--- lxml/branch/extension_refactoring/src/lxml/extensions.pxi (original)
+++ lxml/branch/extension_refactoring/src/lxml/extensions.pxi Sat Mar 17 07:03:26 2007
@@ -12,11 +12,14 @@
################################################################################
# Base class for XSLT and XPath evaluation contexts: functions, namespaces, ...
+ctypedef int _register_function(void* ctxt, name_utf, ns_uri_utf)
+
cdef class _BaseContext:
cdef xpath.xmlXPathContext* _xpathCtxt
cdef _Document _doc
cdef object _extensions
cdef object _namespaces
+ cdef object _registered_namespaces
cdef object _utf_refs
cdef object _function_cache
cdef object _function_cache_ns
@@ -28,9 +31,9 @@
def __init__(self, namespaces, extensions):
self._xpathCtxt = NULL
self._utf_refs = {}
+ self._registered_namespaces = []
self._function_cache = {}
self._function_cache_ns = {}
- self._called_function = None
if extensions is not None:
# convert extensions to UTF-8
@@ -90,7 +93,8 @@
self.registerNamespaces(namespaces)
cdef _unregister_context(self):
- xpath.xmlXPathRegisteredNsCleanup(self._xpathCtxt)
+ self._unregisterNamespaces()
+# xpath.xmlXPathRegisteredNsCleanup(self._xpathCtxt)
self._free_context()
cdef _free_context(self):
@@ -112,12 +116,86 @@
self.registerNamespace(prefix, uri)
cdef registerNamespace(self, prefix, ns_uri):
+ if prefix is None:
+ raise TypeError, "empty prefix is not supported in XPath"
prefix_utf = self._to_utf(prefix)
ns_uri_utf = self._to_utf(ns_uri)
- xpath.xmlXPathRegisterNs(self._xpathCtxt, prefix_utf, ns_uri_utf)
+ python.PyList_Append(self._registered_namespaces, prefix_utf)
+ xpath.xmlXPathRegisterNs(self._xpathCtxt,
+ _cstr(prefix_utf), _cstr(ns_uri_utf))
+
+ cdef _registerNamespace(self, prefix_utf, ns_uri_utf):
+ python.PyList_Append(self._registered_namespaces, prefix_utf)
+ xpath.xmlXPathRegisterNs(self._xpathCtxt,
+ _cstr(prefix_utf), _cstr(ns_uri_utf))
+
+ cdef void _unregisterNamespaces(self):
+ if python.PyList_GET_SIZE(self._registered_namespaces) > 0:
+ for prefix_utf in self._registered_namespaces:
+ sys.stderr.write(prefix_utf)
+ sys.stderr.flush()
+ xpath.xmlXPathRegisterNs(self._xpathCtxt,
+ _cstr(prefix_utf), NULL)
+ self._registered_namespaces = []
+
+ cdef void _unregisterNamespace(self, prefix_utf):
+ xpath.xmlXPathRegisterNs(self._xpathCtxt,
+ _cstr(prefix_utf), NULL)
# extension functions
+ cdef void _addLocalExtensionFunction(self, ns_utf, name_utf, function):
+ if self._extensions is None:
+ self._extensions = {}
+ python.PyDict_SetItem(self._extensions, (ns_utf, name_utf), function)
+
+ cdef void _registerAllFunctions(self, void* ctxt,
+ _register_function reg_func):
+ cdef python.PyObject* dict_result
+ for ns_utf, ns_functions in _iter_ns_extension_functions():
+ if ns_utf is None:
+ d = self._function_cache
+ else:
+ dict_result = python.PyDict_GetItem(
+ self._function_cache_ns, ns_utf)
+ if dict_result is NULL:
+ d = {}
+ python.PyDict_SetItem(
+ self._function_cache_ns, ns_utf, d)
+ else:
+ d =